aboutsummaryrefslogtreecommitdiffstats
path: root/contrib/libs/cxxsupp/builtins/hexagon/dfaddsub.S
diff options
context:
space:
mode:
authorAlexander Smirnov <alex@ydb.tech>2024-10-16 12:11:24 +0000
committerAlexander Smirnov <alex@ydb.tech>2024-10-16 12:11:24 +0000
commit40811e93f3fdf9342a9295369994012420fac548 (patch)
treea8d85e094a9c21e10aa250f537c101fc2016a049 /contrib/libs/cxxsupp/builtins/hexagon/dfaddsub.S
parent30ebe5357bb143648c6be4d151ecd4944af81ada (diff)
parent28a0c4a9f297064538a018c512cd9bbd00a1a35d (diff)
downloadydb-40811e93f3fdf9342a9295369994012420fac548.tar.gz
Merge branch 'rightlib' into mergelibs-241016-1210
Diffstat (limited to 'contrib/libs/cxxsupp/builtins/hexagon/dfaddsub.S')
-rw-r--r--contrib/libs/cxxsupp/builtins/hexagon/dfaddsub.S396
1 files changed, 396 insertions, 0 deletions
diff --git a/contrib/libs/cxxsupp/builtins/hexagon/dfaddsub.S b/contrib/libs/cxxsupp/builtins/hexagon/dfaddsub.S
new file mode 100644
index 0000000000..1b0d34550f
--- /dev/null
+++ b/contrib/libs/cxxsupp/builtins/hexagon/dfaddsub.S
@@ -0,0 +1,396 @@
+//===----------------------Hexagon builtin routine ------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// Double Precision Multiply
+
+#define A r1:0
+#define AH r1
+#define AL r0
+#define B r3:2
+#define BH r3
+#define BL r2
+
+#define EXPA r4
+#define EXPB r5
+#define EXPB_A r5:4
+
+#define ZTMP r7:6
+#define ZTMPH r7
+#define ZTMPL r6
+
+#define ATMP r13:12
+#define ATMPH r13
+#define ATMPL r12
+
+#define BTMP r9:8
+#define BTMPH r9
+#define BTMPL r8
+
+#define ATMP2 r11:10
+#define ATMP2H r11
+#define ATMP2L r10
+
+#define EXPDIFF r15
+#define EXTRACTOFF r14
+#define EXTRACTAMT r15:14
+
+#define TMP r28
+
+#define MANTBITS 52
+#define HI_MANTBITS 20
+#define EXPBITS 11
+#define BIAS 1024
+#define MANTISSA_TO_INT_BIAS 52
+#define SR_BIT_INEXACT 5
+
+#ifndef SR_ROUND_OFF
+#define SR_ROUND_OFF 22
+#endif
+
+#define NORMAL p3
+#define BIGB p2
+
+#define Q6_ALIAS(TAG) .global __qdsp_##TAG ; .set __qdsp_##TAG, __hexagon_##TAG
+#define FAST_ALIAS(TAG) .global __hexagon_fast_##TAG ; .set __hexagon_fast_##TAG, __hexagon_##TAG
+#define FAST2_ALIAS(TAG) .global __hexagon_fast2_##TAG ; .set __hexagon_fast2_##TAG, __hexagon_##TAG
+#define END(TAG) .size TAG,.-TAG
+
+ .text
+ .global __hexagon_adddf3
+ .global __hexagon_subdf3
+ .type __hexagon_adddf3, @function
+ .type __hexagon_subdf3, @function
+
+Q6_ALIAS(adddf3)
+FAST_ALIAS(adddf3)
+FAST2_ALIAS(adddf3)
+Q6_ALIAS(subdf3)
+FAST_ALIAS(subdf3)
+FAST2_ALIAS(subdf3)
+
+ .p2align 5
+__hexagon_adddf3:
+ {
+ EXPA = extractu(AH,#EXPBITS,#HI_MANTBITS)
+ EXPB = extractu(BH,#EXPBITS,#HI_MANTBITS)
+ ATMP = combine(##0x20000000,#0)
+ }
+ {
+ NORMAL = dfclass(A,#2)
+ NORMAL = dfclass(B,#2)
+ BTMP = ATMP
+ BIGB = cmp.gtu(EXPB,EXPA) // Is B substantially greater than A?
+ }
+ {
+ if (!NORMAL) jump .Ladd_abnormal // If abnormal, go to special code
+ if (BIGB) A = B // if B >> A, swap A and B
+ if (BIGB) B = A // If B >> A, swap A and B
+ if (BIGB) EXPB_A = combine(EXPA,EXPB) // swap exponents
+ }
+ {
+ ATMP = insert(A,#MANTBITS,#EXPBITS-2) // Q1.62
+ BTMP = insert(B,#MANTBITS,#EXPBITS-2) // Q1.62
+ EXPDIFF = sub(EXPA,EXPB)
+ ZTMP = combine(#62,#1)
+ }
+#undef BIGB
+#undef NORMAL
+#define B_POS p3
+#define A_POS p2
+#define NO_STICKIES p1
+.Ladd_continue:
+ {
+ EXPDIFF = min(EXPDIFF,ZTMPH) // If exponent difference >= ~60,
+ // will collapse to sticky bit
+ ATMP2 = neg(ATMP)
+ A_POS = cmp.gt(AH,#-1)
+ EXTRACTOFF = #0
+ }
+ {
+ if (!A_POS) ATMP = ATMP2
+ ATMP2 = extractu(BTMP,EXTRACTAMT)
+ BTMP = ASR(BTMP,EXPDIFF)
+#undef EXTRACTAMT
+#undef EXPDIFF
+#undef EXTRACTOFF
+#define ZERO r15:14
+ ZERO = #0
+ }
+ {
+ NO_STICKIES = cmp.eq(ATMP2,ZERO)
+ if (!NO_STICKIES.new) BTMPL = or(BTMPL,ZTMPL)
+ EXPB = add(EXPA,#-BIAS-60)
+ B_POS = cmp.gt(BH,#-1)
+ }
+ {
+ ATMP = add(ATMP,BTMP) // ADD!!!
+ ATMP2 = sub(ATMP,BTMP) // Negate and ADD --> SUB!!!
+ ZTMP = combine(#54,##2045)
+ }
+ {
+ p0 = cmp.gtu(EXPA,ZTMPH) // must be pretty high in case of large cancellation
+ p0 = !cmp.gtu(EXPA,ZTMPL)
+ if (!p0.new) jump:nt .Ladd_ovf_unf
+ if (!B_POS) ATMP = ATMP2 // if B neg, pick difference
+ }
+ {
+ A = convert_d2df(ATMP) // Convert to Double Precision, taking care of flags, etc. So nice!
+ p0 = cmp.eq(ATMPH,#0)
+ p0 = cmp.eq(ATMPL,#0)
+ if (p0.new) jump:nt .Ladd_zero // or maybe conversion handles zero case correctly?
+ }
+ {
+ AH += asl(EXPB,#HI_MANTBITS)
+ jumpr r31
+ }
+ .falign
+__hexagon_subdf3:
+ {
+ BH = togglebit(BH,#31)
+ jump __qdsp_adddf3
+ }
+
+
+ .falign
+.Ladd_zero:
+ // True zero, full cancellation
+ // +0 unless round towards negative infinity
+ {
+ TMP = USR
+ A = #0
+ BH = #1
+ }
+ {
+ TMP = extractu(TMP,#2,#22)
+ BH = asl(BH,#31)
+ }
+ {
+ p0 = cmp.eq(TMP,#2)
+ if (p0.new) AH = xor(AH,BH)
+ jumpr r31
+ }
+ .falign
+.Ladd_ovf_unf:
+ // Overflow or Denormal is possible
+ // Good news: Underflow flag is not possible!
+
+ // ATMP has 2's complement value
+ //
+ // EXPA has A's exponent, EXPB has EXPA-BIAS-60
+ //
+ // Convert, extract exponent, add adjustment.
+ // If > 2046, overflow
+ // If <= 0, denormal
+ //
+ // Note that we've not done our zero check yet, so do that too
+
+ {
+ A = convert_d2df(ATMP)
+ p0 = cmp.eq(ATMPH,#0)
+ p0 = cmp.eq(ATMPL,#0)
+ if (p0.new) jump:nt .Ladd_zero
+ }
+ {
+ TMP = extractu(AH,#EXPBITS,#HI_MANTBITS)
+ AH += asl(EXPB,#HI_MANTBITS)
+ }
+ {
+ EXPB = add(EXPB,TMP)
+ B = combine(##0x00100000,#0)
+ }
+ {
+ p0 = cmp.gt(EXPB,##BIAS+BIAS-2)
+ if (p0.new) jump:nt .Ladd_ovf
+ }
+ {
+ p0 = cmp.gt(EXPB,#0)
+ if (p0.new) jumpr:t r31
+ TMP = sub(#1,EXPB)
+ }
+ {
+ B = insert(A,#MANTBITS,#0)
+ A = ATMP
+ }
+ {
+ B = lsr(B,TMP)
+ }
+ {
+ A = insert(B,#63,#0)
+ jumpr r31
+ }
+ .falign
+.Ladd_ovf:
+ // We get either max finite value or infinity. Either way, overflow+inexact
+ {
+ A = ATMP // 2's complement value
+ TMP = USR
+ ATMP = combine(##0x7fefffff,#-1) // positive max finite
+ }
+ {
+ EXPB = extractu(TMP,#2,#SR_ROUND_OFF) // rounding bits
+ TMP = or(TMP,#0x28) // inexact + overflow
+ BTMP = combine(##0x7ff00000,#0) // positive infinity
+ }
+ {
+ USR = TMP
+ EXPB ^= lsr(AH,#31) // Does sign match rounding?
+ TMP = EXPB // unmodified rounding mode
+ }
+ {
+ p0 = !cmp.eq(TMP,#1) // If not round-to-zero and
+ p0 = !cmp.eq(EXPB,#2) // Not rounding the other way,
+ if (p0.new) ATMP = BTMP // we should get infinity
+ }
+ {
+ A = insert(ATMP,#63,#0) // insert inf/maxfinite, leave sign
+ }
+ {
+ p0 = dfcmp.eq(A,A)
+ jumpr r31
+ }
+
+.Ladd_abnormal:
+ {
+ ATMP = extractu(A,#63,#0) // strip off sign
+ BTMP = extractu(B,#63,#0) // strip off sign
+ }
+ {
+ p3 = cmp.gtu(ATMP,BTMP)
+ if (!p3.new) A = B // sort values
+ if (!p3.new) B = A // sort values
+ }
+ {
+ // Any NaN --> NaN, possibly raise invalid if sNaN
+ p0 = dfclass(A,#0x0f) // A not NaN?
+ if (!p0.new) jump:nt .Linvalid_nan_add
+ if (!p3) ATMP = BTMP
+ if (!p3) BTMP = ATMP
+ }
+ {
+ // Infinity + non-infinity number is infinity
+ // Infinity + infinity --> inf or nan
+ p1 = dfclass(A,#0x08) // A is infinity
+ if (p1.new) jump:nt .Linf_add
+ }
+ {
+ p2 = dfclass(B,#0x01) // B is zero
+ if (p2.new) jump:nt .LB_zero // so return A or special 0+0
+ ATMP = #0
+ }
+ // We are left with adding one or more subnormals
+ {
+ p0 = dfclass(A,#4)
+ if (p0.new) jump:nt .Ladd_two_subnormal
+ ATMP = combine(##0x20000000,#0)
+ }
+ {
+ EXPA = extractu(AH,#EXPBITS,#HI_MANTBITS)
+ EXPB = #1
+ // BTMP already ABS(B)
+ BTMP = asl(BTMP,#EXPBITS-2)
+ }
+#undef ZERO
+#define EXTRACTOFF r14
+#define EXPDIFF r15
+ {
+ ATMP = insert(A,#MANTBITS,#EXPBITS-2)
+ EXPDIFF = sub(EXPA,EXPB)
+ ZTMP = combine(#62,#1)
+ jump .Ladd_continue
+ }
+
+.Ladd_two_subnormal:
+ {
+ ATMP = extractu(A,#63,#0)
+ BTMP = extractu(B,#63,#0)
+ }
+ {
+ ATMP = neg(ATMP)
+ BTMP = neg(BTMP)
+ p0 = cmp.gt(AH,#-1)
+ p1 = cmp.gt(BH,#-1)
+ }
+ {
+ if (p0) ATMP = A
+ if (p1) BTMP = B
+ }
+ {
+ ATMP = add(ATMP,BTMP)
+ }
+ {
+ BTMP = neg(ATMP)
+ p0 = cmp.gt(ATMPH,#-1)
+ B = #0
+ }
+ {
+ if (!p0) A = BTMP
+ if (p0) A = ATMP
+ BH = ##0x80000000
+ }
+ {
+ if (!p0) AH = or(AH,BH)
+ p0 = dfcmp.eq(A,B)
+ if (p0.new) jump:nt .Lzero_plus_zero
+ }
+ {
+ jumpr r31
+ }
+
+.Linvalid_nan_add:
+ {
+ TMP = convert_df2sf(A) // will generate invalid if sNaN
+ p0 = dfclass(B,#0x0f) // if B is not NaN
+ if (p0.new) B = A // make it whatever A is
+ }
+ {
+ BL = convert_df2sf(B) // will generate invalid if sNaN
+ A = #-1
+ jumpr r31
+ }
+ .falign
+.LB_zero:
+ {
+ p0 = dfcmp.eq(ATMP,A) // is A also zero?
+ if (!p0.new) jumpr:t r31 // If not, just return A
+ }
+ // 0 + 0 is special
+ // if equal integral values, they have the same sign, which is fine for all rounding
+ // modes.
+ // If unequal in sign, we get +0 for all rounding modes except round down
+.Lzero_plus_zero:
+ {
+ p0 = cmp.eq(A,B)
+ if (p0.new) jumpr:t r31
+ }
+ {
+ TMP = USR
+ }
+ {
+ TMP = extractu(TMP,#2,#SR_ROUND_OFF)
+ A = #0
+ }
+ {
+ p0 = cmp.eq(TMP,#2)
+ if (p0.new) AH = ##0x80000000
+ jumpr r31
+ }
+.Linf_add:
+ // adding infinities is only OK if they are equal
+ {
+ p0 = !cmp.eq(AH,BH) // Do they have different signs
+ p0 = dfclass(B,#8) // And is B also infinite?
+ if (!p0.new) jumpr:t r31 // If not, just a normal inf
+ }
+ {
+ BL = ##0x7f800001 // sNAN
+ }
+ {
+ A = convert_sf2df(BL) // trigger invalid, set NaN
+ jumpr r31
+ }
+END(__hexagon_adddf3)