Merge branch 'rightlib' into mergelibs-241016-1210

author: Alexander Smirnov <alex@ydb.tech> 2024-10-16 12:11:24 +0000
committer: Alexander Smirnov <alex@ydb.tech> 2024-10-16 12:11:24 +0000
commit: 40811e93f3fdf9342a9295369994012420fac548 (patch)
tree: a8d85e094a9c21e10aa250f537c101fc2016a049 /contrib/libs/cxxsupp/builtins/hexagon/dfaddsub.S
parent: 30ebe5357bb143648c6be4d151ecd4944af81ada (diff)
parent: 28a0c4a9f297064538a018c512cd9bbd00a1a35d (diff)
download: ydb-40811e93f3fdf9342a9295369994012420fac548.tar.gz
1 files changed, 396 insertions, 0 deletions
diff --git a/contrib/libs/cxxsupp/builtins/hexagon/dfaddsub.S b/contrib/libs/cxxsupp/builtins/hexagon/dfaddsub.S
new file mode 100644
index 0000000000..1b0d34550f
--- /dev/null
+++ b/contrib/libs/cxxsupp/builtins/hexagon/dfaddsub.S
@@ -0,0 +1,396 @@
+//===----------------------Hexagon builtin routine ------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// Double Precision Multiply
+
+#define A r1:0
+#define AH r1
+#define AL r0
+#define B r3:2
+#define BH r3
+#define BL r2
+
+#define EXPA r4
+#define EXPB r5
+#define EXPB_A r5:4
+
+#define ZTMP r7:6
+#define ZTMPH r7
+#define ZTMPL r6
+
+#define ATMP r13:12
+#define ATMPH r13
+#define ATMPL r12
+
+#define BTMP r9:8
+#define BTMPH r9
+#define BTMPL r8
+
+#define ATMP2 r11:10
+#define ATMP2H r11
+#define ATMP2L r10
+
+#define EXPDIFF r15
+#define EXTRACTOFF r14
+#define EXTRACTAMT r15:14
+
+#define TMP r28
+
+#define MANTBITS 52
+#define HI_MANTBITS 20
+#define EXPBITS 11
+#define BIAS 1024
+#define MANTISSA_TO_INT_BIAS 52
+#define SR_BIT_INEXACT 5
+
+#ifndef SR_ROUND_OFF
+#define SR_ROUND_OFF 22
+#endif
+
+#define NORMAL p3
+#define BIGB p2
+
+#define Q6_ALIAS(TAG) .global __qdsp_##TAG ; .set __qdsp_##TAG, __hexagon_##TAG
+#define FAST_ALIAS(TAG) .global __hexagon_fast_##TAG ; .set __hexagon_fast_##TAG, __hexagon_##TAG
+#define FAST2_ALIAS(TAG) .global __hexagon_fast2_##TAG ; .set __hexagon_fast2_##TAG, __hexagon_##TAG
+#define END(TAG) .size TAG,.-TAG
+
+	.text
+	.global __hexagon_adddf3
+	.global __hexagon_subdf3
+	.type __hexagon_adddf3, @function
+	.type __hexagon_subdf3, @function
+
+Q6_ALIAS(adddf3)
+FAST_ALIAS(adddf3)
+FAST2_ALIAS(adddf3)
+Q6_ALIAS(subdf3)
+FAST_ALIAS(subdf3)
+FAST2_ALIAS(subdf3)
+
+	.p2align 5
+__hexagon_adddf3:
+	{
+		EXPA = extractu(AH,#EXPBITS,#HI_MANTBITS)
+		EXPB = extractu(BH,#EXPBITS,#HI_MANTBITS)
+		ATMP = combine(##0x20000000,#0)
+	}
+	{
+		NORMAL = dfclass(A,#2)
+		NORMAL = dfclass(B,#2)
+		BTMP = ATMP
+		BIGB = cmp.gtu(EXPB,EXPA)			// Is B substantially greater than A?
+	}
+	{
+		if (!NORMAL) jump .Ladd_abnormal		// If abnormal, go to special code
+		if (BIGB) A = B				// if B >> A, swap A and B
+		if (BIGB) B = A				// If B >> A, swap A and B
+		if (BIGB) EXPB_A = combine(EXPA,EXPB)	// swap exponents
+	}
+	{
+		ATMP = insert(A,#MANTBITS,#EXPBITS-2)	// Q1.62
+		BTMP = insert(B,#MANTBITS,#EXPBITS-2)	// Q1.62
+		EXPDIFF = sub(EXPA,EXPB)
+		ZTMP = combine(#62,#1)
+	}
+#undef BIGB
+#undef NORMAL
+#define B_POS p3
+#define A_POS p2
+#define NO_STICKIES p1
+.Ladd_continue:
+	{
+		EXPDIFF = min(EXPDIFF,ZTMPH)		// If exponent difference >= ~60,
+							// will collapse to sticky bit
+		ATMP2 = neg(ATMP)
+		A_POS = cmp.gt(AH,#-1)
+		EXTRACTOFF = #0
+	}
+	{
+		if (!A_POS) ATMP = ATMP2
+		ATMP2 = extractu(BTMP,EXTRACTAMT)
+		BTMP = ASR(BTMP,EXPDIFF)
+#undef EXTRACTAMT
+#undef EXPDIFF
+#undef EXTRACTOFF
+#define ZERO r15:14
+		ZERO = #0
+	}
+	{
+		NO_STICKIES = cmp.eq(ATMP2,ZERO)
+		if (!NO_STICKIES.new) BTMPL = or(BTMPL,ZTMPL)
+		EXPB = add(EXPA,#-BIAS-60)
+		B_POS = cmp.gt(BH,#-1)
+	}
+	{
+		ATMP = add(ATMP,BTMP)			// ADD!!!
+		ATMP2 = sub(ATMP,BTMP)			// Negate and ADD --> SUB!!!
+		ZTMP = combine(#54,##2045)
+	}
+	{
+		p0 = cmp.gtu(EXPA,ZTMPH)		// must be pretty high in case of large cancellation
+		p0 = !cmp.gtu(EXPA,ZTMPL)
+		if (!p0.new) jump:nt .Ladd_ovf_unf
+		if (!B_POS) ATMP = ATMP2		// if B neg, pick difference
+	}
+	{
+		A = convert_d2df(ATMP)			// Convert to Double Precision, taking care of flags, etc.  So nice!
+		p0 = cmp.eq(ATMPH,#0)
+		p0 = cmp.eq(ATMPL,#0)
+		if (p0.new) jump:nt .Ladd_zero		// or maybe conversion handles zero case correctly?
+	}
+	{
+		AH += asl(EXPB,#HI_MANTBITS)
+		jumpr r31
+	}
+	.falign
+__hexagon_subdf3:
+	{
+		BH = togglebit(BH,#31)
+		jump __qdsp_adddf3
+	}
+
+
+	.falign
+.Ladd_zero:
+	// True zero, full cancellation
+	// +0 unless round towards negative infinity
+	{
+		TMP = USR
+		A = #0
+		BH = #1
+	}
+	{
+		TMP = extractu(TMP,#2,#22)
+		BH = asl(BH,#31)
+	}
+	{
+		p0 = cmp.eq(TMP,#2)
+		if (p0.new) AH = xor(AH,BH)
+		jumpr r31
+	}
+	.falign
+.Ladd_ovf_unf:
+	// Overflow or Denormal is possible
+	// Good news: Underflow flag is not possible!
+
+	// ATMP has 2's complement value
+	//
+	// EXPA has A's exponent, EXPB has EXPA-BIAS-60
+	//
+	// Convert, extract exponent, add adjustment.
+	// If > 2046, overflow
+	// If <= 0, denormal
+	//
+	// Note that we've not done our zero check yet, so do that too
+
+	{
+		A = convert_d2df(ATMP)
+		p0 = cmp.eq(ATMPH,#0)
+		p0 = cmp.eq(ATMPL,#0)
+		if (p0.new) jump:nt .Ladd_zero
+	}
+	{
+		TMP = extractu(AH,#EXPBITS,#HI_MANTBITS)
+		AH += asl(EXPB,#HI_MANTBITS)
+	}
+	{
+		EXPB = add(EXPB,TMP)
+		B = combine(##0x00100000,#0)
+	}
+	{
+		p0 = cmp.gt(EXPB,##BIAS+BIAS-2)
+		if (p0.new) jump:nt .Ladd_ovf
+	}
+	{
+		p0 = cmp.gt(EXPB,#0)
+		if (p0.new) jumpr:t r31
+		TMP = sub(#1,EXPB)
+	}
+	{
+		B = insert(A,#MANTBITS,#0)
+		A = ATMP
+	}
+	{
+		B = lsr(B,TMP)
+	}
+	{
+		A = insert(B,#63,#0)
+		jumpr r31
+	}
+	.falign
+.Ladd_ovf:
+	// We get either max finite value or infinity.  Either way, overflow+inexact
+	{
+		A = ATMP				// 2's complement value
+		TMP = USR
+		ATMP = combine(##0x7fefffff,#-1)	// positive max finite
+	}
+	{
+		EXPB = extractu(TMP,#2,#SR_ROUND_OFF)	// rounding bits
+		TMP = or(TMP,#0x28)			// inexact + overflow
+		BTMP = combine(##0x7ff00000,#0)		// positive infinity
+	}
+	{
+		USR = TMP
+		EXPB ^= lsr(AH,#31)			// Does sign match rounding?
+		TMP = EXPB				// unmodified rounding mode
+	}
+	{
+		p0 = !cmp.eq(TMP,#1)			// If not round-to-zero and
+		p0 = !cmp.eq(EXPB,#2)			// Not rounding the other way,
+		if (p0.new) ATMP = BTMP			// we should get infinity
+	}
+	{
+		A = insert(ATMP,#63,#0)			// insert inf/maxfinite, leave sign
+	}
+	{
+		p0 = dfcmp.eq(A,A)
+		jumpr r31
+	}
+
+.Ladd_abnormal:
+	{
+		ATMP = extractu(A,#63,#0)		// strip off sign
+		BTMP = extractu(B,#63,#0)		// strip off sign
+	}
+	{
+		p3 = cmp.gtu(ATMP,BTMP)
+		if (!p3.new) A = B			// sort values
+		if (!p3.new) B = A			// sort values
+	}
+	{
+		// Any NaN --> NaN, possibly raise invalid if sNaN
+		p0 = dfclass(A,#0x0f)		// A not NaN?
+		if (!p0.new) jump:nt .Linvalid_nan_add
+		if (!p3) ATMP = BTMP
+		if (!p3) BTMP = ATMP
+	}
+	{
+		// Infinity + non-infinity number is infinity
+		// Infinity + infinity --> inf or nan
+		p1 = dfclass(A,#0x08)		// A is infinity
+		if (p1.new) jump:nt .Linf_add
+	}
+	{
+		p2 = dfclass(B,#0x01)		// B is zero
+		if (p2.new) jump:nt .LB_zero	// so return A or special 0+0
+		ATMP = #0
+	}
+	// We are left with adding one or more subnormals
+	{
+		p0 = dfclass(A,#4)
+		if (p0.new) jump:nt .Ladd_two_subnormal
+		ATMP = combine(##0x20000000,#0)
+	}
+	{
+		EXPA = extractu(AH,#EXPBITS,#HI_MANTBITS)
+		EXPB = #1
+		// BTMP already ABS(B)
+		BTMP = asl(BTMP,#EXPBITS-2)
+	}
+#undef ZERO
+#define EXTRACTOFF r14
+#define EXPDIFF r15
+	{
+		ATMP = insert(A,#MANTBITS,#EXPBITS-2)
+		EXPDIFF = sub(EXPA,EXPB)
+		ZTMP = combine(#62,#1)
+		jump .Ladd_continue
+	}
+
+.Ladd_two_subnormal:
+	{
+		ATMP = extractu(A,#63,#0)
+		BTMP = extractu(B,#63,#0)
+	}
+	{
+		ATMP = neg(ATMP)
+		BTMP = neg(BTMP)
+		p0 = cmp.gt(AH,#-1)
+		p1 = cmp.gt(BH,#-1)
+	}
+	{
+		if (p0) ATMP = A
+		if (p1) BTMP = B
+	}
+	{
+		ATMP = add(ATMP,BTMP)
+	}
+	{
+		BTMP = neg(ATMP)
+		p0 = cmp.gt(ATMPH,#-1)
+		B = #0
+	}
+	{
+		if (!p0) A = BTMP
+		if (p0) A = ATMP
+		BH = ##0x80000000
+	}
+	{
+		if (!p0) AH = or(AH,BH)
+		p0 = dfcmp.eq(A,B)
+		if (p0.new) jump:nt .Lzero_plus_zero
+	}
+	{
+		jumpr r31
+	}
+
+.Linvalid_nan_add:
+	{
+		TMP = convert_df2sf(A)			// will generate invalid if sNaN
+		p0 = dfclass(B,#0x0f)			// if B is not NaN
+		if (p0.new) B = A 			// make it whatever A is
+	}
+	{
+		BL = convert_df2sf(B)			// will generate invalid if sNaN
+		A = #-1
+		jumpr r31
+	}
+	.falign
+.LB_zero:
+	{
+		p0 = dfcmp.eq(ATMP,A)			// is A also zero?
+		if (!p0.new) jumpr:t r31		// If not, just return A
+	}
+	// 0 + 0 is special
+	// if equal integral values, they have the same sign, which is fine for all rounding
+	// modes.
+	// If unequal in sign, we get +0 for all rounding modes except round down
+.Lzero_plus_zero:
+	{
+		p0 = cmp.eq(A,B)
+		if (p0.new) jumpr:t r31
+	}
+	{
+		TMP = USR
+	}
+	{
+		TMP = extractu(TMP,#2,#SR_ROUND_OFF)
+		A = #0
+	}
+	{
+		p0 = cmp.eq(TMP,#2)
+		if (p0.new) AH = ##0x80000000
+		jumpr r31
+	}
+.Linf_add:
+	// adding infinities is only OK if they are equal
+	{
+		p0 = !cmp.eq(AH,BH)			// Do they have different signs
+		p0 = dfclass(B,#8)			// And is B also infinite?
+		if (!p0.new) jumpr:t r31		// If not, just a normal inf
+	}
+	{
+		BL = ##0x7f800001			// sNAN
+	}
+	{
+		A = convert_sf2df(BL)			// trigger invalid, set NaN
+		jumpr r31
+	}
+END(__hexagon_adddf3)
author	Alexander Smirnov <alex@ydb.tech>	2024-10-16 12:11:24 +0000
committer	Alexander Smirnov <alex@ydb.tech>	2024-10-16 12:11:24 +0000
commit	40811e93f3fdf9342a9295369994012420fac548 (patch)
tree	a8d85e094a9c21e10aa250f537c101fc2016a049 /contrib/libs/cxxsupp/builtins/hexagon/dfaddsub.S
parent	30ebe5357bb143648c6be4d151ecd4944af81ada (diff)
parent	28a0c4a9f297064538a018c512cd9bbd00a1a35d (diff)
download	ydb-40811e93f3fdf9342a9295369994012420fac548.tar.gz