summaryrefslogtreecommitdiffstats
path: root/contrib/libs/cxxsupp/openmp/z_Linux_asm.S
diff options
context:
space:
mode:
authormikhnenko <[email protected]>2025-07-15 20:05:43 +0300
committermikhnenko <[email protected]>2025-07-15 20:52:16 +0300
commita40bd4f45bbc18fd95b1596e655b8942ceb2cf4b (patch)
treebce599ca02c778c277198de6d131d37db71997d0 /contrib/libs/cxxsupp/openmp/z_Linux_asm.S
parent728e0eaef4dc1f1152d2c3a4cc1bbdf597f3ef3d (diff)
Update contrib/libs/cxxsupp/openmp to 20.1.7
commit_hash:722dd5fe79203d22ad4a0be288ac0caeb6b3dd68
Diffstat (limited to 'contrib/libs/cxxsupp/openmp/z_Linux_asm.S')
-rw-r--r--contrib/libs/cxxsupp/openmp/z_Linux_asm.S789
1 files changed, 773 insertions, 16 deletions
diff --git a/contrib/libs/cxxsupp/openmp/z_Linux_asm.S b/contrib/libs/cxxsupp/openmp/z_Linux_asm.S
index b4a45c1ac6f..0bf9f07a13f 100644
--- a/contrib/libs/cxxsupp/openmp/z_Linux_asm.S
+++ b/contrib/libs/cxxsupp/openmp/z_Linux_asm.S
@@ -19,6 +19,16 @@
#if KMP_ARCH_X86 || KMP_ARCH_X86_64
+# if defined(__ELF__) && defined(__CET__) && defined(__has_include)
+# if __has_include(<cet.h>)
+# include <cet.h>
+# endif
+# endif
+
+# if !defined(_CET_ENDBR)
+# define _CET_ENDBR
+# endif
+
# if KMP_MIC
// the 'delay r16/r32/r64' should be used instead of the 'pause'.
// The delay operation has the effect of removing the current thread from
@@ -66,6 +76,7 @@
ALIGN 4
.globl KMP_PREFIX_UNDERSCORE($0)
KMP_PREFIX_UNDERSCORE($0):
+ _CET_ENDBR
.endmacro
# else // KMP_OS_DARWIN
# define KMP_PREFIX_UNDERSCORE(x) x //no extra underscore for Linux* OS symbols
@@ -92,6 +103,7 @@ KMP_PREFIX_UNDERSCORE($0):
.globl KMP_PREFIX_UNDERSCORE(\proc)
KMP_PREFIX_UNDERSCORE(\proc):
.cfi_startproc
+ _CET_ENDBR
.endm
.macro KMP_CFI_DEF_OFFSET sz
.cfi_def_cfa_offset \sz
@@ -108,7 +120,7 @@ KMP_PREFIX_UNDERSCORE(\proc):
# endif // KMP_OS_DARWIN
#endif // KMP_ARCH_X86 || KMP_ARCH_x86_64
-#if (KMP_OS_LINUX || KMP_OS_DARWIN) && KMP_ARCH_AARCH64
+#if (KMP_OS_LINUX || KMP_OS_DARWIN || KMP_OS_WINDOWS) && (KMP_ARCH_AARCH64 || KMP_ARCH_AARCH64_32 || KMP_ARCH_ARM)
# if KMP_OS_DARWIN
# define KMP_PREFIX_UNDERSCORE(x) _##x // extra underscore for OS X* symbols
@@ -129,7 +141,25 @@ KMP_PREFIX_UNDERSCORE(\proc):
.globl KMP_PREFIX_UNDERSCORE($0)
KMP_PREFIX_UNDERSCORE($0):
.endmacro
-# else // KMP_OS_DARWIN
+# elif KMP_OS_WINDOWS
+# define KMP_PREFIX_UNDERSCORE(x) x // no extra underscore for Windows/ARM64 symbols
+// Format labels so that they don't override function names in gdb's backtraces
+# define KMP_LABEL(x) .L_##x // local label hidden from backtraces
+
+.macro ALIGN size
+ .align 1<<(\size)
+.endm
+
+.macro DEBUG_INFO proc
+ ALIGN 2
+.endm
+
+.macro PROC proc
+ ALIGN 2
+ .globl KMP_PREFIX_UNDERSCORE(\proc)
+KMP_PREFIX_UNDERSCORE(\proc):
+.endm
+# else // KMP_OS_DARWIN || KMP_OS_WINDOWS
# define KMP_PREFIX_UNDERSCORE(x) x // no extra underscore for Linux* OS symbols
// Format labels so that they don't override function names in gdb's backtraces
# define KMP_LABEL(x) .L_##x // local label hidden from backtraces
@@ -142,7 +172,11 @@ KMP_PREFIX_UNDERSCORE($0):
.cfi_endproc
// Not sure why we need .type and .size for the functions
ALIGN 2
+#if KMP_ARCH_ARM
+ .type \proc,%function
+#else
.type \proc,@function
+#endif
.size \proc,.-\proc
.endm
@@ -154,7 +188,64 @@ KMP_PREFIX_UNDERSCORE(\proc):
.endm
# endif // KMP_OS_DARWIN
-#endif // (KMP_OS_LINUX || KMP_OS_DARWIN) && KMP_ARCH_AARCH64
+# if KMP_OS_LINUX
+// BTI and PAC gnu property note
+# define NT_GNU_PROPERTY_TYPE_0 5
+# define GNU_PROPERTY_AARCH64_FEATURE_1_AND 0xc0000000
+# define GNU_PROPERTY_AARCH64_FEATURE_1_BTI 1
+# define GNU_PROPERTY_AARCH64_FEATURE_1_PAC 2
+
+# define GNU_PROPERTY(type, value) \
+ .pushsection .note.gnu.property, "a"; \
+ .p2align 3; \
+ .word 4; \
+ .word 16; \
+ .word NT_GNU_PROPERTY_TYPE_0; \
+ .asciz "GNU"; \
+ .word type; \
+ .word 4; \
+ .word value; \
+ .word 0; \
+ .popsection
+# endif
+
+# if defined(__ARM_FEATURE_BTI_DEFAULT)
+# define BTI_FLAG GNU_PROPERTY_AARCH64_FEATURE_1_BTI
+# else
+# define BTI_FLAG 0
+# endif
+# if __ARM_FEATURE_PAC_DEFAULT & 3
+# define PAC_FLAG GNU_PROPERTY_AARCH64_FEATURE_1_PAC
+# else
+# define PAC_FLAG 0
+# endif
+
+# if (BTI_FLAG | PAC_FLAG) != 0
+# if PAC_FLAG != 0
+# define PACBTI_C hint #25
+# define PACBTI_RET hint #29
+# else
+# define PACBTI_C hint #34
+# define PACBTI_RET
+# endif
+# define GNU_PROPERTY_BTI_PAC \
+ GNU_PROPERTY(GNU_PROPERTY_AARCH64_FEATURE_1_AND, BTI_FLAG | PAC_FLAG)
+# else
+# define PACBTI_C
+# define PACBTI_RET
+# define GNU_PROPERTY_BTI_PAC
+# endif
+#endif // (KMP_OS_LINUX || KMP_OS_DARWIN || KMP_OS_WINDOWS) && (KMP_ARCH_AARCH64 || KMP_ARCH_AARCH64_32 || KMP_ARCH_ARM)
+
+.macro COMMON name, size, align_power
+#if KMP_OS_DARWIN
+ .comm \name, \size
+#elif KMP_OS_WINDOWS
+ .comm \name, \size, \align_power
+#else // !KMP_OS_DARWIN && !KMP_OS_WINDOWS
+ .comm \name, \size, (1<<(\align_power))
+#endif
+.endm
// -----------------------------------------------------------------------
// data
@@ -1118,6 +1209,9 @@ KMP_LABEL(kmp_invoke_pass_parms): // put 1st - 6th parms to pkfn in registers.
movq %rdi, %rbx // pkfn -> %rbx
leaq __gtid(%rbp), %rdi // &gtid -> %rdi (store 1st parm to pkfn)
leaq __tid(%rbp), %rsi // &tid -> %rsi (store 2nd parm to pkfn)
+ // Check if argc is 0
+ cmpq $0, %rax
+ je KMP_LABEL(kmp_no_args) // Jump ahead
movq %r8, %r11 // p_argv -> %r11
@@ -1163,6 +1257,7 @@ KMP_LABEL(kmp_1_exit):
cmovnsq (%r11), %rdx // p_argv[0] -> %rdx (store 3rd parm to pkfn)
#endif // KMP_MIC
+KMP_LABEL(kmp_no_args):
call *%rbx // call (*pkfn)();
movq $1, %rax // move 1 into return register;
@@ -1204,7 +1299,7 @@ KMP_LABEL(kmp_1_exit):
#endif /* KMP_ARCH_X86_64 */
// '
-#if (KMP_OS_LINUX || KMP_OS_DARWIN) && KMP_ARCH_AARCH64
+#if (KMP_OS_LINUX || KMP_OS_DARWIN || KMP_OS_WINDOWS) && (KMP_ARCH_AARCH64 || KMP_ARCH_AARCH64_32)
//------------------------------------------------------------------------
// int
@@ -1260,6 +1355,7 @@ __tid = 8
// mark_begin;
.text
PROC __kmp_invoke_microtask
+ PACBTI_C
stp x29, x30, [sp, #-16]!
# if OMPT_SUPPORT
@@ -1323,12 +1419,158 @@ KMP_LABEL(kmp_1):
ldp x19, x20, [sp], #16
# endif
ldp x29, x30, [sp], #16
+ PACBTI_RET
ret
DEBUG_INFO __kmp_invoke_microtask
// -- End __kmp_invoke_microtask
-#endif /* (KMP_OS_LINUX || KMP_OS_DARWIN) && KMP_ARCH_AARCH64 */
+#endif /* (KMP_OS_LINUX || KMP_OS_DARWIN || KMP_OS_WINDOWS) && (KMP_ARCH_AARCH64 || KMP_ARCH_AARCH64_32) */
+
+#if (KMP_OS_LINUX || KMP_OS_DARWIN || KMP_OS_WINDOWS) && KMP_ARCH_ARM
+
+//------------------------------------------------------------------------
+// int
+// __kmp_invoke_microtask( void (*pkfn) (int gtid, int tid, ...),
+// int gtid, int tid,
+// int argc, void *p_argv[]
+// #if OMPT_SUPPORT
+// ,
+// void **exit_frame_ptr
+// #endif
+// ) {
+// #if OMPT_SUPPORT
+// *exit_frame_ptr = OMPT_GET_FRAME_ADDRESS(0);
+// #endif
+//
+// (*pkfn)( & gtid, & tid, argv[0], ... );
+//
+// // FIXME: This is done at call-site and can be removed here.
+// #if OMPT_SUPPORT
+// *exit_frame_ptr = 0;
+// #endif
+//
+// return 1;
+// }
+//
+// parameters:
+// r0: pkfn
+// r1: gtid
+// r2: tid
+// r3: argc
+// r4(stack): p_argv
+// r5(stack): &exit_frame
+//
+// locals:
+// __gtid: gtid parm pushed on stack so can pass &gtid to pkfn
+// __tid: tid parm pushed on stack so can pass &tid to pkfn
+//
+// reg temps:
+// r4: used to hold pkfn address
+// r5: used as temporary for number of pkfn parms
+// r6: used to traverse p_argv array
+// r7: frame pointer (in some configurations)
+// r8: used as temporary for stack placement calculation
+// and as pointer to base of callee saved area
+// r9: used as temporary for stack parameters
+// r10: used to preserve exit_frame_ptr, callee-save
+// r11: frame pointer (in some configurations)
+//
+// return: r0 (always 1/TRUE)
+//
+
+__gtid = 4
+__tid = 8
+
+// -- Begin __kmp_invoke_microtask
+// mark_begin;
+ .text
+ PROC __kmp_invoke_microtask
+
+ // Pushing one extra register (r3) to keep the stack aligned
+ // for when we call pkfn below
+ push {r3-r11,lr}
+ // Load p_argv and &exit_frame
+ ldr r4, [sp, #10*4]
+# if OMPT_SUPPORT
+ ldr r5, [sp, #11*4]
+# endif
+
+# if KMP_OS_DARWIN || (defined(__thumb__) && !KMP_OS_WINDOWS)
+# define FP r7
+# define FPOFF 4*4
+#else
+# define FP r11
+# define FPOFF 8*4
+#endif
+ add FP, sp, #FPOFF
+# if OMPT_SUPPORT
+ mov r10, r5
+ str FP, [r10]
+# endif
+ mov r8, sp
+
+ // Calculate how much stack to allocate, in increments of 8 bytes.
+ // We strictly need 4*(argc-2) bytes (2 arguments are passed in
+ // registers) but allocate 4*argc for simplicity (to avoid needing
+ // to handle the argc<2 cases). We align the number of bytes
+ // allocated to 8 bytes, to keep the stack aligned. (Since we
+ // already allocate more than enough, it's ok to round down
+ // instead of up for the alignment.) We allocate another extra
+ // 8 bytes for gtid and tid.
+ mov r5, #1
+ add r5, r5, r3, lsr #1
+ sub sp, sp, r5, lsl #3
+
+ str r1, [r8, #-__gtid]
+ str r2, [r8, #-__tid]
+ mov r5, r3
+ mov r6, r4
+ mov r4, r0
+
+ // Prepare the first 2 parameters to pkfn - pointers to gtid and tid
+ // in our stack frame.
+ sub r0, r8, #__gtid
+ sub r1, r8, #__tid
+
+ mov r8, sp
+
+ // Load p_argv[0] and p_argv[1] into r2 and r3, if argc >= 1/2
+ cmp r5, #0
+ beq KMP_LABEL(kmp_1)
+ ldr r2, [r6]
+
+ subs r5, r5, #1
+ beq KMP_LABEL(kmp_1)
+ ldr r3, [r6, #4]!
+
+ // Loop, loading the rest of p_argv and writing the elements on the
+ // stack.
+KMP_LABEL(kmp_0):
+ subs r5, r5, #1
+ beq KMP_LABEL(kmp_1)
+ ldr r12, [r6, #4]!
+ str r12, [r8], #4
+ b KMP_LABEL(kmp_0)
+KMP_LABEL(kmp_1):
+ blx r4
+ mov r0, #1
+
+ sub r4, FP, #FPOFF
+ mov sp, r4
+# undef FP
+# undef FPOFF
+
+# if OMPT_SUPPORT
+ mov r1, #0
+ str r1, [r10]
+# endif
+ pop {r3-r11,pc}
+
+ DEBUG_INFO __kmp_invoke_microtask
+// -- End __kmp_invoke_microtask
+
+#endif /* (KMP_OS_LINUX || KMP_OS_DARWIN || KMP_OS_WINDOWS) && KMP_ARCH_ARM */
#if KMP_ARCH_PPC64
@@ -1725,23 +1967,533 @@ __kmp_invoke_microtask:
#endif /* KMP_ARCH_RISCV64 */
-#if KMP_ARCH_ARM || KMP_ARCH_MIPS
+#if KMP_ARCH_LOONGARCH64
+
+//------------------------------------------------------------------------
+//
+// typedef void (*microtask_t)(int *gtid, int *tid, ...);
+//
+// int __kmp_invoke_microtask(microtask_t pkfn, int gtid, int tid, int argc,
+// void *p_argv[]
+// #if OMPT_SUPPORT
+// ,
+// void **exit_frame_ptr
+// #endif
+// ) {
+// #if OMPT_SUPPORT
+// *exit_frame_ptr = OMPT_GET_FRAME_ADDRESS(0);
+// #endif
+//
+// (*pkfn)(&gtid, &tid, argv[0], ...);
+//
+// return 1;
+// }
+//
+// Parameters:
+// a0: pkfn
+// a1: gtid
+// a2: tid
+// a3: argc
+// a4: p_argv
+// a5: exit_frame_ptr
+//
+// Locals:
+// __gtid: gtid param pushed on stack so can pass &gtid to pkfn
+// __tid: tid param pushed on stack so can pass &tid to pkfn
+//
+// Temp registers:
+//
+// t0: used to calculate the dynamic stack size / used to hold pkfn address
+// t1: used as temporary for stack placement calculation
+// t2: used as temporary for stack arguments
+// t3: used as temporary for number of remaining pkfn parms
+// t4: used to traverse p_argv array
+//
+// return: a0 (always 1/TRUE)
+//
+
+// -- Begin __kmp_invoke_microtask
+// mark_begin;
+ .text
+ .globl __kmp_invoke_microtask
+ .p2align 2
+ .type __kmp_invoke_microtask,@function
+__kmp_invoke_microtask:
+ .cfi_startproc
+
+ // First, save ra and fp
+ addi.d $sp, $sp, -16
+ st.d $ra, $sp, 8
+ st.d $fp, $sp, 0
+ addi.d $fp, $sp, 16
+ .cfi_def_cfa 22, 0
+ .cfi_offset 1, -8
+ .cfi_offset 22, -16
+
+ // Compute the dynamic stack size:
+ //
+ // - We need 8 bytes for storing 'gtid' and 'tid', so we can pass them by
+ // reference
+ // - We need 8 bytes for each argument that cannot be passed to the 'pkfn'
+ // function by register. Given that we have 8 of such registers (a[0-7])
+ // and two + 'argc' arguments (consider &gtid and &tid), we need to
+ // reserve max(0, argc - 6)*8 extra bytes
+ //
+ // The total number of bytes is then max(0, argc - 6)*8 + 8
+
+ addi.d $t0, $a3, -6
+ slt $t1, $t0, $zero
+ masknez $t0, $t0, $t1
+ addi.d $t0, $t0, 1
+ slli.d $t0, $t0, 3
+ sub.d $sp, $sp, $t0
+
+ // Align the stack to 16 bytes
+ bstrins.d $sp, $zero, 3, 0
+
+ move $t0, $a0
+ move $t3, $a3
+ move $t4, $a4
+
+#if OMPT_SUPPORT
+ // Save frame pointer into exit_frame
+ st.d $fp, $a5, 0
+#endif
+
+ // Prepare arguments for the pkfn function (first 8 using a0-a7 registers)
+
+ st.w $a1, $fp, -20
+ st.w $a2, $fp, -24
+
+ addi.d $a0, $fp, -20
+ addi.d $a1, $fp, -24
+
+ beqz $t3, .L_kmp_3
+ ld.d $a2, $t4, 0
+
+ addi.d $t3, $t3, -1
+ beqz $t3, .L_kmp_3
+ ld.d $a3, $t4, 8
+
+ addi.d $t3, $t3, -1
+ beqz $t3, .L_kmp_3
+ ld.d $a4, $t4, 16
+
+ addi.d $t3, $t3, -1
+ beqz $t3, .L_kmp_3
+ ld.d $a5, $t4, 24
+
+ addi.d $t3, $t3, -1
+ beqz $t3, .L_kmp_3
+ ld.d $a6, $t4, 32
+
+ addi.d $t3, $t3, -1
+ beqz $t3, .L_kmp_3
+ ld.d $a7, $t4, 40
+
+ // Prepare any additional argument passed through the stack
+ addi.d $t4, $t4, 48
+ move $t1, $sp
+ b .L_kmp_2
+.L_kmp_1:
+ ld.d $t2, $t4, 0
+ st.d $t2, $t1, 0
+ addi.d $t4, $t4, 8
+ addi.d $t1, $t1, 8
+.L_kmp_2:
+ addi.d $t3, $t3, -1
+ bnez $t3, .L_kmp_1
+
+.L_kmp_3:
+ // Call pkfn function
+ jirl $ra, $t0, 0
+
+ // Restore stack and return
+
+ addi.d $a0, $zero, 1
+
+ addi.d $sp, $fp, -16
+ ld.d $fp, $sp, 0
+ ld.d $ra, $sp, 8
+ addi.d $sp, $sp, 16
+ jr $ra
+.Lfunc_end0:
+ .size __kmp_invoke_microtask, .Lfunc_end0-__kmp_invoke_microtask
+ .cfi_endproc
+
+// -- End __kmp_invoke_microtask
+
+#endif /* KMP_ARCH_LOONGARCH64 */
+
+#if KMP_ARCH_VE
+
+//------------------------------------------------------------------------
+//
+// typedef void (*microtask_t)(int *gtid, int *tid, ...);
+//
+// int __kmp_invoke_microtask(microtask_t pkfn, int gtid, int tid, int argc,
+// void *p_argv[]
+// #if OMPT_SUPPORT
+// ,
+// void **exit_frame_ptr
+// #endif
+// ) {
+// #if OMPT_SUPPORT
+// *exit_frame_ptr = OMPT_GET_FRAME_ADDRESS(0);
+// #endif
+//
+// (*pkfn)(&gtid, &tid, argv[0], ...);
+//
+// return 1;
+// }
+//
+// Parameters:
+// s0: pkfn
+// s1: gtid
+// s2: tid
+// s3: argc
+// s4: p_argv
+// s5: exit_frame_ptr
+//
+// Locals:
+// __gtid: gtid param pushed on stack so can pass &gtid to pkfn
+// __tid: tid param pushed on stack so can pass &tid to pkfn
+//
+// Temp. registers:
+//
+// s34: used to calculate the dynamic stack size
+// s35: used as temporary for stack placement calculation
+// s36: used as temporary for stack arguments
+// s37: used as temporary for number of remaining pkfn parms
+// s38: used to traverse p_argv array
+//
+// return: s0 (always 1/TRUE)
+//
+
+__gtid = -4
+__tid = -8
+
+// -- Begin __kmp_invoke_microtask
+// mark_begin;
+ .text
+ .globl __kmp_invoke_microtask
+ // A function requires 8 bytes align.
+ .p2align 3
+ .type __kmp_invoke_microtask,@function
+__kmp_invoke_microtask:
+ .cfi_startproc
+
+ // First, save fp and lr. VE stores them at caller stack frame.
+ st %fp, 0(, %sp)
+ st %lr, 8(, %sp)
+ or %fp, 0, %sp
+ .cfi_def_cfa %fp, 0
+ .cfi_offset %lr, 8
+ .cfi_offset %fp, 0
+
+ // Compute the dynamic stack size:
+ //
+ // - We need 8 bytes for storing 'gtid' and 'tid', so we can pass them
+ // by reference
+ // - We need 8 bytes for whole arguments. We have two + 'argc'
+ // arguments (condider &gtid and &tid). We need to reserve
+ // (argc + 2) * 8 bytes.
+ // - We need 176 bytes for RSA and others
+ //
+ // The total number of bytes is then (argc + 2) * 8 + 8 + 176.
+ //
+ // |------------------------------|
+ // | return address of callee | 8(%fp)
+ // |------------------------------|
+ // | frame pointer of callee | 0(%fp)
+ // |------------------------------| <------------------ %fp
+ // | __tid / __gtid | -8(%fp) / -4(%fp)
+ // |------------------------------|
+ // | argc+2 for arguments | 176(%sp)
+ // |------------------------------|
+ // | RSA |
+ // |------------------------------|
+ // | return address |
+ // |------------------------------|
+ // | frame pointer |
+ // |------------------------------| <------------------ %sp
+
+ adds.w.sx %s34, 2, %s3
+ sll %s34, %s34, 3
+ lea %s34, 184(, %s34)
+ subs.l %sp, %sp, %s34
+
+ // Align the stack to 16 bytes.
+ and %sp, -16, %sp
+
+ // Save pkfn.
+ or %s12, 0, %s0
+
+ // Call host to allocate stack if it is necessary.
+ brge.l %sp, %sl, .L_kmp_pass
+ ld %s61, 24(, %tp)
+ lea %s63, 0x13b
+ shm.l %s63, 0(%s61)
+ shm.l %sl, 8(%s61)
+ shm.l %sp, 16(%s61)
+ monc
+
+.L_kmp_pass:
+ lea %s35, 176(, %sp)
+ adds.w.sx %s37, 0, %s3
+ or %s38, 0, %s4
+
+#if OMPT_SUPPORT
+ // Save frame pointer into exit_frame.
+ st %fp, 0(%s5)
+#endif
+
+ // Prepare arguments for the pkfn function (first 8 using s0-s7
+ // registers, but need to store stack also because of varargs).
+
+ stl %s1, __gtid(%fp)
+ stl %s2, __tid(%fp)
+
+ adds.l %s0, __gtid, %fp
+ st %s0, 0(, %s35)
+ adds.l %s1, __tid, %fp
+ st %s1, 8(, %s35)
+
+ breq.l 0, %s37, .L_kmp_call
+ ld %s2, 0(, %s38)
+ st %s2, 16(, %s35)
+
+ breq.l 1, %s37, .L_kmp_call
+ ld %s3, 8(, %s38)
+ st %s3, 24(, %s35)
+
+ breq.l 2, %s37, .L_kmp_call
+ ld %s4, 16(, %s38)
+ st %s4, 32(, %s35)
+
+ breq.l 3, %s37, .L_kmp_call
+ ld %s5, 24(, %s38)
+ st %s5, 40(, %s35)
+
+ breq.l 4, %s37, .L_kmp_call
+ ld %s6, 32(, %s38)
+ st %s6, 48(, %s35)
+
+ breq.l 5, %s37, .L_kmp_call
+ ld %s7, 40(, %s38)
+ st %s7, 56(, %s35)
+
+ breq.l 6, %s37, .L_kmp_call
+
+ // Prepare any additional argument passed through the stack.
+ adds.l %s37, -6, %s37
+ lea %s38, 48(, %s38)
+ lea %s35, 64(, %s35)
+.L_kmp_loop:
+ ld %s36, 0(, %s38)
+ st %s36, 0(, %s35)
+ adds.l %s37, -1, %s37
+ adds.l %s38, 8, %s38
+ adds.l %s35, 8, %s35
+ brne.l 0, %s37, .L_kmp_loop
+
+.L_kmp_call:
+ // Call pkfn function.
+ bsic %lr, (, %s12)
+
+ // Return value.
+ lea %s0, 1
+
+ // Restore stack and return.
+ or %sp, 0, %fp
+ ld %lr, 8(, %sp)
+ ld %fp, 0(, %sp)
+ b.l.t (, %lr)
+.Lfunc_end0:
+ .size __kmp_invoke_microtask, .Lfunc_end0-__kmp_invoke_microtask
+ .cfi_endproc
+
+// -- End __kmp_invoke_microtask
+
+#endif /* KMP_ARCH_VE */
+
+#if KMP_ARCH_S390X
+
+//------------------------------------------------------------------------
+//
+// typedef void (*microtask_t)(int *gtid, int *tid, ...);
+//
+// int __kmp_invoke_microtask(microtask_t pkfn, int gtid, int tid, int argc,
+// void *p_argv[]
+// #if OMPT_SUPPORT
+// ,
+// void **exit_frame_ptr
+// #endif
+// ) {
+// #if OMPT_SUPPORT
+// *exit_frame_ptr = OMPT_GET_FRAME_ADDRESS(0);
+// #endif
+//
+// (*pkfn)(&gtid, &tid, argv[0], ...);
+//
+// return 1;
+// }
+//
+// Parameters:
+// r2: pkfn
+// r3: gtid
+// r4: tid
+// r5: argc
+// r6: p_argv
+// SP+160: exit_frame_ptr
+//
+// Locals:
+// __gtid: gtid param pushed on stack so can pass &gtid to pkfn
+// __tid: tid param pushed on stack so can pass &tid to pkfn
+//
+// Temp. registers:
+//
+// r0: used to fetch argv slots
+// r7: used as temporary for number of remaining pkfn parms
+// r8: argv
+// r9: pkfn
+// r10: stack size
+// r11: previous fp
+// r12: stack parameter area
+// r13: argv slot
+//
+// return: r2 (always 1/TRUE)
+//
+
+// -- Begin __kmp_invoke_microtask
+// mark_begin;
+ .text
+ .globl __kmp_invoke_microtask
+ .p2align 1
+ .type __kmp_invoke_microtask,@function
+__kmp_invoke_microtask:
+ .cfi_startproc
+
+ stmg %r6,%r14,48(%r15)
+ .cfi_offset %r6, -112
+ .cfi_offset %r7, -104
+ .cfi_offset %r8, -96
+ .cfi_offset %r9, -88
+ .cfi_offset %r10, -80
+ .cfi_offset %r11, -72
+ .cfi_offset %r12, -64
+ .cfi_offset %r13, -56
+ .cfi_offset %r14, -48
+ .cfi_offset %r15, -40
+ lgr %r11,%r15
+ .cfi_def_cfa %r11, 160
+
+ // Compute the dynamic stack size:
+ //
+ // - We need 8 bytes for storing 'gtid' and 'tid', so we can pass them by
+ // reference
+ // - We need 8 bytes for each argument that cannot be passed to the 'pkfn'
+ // function by register. Given that we have 5 of such registers (r[2-6])
+ // and two + 'argc' arguments (consider &gtid and &tid), we need to
+ // reserve max(0, argc - 3)*8 extra bytes
+ //
+ // The total number of bytes is then max(0, argc - 3)*8 + 8
+
+ lgr %r10,%r5
+ aghi %r10,-2
+ jnm 0f
+ lghi %r10,0
+0:
+ sllg %r10,%r10,3
+ lgr %r12,%r10
+ aghi %r10,176
+ sgr %r15,%r10
+ agr %r12,%r15
+ stg %r11,0(%r15)
+
+ lgr %r9,%r2 // pkfn
+
+#if OMPT_SUPPORT
+ // Save frame pointer into exit_frame
+ lg %r8,160(%r11)
+ stg %r11,0(%r8)
+#endif
+
+ // Prepare arguments for the pkfn function (first 5 using r2-r6 registers)
+
+ stg %r3,160(%r12)
+ la %r2,164(%r12) // gid
+ stg %r4,168(%r12)
+ la %r3,172(%r12) // tid
+ lgr %r8,%r6 // argv
+
+ // If argc > 0
+ ltgr %r7,%r5
+ jz 1f
+
+ lg %r4,0(%r8) // argv[0]
+ aghi %r7,-1
+ jz 1f
+
+ // If argc > 1
+ lg %r5,8(%r8) // argv[1]
+ aghi %r7,-1
+ jz 1f
+
+ // If argc > 2
+ lg %r6,16(%r8) // argv[2]
+ aghi %r7,-1
+ jz 1f
+
+ lghi %r13,0 // Index [n]
+2:
+ lg %r0,24(%r13,%r8) // argv[2+n]
+ stg %r0,160(%r13,%r15) // parm[2+n]
+ aghi %r13,8 // Next
+ aghi %r7,-1
+ jnz 2b
+
+1:
+ basr %r14,%r9 // Call pkfn
+
+ // Restore stack and return
+
+ lgr %r15,%r11
+ lmg %r6,%r14,48(%r15)
+ lghi %r2,1
+ br %r14
+.Lfunc_end0:
+ .size __kmp_invoke_microtask, .Lfunc_end0-__kmp_invoke_microtask
+ .cfi_endproc
+
+// -- End __kmp_invoke_microtask
+
+#endif /* KMP_ARCH_S390X */
+
+#if KMP_ARCH_ARM || KMP_ARCH_MIPS || KMP_ARCH_AARCH64_32
+#ifndef KMP_PREFIX_UNDERSCORE
+# define KMP_PREFIX_UNDERSCORE(x) x
+#endif
.data
- .comm .gomp_critical_user_,32,8
+ COMMON .gomp_critical_user_, 32, 3
.data
.align 4
- .global __kmp_unnamed_critical_addr
-__kmp_unnamed_critical_addr:
+ .global KMP_PREFIX_UNDERSCORE(__kmp_unnamed_critical_addr)
+KMP_PREFIX_UNDERSCORE(__kmp_unnamed_critical_addr):
.4byte .gomp_critical_user_
- .size __kmp_unnamed_critical_addr,4
-#endif /* KMP_ARCH_ARM */
+#ifdef __ELF__
+ .size KMP_PREFIX_UNDERSCORE(__kmp_unnamed_critical_addr),4
+#endif
+#endif /* KMP_ARCH_ARM || KMP_ARCH_MIPS || KMP_ARCH_AARCH64_32 */
-#if KMP_ARCH_PPC64 || KMP_ARCH_AARCH64 || KMP_ARCH_MIPS64 || KMP_ARCH_RISCV64
+#if KMP_ARCH_PPC64 || KMP_ARCH_AARCH64 || KMP_ARCH_MIPS64 || \
+ KMP_ARCH_RISCV64 || KMP_ARCH_LOONGARCH64 || KMP_ARCH_VE || \
+ KMP_ARCH_S390X
#ifndef KMP_PREFIX_UNDERSCORE
# define KMP_PREFIX_UNDERSCORE(x) x
#endif
.data
- .comm .gomp_critical_user_,32,8
+ COMMON .gomp_critical_user_, 32, 3
.data
.align 8
.global KMP_PREFIX_UNDERSCORE(__kmp_unnamed_critical_addr)
@@ -1751,12 +2503,17 @@ KMP_PREFIX_UNDERSCORE(__kmp_unnamed_critical_addr):
.size KMP_PREFIX_UNDERSCORE(__kmp_unnamed_critical_addr),8
#endif
#endif /* KMP_ARCH_PPC64 || KMP_ARCH_AARCH64 || KMP_ARCH_MIPS64 ||
- KMP_ARCH_RISCV64 */
+ KMP_ARCH_RISCV64 || KMP_ARCH_LOONGARCH64 || KMP_ARCH_VE ||
+ KMP_ARCH_S390X */
#if KMP_OS_LINUX
-# if KMP_ARCH_ARM
+# if KMP_ARCH_ARM || KMP_ARCH_AARCH64
.section .note.GNU-stack,"",%progbits
-# else
+# elif !KMP_ARCH_WASM
.section .note.GNU-stack,"",@progbits
# endif
#endif
+
+#if KMP_OS_LINUX && (KMP_ARCH_AARCH64 || KMP_ARCH_AARCH64_32)
+GNU_PROPERTY_BTI_PAC
+#endif