diff options
author | mikhnenko <[email protected]> | 2025-07-15 20:05:43 +0300 |
---|---|---|
committer | mikhnenko <[email protected]> | 2025-07-15 20:52:16 +0300 |
commit | a40bd4f45bbc18fd95b1596e655b8942ceb2cf4b (patch) | |
tree | bce599ca02c778c277198de6d131d37db71997d0 /contrib/libs/cxxsupp/openmp/z_Linux_asm.S | |
parent | 728e0eaef4dc1f1152d2c3a4cc1bbdf597f3ef3d (diff) |
Update contrib/libs/cxxsupp/openmp to 20.1.7
commit_hash:722dd5fe79203d22ad4a0be288ac0caeb6b3dd68
Diffstat (limited to 'contrib/libs/cxxsupp/openmp/z_Linux_asm.S')
-rw-r--r-- | contrib/libs/cxxsupp/openmp/z_Linux_asm.S | 789 |
1 files changed, 773 insertions, 16 deletions
diff --git a/contrib/libs/cxxsupp/openmp/z_Linux_asm.S b/contrib/libs/cxxsupp/openmp/z_Linux_asm.S index b4a45c1ac6f..0bf9f07a13f 100644 --- a/contrib/libs/cxxsupp/openmp/z_Linux_asm.S +++ b/contrib/libs/cxxsupp/openmp/z_Linux_asm.S @@ -19,6 +19,16 @@ #if KMP_ARCH_X86 || KMP_ARCH_X86_64 +# if defined(__ELF__) && defined(__CET__) && defined(__has_include) +# if __has_include(<cet.h>) +# include <cet.h> +# endif +# endif + +# if !defined(_CET_ENDBR) +# define _CET_ENDBR +# endif + # if KMP_MIC // the 'delay r16/r32/r64' should be used instead of the 'pause'. // The delay operation has the effect of removing the current thread from @@ -66,6 +76,7 @@ ALIGN 4 .globl KMP_PREFIX_UNDERSCORE($0) KMP_PREFIX_UNDERSCORE($0): + _CET_ENDBR .endmacro # else // KMP_OS_DARWIN # define KMP_PREFIX_UNDERSCORE(x) x //no extra underscore for Linux* OS symbols @@ -92,6 +103,7 @@ KMP_PREFIX_UNDERSCORE($0): .globl KMP_PREFIX_UNDERSCORE(\proc) KMP_PREFIX_UNDERSCORE(\proc): .cfi_startproc + _CET_ENDBR .endm .macro KMP_CFI_DEF_OFFSET sz .cfi_def_cfa_offset \sz @@ -108,7 +120,7 @@ KMP_PREFIX_UNDERSCORE(\proc): # endif // KMP_OS_DARWIN #endif // KMP_ARCH_X86 || KMP_ARCH_x86_64 -#if (KMP_OS_LINUX || KMP_OS_DARWIN) && KMP_ARCH_AARCH64 +#if (KMP_OS_LINUX || KMP_OS_DARWIN || KMP_OS_WINDOWS) && (KMP_ARCH_AARCH64 || KMP_ARCH_AARCH64_32 || KMP_ARCH_ARM) # if KMP_OS_DARWIN # define KMP_PREFIX_UNDERSCORE(x) _##x // extra underscore for OS X* symbols @@ -129,7 +141,25 @@ KMP_PREFIX_UNDERSCORE(\proc): .globl KMP_PREFIX_UNDERSCORE($0) KMP_PREFIX_UNDERSCORE($0): .endmacro -# else // KMP_OS_DARWIN +# elif KMP_OS_WINDOWS +# define KMP_PREFIX_UNDERSCORE(x) x // no extra underscore for Windows/ARM64 symbols +// Format labels so that they don't override function names in gdb's backtraces +# define KMP_LABEL(x) .L_##x // local label hidden from backtraces + +.macro ALIGN size + .align 1<<(\size) +.endm + +.macro DEBUG_INFO proc + ALIGN 2 +.endm + +.macro PROC proc + ALIGN 2 + .globl KMP_PREFIX_UNDERSCORE(\proc) +KMP_PREFIX_UNDERSCORE(\proc): +.endm +# else // KMP_OS_DARWIN || KMP_OS_WINDOWS # define KMP_PREFIX_UNDERSCORE(x) x // no extra underscore for Linux* OS symbols // Format labels so that they don't override function names in gdb's backtraces # define KMP_LABEL(x) .L_##x // local label hidden from backtraces @@ -142,7 +172,11 @@ KMP_PREFIX_UNDERSCORE($0): .cfi_endproc // Not sure why we need .type and .size for the functions ALIGN 2 +#if KMP_ARCH_ARM + .type \proc,%function +#else .type \proc,@function +#endif .size \proc,.-\proc .endm @@ -154,7 +188,64 @@ KMP_PREFIX_UNDERSCORE(\proc): .endm # endif // KMP_OS_DARWIN -#endif // (KMP_OS_LINUX || KMP_OS_DARWIN) && KMP_ARCH_AARCH64 +# if KMP_OS_LINUX +// BTI and PAC gnu property note +# define NT_GNU_PROPERTY_TYPE_0 5 +# define GNU_PROPERTY_AARCH64_FEATURE_1_AND 0xc0000000 +# define GNU_PROPERTY_AARCH64_FEATURE_1_BTI 1 +# define GNU_PROPERTY_AARCH64_FEATURE_1_PAC 2 + +# define GNU_PROPERTY(type, value) \ + .pushsection .note.gnu.property, "a"; \ + .p2align 3; \ + .word 4; \ + .word 16; \ + .word NT_GNU_PROPERTY_TYPE_0; \ + .asciz "GNU"; \ + .word type; \ + .word 4; \ + .word value; \ + .word 0; \ + .popsection +# endif + +# if defined(__ARM_FEATURE_BTI_DEFAULT) +# define BTI_FLAG GNU_PROPERTY_AARCH64_FEATURE_1_BTI +# else +# define BTI_FLAG 0 +# endif +# if __ARM_FEATURE_PAC_DEFAULT & 3 +# define PAC_FLAG GNU_PROPERTY_AARCH64_FEATURE_1_PAC +# else +# define PAC_FLAG 0 +# endif + +# if (BTI_FLAG | PAC_FLAG) != 0 +# if PAC_FLAG != 0 +# define PACBTI_C hint #25 +# define PACBTI_RET hint #29 +# else +# define PACBTI_C hint #34 +# define PACBTI_RET +# endif +# define GNU_PROPERTY_BTI_PAC \ + GNU_PROPERTY(GNU_PROPERTY_AARCH64_FEATURE_1_AND, BTI_FLAG | PAC_FLAG) +# else +# define PACBTI_C +# define PACBTI_RET +# define GNU_PROPERTY_BTI_PAC +# endif +#endif // (KMP_OS_LINUX || KMP_OS_DARWIN || KMP_OS_WINDOWS) && (KMP_ARCH_AARCH64 || KMP_ARCH_AARCH64_32 || KMP_ARCH_ARM) + +.macro COMMON name, size, align_power +#if KMP_OS_DARWIN + .comm \name, \size +#elif KMP_OS_WINDOWS + .comm \name, \size, \align_power +#else // !KMP_OS_DARWIN && !KMP_OS_WINDOWS + .comm \name, \size, (1<<(\align_power)) +#endif +.endm // ----------------------------------------------------------------------- // data @@ -1118,6 +1209,9 @@ KMP_LABEL(kmp_invoke_pass_parms): // put 1st - 6th parms to pkfn in registers. movq %rdi, %rbx // pkfn -> %rbx leaq __gtid(%rbp), %rdi // >id -> %rdi (store 1st parm to pkfn) leaq __tid(%rbp), %rsi // &tid -> %rsi (store 2nd parm to pkfn) + // Check if argc is 0 + cmpq $0, %rax + je KMP_LABEL(kmp_no_args) // Jump ahead movq %r8, %r11 // p_argv -> %r11 @@ -1163,6 +1257,7 @@ KMP_LABEL(kmp_1_exit): cmovnsq (%r11), %rdx // p_argv[0] -> %rdx (store 3rd parm to pkfn) #endif // KMP_MIC +KMP_LABEL(kmp_no_args): call *%rbx // call (*pkfn)(); movq $1, %rax // move 1 into return register; @@ -1204,7 +1299,7 @@ KMP_LABEL(kmp_1_exit): #endif /* KMP_ARCH_X86_64 */ // ' -#if (KMP_OS_LINUX || KMP_OS_DARWIN) && KMP_ARCH_AARCH64 +#if (KMP_OS_LINUX || KMP_OS_DARWIN || KMP_OS_WINDOWS) && (KMP_ARCH_AARCH64 || KMP_ARCH_AARCH64_32) //------------------------------------------------------------------------ // int @@ -1260,6 +1355,7 @@ __tid = 8 // mark_begin; .text PROC __kmp_invoke_microtask + PACBTI_C stp x29, x30, [sp, #-16]! # if OMPT_SUPPORT @@ -1323,12 +1419,158 @@ KMP_LABEL(kmp_1): ldp x19, x20, [sp], #16 # endif ldp x29, x30, [sp], #16 + PACBTI_RET ret DEBUG_INFO __kmp_invoke_microtask // -- End __kmp_invoke_microtask -#endif /* (KMP_OS_LINUX || KMP_OS_DARWIN) && KMP_ARCH_AARCH64 */ +#endif /* (KMP_OS_LINUX || KMP_OS_DARWIN || KMP_OS_WINDOWS) && (KMP_ARCH_AARCH64 || KMP_ARCH_AARCH64_32) */ + +#if (KMP_OS_LINUX || KMP_OS_DARWIN || KMP_OS_WINDOWS) && KMP_ARCH_ARM + +//------------------------------------------------------------------------ +// int +// __kmp_invoke_microtask( void (*pkfn) (int gtid, int tid, ...), +// int gtid, int tid, +// int argc, void *p_argv[] +// #if OMPT_SUPPORT +// , +// void **exit_frame_ptr +// #endif +// ) { +// #if OMPT_SUPPORT +// *exit_frame_ptr = OMPT_GET_FRAME_ADDRESS(0); +// #endif +// +// (*pkfn)( & gtid, & tid, argv[0], ... ); +// +// // FIXME: This is done at call-site and can be removed here. +// #if OMPT_SUPPORT +// *exit_frame_ptr = 0; +// #endif +// +// return 1; +// } +// +// parameters: +// r0: pkfn +// r1: gtid +// r2: tid +// r3: argc +// r4(stack): p_argv +// r5(stack): &exit_frame +// +// locals: +// __gtid: gtid parm pushed on stack so can pass >id to pkfn +// __tid: tid parm pushed on stack so can pass &tid to pkfn +// +// reg temps: +// r4: used to hold pkfn address +// r5: used as temporary for number of pkfn parms +// r6: used to traverse p_argv array +// r7: frame pointer (in some configurations) +// r8: used as temporary for stack placement calculation +// and as pointer to base of callee saved area +// r9: used as temporary for stack parameters +// r10: used to preserve exit_frame_ptr, callee-save +// r11: frame pointer (in some configurations) +// +// return: r0 (always 1/TRUE) +// + +__gtid = 4 +__tid = 8 + +// -- Begin __kmp_invoke_microtask +// mark_begin; + .text + PROC __kmp_invoke_microtask + + // Pushing one extra register (r3) to keep the stack aligned + // for when we call pkfn below + push {r3-r11,lr} + // Load p_argv and &exit_frame + ldr r4, [sp, #10*4] +# if OMPT_SUPPORT + ldr r5, [sp, #11*4] +# endif + +# if KMP_OS_DARWIN || (defined(__thumb__) && !KMP_OS_WINDOWS) +# define FP r7 +# define FPOFF 4*4 +#else +# define FP r11 +# define FPOFF 8*4 +#endif + add FP, sp, #FPOFF +# if OMPT_SUPPORT + mov r10, r5 + str FP, [r10] +# endif + mov r8, sp + + // Calculate how much stack to allocate, in increments of 8 bytes. + // We strictly need 4*(argc-2) bytes (2 arguments are passed in + // registers) but allocate 4*argc for simplicity (to avoid needing + // to handle the argc<2 cases). We align the number of bytes + // allocated to 8 bytes, to keep the stack aligned. (Since we + // already allocate more than enough, it's ok to round down + // instead of up for the alignment.) We allocate another extra + // 8 bytes for gtid and tid. + mov r5, #1 + add r5, r5, r3, lsr #1 + sub sp, sp, r5, lsl #3 + + str r1, [r8, #-__gtid] + str r2, [r8, #-__tid] + mov r5, r3 + mov r6, r4 + mov r4, r0 + + // Prepare the first 2 parameters to pkfn - pointers to gtid and tid + // in our stack frame. + sub r0, r8, #__gtid + sub r1, r8, #__tid + + mov r8, sp + + // Load p_argv[0] and p_argv[1] into r2 and r3, if argc >= 1/2 + cmp r5, #0 + beq KMP_LABEL(kmp_1) + ldr r2, [r6] + + subs r5, r5, #1 + beq KMP_LABEL(kmp_1) + ldr r3, [r6, #4]! + + // Loop, loading the rest of p_argv and writing the elements on the + // stack. +KMP_LABEL(kmp_0): + subs r5, r5, #1 + beq KMP_LABEL(kmp_1) + ldr r12, [r6, #4]! + str r12, [r8], #4 + b KMP_LABEL(kmp_0) +KMP_LABEL(kmp_1): + blx r4 + mov r0, #1 + + sub r4, FP, #FPOFF + mov sp, r4 +# undef FP +# undef FPOFF + +# if OMPT_SUPPORT + mov r1, #0 + str r1, [r10] +# endif + pop {r3-r11,pc} + + DEBUG_INFO __kmp_invoke_microtask +// -- End __kmp_invoke_microtask + +#endif /* (KMP_OS_LINUX || KMP_OS_DARWIN || KMP_OS_WINDOWS) && KMP_ARCH_ARM */ #if KMP_ARCH_PPC64 @@ -1725,23 +1967,533 @@ __kmp_invoke_microtask: #endif /* KMP_ARCH_RISCV64 */ -#if KMP_ARCH_ARM || KMP_ARCH_MIPS +#if KMP_ARCH_LOONGARCH64 + +//------------------------------------------------------------------------ +// +// typedef void (*microtask_t)(int *gtid, int *tid, ...); +// +// int __kmp_invoke_microtask(microtask_t pkfn, int gtid, int tid, int argc, +// void *p_argv[] +// #if OMPT_SUPPORT +// , +// void **exit_frame_ptr +// #endif +// ) { +// #if OMPT_SUPPORT +// *exit_frame_ptr = OMPT_GET_FRAME_ADDRESS(0); +// #endif +// +// (*pkfn)(>id, &tid, argv[0], ...); +// +// return 1; +// } +// +// Parameters: +// a0: pkfn +// a1: gtid +// a2: tid +// a3: argc +// a4: p_argv +// a5: exit_frame_ptr +// +// Locals: +// __gtid: gtid param pushed on stack so can pass >id to pkfn +// __tid: tid param pushed on stack so can pass &tid to pkfn +// +// Temp registers: +// +// t0: used to calculate the dynamic stack size / used to hold pkfn address +// t1: used as temporary for stack placement calculation +// t2: used as temporary for stack arguments +// t3: used as temporary for number of remaining pkfn parms +// t4: used to traverse p_argv array +// +// return: a0 (always 1/TRUE) +// + +// -- Begin __kmp_invoke_microtask +// mark_begin; + .text + .globl __kmp_invoke_microtask + .p2align 2 + .type __kmp_invoke_microtask,@function +__kmp_invoke_microtask: + .cfi_startproc + + // First, save ra and fp + addi.d $sp, $sp, -16 + st.d $ra, $sp, 8 + st.d $fp, $sp, 0 + addi.d $fp, $sp, 16 + .cfi_def_cfa 22, 0 + .cfi_offset 1, -8 + .cfi_offset 22, -16 + + // Compute the dynamic stack size: + // + // - We need 8 bytes for storing 'gtid' and 'tid', so we can pass them by + // reference + // - We need 8 bytes for each argument that cannot be passed to the 'pkfn' + // function by register. Given that we have 8 of such registers (a[0-7]) + // and two + 'argc' arguments (consider >id and &tid), we need to + // reserve max(0, argc - 6)*8 extra bytes + // + // The total number of bytes is then max(0, argc - 6)*8 + 8 + + addi.d $t0, $a3, -6 + slt $t1, $t0, $zero + masknez $t0, $t0, $t1 + addi.d $t0, $t0, 1 + slli.d $t0, $t0, 3 + sub.d $sp, $sp, $t0 + + // Align the stack to 16 bytes + bstrins.d $sp, $zero, 3, 0 + + move $t0, $a0 + move $t3, $a3 + move $t4, $a4 + +#if OMPT_SUPPORT + // Save frame pointer into exit_frame + st.d $fp, $a5, 0 +#endif + + // Prepare arguments for the pkfn function (first 8 using a0-a7 registers) + + st.w $a1, $fp, -20 + st.w $a2, $fp, -24 + + addi.d $a0, $fp, -20 + addi.d $a1, $fp, -24 + + beqz $t3, .L_kmp_3 + ld.d $a2, $t4, 0 + + addi.d $t3, $t3, -1 + beqz $t3, .L_kmp_3 + ld.d $a3, $t4, 8 + + addi.d $t3, $t3, -1 + beqz $t3, .L_kmp_3 + ld.d $a4, $t4, 16 + + addi.d $t3, $t3, -1 + beqz $t3, .L_kmp_3 + ld.d $a5, $t4, 24 + + addi.d $t3, $t3, -1 + beqz $t3, .L_kmp_3 + ld.d $a6, $t4, 32 + + addi.d $t3, $t3, -1 + beqz $t3, .L_kmp_3 + ld.d $a7, $t4, 40 + + // Prepare any additional argument passed through the stack + addi.d $t4, $t4, 48 + move $t1, $sp + b .L_kmp_2 +.L_kmp_1: + ld.d $t2, $t4, 0 + st.d $t2, $t1, 0 + addi.d $t4, $t4, 8 + addi.d $t1, $t1, 8 +.L_kmp_2: + addi.d $t3, $t3, -1 + bnez $t3, .L_kmp_1 + +.L_kmp_3: + // Call pkfn function + jirl $ra, $t0, 0 + + // Restore stack and return + + addi.d $a0, $zero, 1 + + addi.d $sp, $fp, -16 + ld.d $fp, $sp, 0 + ld.d $ra, $sp, 8 + addi.d $sp, $sp, 16 + jr $ra +.Lfunc_end0: + .size __kmp_invoke_microtask, .Lfunc_end0-__kmp_invoke_microtask + .cfi_endproc + +// -- End __kmp_invoke_microtask + +#endif /* KMP_ARCH_LOONGARCH64 */ + +#if KMP_ARCH_VE + +//------------------------------------------------------------------------ +// +// typedef void (*microtask_t)(int *gtid, int *tid, ...); +// +// int __kmp_invoke_microtask(microtask_t pkfn, int gtid, int tid, int argc, +// void *p_argv[] +// #if OMPT_SUPPORT +// , +// void **exit_frame_ptr +// #endif +// ) { +// #if OMPT_SUPPORT +// *exit_frame_ptr = OMPT_GET_FRAME_ADDRESS(0); +// #endif +// +// (*pkfn)(>id, &tid, argv[0], ...); +// +// return 1; +// } +// +// Parameters: +// s0: pkfn +// s1: gtid +// s2: tid +// s3: argc +// s4: p_argv +// s5: exit_frame_ptr +// +// Locals: +// __gtid: gtid param pushed on stack so can pass >id to pkfn +// __tid: tid param pushed on stack so can pass &tid to pkfn +// +// Temp. registers: +// +// s34: used to calculate the dynamic stack size +// s35: used as temporary for stack placement calculation +// s36: used as temporary for stack arguments +// s37: used as temporary for number of remaining pkfn parms +// s38: used to traverse p_argv array +// +// return: s0 (always 1/TRUE) +// + +__gtid = -4 +__tid = -8 + +// -- Begin __kmp_invoke_microtask +// mark_begin; + .text + .globl __kmp_invoke_microtask + // A function requires 8 bytes align. + .p2align 3 + .type __kmp_invoke_microtask,@function +__kmp_invoke_microtask: + .cfi_startproc + + // First, save fp and lr. VE stores them at caller stack frame. + st %fp, 0(, %sp) + st %lr, 8(, %sp) + or %fp, 0, %sp + .cfi_def_cfa %fp, 0 + .cfi_offset %lr, 8 + .cfi_offset %fp, 0 + + // Compute the dynamic stack size: + // + // - We need 8 bytes for storing 'gtid' and 'tid', so we can pass them + // by reference + // - We need 8 bytes for whole arguments. We have two + 'argc' + // arguments (condider >id and &tid). We need to reserve + // (argc + 2) * 8 bytes. + // - We need 176 bytes for RSA and others + // + // The total number of bytes is then (argc + 2) * 8 + 8 + 176. + // + // |------------------------------| + // | return address of callee | 8(%fp) + // |------------------------------| + // | frame pointer of callee | 0(%fp) + // |------------------------------| <------------------ %fp + // | __tid / __gtid | -8(%fp) / -4(%fp) + // |------------------------------| + // | argc+2 for arguments | 176(%sp) + // |------------------------------| + // | RSA | + // |------------------------------| + // | return address | + // |------------------------------| + // | frame pointer | + // |------------------------------| <------------------ %sp + + adds.w.sx %s34, 2, %s3 + sll %s34, %s34, 3 + lea %s34, 184(, %s34) + subs.l %sp, %sp, %s34 + + // Align the stack to 16 bytes. + and %sp, -16, %sp + + // Save pkfn. + or %s12, 0, %s0 + + // Call host to allocate stack if it is necessary. + brge.l %sp, %sl, .L_kmp_pass + ld %s61, 24(, %tp) + lea %s63, 0x13b + shm.l %s63, 0(%s61) + shm.l %sl, 8(%s61) + shm.l %sp, 16(%s61) + monc + +.L_kmp_pass: + lea %s35, 176(, %sp) + adds.w.sx %s37, 0, %s3 + or %s38, 0, %s4 + +#if OMPT_SUPPORT + // Save frame pointer into exit_frame. + st %fp, 0(%s5) +#endif + + // Prepare arguments for the pkfn function (first 8 using s0-s7 + // registers, but need to store stack also because of varargs). + + stl %s1, __gtid(%fp) + stl %s2, __tid(%fp) + + adds.l %s0, __gtid, %fp + st %s0, 0(, %s35) + adds.l %s1, __tid, %fp + st %s1, 8(, %s35) + + breq.l 0, %s37, .L_kmp_call + ld %s2, 0(, %s38) + st %s2, 16(, %s35) + + breq.l 1, %s37, .L_kmp_call + ld %s3, 8(, %s38) + st %s3, 24(, %s35) + + breq.l 2, %s37, .L_kmp_call + ld %s4, 16(, %s38) + st %s4, 32(, %s35) + + breq.l 3, %s37, .L_kmp_call + ld %s5, 24(, %s38) + st %s5, 40(, %s35) + + breq.l 4, %s37, .L_kmp_call + ld %s6, 32(, %s38) + st %s6, 48(, %s35) + + breq.l 5, %s37, .L_kmp_call + ld %s7, 40(, %s38) + st %s7, 56(, %s35) + + breq.l 6, %s37, .L_kmp_call + + // Prepare any additional argument passed through the stack. + adds.l %s37, -6, %s37 + lea %s38, 48(, %s38) + lea %s35, 64(, %s35) +.L_kmp_loop: + ld %s36, 0(, %s38) + st %s36, 0(, %s35) + adds.l %s37, -1, %s37 + adds.l %s38, 8, %s38 + adds.l %s35, 8, %s35 + brne.l 0, %s37, .L_kmp_loop + +.L_kmp_call: + // Call pkfn function. + bsic %lr, (, %s12) + + // Return value. + lea %s0, 1 + + // Restore stack and return. + or %sp, 0, %fp + ld %lr, 8(, %sp) + ld %fp, 0(, %sp) + b.l.t (, %lr) +.Lfunc_end0: + .size __kmp_invoke_microtask, .Lfunc_end0-__kmp_invoke_microtask + .cfi_endproc + +// -- End __kmp_invoke_microtask + +#endif /* KMP_ARCH_VE */ + +#if KMP_ARCH_S390X + +//------------------------------------------------------------------------ +// +// typedef void (*microtask_t)(int *gtid, int *tid, ...); +// +// int __kmp_invoke_microtask(microtask_t pkfn, int gtid, int tid, int argc, +// void *p_argv[] +// #if OMPT_SUPPORT +// , +// void **exit_frame_ptr +// #endif +// ) { +// #if OMPT_SUPPORT +// *exit_frame_ptr = OMPT_GET_FRAME_ADDRESS(0); +// #endif +// +// (*pkfn)(>id, &tid, argv[0], ...); +// +// return 1; +// } +// +// Parameters: +// r2: pkfn +// r3: gtid +// r4: tid +// r5: argc +// r6: p_argv +// SP+160: exit_frame_ptr +// +// Locals: +// __gtid: gtid param pushed on stack so can pass >id to pkfn +// __tid: tid param pushed on stack so can pass &tid to pkfn +// +// Temp. registers: +// +// r0: used to fetch argv slots +// r7: used as temporary for number of remaining pkfn parms +// r8: argv +// r9: pkfn +// r10: stack size +// r11: previous fp +// r12: stack parameter area +// r13: argv slot +// +// return: r2 (always 1/TRUE) +// + +// -- Begin __kmp_invoke_microtask +// mark_begin; + .text + .globl __kmp_invoke_microtask + .p2align 1 + .type __kmp_invoke_microtask,@function +__kmp_invoke_microtask: + .cfi_startproc + + stmg %r6,%r14,48(%r15) + .cfi_offset %r6, -112 + .cfi_offset %r7, -104 + .cfi_offset %r8, -96 + .cfi_offset %r9, -88 + .cfi_offset %r10, -80 + .cfi_offset %r11, -72 + .cfi_offset %r12, -64 + .cfi_offset %r13, -56 + .cfi_offset %r14, -48 + .cfi_offset %r15, -40 + lgr %r11,%r15 + .cfi_def_cfa %r11, 160 + + // Compute the dynamic stack size: + // + // - We need 8 bytes for storing 'gtid' and 'tid', so we can pass them by + // reference + // - We need 8 bytes for each argument that cannot be passed to the 'pkfn' + // function by register. Given that we have 5 of such registers (r[2-6]) + // and two + 'argc' arguments (consider >id and &tid), we need to + // reserve max(0, argc - 3)*8 extra bytes + // + // The total number of bytes is then max(0, argc - 3)*8 + 8 + + lgr %r10,%r5 + aghi %r10,-2 + jnm 0f + lghi %r10,0 +0: + sllg %r10,%r10,3 + lgr %r12,%r10 + aghi %r10,176 + sgr %r15,%r10 + agr %r12,%r15 + stg %r11,0(%r15) + + lgr %r9,%r2 // pkfn + +#if OMPT_SUPPORT + // Save frame pointer into exit_frame + lg %r8,160(%r11) + stg %r11,0(%r8) +#endif + + // Prepare arguments for the pkfn function (first 5 using r2-r6 registers) + + stg %r3,160(%r12) + la %r2,164(%r12) // gid + stg %r4,168(%r12) + la %r3,172(%r12) // tid + lgr %r8,%r6 // argv + + // If argc > 0 + ltgr %r7,%r5 + jz 1f + + lg %r4,0(%r8) // argv[0] + aghi %r7,-1 + jz 1f + + // If argc > 1 + lg %r5,8(%r8) // argv[1] + aghi %r7,-1 + jz 1f + + // If argc > 2 + lg %r6,16(%r8) // argv[2] + aghi %r7,-1 + jz 1f + + lghi %r13,0 // Index [n] +2: + lg %r0,24(%r13,%r8) // argv[2+n] + stg %r0,160(%r13,%r15) // parm[2+n] + aghi %r13,8 // Next + aghi %r7,-1 + jnz 2b + +1: + basr %r14,%r9 // Call pkfn + + // Restore stack and return + + lgr %r15,%r11 + lmg %r6,%r14,48(%r15) + lghi %r2,1 + br %r14 +.Lfunc_end0: + .size __kmp_invoke_microtask, .Lfunc_end0-__kmp_invoke_microtask + .cfi_endproc + +// -- End __kmp_invoke_microtask + +#endif /* KMP_ARCH_S390X */ + +#if KMP_ARCH_ARM || KMP_ARCH_MIPS || KMP_ARCH_AARCH64_32 +#ifndef KMP_PREFIX_UNDERSCORE +# define KMP_PREFIX_UNDERSCORE(x) x +#endif .data - .comm .gomp_critical_user_,32,8 + COMMON .gomp_critical_user_, 32, 3 .data .align 4 - .global __kmp_unnamed_critical_addr -__kmp_unnamed_critical_addr: + .global KMP_PREFIX_UNDERSCORE(__kmp_unnamed_critical_addr) +KMP_PREFIX_UNDERSCORE(__kmp_unnamed_critical_addr): .4byte .gomp_critical_user_ - .size __kmp_unnamed_critical_addr,4 -#endif /* KMP_ARCH_ARM */ +#ifdef __ELF__ + .size KMP_PREFIX_UNDERSCORE(__kmp_unnamed_critical_addr),4 +#endif +#endif /* KMP_ARCH_ARM || KMP_ARCH_MIPS || KMP_ARCH_AARCH64_32 */ -#if KMP_ARCH_PPC64 || KMP_ARCH_AARCH64 || KMP_ARCH_MIPS64 || KMP_ARCH_RISCV64 +#if KMP_ARCH_PPC64 || KMP_ARCH_AARCH64 || KMP_ARCH_MIPS64 || \ + KMP_ARCH_RISCV64 || KMP_ARCH_LOONGARCH64 || KMP_ARCH_VE || \ + KMP_ARCH_S390X #ifndef KMP_PREFIX_UNDERSCORE # define KMP_PREFIX_UNDERSCORE(x) x #endif .data - .comm .gomp_critical_user_,32,8 + COMMON .gomp_critical_user_, 32, 3 .data .align 8 .global KMP_PREFIX_UNDERSCORE(__kmp_unnamed_critical_addr) @@ -1751,12 +2503,17 @@ KMP_PREFIX_UNDERSCORE(__kmp_unnamed_critical_addr): .size KMP_PREFIX_UNDERSCORE(__kmp_unnamed_critical_addr),8 #endif #endif /* KMP_ARCH_PPC64 || KMP_ARCH_AARCH64 || KMP_ARCH_MIPS64 || - KMP_ARCH_RISCV64 */ + KMP_ARCH_RISCV64 || KMP_ARCH_LOONGARCH64 || KMP_ARCH_VE || + KMP_ARCH_S390X */ #if KMP_OS_LINUX -# if KMP_ARCH_ARM +# if KMP_ARCH_ARM || KMP_ARCH_AARCH64 .section .note.GNU-stack,"",%progbits -# else +# elif !KMP_ARCH_WASM .section .note.GNU-stack,"",@progbits # endif #endif + +#if KMP_OS_LINUX && (KMP_ARCH_AARCH64 || KMP_ARCH_AARCH64_32) +GNU_PROPERTY_BTI_PAC +#endif |