diff options
author | robot-contrib <robot-contrib@yandex-team.com> | 2024-12-02 20:23:41 +0300 |
---|---|---|
committer | robot-contrib <robot-contrib@yandex-team.com> | 2024-12-02 20:47:52 +0300 |
commit | 81518d12414d1a5f8f1e3d3e13d884306230609b (patch) | |
tree | 3eea40f5490048f98fcca4f5b22e1597d2364f54 | |
parent | c6bd6398f1bec61405c83f91872481e3b5e33510 (diff) | |
download | ydb-81518d12414d1a5f8f1e3d3e13d884306230609b.tar.gz |
Update contrib/libs/cxxsupp/builtins to 19.1.3
commit_hash:4898490dcc35775adf8be6d67c2ca83001fb5311
34 files changed, 1167 insertions, 363 deletions
diff --git a/build/sysincl/darwin.yml b/build/sysincl/darwin.yml index 06eaa50480f..d583225dfad 100644 --- a/build/sysincl/darwin.yml +++ b/build/sysincl/darwin.yml @@ -37,6 +37,7 @@ - MacTypes.h - TargetConditionals.h - architecture/byte_order.h + - arm/cpu_capabilities_public.h - asl.h - copyfile.h - crt_externs.h diff --git a/contrib/libs/cxxsupp/builtins/.yandex_meta/build.ym b/contrib/libs/cxxsupp/builtins/.yandex_meta/build.ym index 20967507889..e454ac162ac 100644 --- a/contrib/libs/cxxsupp/builtins/.yandex_meta/build.ym +++ b/contrib/libs/cxxsupp/builtins/.yandex_meta/build.ym @@ -1,6 +1,6 @@ {% extends '//builtin/bag.ym' %} -{% block current_version %}18.1.8{% endblock %} +{% block current_version %}19.1.3{% endblock %} {% block current_url %} https://github.com/llvm/llvm-project/releases/download/llvmorg-{{self.version().strip()}}/compiler-rt-{{self.version().strip()}}.src.tar.xz diff --git a/contrib/libs/cxxsupp/builtins/.yandex_meta/devtools.licenses.report b/contrib/libs/cxxsupp/builtins/.yandex_meta/devtools.licenses.report index ce8fb1f4bb7..874c592edd9 100644 --- a/contrib/libs/cxxsupp/builtins/.yandex_meta/devtools.licenses.report +++ b/contrib/libs/cxxsupp/builtins/.yandex_meta/devtools.licenses.report @@ -103,7 +103,9 @@ BELONGS ya.make aarch64/chkstk.S [1:2] aarch64/fp_mode.c [3:4] aarch64/sme-abi-init.c [1:2] + aarch64/sme-abi-vg.c [1:2] aarch64/sme-abi.S [1:2] + aarch64/sme-libc-mem-routines.S [1:2] absvdi2.c [3:4] absvsi2.c [3:4] absvti2.c [3:4] @@ -237,7 +239,9 @@ BELONGS ya.make comparedf2.c [3:4] comparesf2.c [3:4] comparetf2.c [3:4] + cpu_model/AArch64CPUFeatures.inc [3:4] cpu_model/aarch64.c [3:4] + cpu_model/aarch64.h [3:4] cpu_model/cpu_model.h [3:4] cpu_model/x86.c [3:4] crtbegin.c [3:4] @@ -261,6 +265,7 @@ BELONGS ya.make emutls.c [3:4] enable_execute_stack.c [3:4] eprintf.c [3:4] + extendbfsf2.c [3:4] extenddftf2.c [3:4] extendhfsf2.c [3:4] extendsfdf2.c [3:4] @@ -486,7 +491,9 @@ BELONGS ya.make aarch64/chkstk.S [1:2] aarch64/fp_mode.c [3:4] aarch64/sme-abi-init.c [1:2] + aarch64/sme-abi-vg.c [1:2] aarch64/sme-abi.S [1:2] + aarch64/sme-libc-mem-routines.S [1:2] absvdi2.c [3:4] absvsi2.c [3:4] absvti2.c [3:4] @@ -620,7 +627,9 @@ BELONGS ya.make comparedf2.c [3:4] comparesf2.c [3:4] comparetf2.c [3:4] + cpu_model/AArch64CPUFeatures.inc [3:4] cpu_model/aarch64.c [3:4] + cpu_model/aarch64.h [3:4] cpu_model/cpu_model.h [3:4] cpu_model/x86.c [3:4] crtbegin.c [3:4] @@ -644,6 +653,7 @@ BELONGS ya.make emutls.c [3:4] enable_execute_stack.c [3:4] eprintf.c [3:4] + extendbfsf2.c [3:4] extenddftf2.c [3:4] extendhfsf2.c [3:4] extendsfdf2.c [3:4] @@ -927,7 +937,9 @@ BELONGS ya.make aarch64/chkstk.S [3:3] aarch64/fp_mode.c [5:5] aarch64/sme-abi-init.c [3:3] + aarch64/sme-abi-vg.c [3:3] aarch64/sme-abi.S [3:3] + aarch64/sme-libc-mem-routines.S [3:3] absvdi2.c [5:5] absvsi2.c [5:5] absvti2.c [5:5] @@ -1061,7 +1073,9 @@ BELONGS ya.make comparedf2.c [5:5] comparesf2.c [5:5] comparetf2.c [5:5] + cpu_model/AArch64CPUFeatures.inc [5:5] cpu_model/aarch64.c [5:5] + cpu_model/aarch64.h [5:5] cpu_model/cpu_model.h [5:5] cpu_model/x86.c [5:5] crtbegin.c [5:5] @@ -1085,6 +1099,7 @@ BELONGS ya.make emutls.c [5:5] enable_execute_stack.c [5:5] eprintf.c [5:5] + extendbfsf2.c [5:5] extenddftf2.c [5:5] extendhfsf2.c [5:5] extendsfdf2.c [5:5] @@ -1310,7 +1325,9 @@ BELONGS ya.make aarch64/chkstk.S [3:3] aarch64/fp_mode.c [5:5] aarch64/sme-abi-init.c [3:3] + aarch64/sme-abi-vg.c [3:3] aarch64/sme-abi.S [3:3] + aarch64/sme-libc-mem-routines.S [3:3] absvdi2.c [5:5] absvsi2.c [5:5] absvti2.c [5:5] @@ -1444,7 +1461,9 @@ BELONGS ya.make comparedf2.c [5:5] comparesf2.c [5:5] comparetf2.c [5:5] + cpu_model/AArch64CPUFeatures.inc [5:5] cpu_model/aarch64.c [5:5] + cpu_model/aarch64.h [5:5] cpu_model/cpu_model.h [5:5] cpu_model/x86.c [5:5] crtbegin.c [5:5] @@ -1468,6 +1487,7 @@ BELONGS ya.make emutls.c [5:5] enable_execute_stack.c [5:5] eprintf.c [5:5] + extendbfsf2.c [5:5] extenddftf2.c [5:5] extendhfsf2.c [5:5] extendsfdf2.c [5:5] diff --git a/contrib/libs/cxxsupp/builtins/CODE_OWNERS.TXT b/contrib/libs/cxxsupp/builtins/CODE_OWNERS.TXT index ad136edf967..bd51a1073cc 100644 --- a/contrib/libs/cxxsupp/builtins/CODE_OWNERS.TXT +++ b/contrib/libs/cxxsupp/builtins/CODE_OWNERS.TXT @@ -67,3 +67,11 @@ D: ThreadSanitizer N: Bill Wendling E: isanbard@gmail.com D: Profile runtime library + +N: Christopher Apple, David Trevelyan +E: cja-private@pm.me, realtime.sanitizer@gmail.com +D: Realtime Sanitizer (RTSan) + +N: Alexander Shaposhnikov +E: alexander.v.shaposhnikov@gmail.com +D: Numerical Sanitizer (NSAN) diff --git a/contrib/libs/cxxsupp/builtins/README.txt b/contrib/libs/cxxsupp/builtins/README.txt index 2d213d95f33..19f26c92a0f 100644 --- a/contrib/libs/cxxsupp/builtins/README.txt +++ b/contrib/libs/cxxsupp/builtins/README.txt @@ -272,6 +272,11 @@ switch32 switch8 switchu8 +// This function generates a custom trampoline function with the specific +// realFunc and localsPtr values. +void __trampoline_setup(uint32_t* trampOnStack, int trampSizeAllocated, + const void* realFunc, void* localsPtr); + // There is no C interface to the *_vfp_d8_d15_regs functions. There are // called in the prolog and epilog of Thumb1 functions. When the C++ ABI use // SJLJ for exceptions, each function with a catch clause or destructors needs diff --git a/contrib/libs/cxxsupp/builtins/aarch64/sme-abi-vg.c b/contrib/libs/cxxsupp/builtins/aarch64/sme-abi-vg.c new file mode 100644 index 00000000000..20061012e16 --- /dev/null +++ b/contrib/libs/cxxsupp/builtins/aarch64/sme-abi-vg.c @@ -0,0 +1,21 @@ +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +#include "../cpu_model/aarch64.h" + +struct FEATURES { + unsigned long long features; +}; + +extern struct FEATURES __aarch64_cpu_features; + +#if __GNUC__ >= 9 +#pragma GCC diagnostic ignored "-Wprio-ctor-dtor" +#endif +__attribute__((constructor(90))) static void get_aarch64_cpu_features(void) { + if (__atomic_load_n(&__aarch64_cpu_features.features, __ATOMIC_RELAXED)) + return; + + __init_cpu_features(); +} diff --git a/contrib/libs/cxxsupp/builtins/aarch64/sme-abi.S b/contrib/libs/cxxsupp/builtins/aarch64/sme-abi.S index d470ecaf7aa..cd8153f6067 100644 --- a/contrib/libs/cxxsupp/builtins/aarch64/sme-abi.S +++ b/contrib/libs/cxxsupp/builtins/aarch64/sme-abi.S @@ -12,11 +12,15 @@ #if !defined(__APPLE__) #define TPIDR2_SYMBOL SYMBOL_NAME(__aarch64_has_sme_and_tpidr2_el0) #define TPIDR2_SYMBOL_OFFSET :lo12:SYMBOL_NAME(__aarch64_has_sme_and_tpidr2_el0) +#define CPU_FEATS_SYMBOL SYMBOL_NAME(__aarch64_cpu_features) +#define CPU_FEATS_SYMBOL_OFFSET :lo12:SYMBOL_NAME(__aarch64_cpu_features) #else // MachO requires @page/@pageoff directives because the global is defined // in a different file. Otherwise this file may fail to build. #define TPIDR2_SYMBOL SYMBOL_NAME(__aarch64_has_sme_and_tpidr2_el0)@page #define TPIDR2_SYMBOL_OFFSET SYMBOL_NAME(__aarch64_has_sme_and_tpidr2_el0)@pageoff +#define CPU_FEATS_SYMBOL SYMBOL_NAME(__aarch64_cpu_features)@page +#define CPU_FEATS_SYMBOL_OFFSET SYMBOL_NAME(__aarch64_cpu_features)@pageoff #endif .arch armv9-a+sme @@ -26,9 +30,10 @@ // abort(). Note that there is no need to preserve any state before the call, // because the function does not return. DEFINE_COMPILERRT_PRIVATE_FUNCTION(do_abort) -.cfi_startproc - .variant_pcs SYMBOL_NAME(do_abort) - stp x29, x30, [sp, #-32]! + .cfi_startproc + .variant_pcs SYMBOL_NAME(do_abort) + BTI_C + stp x29, x30, [sp, #-32]! cntd x0 // Store VG to a stack location that we describe with .cfi_offset str x0, [sp, #16] @@ -36,22 +41,23 @@ DEFINE_COMPILERRT_PRIVATE_FUNCTION(do_abort) .cfi_offset w30, -24 .cfi_offset w29, -32 .cfi_offset 46, -16 - bl __arm_sme_state - tbz x0, #0, 2f + bl __arm_sme_state + tbz x0, #0, 2f 1: - smstop sm + smstop sm 2: // We can't make this into a tail-call because the unwinder would // need to restore the value of VG. - bl SYMBOL_NAME(abort) -.cfi_endproc + bl SYMBOL_NAME(abort) + .cfi_endproc END_COMPILERRT_FUNCTION(do_abort) // __arm_sme_state fills the result registers based on a local // that is set as part of the compiler-rt startup code. // __aarch64_has_sme_and_tpidr2_el0 DEFINE_COMPILERRT_OUTLINE_FUNCTION_UNMANGLED(__arm_sme_state) - .variant_pcs __arm_sme_state + .variant_pcs __arm_sme_state + BTI_C mov x0, xzr mov x1, xzr @@ -68,7 +74,8 @@ DEFINE_COMPILERRT_OUTLINE_FUNCTION_UNMANGLED(__arm_sme_state) END_COMPILERRT_OUTLINE_FUNCTION(__arm_sme_state) DEFINE_COMPILERRT_OUTLINE_FUNCTION_UNMANGLED(__arm_tpidr2_restore) - .variant_pcs __arm_tpidr2_restore + .variant_pcs __arm_tpidr2_restore + BTI_C // If TPIDR2_EL0 is nonnull, the subroutine aborts in some platform-specific // manner. mrs x14, TPIDR2_EL0 @@ -103,7 +110,8 @@ DEFINE_COMPILERRT_OUTLINE_FUNCTION_UNMANGLED(__arm_tpidr2_restore) END_COMPILERRT_OUTLINE_FUNCTION(__arm_tpidr2_restore) DEFINE_COMPILERRT_OUTLINE_FUNCTION_UNMANGLED(__arm_tpidr2_save) - .variant_pcs __arm_tpidr2_restore + .variant_pcs __arm_tpidr2_restore + BTI_C // If the current thread does not have access to TPIDR2_EL0, the subroutine // does nothing. adrp x14, TPIDR2_SYMBOL @@ -143,7 +151,8 @@ DEFINE_COMPILERRT_OUTLINE_FUNCTION_UNMANGLED(__arm_tpidr2_save) END_COMPILERRT_OUTLINE_FUNCTION(__arm_tpidr2_save) DEFINE_COMPILERRT_OUTLINE_FUNCTION_UNMANGLED(__arm_za_disable) - .variant_pcs __arm_tpidr2_restore + .variant_pcs __arm_tpidr2_restore + BTI_C // If the current thread does not have access to SME, the subroutine does // nothing. adrp x14, TPIDR2_SYMBOL @@ -174,3 +183,48 @@ DEFINE_COMPILERRT_OUTLINE_FUNCTION_UNMANGLED(__arm_za_disable) 0: ret END_COMPILERRT_OUTLINE_FUNCTION(__arm_za_disable) + +DEFINE_COMPILERRT_OUTLINE_FUNCTION_UNMANGLED(__arm_get_current_vg) + .variant_pcs __arm_get_current_vg + BTI_C + + stp x29, x30, [sp, #-16]! + .cfi_def_cfa_offset 16 + mov x29, sp + .cfi_def_cfa w29, 16 + .cfi_offset w30, -8 + .cfi_offset w29, -16 + adrp x17, CPU_FEATS_SYMBOL + ldr w17, [x17, CPU_FEATS_SYMBOL_OFFSET] + tbnz w17, #30, 0f + adrp x16, TPIDR2_SYMBOL + ldrb w16, [x16, TPIDR2_SYMBOL_OFFSET] + cbz w16, 1f +0: + mov x18, x1 + bl __arm_sme_state + mov x1, x18 + and x17, x17, #0x40000000 + bfxil x17, x0, #0, #1 + cbz x17, 1f + cntd x0 + .cfi_def_cfa wsp, 16 + ldp x29, x30, [sp], #16 + .cfi_def_cfa_offset 0 + .cfi_restore w30 + .cfi_restore w29 + ret +1: + mov x0, xzr + .cfi_def_cfa wsp, 16 + ldp x29, x30, [sp], #16 + .cfi_def_cfa_offset 0 + .cfi_restore w30 + .cfi_restore w29 + ret +END_COMPILERRT_OUTLINE_FUNCTION(__arm_get_current_vg) + +NO_EXEC_STACK_DIRECTIVE + +// GNU property note for BTI and PAC +GNU_PROPERTY_BTI_PAC diff --git a/contrib/libs/cxxsupp/builtins/aarch64/sme-libc-mem-routines.S b/contrib/libs/cxxsupp/builtins/aarch64/sme-libc-mem-routines.S new file mode 100644 index 00000000000..0318d9a6f1e --- /dev/null +++ b/contrib/libs/cxxsupp/builtins/aarch64/sme-libc-mem-routines.S @@ -0,0 +1,352 @@ +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +// Routines taken from libc/AOR_v20.02/string/aarch64 + +#include "../assembly.h" + +#ifdef __aarch64__ + +#define L(l) .L ## l + +// +// __arm_sc_memcpy / __arm_sc_memmove +// + +#define dstin x0 +#define src x1 +#define count x2 +#define dst x3 +#define srcend1 x4 +#define dstend1 x5 +#define A_l x6 +#define A_lw w6 +#define A_h x7 +#define B_l x8 +#define B_lw w8 +#define B_h x9 +#define C_l x10 +#define C_lw w10 +#define C_h x11 +#define D_l x12 +#define D_h x13 +#define E_l x14 +#define E_h x15 +#define F_l x16 +#define F_h x17 +#define G_l count +#define G_h dst +#define H_l src +#define H_h srcend1 +#define tmp1 x14 + +/* This implementation handles overlaps and supports both memcpy and memmove + from a single entry point. It uses unaligned accesses and branchless + sequences to keep the code small, simple and improve performance. + + Copies are split into 3 main cases: small copies of up to 32 bytes, medium + copies of up to 128 bytes, and large copies. The overhead of the overlap + check is negligible since it is only required for large copies. + + Large copies use a software pipelined loop processing 64 bytes per iteration. + The destination pointer is 16-byte aligned to minimize unaligned accesses. + The loop tail is handled by always copying 64 bytes from the end. +*/ + +DEFINE_COMPILERRT_OUTLINE_FUNCTION_UNMANGLED(__arm_sc_memcpy) + add srcend1, src, count + add dstend1, dstin, count + cmp count, 128 + b.hi L(copy_long) + cmp count, 32 + b.hi L(copy32_128) + + /* Small copies: 0..32 bytes. */ + cmp count, 16 + b.lo L(copy16) + ldp A_l, A_h, [src] + ldp D_l, D_h, [srcend1, -16] + stp A_l, A_h, [dstin] + stp D_l, D_h, [dstend1, -16] + ret + + /* Copy 8-15 bytes. */ +L(copy16): + tbz count, 3, L(copy8) + ldr A_l, [src] + ldr A_h, [srcend1, -8] + str A_l, [dstin] + str A_h, [dstend1, -8] + ret + + .p2align 3 + /* Copy 4-7 bytes. */ +L(copy8): + tbz count, 2, L(copy4) + ldr A_lw, [src] + ldr B_lw, [srcend1, -4] + str A_lw, [dstin] + str B_lw, [dstend1, -4] + ret + + /* Copy 0..3 bytes using a branchless sequence. */ +L(copy4): + cbz count, L(copy0) + lsr tmp1, count, 1 + ldrb A_lw, [src] + ldrb C_lw, [srcend1, -1] + ldrb B_lw, [src, tmp1] + strb A_lw, [dstin] + strb B_lw, [dstin, tmp1] + strb C_lw, [dstend1, -1] +L(copy0): + ret + + .p2align 4 + /* Medium copies: 33..128 bytes. */ +L(copy32_128): + ldp A_l, A_h, [src] + ldp B_l, B_h, [src, 16] + ldp C_l, C_h, [srcend1, -32] + ldp D_l, D_h, [srcend1, -16] + cmp count, 64 + b.hi L(copy128) + stp A_l, A_h, [dstin] + stp B_l, B_h, [dstin, 16] + stp C_l, C_h, [dstend1, -32] + stp D_l, D_h, [dstend1, -16] + ret + + .p2align 4 + /* Copy 65..128 bytes. */ +L(copy128): + ldp E_l, E_h, [src, 32] + ldp F_l, F_h, [src, 48] + cmp count, 96 + b.ls L(copy96) + ldp G_l, G_h, [srcend1, -64] + ldp H_l, H_h, [srcend1, -48] + stp G_l, G_h, [dstend1, -64] + stp H_l, H_h, [dstend1, -48] +L(copy96): + stp A_l, A_h, [dstin] + stp B_l, B_h, [dstin, 16] + stp E_l, E_h, [dstin, 32] + stp F_l, F_h, [dstin, 48] + stp C_l, C_h, [dstend1, -32] + stp D_l, D_h, [dstend1, -16] + ret + + .p2align 4 + /* Copy more than 128 bytes. */ +L(copy_long): + /* Use backwards copy if there is an overlap. */ + sub tmp1, dstin, src + cbz tmp1, L(copy0) + cmp tmp1, count + b.lo L(copy_long_backwards) + + /* Copy 16 bytes and then align dst to 16-byte alignment. */ + + ldp D_l, D_h, [src] + and tmp1, dstin, 15 + bic dst, dstin, 15 + sub src, src, tmp1 + add count, count, tmp1 /* Count is now 16 too large. */ + ldp A_l, A_h, [src, 16] + stp D_l, D_h, [dstin] + ldp B_l, B_h, [src, 32] + ldp C_l, C_h, [src, 48] + ldp D_l, D_h, [src, 64]! + subs count, count, 128 + 16 /* Test and readjust count. */ + b.ls L(copy64_from_end) +L(loop64): + stp A_l, A_h, [dst, 16] + ldp A_l, A_h, [src, 16] + stp B_l, B_h, [dst, 32] + ldp B_l, B_h, [src, 32] + stp C_l, C_h, [dst, 48] + ldp C_l, C_h, [src, 48] + stp D_l, D_h, [dst, 64]! + ldp D_l, D_h, [src, 64]! + subs count, count, 64 + b.hi L(loop64) + + /* Write the last iteration and copy 64 bytes from the end. */ +L(copy64_from_end): + ldp E_l, E_h, [srcend1, -64] + stp A_l, A_h, [dst, 16] + ldp A_l, A_h, [srcend1, -48] + stp B_l, B_h, [dst, 32] + ldp B_l, B_h, [srcend1, -32] + stp C_l, C_h, [dst, 48] + ldp C_l, C_h, [srcend1, -16] + stp D_l, D_h, [dst, 64] + stp E_l, E_h, [dstend1, -64] + stp A_l, A_h, [dstend1, -48] + stp B_l, B_h, [dstend1, -32] + stp C_l, C_h, [dstend1, -16] + ret + + .p2align 4 + + /* Large backwards copy for overlapping copies. + Copy 16 bytes and then align dst to 16-byte alignment. */ +L(copy_long_backwards): + ldp D_l, D_h, [srcend1, -16] + and tmp1, dstend1, 15 + sub srcend1, srcend1, tmp1 + sub count, count, tmp1 + ldp A_l, A_h, [srcend1, -16] + stp D_l, D_h, [dstend1, -16] + ldp B_l, B_h, [srcend1, -32] + ldp C_l, C_h, [srcend1, -48] + ldp D_l, D_h, [srcend1, -64]! + sub dstend1, dstend1, tmp1 + subs count, count, 128 + b.ls L(copy64_from_start) + +L(loop64_backwards): + stp A_l, A_h, [dstend1, -16] + ldp A_l, A_h, [srcend1, -16] + stp B_l, B_h, [dstend1, -32] + ldp B_l, B_h, [srcend1, -32] + stp C_l, C_h, [dstend1, -48] + ldp C_l, C_h, [srcend1, -48] + stp D_l, D_h, [dstend1, -64]! + ldp D_l, D_h, [srcend1, -64]! + subs count, count, 64 + b.hi L(loop64_backwards) + + /* Write the last iteration and copy 64 bytes from the start. */ +L(copy64_from_start): + ldp G_l, G_h, [src, 48] + stp A_l, A_h, [dstend1, -16] + ldp A_l, A_h, [src, 32] + stp B_l, B_h, [dstend1, -32] + ldp B_l, B_h, [src, 16] + stp C_l, C_h, [dstend1, -48] + ldp C_l, C_h, [src] + stp D_l, D_h, [dstend1, -64] + stp G_l, G_h, [dstin, 48] + stp A_l, A_h, [dstin, 32] + stp B_l, B_h, [dstin, 16] + stp C_l, C_h, [dstin] + ret +END_COMPILERRT_OUTLINE_FUNCTION(__arm_sc_memcpy) + +DEFINE_COMPILERRT_FUNCTION_ALIAS(__arm_sc_memmove, __arm_sc_memcpy) + + +// +// __arm_sc_memset +// + +#define dstin x0 +#define val x1 +#define valw w1 +#define count x2 +#define dst x3 +#define dstend2 x4 +#define zva_val x5 + +DEFINE_COMPILERRT_OUTLINE_FUNCTION_UNMANGLED(__arm_sc_memset) +#ifdef __ARM_FEATURE_SVE + mov z0.b, valw +#else + bfi valw, valw, #8, #8 + bfi valw, valw, #16, #16 + bfi val, val, #32, #32 + fmov d0, val + fmov v0.d[1], val +#endif + add dstend2, dstin, count + + cmp count, 96 + b.hi L(set_long) + cmp count, 16 + b.hs L(set_medium) + mov val, v0.D[0] + + /* Set 0..15 bytes. */ + tbz count, 3, 1f + str val, [dstin] + str val, [dstend2, -8] + ret + nop +1: tbz count, 2, 2f + str valw, [dstin] + str valw, [dstend2, -4] + ret +2: cbz count, 3f + strb valw, [dstin] + tbz count, 1, 3f + strh valw, [dstend2, -2] +3: ret + + /* Set 17..96 bytes. */ +L(set_medium): + str q0, [dstin] + tbnz count, 6, L(set96) + str q0, [dstend2, -16] + tbz count, 5, 1f + str q0, [dstin, 16] + str q0, [dstend2, -32] +1: ret + + .p2align 4 + /* Set 64..96 bytes. Write 64 bytes from the start and + 32 bytes from the end. */ +L(set96): + str q0, [dstin, 16] + stp q0, q0, [dstin, 32] + stp q0, q0, [dstend2, -32] + ret + + .p2align 4 +L(set_long): + and valw, valw, 255 + bic dst, dstin, 15 + str q0, [dstin] + cmp count, 160 + ccmp valw, 0, 0, hs + b.ne L(no_zva) + +#ifndef SKIP_ZVA_CHECK + mrs zva_val, dczid_el0 + and zva_val, zva_val, 31 + cmp zva_val, 4 /* ZVA size is 64 bytes. */ + b.ne L(no_zva) +#endif + str q0, [dst, 16] + stp q0, q0, [dst, 32] + bic dst, dst, 63 + sub count, dstend2, dst /* Count is now 64 too large. */ + sub count, count, 128 /* Adjust count and bias for loop. */ + + .p2align 4 +L(zva_loop): + add dst, dst, 64 + dc zva, dst + subs count, count, 64 + b.hi L(zva_loop) + stp q0, q0, [dstend2, -64] + stp q0, q0, [dstend2, -32] + ret + +L(no_zva): + sub count, dstend2, dst /* Count is 16 too large. */ + sub dst, dst, 16 /* Dst is biased by -32. */ + sub count, count, 64 + 16 /* Adjust count and bias for loop. */ +L(no_zva_loop): + stp q0, q0, [dst, 32] + stp q0, q0, [dst, 64]! + subs count, count, 64 + b.hi L(no_zva_loop) + stp q0, q0, [dstend2, -64] + stp q0, q0, [dstend2, -32] + ret +END_COMPILERRT_OUTLINE_FUNCTION(__arm_sc_memset) + +#endif // __aarch64__ diff --git a/contrib/libs/cxxsupp/builtins/aarch64/sme-libc-routines.c b/contrib/libs/cxxsupp/builtins/aarch64/sme-libc-routines.c index cd73025a19c..315490e73ea 100644 --- a/contrib/libs/cxxsupp/builtins/aarch64/sme-libc-routines.c +++ b/contrib/libs/cxxsupp/builtins/aarch64/sme-libc-routines.c @@ -1,79 +1,4 @@ -#include <stdlib.h> - -// WARNING: When building the scalar versions of these functions you need to -// use the compiler flag "-mllvm -disable-loop-idiom-all" to prevent clang -// from recognising a loop idiom and planting calls to memcpy! - -static void *__arm_sc_memcpy_fwd(void *dest, const void *src, - size_t n) __arm_streaming_compatible { - unsigned char *destp = (unsigned char *)dest; - const unsigned char *srcp = (const unsigned char *)src; - for (size_t i = 0; i < n; ++i) - destp[i] = srcp[i]; - - return dest; -} - -// If dest and src overlap then behaviour is undefined, hence we can add the -// restrict keywords here. This also matches the definition of the libc memcpy -// according to the man page. -void *__arm_sc_memcpy(void *__restrict__ dest, const void *__restrict__ src, - size_t n) __arm_streaming_compatible { - return __arm_sc_memcpy_fwd(dest, src, n); -} - -void *__arm_sc_memset(void *dest, int c, size_t n) __arm_streaming_compatible { - unsigned char *destp = (unsigned char *)dest; - unsigned char c8 = (unsigned char)c; - for (size_t i = 0; i < n; ++i) - destp[i] = c8; - - return dest; -} - -static void *__arm_sc_memcpy_rev(void *dest, const void *src, - size_t n) __arm_streaming_compatible { - unsigned char *destp = (unsigned char *)dest; - const unsigned char *srcp = (const unsigned char *)src; - // TODO: Improve performance by copying larger chunks in reverse, or by - // using SVE. - while (n > 0) { - --n; - destp[n] = srcp[n]; - } - return dest; -} - -// Semantically a memmove is equivalent to the following: -// 1. Copy the entire contents of src to a temporary array that does not -// overlap with src or dest. -// 2. Copy the contents of the temporary array into dest. -void *__arm_sc_memmove(void *dest, const void *src, - size_t n) __arm_streaming_compatible { - unsigned char *destp = (unsigned char *)dest; - const unsigned char *srcp = (const unsigned char *)src; - - // If src and dest don't overlap then just invoke memcpy - if ((srcp > (destp + n)) || (destp > (srcp + n))) - return __arm_sc_memcpy_fwd(dest, src, n); - - // Overlap case 1: - // src: Low | -> | High - // dest: Low | -> | High - // Here src is always ahead of dest at a higher addres. If we first read a - // chunk of data from src we can safely write the same chunk to dest without - // corrupting future reads of src. - if (srcp > destp) - return __arm_sc_memcpy_fwd(dest, src, n); - - // Overlap case 2: - // src: Low | -> | High - // dest: Low | -> | High - // While we're in the overlap region we're always corrupting future reads of - // src when writing to dest. An efficient way to do this is to copy the data - // in reverse by starting at the highest address. - return __arm_sc_memcpy_rev(dest, src, n); -} +#include <stddef.h> const void *__arm_sc_memchr(const void *src, int c, size_t n) __arm_streaming_compatible { diff --git a/contrib/libs/cxxsupp/builtins/atomic.c b/contrib/libs/cxxsupp/builtins/atomic.c index 852bb20f086..aded25d9baa 100644 --- a/contrib/libs/cxxsupp/builtins/atomic.c +++ b/contrib/libs/cxxsupp/builtins/atomic.c @@ -12,7 +12,7 @@ // // 1) This code must work with C programs that do not link to anything // (including pthreads) and so it should not depend on any pthread -// functions. +// functions. If the user wishes to opt into using pthreads, they may do so. // 2) Atomic operations, rather than explicit mutexes, are most commonly used // on code where contended operations are rate. // @@ -56,7 +56,17 @@ static const long SPINLOCK_MASK = SPINLOCK_COUNT - 1; // defined. Each platform should define the Lock type, and corresponding // lock() and unlock() functions. //////////////////////////////////////////////////////////////////////////////// -#if defined(__FreeBSD__) || defined(__DragonFly__) +#if defined(_LIBATOMIC_USE_PTHREAD) +#include <pthread.h> +typedef pthread_mutex_t Lock; +/// Unlock a lock. This is a release operation. +__inline static void unlock(Lock *l) { pthread_mutex_unlock(l); } +/// Locks a lock. +__inline static void lock(Lock *l) { pthread_mutex_lock(l); } +/// locks for atomic operations +static Lock locks[SPINLOCK_COUNT]; + +#elif defined(__FreeBSD__) || defined(__DragonFly__) #include <errno.h> // clang-format off #include <sys/types.h> diff --git a/contrib/libs/cxxsupp/builtins/cpu_model/AArch64CPUFeatures.inc b/contrib/libs/cxxsupp/builtins/cpu_model/AArch64CPUFeatures.inc new file mode 100644 index 00000000000..e78bb88cfed --- /dev/null +++ b/contrib/libs/cxxsupp/builtins/cpu_model/AArch64CPUFeatures.inc @@ -0,0 +1,91 @@ +//===- AArch64CPUFeatures.inc - AArch64 CPU Features enum -------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines the CPUFeatures enum for AArch64 to facilitate better +// testing of this code between LLVM and compiler-rt, primarily that the files +// are an exact match. +// +// This file has two identical copies. The primary copy lives in LLVM and +// the other one sits in compiler-rt/lib/builtins/cpu_model directory. To make +// changes in this file, first modify the primary copy and copy it over to +// compiler-rt. compiler-rt tests will fail if the two files are not synced up. +// +//===----------------------------------------------------------------------===// + +#ifndef AARCH64_CPU_FEATURS_INC_H +#define AARCH64_CPU_FEATURS_INC_H + +// Function Multi Versioning CPU features. +enum CPUFeatures { + FEAT_RNG, + FEAT_FLAGM, + FEAT_FLAGM2, + FEAT_FP16FML, + FEAT_DOTPROD, + FEAT_SM4, + FEAT_RDM, + FEAT_LSE, + FEAT_FP, + FEAT_SIMD, + FEAT_CRC, + FEAT_SHA1, + FEAT_SHA2, + FEAT_SHA3, + FEAT_AES, + FEAT_PMULL, + FEAT_FP16, + FEAT_DIT, + FEAT_DPB, + FEAT_DPB2, + FEAT_JSCVT, + FEAT_FCMA, + FEAT_RCPC, + FEAT_RCPC2, + FEAT_FRINTTS, + FEAT_DGH, + FEAT_I8MM, + FEAT_BF16, + FEAT_EBF16, + FEAT_RPRES, + FEAT_SVE, + FEAT_SVE_BF16, + FEAT_SVE_EBF16, + FEAT_SVE_I8MM, + FEAT_SVE_F32MM, + FEAT_SVE_F64MM, + FEAT_SVE2, + FEAT_SVE_AES, + FEAT_SVE_PMULL128, + FEAT_SVE_BITPERM, + FEAT_SVE_SHA3, + FEAT_SVE_SM4, + FEAT_SME, + FEAT_MEMTAG, + FEAT_MEMTAG2, + FEAT_MEMTAG3, + FEAT_SB, + FEAT_PREDRES, + FEAT_SSBS, + FEAT_SSBS2, + FEAT_BTI, + FEAT_LS64, + FEAT_LS64_V, + FEAT_LS64_ACCDATA, + FEAT_WFXT, + FEAT_SME_F64, + FEAT_SME_I64, + FEAT_SME2, + FEAT_RCPC3, + FEAT_MOPS, + FEAT_MAX, + FEAT_EXT = 62, // Reserved to indicate presence of additional features field + // in __aarch64_cpu_features + FEAT_INIT // Used as flag of features initialization completion +}; + +#endif diff --git a/contrib/libs/cxxsupp/builtins/cpu_model/aarch64.c b/contrib/libs/cxxsupp/builtins/cpu_model/aarch64.c index 17bddfca46f..b868caa991b 100644 --- a/contrib/libs/cxxsupp/builtins/cpu_model/aarch64.c +++ b/contrib/libs/cxxsupp/builtins/cpu_model/aarch64.c @@ -12,7 +12,7 @@ // //===----------------------------------------------------------------------===// -#include "cpu_model.h" +#include "aarch64.h" #if !defined(__aarch64__) #error This file is intended only for aarch64-based targets @@ -53,74 +53,6 @@ _Bool __aarch64_have_lse_atomics #endif #if !defined(DISABLE_AARCH64_FMV) -// CPUFeatures must correspond to the same AArch64 features in -// AArch64TargetParser.h -enum CPUFeatures { - FEAT_RNG, - FEAT_FLAGM, - FEAT_FLAGM2, - FEAT_FP16FML, - FEAT_DOTPROD, - FEAT_SM4, - FEAT_RDM, - FEAT_LSE, - FEAT_FP, - FEAT_SIMD, - FEAT_CRC, - FEAT_SHA1, - FEAT_SHA2, - FEAT_SHA3, - FEAT_AES, - FEAT_PMULL, - FEAT_FP16, - FEAT_DIT, - FEAT_DPB, - FEAT_DPB2, - FEAT_JSCVT, - FEAT_FCMA, - FEAT_RCPC, - FEAT_RCPC2, - FEAT_FRINTTS, - FEAT_DGH, - FEAT_I8MM, - FEAT_BF16, - FEAT_EBF16, - FEAT_RPRES, - FEAT_SVE, - FEAT_SVE_BF16, - FEAT_SVE_EBF16, - FEAT_SVE_I8MM, - FEAT_SVE_F32MM, - FEAT_SVE_F64MM, - FEAT_SVE2, - FEAT_SVE_AES, - FEAT_SVE_PMULL128, - FEAT_SVE_BITPERM, - FEAT_SVE_SHA3, - FEAT_SVE_SM4, - FEAT_SME, - FEAT_MEMTAG, - FEAT_MEMTAG2, - FEAT_MEMTAG3, - FEAT_SB, - FEAT_PREDRES, - FEAT_SSBS, - FEAT_SSBS2, - FEAT_BTI, - FEAT_LS64, - FEAT_LS64_V, - FEAT_LS64_ACCDATA, - FEAT_WFXT, - FEAT_SME_F64, - FEAT_SME_I64, - FEAT_SME2, - FEAT_RCPC3, - FEAT_MOPS, - FEAT_MAX, - FEAT_EXT = 62, // Reserved to indicate presence of additional features field - // in __aarch64_cpu_features - FEAT_INIT // Used as flag of features initialization completion -}; // Architecture features used // in Function Multi Versioning diff --git a/contrib/libs/cxxsupp/builtins/cpu_model/aarch64.h b/contrib/libs/cxxsupp/builtins/cpu_model/aarch64.h new file mode 100644 index 00000000000..f6cbf75d582 --- /dev/null +++ b/contrib/libs/cxxsupp/builtins/cpu_model/aarch64.h @@ -0,0 +1,21 @@ +//===-- cpu_model/aarch64.h --------------------------------------------- -===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "cpu_model.h" + +#if !defined(__aarch64__) +#error This file is intended only for aarch64-based targets +#endif + +#if !defined(DISABLE_AARCH64_FMV) + +#include "AArch64CPUFeatures.inc" + +void __init_cpu_features(void); + +#endif // !defined(DISABLE_AARCH64_FMV) diff --git a/contrib/libs/cxxsupp/builtins/cpu_model/aarch64/fmv/android.inc b/contrib/libs/cxxsupp/builtins/cpu_model/aarch64/fmv/android.inc index f711431489c..a9e3594e93c 100644 --- a/contrib/libs/cxxsupp/builtins/cpu_model/aarch64/fmv/android.inc +++ b/contrib/libs/cxxsupp/builtins/cpu_model/aarch64/fmv/android.inc @@ -1,6 +1,6 @@ void __init_cpu_features_resolver(unsigned long hwcap, const __ifunc_arg_t *arg) { - if (__aarch64_cpu_features.features) + if (__atomic_load_n(&__aarch64_cpu_features.features, __ATOMIC_RELAXED)) return; // ifunc resolvers don't have hwcaps in arguments on Android API lower @@ -17,7 +17,7 @@ void __init_cpu_features_resolver(unsigned long hwcap, void CONSTRUCTOR_ATTRIBUTE __init_cpu_features(void) { // CPU features already initialized. - if (__aarch64_cpu_features.features) + if (__atomic_load_n(&__aarch64_cpu_features.features, __ATOMIC_RELAXED)) return; // Don't set any CPU features, diff --git a/contrib/libs/cxxsupp/builtins/cpu_model/aarch64/fmv/apple.inc b/contrib/libs/cxxsupp/builtins/cpu_model/aarch64/fmv/apple.inc index 0bb755f4b30..f0694900f23 100644 --- a/contrib/libs/cxxsupp/builtins/cpu_model/aarch64/fmv/apple.inc +++ b/contrib/libs/cxxsupp/builtins/cpu_model/aarch64/fmv/apple.inc @@ -1,8 +1,27 @@ #include <TargetConditionals.h> #if TARGET_OS_OSX || TARGET_OS_IPHONE -#include <dispatch/dispatch.h> #include <sys/sysctl.h> +#if __has_include(<arm/cpu_capabilities_public.h>) +#include <arm/cpu_capabilities_public.h> +#define HAS_CPU_CAPABILITIES_PUBLIC_H 1 + +// FB13964283 - A few of these didn't make it into the public SDK yet. +#ifndef CAP_BIT_FEAT_SME +#define CAP_BIT_FEAT_SME 40 +#endif +#ifndef CAP_BIT_FEAT_SME2 +#define CAP_BIT_FEAT_SME2 41 +#endif +#ifndef CAP_BIT_FEAT_SME_F64F64 +#define CAP_BIT_FEAT_SME_F64F64 42 +#endif +#ifndef CAP_BIT_FEAT_SME_I16I64 +#define CAP_BIT_FEAT_SME_I16I64 43 +#endif + +#endif + static bool isKnownAndSupported(const char *name) { int32_t val = 0; size_t size = sizeof(val); @@ -11,61 +30,130 @@ static bool isKnownAndSupported(const char *name) { return val; } +static uint64_t deriveImplicitFeatures(uint64_t features) { + // FEAT_SSBS2 implies FEAT_SSBS + if ((1ULL << FEAT_SSBS2) & features) + features |= (1ULL << FEAT_SSBS); + + // FEAT_FP is always enabled + features |= (1ULL << FEAT_FP); + + features |= (1ULL << FEAT_INIT); + + return features; +} + void __init_cpu_features_resolver(void) { // On Darwin platforms, this may be called concurrently by multiple threads // because the resolvers that use it are called lazily at runtime (unlike on // ELF platforms, where IFuncs are resolved serially at load time). This // function's effect on __aarch64_cpu_features must be idempotent. - if (!__atomic_load_n(&__aarch64_cpu_features.features, __ATOMIC_RELAXED)) { - uint64_t features = 0; - - // https://developer.apple.com/documentation/kernel/1387446-sysctlbyname/determining_instruction_set_characteristics - static const struct { - const char *sysctl_name; - enum CPUFeatures feature; - } feature_checks[] = { - {"hw.optional.arm.FEAT_FlagM", FEAT_FLAGM}, - {"hw.optional.arm.FEAT_FlagM2", FEAT_FLAGM2}, - {"hw.optional.arm.FEAT_FHM", FEAT_FP16FML}, - {"hw.optional.arm.FEAT_DotProd", FEAT_DOTPROD}, - {"hw.optional.arm.FEAT_RDM", FEAT_RDM}, - {"hw.optional.arm.FEAT_LSE", FEAT_LSE}, - {"hw.optional.floatingpoint", FEAT_FP}, - {"hw.optional.AdvSIMD", FEAT_SIMD}, - {"hw.optional.armv8_crc32", FEAT_CRC}, - {"hw.optional.arm.FEAT_SHA1", FEAT_SHA1}, - {"hw.optional.arm.FEAT_SHA256", FEAT_SHA2}, - {"hw.optional.arm.FEAT_SHA3", FEAT_SHA3}, - {"hw.optional.arm.FEAT_AES", FEAT_AES}, - {"hw.optional.arm.FEAT_PMULL", FEAT_PMULL}, - {"hw.optional.arm.FEAT_FP16", FEAT_FP16}, - {"hw.optional.arm.FEAT_DIT", FEAT_DIT}, - {"hw.optional.arm.FEAT_DPB", FEAT_DPB}, - {"hw.optional.arm.FEAT_DPB2", FEAT_DPB2}, - {"hw.optional.arm.FEAT_JSCVT", FEAT_JSCVT}, - {"hw.optional.arm.FEAT_FCMA", FEAT_FCMA}, - {"hw.optional.arm.FEAT_LRCPC", FEAT_RCPC}, - {"hw.optional.arm.FEAT_LRCPC2", FEAT_RCPC2}, - {"hw.optional.arm.FEAT_FRINTTS", FEAT_FRINTTS}, - {"hw.optional.arm.FEAT_I8MM", FEAT_I8MM}, - {"hw.optional.arm.FEAT_BF16", FEAT_BF16}, - {"hw.optional.arm.FEAT_SB", FEAT_SB}, - {"hw.optional.arm.FEAT_SPECRES", FEAT_PREDRES}, - {"hw.optional.arm.FEAT_SSBS", FEAT_SSBS2}, - {"hw.optional.arm.FEAT_BTI", FEAT_BTI}, - }; - - for (size_t I = 0, E = sizeof(feature_checks) / sizeof(feature_checks[0]); - I != E; ++I) - if (isKnownAndSupported(feature_checks[I].sysctl_name)) - features |= (1ULL << feature_checks[I].feature); - - features |= (1ULL << FEAT_INIT); + if (__atomic_load_n(&__aarch64_cpu_features.features, __ATOMIC_RELAXED)) + return; + + uint64_t features = 0; + +#ifdef HAS_CPU_CAPABILITIES_PUBLIC_H + uint8_t feats_bitvec[(CAP_BIT_NB + 7) / 8] = {0}; + size_t len = sizeof(feats_bitvec); + // When hw.optional.arm.feats is available (macOS 15.0+, iOS 18.0+), use the + // fast path to get all the feature bits, otherwise fall back to the slow + // ~20-something sysctls path. + if (!sysctlbyname("hw.optional.arm.caps", &feats_bitvec, &len, 0, 0)) { + +#define CHECK_BIT(FROM, TO) \ + do { \ + if (feats_bitvec[FROM / 8] & (1u << ((FROM) & 7))) { \ + features |= (1ULL << TO); \ + } \ + } while (0) + + CHECK_BIT(CAP_BIT_FEAT_FlagM, FEAT_FLAGM); + CHECK_BIT(CAP_BIT_FEAT_FlagM2, FEAT_FLAGM2); + CHECK_BIT(CAP_BIT_FEAT_FHM, FEAT_FP16FML); + CHECK_BIT(CAP_BIT_FEAT_DotProd, FEAT_DOTPROD); + CHECK_BIT(CAP_BIT_FEAT_SHA3, FEAT_SHA3); + CHECK_BIT(CAP_BIT_FEAT_RDM, FEAT_RDM); + CHECK_BIT(CAP_BIT_FEAT_LSE, FEAT_LSE); + CHECK_BIT(CAP_BIT_FEAT_SHA256, FEAT_SHA2); + CHECK_BIT(CAP_BIT_FEAT_SHA1, FEAT_SHA1); + CHECK_BIT(CAP_BIT_FEAT_AES, FEAT_AES); + CHECK_BIT(CAP_BIT_FEAT_PMULL, FEAT_PMULL); + CHECK_BIT(CAP_BIT_FEAT_SPECRES, FEAT_PREDRES); + CHECK_BIT(CAP_BIT_FEAT_SB, FEAT_SB); + CHECK_BIT(CAP_BIT_FEAT_FRINTTS, FEAT_FRINTTS); + CHECK_BIT(CAP_BIT_FEAT_LRCPC, FEAT_RCPC); + CHECK_BIT(CAP_BIT_FEAT_LRCPC2, FEAT_RCPC2); + CHECK_BIT(CAP_BIT_FEAT_FCMA, FEAT_FCMA); + CHECK_BIT(CAP_BIT_FEAT_JSCVT, FEAT_JSCVT); + CHECK_BIT(CAP_BIT_FEAT_DPB, FEAT_DPB); + CHECK_BIT(CAP_BIT_FEAT_DPB2, FEAT_DPB2); + CHECK_BIT(CAP_BIT_FEAT_BF16, FEAT_BF16); + CHECK_BIT(CAP_BIT_FEAT_I8MM, FEAT_I8MM); + CHECK_BIT(CAP_BIT_FEAT_DIT, FEAT_DIT); + CHECK_BIT(CAP_BIT_FEAT_FP16, FEAT_FP16); + CHECK_BIT(CAP_BIT_FEAT_SSBS, FEAT_SSBS2); + CHECK_BIT(CAP_BIT_FEAT_BTI, FEAT_BTI); + CHECK_BIT(CAP_BIT_AdvSIMD, FEAT_SIMD); + CHECK_BIT(CAP_BIT_CRC32, FEAT_CRC); + CHECK_BIT(CAP_BIT_FEAT_SME, FEAT_SME); + CHECK_BIT(CAP_BIT_FEAT_SME2, FEAT_SME2); + CHECK_BIT(CAP_BIT_FEAT_SME_F64F64, FEAT_SME_F64); + CHECK_BIT(CAP_BIT_FEAT_SME_I16I64, FEAT_SME_I64); + + features = deriveImplicitFeatures(features); __atomic_store(&__aarch64_cpu_features.features, &features, __ATOMIC_RELAXED); + return; } +#endif + + // https://developer.apple.com/documentation/kernel/1387446-sysctlbyname/determining_instruction_set_characteristics + static const struct { + const char *sysctl_name; + enum CPUFeatures feature; + } feature_checks[] = { + {"hw.optional.arm.FEAT_FlagM", FEAT_FLAGM}, + {"hw.optional.arm.FEAT_FlagM2", FEAT_FLAGM2}, + {"hw.optional.arm.FEAT_FHM", FEAT_FP16FML}, + {"hw.optional.arm.FEAT_DotProd", FEAT_DOTPROD}, + {"hw.optional.arm.FEAT_RDM", FEAT_RDM}, + {"hw.optional.arm.FEAT_LSE", FEAT_LSE}, + {"hw.optional.AdvSIMD", FEAT_SIMD}, + {"hw.optional.armv8_crc32", FEAT_CRC}, + {"hw.optional.arm.FEAT_SHA1", FEAT_SHA1}, + {"hw.optional.arm.FEAT_SHA256", FEAT_SHA2}, + {"hw.optional.arm.FEAT_SHA3", FEAT_SHA3}, + {"hw.optional.arm.FEAT_AES", FEAT_AES}, + {"hw.optional.arm.FEAT_PMULL", FEAT_PMULL}, + {"hw.optional.arm.FEAT_FP16", FEAT_FP16}, + {"hw.optional.arm.FEAT_DIT", FEAT_DIT}, + {"hw.optional.arm.FEAT_DPB", FEAT_DPB}, + {"hw.optional.arm.FEAT_DPB2", FEAT_DPB2}, + {"hw.optional.arm.FEAT_JSCVT", FEAT_JSCVT}, + {"hw.optional.arm.FEAT_FCMA", FEAT_FCMA}, + {"hw.optional.arm.FEAT_LRCPC", FEAT_RCPC}, + {"hw.optional.arm.FEAT_LRCPC2", FEAT_RCPC2}, + {"hw.optional.arm.FEAT_FRINTTS", FEAT_FRINTTS}, + {"hw.optional.arm.FEAT_I8MM", FEAT_I8MM}, + {"hw.optional.arm.FEAT_BF16", FEAT_BF16}, + {"hw.optional.arm.FEAT_SB", FEAT_SB}, + {"hw.optional.arm.FEAT_SPECRES", FEAT_PREDRES}, + {"hw.optional.arm.FEAT_SSBS", FEAT_SSBS2}, + {"hw.optional.arm.FEAT_BTI", FEAT_BTI}, + }; + + for (size_t I = 0, E = sizeof(feature_checks) / sizeof(feature_checks[0]); + I != E; ++I) + if (isKnownAndSupported(feature_checks[I].sysctl_name)) + features |= (1ULL << feature_checks[I].feature); + + features = deriveImplicitFeatures(features); + + __atomic_store(&__aarch64_cpu_features.features, &features, + __ATOMIC_RELAXED); } #endif // TARGET_OS_OSX || TARGET_OS_IPHONE diff --git a/contrib/libs/cxxsupp/builtins/cpu_model/aarch64/fmv/freebsd.inc b/contrib/libs/cxxsupp/builtins/cpu_model/aarch64/fmv/freebsd.inc index 793adef44b9..aa975dc854f 100644 --- a/contrib/libs/cxxsupp/builtins/cpu_model/aarch64/fmv/freebsd.inc +++ b/contrib/libs/cxxsupp/builtins/cpu_model/aarch64/fmv/freebsd.inc @@ -1,6 +1,6 @@ void __init_cpu_features_resolver(unsigned long hwcap, const __ifunc_arg_t *arg) { - if (__aarch64_cpu_features.features) + if (__atomic_load_n(&__aarch64_cpu_features.features, __ATOMIC_RELAXED)) return; __init_cpu_features_constructor(hwcap, arg); @@ -10,7 +10,7 @@ void CONSTRUCTOR_ATTRIBUTE __init_cpu_features(void) { unsigned long hwcap = 0; unsigned long hwcap2 = 0; // CPU features already initialized. - if (__aarch64_cpu_features.features) + if (__atomic_load_n(&__aarch64_cpu_features.features, __ATOMIC_RELAXED)) return; int res = 0; diff --git a/contrib/libs/cxxsupp/builtins/cpu_model/aarch64/fmv/fuchsia.inc b/contrib/libs/cxxsupp/builtins/cpu_model/aarch64/fmv/fuchsia.inc index 329b6b43a8a..fd0800dd11e 100644 --- a/contrib/libs/cxxsupp/builtins/cpu_model/aarch64/fmv/fuchsia.inc +++ b/contrib/libs/cxxsupp/builtins/cpu_model/aarch64/fmv/fuchsia.inc @@ -2,7 +2,7 @@ #include <zircon/syscalls.h> void __init_cpu_features_resolver() { - if (__aarch64_cpu_features.features) + if (__atomic_load_n(&__aarch64_cpu_features.features, __ATOMIC_RELAXED)) return; // This ensures the vDSO is a direct link-time dependency of anything that @@ -13,8 +13,8 @@ void __init_cpu_features_resolver() { if (status != ZX_OK) return; -#define setCPUFeature(cpu_feature) \ - __aarch64_cpu_features.features |= 1ULL << cpu_feature + unsigned long long feat = 0; +#define setCPUFeature(cpu_feature) feat |= 1ULL << cpu_feature if (features & ZX_ARM64_FEATURE_ISA_FP) setCPUFeature(FEAT_FP); @@ -48,4 +48,6 @@ void __init_cpu_features_resolver() { setCPUFeature(FEAT_SVE); setCPUFeature(FEAT_INIT); + + __atomic_store_n(&__aarch64_cpu_features.features, feat, __ATOMIC_RELAXED); } diff --git a/contrib/libs/cxxsupp/builtins/cpu_model/aarch64/fmv/mrs.inc b/contrib/libs/cxxsupp/builtins/cpu_model/aarch64/fmv/mrs.inc index 32a21a2fba9..e4d5e7f2bd7 100644 --- a/contrib/libs/cxxsupp/builtins/cpu_model/aarch64/fmv/mrs.inc +++ b/contrib/libs/cxxsupp/builtins/cpu_model/aarch64/fmv/mrs.inc @@ -3,11 +3,10 @@ #define HAVE_SYS_AUXV_H #endif - - static void __init_cpu_features_constructor(unsigned long hwcap, const __ifunc_arg_t *arg) { -#define setCPUFeature(F) __aarch64_cpu_features.features |= 1ULL << F + unsigned long long feat = 0; +#define setCPUFeature(F) feat |= 1ULL << F #define getCPUFeature(id, ftr) __asm__("mrs %0, " #id : "=r"(ftr)) #define extractBits(val, start, number) \ (val & ((1ULL << number) - 1ULL) << start) >> start @@ -20,26 +19,20 @@ static void __init_cpu_features_constructor(unsigned long hwcap, setCPUFeature(FEAT_PMULL); if (hwcap & HWCAP_FLAGM) setCPUFeature(FEAT_FLAGM); - if (hwcap2 & HWCAP2_FLAGM2) { - setCPUFeature(FEAT_FLAGM); + if (hwcap2 & HWCAP2_FLAGM2) setCPUFeature(FEAT_FLAGM2); - } - if (hwcap & HWCAP_SM3 && hwcap & HWCAP_SM4) + if (hwcap & HWCAP_SM4) setCPUFeature(FEAT_SM4); if (hwcap & HWCAP_ASIMDDP) setCPUFeature(FEAT_DOTPROD); if (hwcap & HWCAP_ASIMDFHM) setCPUFeature(FEAT_FP16FML); - if (hwcap & HWCAP_FPHP) { + if (hwcap & HWCAP_FPHP) setCPUFeature(FEAT_FP16); - setCPUFeature(FEAT_FP); - } if (hwcap & HWCAP_DIT) setCPUFeature(FEAT_DIT); if (hwcap & HWCAP_ASIMDRDM) setCPUFeature(FEAT_RDM); - if (hwcap & HWCAP_ILRCPC) - setCPUFeature(FEAT_RCPC2); if (hwcap & HWCAP_AES) setCPUFeature(FEAT_AES); if (hwcap & HWCAP_SHA1) @@ -52,23 +45,20 @@ static void __init_cpu_features_constructor(unsigned long hwcap, setCPUFeature(FEAT_FCMA); if (hwcap & HWCAP_SB) setCPUFeature(FEAT_SB); - if (hwcap & HWCAP_SSBS) + if (hwcap & HWCAP_SSBS) { + setCPUFeature(FEAT_SSBS); setCPUFeature(FEAT_SSBS2); + } if (hwcap2 & HWCAP2_MTE) { setCPUFeature(FEAT_MEMTAG); setCPUFeature(FEAT_MEMTAG2); } - if (hwcap2 & HWCAP2_MTE3) { - setCPUFeature(FEAT_MEMTAG); - setCPUFeature(FEAT_MEMTAG2); + if (hwcap2 & HWCAP2_MTE3) setCPUFeature(FEAT_MEMTAG3); - } if (hwcap2 & HWCAP2_SVEAES) setCPUFeature(FEAT_SVE_AES); - if (hwcap2 & HWCAP2_SVEPMULL) { - setCPUFeature(FEAT_SVE_AES); + if (hwcap2 & HWCAP2_SVEPMULL) setCPUFeature(FEAT_SVE_PMULL128); - } if (hwcap2 & HWCAP2_SVEBITPERM) setCPUFeature(FEAT_SVE_BITPERM); if (hwcap2 & HWCAP2_SVESHA3) @@ -105,6 +95,8 @@ static void __init_cpu_features_constructor(unsigned long hwcap, setCPUFeature(FEAT_WFXT); if (hwcap2 & HWCAP2_SME) setCPUFeature(FEAT_SME); + if (hwcap2 & HWCAP2_SME2) + setCPUFeature(FEAT_SME2); if (hwcap2 & HWCAP2_SME_I16I64) setCPUFeature(FEAT_SME_I64); if (hwcap2 & HWCAP2_SME_F64F64) @@ -113,86 +105,45 @@ static void __init_cpu_features_constructor(unsigned long hwcap, setCPUFeature(FEAT_MOPS); if (hwcap & HWCAP_CPUID) { unsigned long ftr; - getCPUFeature(ID_AA64PFR1_EL1, ftr); - // ID_AA64PFR1_EL1.MTE >= 0b0001 - if (extractBits(ftr, 8, 4) >= 0x1) - setCPUFeature(FEAT_MEMTAG); - // ID_AA64PFR1_EL1.SSBS == 0b0001 - if (extractBits(ftr, 4, 4) == 0x1) - setCPUFeature(FEAT_SSBS); - // ID_AA64PFR1_EL1.SME == 0b0010 - if (extractBits(ftr, 24, 4) == 0x2) - setCPUFeature(FEAT_SME2); - getCPUFeature(ID_AA64PFR0_EL1, ftr); - // ID_AA64PFR0_EL1.FP != 0b1111 - if (extractBits(ftr, 16, 4) != 0xF) { - setCPUFeature(FEAT_FP); - // ID_AA64PFR0_EL1.AdvSIMD has the same value as ID_AA64PFR0_EL1.FP - setCPUFeature(FEAT_SIMD); - } - // ID_AA64PFR0_EL1.SVE != 0b0000 - if (extractBits(ftr, 32, 4) != 0x0) { - // get ID_AA64ZFR0_EL1, that name supported - // if sve enabled only - getCPUFeature(S3_0_C0_C4_4, ftr); - // ID_AA64ZFR0_EL1.SVEver == 0b0000 - if (extractBits(ftr, 0, 4) == 0x0) - setCPUFeature(FEAT_SVE); - // ID_AA64ZFR0_EL1.SVEver == 0b0001 - if (extractBits(ftr, 0, 4) == 0x1) - setCPUFeature(FEAT_SVE2); - // ID_AA64ZFR0_EL1.BF16 != 0b0000 - if (extractBits(ftr, 20, 4) != 0x0) - setCPUFeature(FEAT_SVE_BF16); - } - getCPUFeature(ID_AA64ISAR0_EL1, ftr); - // ID_AA64ISAR0_EL1.SHA3 != 0b0000 - if (extractBits(ftr, 32, 4) != 0x0) - setCPUFeature(FEAT_SHA3); + getCPUFeature(ID_AA64ISAR1_EL1, ftr); - // ID_AA64ISAR1_EL1.DPB >= 0b0001 - if (extractBits(ftr, 0, 4) >= 0x1) - setCPUFeature(FEAT_DPB); - // ID_AA64ISAR1_EL1.LRCPC != 0b0000 - if (extractBits(ftr, 20, 4) != 0x0) - setCPUFeature(FEAT_RCPC); - // ID_AA64ISAR1_EL1.LRCPC == 0b0011 - if (extractBits(ftr, 20, 4) == 0x3) - setCPUFeature(FEAT_RCPC3); - // ID_AA64ISAR1_EL1.SPECRES == 0b0001 - if (extractBits(ftr, 40, 4) == 0x2) + /* ID_AA64ISAR1_EL1.SPECRES >= 0b0001 */ + if (extractBits(ftr, 40, 4) >= 0x1) setCPUFeature(FEAT_PREDRES); - // ID_AA64ISAR1_EL1.BF16 != 0b0000 - if (extractBits(ftr, 44, 4) != 0x0) - setCPUFeature(FEAT_BF16); - // ID_AA64ISAR1_EL1.LS64 >= 0b0001 + /* ID_AA64ISAR1_EL1.LS64 >= 0b0001 */ if (extractBits(ftr, 60, 4) >= 0x1) setCPUFeature(FEAT_LS64); - // ID_AA64ISAR1_EL1.LS64 >= 0b0010 + /* ID_AA64ISAR1_EL1.LS64 >= 0b0010 */ if (extractBits(ftr, 60, 4) >= 0x2) setCPUFeature(FEAT_LS64_V); - // ID_AA64ISAR1_EL1.LS64 >= 0b0011 + /* ID_AA64ISAR1_EL1.LS64 >= 0b0011 */ if (extractBits(ftr, 60, 4) >= 0x3) setCPUFeature(FEAT_LS64_ACCDATA); - } else { - // Set some features in case of no CPUID support - if (hwcap & (HWCAP_FP | HWCAP_FPHP)) { - setCPUFeature(FEAT_FP); - // FP and AdvSIMD fields have the same value - setCPUFeature(FEAT_SIMD); - } - if (hwcap & HWCAP_DCPOP || hwcap2 & HWCAP2_DCPODP) - setCPUFeature(FEAT_DPB); - if (hwcap & HWCAP_LRCPC || hwcap & HWCAP_ILRCPC) - setCPUFeature(FEAT_RCPC); - if (hwcap2 & HWCAP2_BF16 || hwcap2 & HWCAP2_EBF16) - setCPUFeature(FEAT_BF16); - if (hwcap2 & HWCAP2_SVEBF16) - setCPUFeature(FEAT_SVE_BF16); - if (hwcap2 & HWCAP2_SVE2 && hwcap & HWCAP_SVE) - setCPUFeature(FEAT_SVE2); - if (hwcap & HWCAP_SHA3) - setCPUFeature(FEAT_SHA3); } + if (hwcap & HWCAP_FP) { + setCPUFeature(FEAT_FP); + // FP and AdvSIMD fields have the same value + setCPUFeature(FEAT_SIMD); + } + if (hwcap & HWCAP_DCPOP) + setCPUFeature(FEAT_DPB); + if (hwcap & HWCAP_LRCPC) + setCPUFeature(FEAT_RCPC); + if (hwcap & HWCAP_ILRCPC) + setCPUFeature(FEAT_RCPC2); + if (hwcap2 & HWCAP2_LRCPC3) + setCPUFeature(FEAT_RCPC3); + if (hwcap2 & HWCAP2_BF16) + setCPUFeature(FEAT_BF16); + if (hwcap2 & HWCAP2_SVEBF16) + setCPUFeature(FEAT_SVE_BF16); + if (hwcap & HWCAP_SVE) + setCPUFeature(FEAT_SVE); + if (hwcap2 & HWCAP2_SVE2) + setCPUFeature(FEAT_SVE2); + if (hwcap & HWCAP_SHA3) + setCPUFeature(FEAT_SHA3); setCPUFeature(FEAT_INIT); + + __atomic_store_n(&__aarch64_cpu_features.features, feat, __ATOMIC_RELAXED); } diff --git a/contrib/libs/cxxsupp/builtins/cpu_model/aarch64/fmv/sysauxv.inc b/contrib/libs/cxxsupp/builtins/cpu_model/aarch64/fmv/sysauxv.inc index fb5722c4306..486f77a1e4d 100644 --- a/contrib/libs/cxxsupp/builtins/cpu_model/aarch64/fmv/sysauxv.inc +++ b/contrib/libs/cxxsupp/builtins/cpu_model/aarch64/fmv/sysauxv.inc @@ -1,13 +1,13 @@ void __init_cpu_features_resolver(unsigned long hwcap, const __ifunc_arg_t *arg) { - if (__aarch64_cpu_features.features) + if (__atomic_load_n(&__aarch64_cpu_features.features, __ATOMIC_RELAXED)) return; __init_cpu_features_constructor(hwcap, arg); } void CONSTRUCTOR_ATTRIBUTE __init_cpu_features(void) { // CPU features already initialized. - if (__aarch64_cpu_features.features) + if (__atomic_load_n(&__aarch64_cpu_features.features, __ATOMIC_RELAXED)) return; unsigned long hwcap = getauxval(AT_HWCAP); diff --git a/contrib/libs/cxxsupp/builtins/cpu_model/aarch64/hwcap.inc b/contrib/libs/cxxsupp/builtins/cpu_model/aarch64/hwcap.inc index 7ddc125b26d..41aba82ef95 100644 --- a/contrib/libs/cxxsupp/builtins/cpu_model/aarch64/hwcap.inc +++ b/contrib/libs/cxxsupp/builtins/cpu_model/aarch64/hwcap.inc @@ -178,6 +178,12 @@ #ifndef HWCAP2_SVE_EBF16 #define HWCAP2_SVE_EBF16 (1ULL << 33) #endif +#ifndef HWCAP2_SME2 +#define HWCAP2_SME2 (1UL << 37) +#endif #ifndef HWCAP2_MOPS #define HWCAP2_MOPS (1ULL << 43) #endif +#ifndef HWCAP2_LRCPC3 +#define HWCAP2_LRCPC3 (1UL << 46) +#endif diff --git a/contrib/libs/cxxsupp/builtins/cpu_model/x86.c b/contrib/libs/cxxsupp/builtins/cpu_model/x86.c index 0750e29f989..b1c4abd9d11 100644 --- a/contrib/libs/cxxsupp/builtins/cpu_model/x86.c +++ b/contrib/libs/cxxsupp/builtins/cpu_model/x86.c @@ -59,6 +59,7 @@ enum ProcessorTypes { INTEL_SIERRAFOREST, INTEL_GRANDRIDGE, INTEL_CLEARWATERFOREST, + AMDFAM1AH, CPU_TYPE_MAX }; @@ -97,6 +98,7 @@ enum ProcessorSubtypes { INTEL_COREI7_ARROWLAKE, INTEL_COREI7_ARROWLAKE_S, INTEL_COREI7_PANTHERLAKE, + AMDFAM1AH_ZNVER5, CPU_SUBTYPE_MAX }; @@ -139,20 +141,88 @@ enum ProcessorFeatures { FEATURE_AVX512BITALG, FEATURE_AVX512BF16, FEATURE_AVX512VP2INTERSECT, - - FEATURE_CMPXCHG16B = 46, - FEATURE_F16C = 49, + // FIXME: Below Features has some missings comparing to gcc, it's because gcc + // has some not one-to-one mapped in llvm. + // FEATURE_3DNOW, + // FEATURE_3DNOWP, + FEATURE_ADX = 40, + // FEATURE_ABM, + FEATURE_CLDEMOTE = 42, + FEATURE_CLFLUSHOPT, + FEATURE_CLWB, + FEATURE_CLZERO, + FEATURE_CMPXCHG16B, + // FIXME: Not adding FEATURE_CMPXCHG8B is a workaround to make 'generic' as + // a cpu string with no X86_FEATURE_COMPAT features, which is required in + // current implementantion of cpu_specific/cpu_dispatch FMV feature. + // FEATURE_CMPXCHG8B, + FEATURE_ENQCMD = 48, + FEATURE_F16C, + FEATURE_FSGSBASE, + // FEATURE_FXSAVE, + // FEATURE_HLE, + // FEATURE_IBT, FEATURE_LAHF_LM = 54, FEATURE_LM, - FEATURE_WP, + FEATURE_LWP, FEATURE_LZCNT, FEATURE_MOVBE, - - FEATURE_AVX512FP16 = 94, + FEATURE_MOVDIR64B, + FEATURE_MOVDIRI, + FEATURE_MWAITX, + // FEATURE_OSXSAVE, + FEATURE_PCONFIG = 63, + FEATURE_PKU, + FEATURE_PREFETCHWT1, + FEATURE_PRFCHW, + FEATURE_PTWRITE, + FEATURE_RDPID, + FEATURE_RDRND, + FEATURE_RDSEED, + FEATURE_RTM, + FEATURE_SERIALIZE, + FEATURE_SGX, + FEATURE_SHA, + FEATURE_SHSTK, + FEATURE_TBM, + FEATURE_TSXLDTRK, + FEATURE_VAES, + FEATURE_WAITPKG, + FEATURE_WBNOINVD, + FEATURE_XSAVE, + FEATURE_XSAVEC, + FEATURE_XSAVEOPT, + FEATURE_XSAVES, + FEATURE_AMX_TILE, + FEATURE_AMX_INT8, + FEATURE_AMX_BF16, + FEATURE_UINTR, + FEATURE_HRESET, + FEATURE_KL, + // FEATURE_AESKLE, + FEATURE_WIDEKL = 92, + FEATURE_AVXVNNI, + FEATURE_AVX512FP16, FEATURE_X86_64_BASELINE, FEATURE_X86_64_V2, FEATURE_X86_64_V3, FEATURE_X86_64_V4, + FEATURE_AVXIFMA, + FEATURE_AVXVNNIINT8, + FEATURE_AVXNECONVERT, + FEATURE_CMPCCXADD, + FEATURE_AMX_FP16, + FEATURE_PREFETCHI, + FEATURE_RAOINT, + FEATURE_AMX_COMPLEX, + FEATURE_AVXVNNIINT16, + FEATURE_SM3, + FEATURE_SHA512, + FEATURE_SM4, + FEATURE_APXF, + FEATURE_USERMSR, + FEATURE_AVX10_1_256, + FEATURE_AVX10_1_512, CPU_FEATURE_MAX }; @@ -299,13 +369,13 @@ static void detectX86FamilyModel(unsigned EAX, unsigned *Family, } } +#define testFeature(F) (Features[F / 32] & (1 << (F % 32))) != 0 + static const char *getIntelProcessorTypeAndSubtype(unsigned Family, unsigned Model, const unsigned *Features, unsigned *Type, unsigned *Subtype) { -#define testFeature(F) (Features[F / 32] & (1 << (F % 32))) != 0 - // We select CPU strings to match the code in Host.cpp, but we don't use them // in compiler-rt. const char *CPU = 0; @@ -594,14 +664,48 @@ static const char *getAMDProcessorTypeAndSubtype(unsigned Family, const unsigned *Features, unsigned *Type, unsigned *Subtype) { - // We select CPU strings to match the code in Host.cpp, but we don't use them - // in compiler-rt. const char *CPU = 0; switch (Family) { + case 4: + CPU = "i486"; + break; + case 5: + CPU = "pentium"; + switch (Model) { + case 6: + case 7: + CPU = "k6"; + break; + case 8: + CPU = "k6-2"; + break; + case 9: + case 13: + CPU = "k6-3"; + break; + case 10: + CPU = "geode"; + break; + } + break; + case 6: + if (testFeature(FEATURE_SSE)) { + CPU = "athlon-xp"; + break; + } + CPU = "athlon"; + break; + case 15: + if (testFeature(FEATURE_SSE3)) { + CPU = "k8-sse3"; + break; + } + CPU = "k8"; + break; case 16: CPU = "amdfam10"; - *Type = AMDFAM10H; + *Type = AMDFAM10H; // "amdfam10" switch (Model) { case 2: *Subtype = AMDFAM10H_BARCELONA; @@ -677,7 +781,7 @@ static const char *getAMDProcessorTypeAndSubtype(unsigned Family, case 25: CPU = "znver3"; *Type = AMDFAM19H; - if ((Model <= 0x0f) || (Model >= 0x20 && Model <= 0x2f) || + if (Model <= 0x0f || (Model >= 0x20 && Model <= 0x2f) || (Model >= 0x30 && Model <= 0x3f) || (Model >= 0x40 && Model <= 0x4f) || (Model >= 0x50 && Model <= 0x5f)) { // Family 19h Models 00h-0Fh (Genesis, Chagall) Zen 3 @@ -701,6 +805,24 @@ static const char *getAMDProcessorTypeAndSubtype(unsigned Family, break; // "znver4" } break; // family 19h + case 26: + CPU = "znver5"; + *Type = AMDFAM1AH; + if (Model <= 0x77) { + // Models 00h-0Fh (Breithorn). + // Models 10h-1Fh (Breithorn-Dense). + // Models 20h-2Fh (Strix 1). + // Models 30h-37h (Strix 2). + // Models 38h-3Fh (Strix 3). + // Models 40h-4Fh (Granite Ridge). + // Models 50h-5Fh (Weisshorn). + // Models 60h-6Fh (Krackan1). + // Models 70h-77h (Sarlak). + CPU = "znver5"; + *Subtype = AMDFAM1AH_ZNVER5; + break; // "znver5" + } + break; default: break; // Unknown AMD CPU. } @@ -708,6 +830,8 @@ static const char *getAMDProcessorTypeAndSubtype(unsigned Family, return CPU; } +#undef testFeature + static void getAvailableFeatures(unsigned ECX, unsigned EDX, unsigned MaxLeaf, unsigned *Features) { unsigned EAX = 0, EBX = 0; @@ -746,13 +870,15 @@ static void getAvailableFeatures(unsigned ECX, unsigned EDX, unsigned MaxLeaf, setFeature(FEATURE_AES); if ((ECX >> 29) & 1) setFeature(FEATURE_F16C); + if ((ECX >> 30) & 1) + setFeature(FEATURE_RDRND); // If CPUID indicates support for XSAVE, XRESTORE and AVX, and XGETBV // indicates that the AVX registers will be saved and restored on context // switch, then we have full AVX support. const unsigned AVXBits = (1 << 27) | (1 << 28); - bool HasAVX = ((ECX & AVXBits) == AVXBits) && !getX86XCR0(&EAX, &EDX) && - ((EAX & 0x6) == 0x6); + bool HasAVXSave = ((ECX & AVXBits) == AVXBits) && !getX86XCR0(&EAX, &EDX) && + ((EAX & 0x6) == 0x6); #if defined(__APPLE__) // Darwin lazily saves the AVX512 context on first use: trust that the OS will // save the AVX512 context if we use AVX512 instructions, even the bit is not @@ -760,45 +886,76 @@ static void getAvailableFeatures(unsigned ECX, unsigned EDX, unsigned MaxLeaf, bool HasAVX512Save = true; #else // AVX512 requires additional context to be saved by the OS. - bool HasAVX512Save = HasAVX && ((EAX & 0xe0) == 0xe0); + bool HasAVX512Save = HasAVXSave && ((EAX & 0xe0) == 0xe0); #endif + // AMX requires additional context to be saved by the OS. + const unsigned AMXBits = (1 << 17) | (1 << 18); + bool HasXSave = ((ECX >> 27) & 1) && !getX86XCR0(&EAX, &EDX); + bool HasAMXSave = HasXSave && ((EAX & AMXBits) == AMXBits); - if (HasAVX) + if (HasAVXSave) setFeature(FEATURE_AVX); + if (((ECX >> 26) & 1) && HasAVXSave) + setFeature(FEATURE_XSAVE); + bool HasLeaf7 = MaxLeaf >= 0x7 && !getX86CpuIDAndInfoEx(0x7, 0x0, &EAX, &EBX, &ECX, &EDX); + if (HasLeaf7 && ((EBX >> 0) & 1)) + setFeature(FEATURE_FSGSBASE); + if (HasLeaf7 && ((EBX >> 2) & 1)) + setFeature(FEATURE_SGX); if (HasLeaf7 && ((EBX >> 3) & 1)) setFeature(FEATURE_BMI); - if (HasLeaf7 && ((EBX >> 5) & 1) && HasAVX) + if (HasLeaf7 && ((EBX >> 5) & 1) && HasAVXSave) setFeature(FEATURE_AVX2); if (HasLeaf7 && ((EBX >> 8) & 1)) setFeature(FEATURE_BMI2); + if (HasLeaf7 && ((EBX >> 11) & 1)) + setFeature(FEATURE_RTM); if (HasLeaf7 && ((EBX >> 16) & 1) && HasAVX512Save) setFeature(FEATURE_AVX512F); if (HasLeaf7 && ((EBX >> 17) & 1) && HasAVX512Save) setFeature(FEATURE_AVX512DQ); + if (HasLeaf7 && ((EBX >> 18) & 1)) + setFeature(FEATURE_RDSEED); + if (HasLeaf7 && ((EBX >> 19) & 1)) + setFeature(FEATURE_ADX); if (HasLeaf7 && ((EBX >> 21) & 1) && HasAVX512Save) setFeature(FEATURE_AVX512IFMA); + if (HasLeaf7 && ((EBX >> 24) & 1)) + setFeature(FEATURE_CLWB); if (HasLeaf7 && ((EBX >> 26) & 1) && HasAVX512Save) setFeature(FEATURE_AVX512PF); if (HasLeaf7 && ((EBX >> 27) & 1) && HasAVX512Save) setFeature(FEATURE_AVX512ER); if (HasLeaf7 && ((EBX >> 28) & 1) && HasAVX512Save) setFeature(FEATURE_AVX512CD); + if (HasLeaf7 && ((EBX >> 29) & 1)) + setFeature(FEATURE_SHA); if (HasLeaf7 && ((EBX >> 30) & 1) && HasAVX512Save) setFeature(FEATURE_AVX512BW); if (HasLeaf7 && ((EBX >> 31) & 1) && HasAVX512Save) setFeature(FEATURE_AVX512VL); + if (HasLeaf7 && ((ECX >> 0) & 1)) + setFeature(FEATURE_PREFETCHWT1); if (HasLeaf7 && ((ECX >> 1) & 1) && HasAVX512Save) setFeature(FEATURE_AVX512VBMI); + if (HasLeaf7 && ((ECX >> 4) & 1)) + setFeature(FEATURE_PKU); + if (HasLeaf7 && ((ECX >> 5) & 1)) + setFeature(FEATURE_WAITPKG); if (HasLeaf7 && ((ECX >> 6) & 1) && HasAVX512Save) setFeature(FEATURE_AVX512VBMI2); + if (HasLeaf7 && ((ECX >> 7) & 1)) + setFeature(FEATURE_SHSTK); if (HasLeaf7 && ((ECX >> 8) & 1)) setFeature(FEATURE_GFNI); - if (HasLeaf7 && ((ECX >> 10) & 1) && HasAVX) + if (HasLeaf7 && ((ECX >> 9) & 1) && HasAVXSave) + setFeature(FEATURE_VAES); + if (HasLeaf7 && ((ECX >> 10) & 1) && HasAVXSave) setFeature(FEATURE_VPCLMULQDQ); if (HasLeaf7 && ((ECX >> 11) & 1) && HasAVX512Save) setFeature(FEATURE_AVX512VNNI); @@ -806,23 +963,100 @@ static void getAvailableFeatures(unsigned ECX, unsigned EDX, unsigned MaxLeaf, setFeature(FEATURE_AVX512BITALG); if (HasLeaf7 && ((ECX >> 14) & 1) && HasAVX512Save) setFeature(FEATURE_AVX512VPOPCNTDQ); + if (HasLeaf7 && ((ECX >> 22) & 1)) + setFeature(FEATURE_RDPID); + if (HasLeaf7 && ((ECX >> 23) & 1)) + setFeature(FEATURE_KL); + if (HasLeaf7 && ((ECX >> 25) & 1)) + setFeature(FEATURE_CLDEMOTE); + if (HasLeaf7 && ((ECX >> 27) & 1)) + setFeature(FEATURE_MOVDIRI); + if (HasLeaf7 && ((ECX >> 28) & 1)) + setFeature(FEATURE_MOVDIR64B); + if (HasLeaf7 && ((ECX >> 29) & 1)) + setFeature(FEATURE_ENQCMD); if (HasLeaf7 && ((EDX >> 2) & 1) && HasAVX512Save) setFeature(FEATURE_AVX5124VNNIW); if (HasLeaf7 && ((EDX >> 3) & 1) && HasAVX512Save) setFeature(FEATURE_AVX5124FMAPS); + if (HasLeaf7 && ((EDX >> 5) & 1)) + setFeature(FEATURE_UINTR); if (HasLeaf7 && ((EDX >> 8) & 1) && HasAVX512Save) setFeature(FEATURE_AVX512VP2INTERSECT); + if (HasLeaf7 && ((EDX >> 14) & 1)) + setFeature(FEATURE_SERIALIZE); + if (HasLeaf7 && ((EDX >> 16) & 1)) + setFeature(FEATURE_TSXLDTRK); + if (HasLeaf7 && ((EDX >> 18) & 1)) + setFeature(FEATURE_PCONFIG); + if (HasLeaf7 && ((EDX >> 22) & 1) && HasAMXSave) + setFeature(FEATURE_AMX_BF16); if (HasLeaf7 && ((EDX >> 23) & 1) && HasAVX512Save) setFeature(FEATURE_AVX512FP16); + if (HasLeaf7 && ((EDX >> 24) & 1) && HasAMXSave) + setFeature(FEATURE_AMX_TILE); + if (HasLeaf7 && ((EDX >> 25) & 1) && HasAMXSave) + setFeature(FEATURE_AMX_INT8); // EAX from subleaf 0 is the maximum subleaf supported. Some CPUs don't // return all 0s for invalid subleaves so check the limit. bool HasLeaf7Subleaf1 = HasLeaf7 && EAX >= 1 && !getX86CpuIDAndInfoEx(0x7, 0x1, &EAX, &EBX, &ECX, &EDX); + if (HasLeaf7Subleaf1 && ((EAX >> 0) & 1)) + setFeature(FEATURE_SHA512); + if (HasLeaf7Subleaf1 && ((EAX >> 1) & 1)) + setFeature(FEATURE_SM3); + if (HasLeaf7Subleaf1 && ((EAX >> 2) & 1)) + setFeature(FEATURE_SM4); + if (HasLeaf7Subleaf1 && ((EAX >> 3) & 1)) + setFeature(FEATURE_RAOINT); + if (HasLeaf7Subleaf1 && ((EAX >> 4) & 1) && HasAVXSave) + setFeature(FEATURE_AVXVNNI); if (HasLeaf7Subleaf1 && ((EAX >> 5) & 1) && HasAVX512Save) setFeature(FEATURE_AVX512BF16); + if (HasLeaf7Subleaf1 && ((EAX >> 7) & 1)) + setFeature(FEATURE_CMPCCXADD); + if (HasLeaf7Subleaf1 && ((EAX >> 21) & 1) && HasAMXSave) + setFeature(FEATURE_AMX_FP16); + if (HasLeaf7Subleaf1 && ((EAX >> 22) & 1)) + setFeature(FEATURE_HRESET); + if (HasLeaf7Subleaf1 && ((EAX >> 23) & 1) && HasAVXSave) + setFeature(FEATURE_AVXIFMA); + + if (HasLeaf7Subleaf1 && ((EDX >> 4) & 1) && HasAVXSave) + setFeature(FEATURE_AVXVNNIINT8); + if (HasLeaf7Subleaf1 && ((EDX >> 5) & 1) && HasAVXSave) + setFeature(FEATURE_AVXNECONVERT); + if (HasLeaf7Subleaf1 && ((EDX >> 8) & 1) && HasAMXSave) + setFeature(FEATURE_AMX_COMPLEX); + if (HasLeaf7Subleaf1 && ((EDX >> 10) & 1) && HasAVXSave) + setFeature(FEATURE_AVXVNNIINT16); + if (HasLeaf7Subleaf1 && ((EDX >> 14) & 1)) + setFeature(FEATURE_PREFETCHI); + if (HasLeaf7Subleaf1 && ((EDX >> 15) & 1)) + setFeature(FEATURE_USERMSR); + if (HasLeaf7Subleaf1 && ((EDX >> 19) & 1)) + setFeature(FEATURE_AVX10_1_256); + if (HasLeaf7Subleaf1 && ((EDX >> 21) & 1)) + setFeature(FEATURE_APXF); + + unsigned MaxLevel; + getX86CpuIDAndInfo(0, &MaxLevel, &EBX, &ECX, &EDX); + bool HasLeafD = MaxLevel >= 0xd && + !getX86CpuIDAndInfoEx(0xd, 0x1, &EAX, &EBX, &ECX, &EDX); + if (HasLeafD && ((EAX >> 0) & 1) && HasAVXSave) + setFeature(FEATURE_XSAVEOPT); + if (HasLeafD && ((EAX >> 1) & 1) && HasAVXSave) + setFeature(FEATURE_XSAVEC); + if (HasLeafD && ((EAX >> 3) & 1) && HasAVXSave) + setFeature(FEATURE_XSAVES); + + bool HasLeaf24 = + MaxLevel >= 0x24 && !getX86CpuIDAndInfo(0x24, &EAX, &EBX, &ECX, &EDX); + if (HasLeaf7Subleaf1 && ((EDX >> 19) & 1) && HasLeaf24 && ((EBX >> 18) & 1)) + setFeature(FEATURE_AVX10_1_512); unsigned MaxExtLevel; getX86CpuIDAndInfo(0x80000000, &MaxExtLevel, &EBX, &ECX, &EDX); @@ -836,14 +1070,40 @@ static void getAvailableFeatures(unsigned ECX, unsigned EDX, unsigned MaxLeaf, setFeature(FEATURE_LZCNT); if (((ECX >> 6) & 1)) setFeature(FEATURE_SSE4_A); + if (((ECX >> 8) & 1)) + setFeature(FEATURE_PRFCHW); if (((ECX >> 11) & 1)) setFeature(FEATURE_XOP); + if (((ECX >> 15) & 1)) + setFeature(FEATURE_LWP); if (((ECX >> 16) & 1)) setFeature(FEATURE_FMA4); + if (((ECX >> 21) & 1)) + setFeature(FEATURE_TBM); + if (((ECX >> 29) & 1)) + setFeature(FEATURE_MWAITX); + if (((EDX >> 29) & 1)) setFeature(FEATURE_LM); } + bool HasExtLeaf8 = MaxExtLevel >= 0x80000008 && + !getX86CpuIDAndInfo(0x80000008, &EAX, &EBX, &ECX, &EDX); + if (HasExtLeaf8 && ((EBX >> 0) & 1)) + setFeature(FEATURE_CLZERO); + if (HasExtLeaf8 && ((EBX >> 9) & 1)) + setFeature(FEATURE_WBNOINVD); + + bool HasLeaf14 = MaxLevel >= 0x14 && + !getX86CpuIDAndInfoEx(0x14, 0x0, &EAX, &EBX, &ECX, &EDX); + if (HasLeaf14 && ((EBX >> 4) & 1)) + setFeature(FEATURE_PTWRITE); + + bool HasLeaf19 = + MaxLevel >= 0x19 && !getX86CpuIDAndInfo(0x19, &EAX, &EBX, &ECX, &EDX); + if (HasLeaf7 && HasLeaf19 && ((EBX >> 2) & 1)) + setFeature(FEATURE_WIDEKL); + if (hasFeature(FEATURE_LM) && hasFeature(FEATURE_SSE2)) { setFeature(FEATURE_X86_64_BASELINE); if (hasFeature(FEATURE_CMPXCHG16B) && hasFeature(FEATURE_POPCNT) && diff --git a/contrib/libs/cxxsupp/builtins/divtc3.c b/contrib/libs/cxxsupp/builtins/divtc3.c index 099de5802da..c393de81533 100644 --- a/contrib/libs/cxxsupp/builtins/divtc3.c +++ b/contrib/libs/cxxsupp/builtins/divtc3.c @@ -13,7 +13,7 @@ #define QUAD_PRECISION #include "fp_lib.h" -#if defined(CRT_HAS_F128) +#if defined(CRT_HAS_128BIT) && defined(CRT_HAS_F128) // Returns: the quotient of (a + ib) / (c + id) diff --git a/contrib/libs/cxxsupp/builtins/extendbfsf2.c b/contrib/libs/cxxsupp/builtins/extendbfsf2.c new file mode 100644 index 00000000000..e159d7997f6 --- /dev/null +++ b/contrib/libs/cxxsupp/builtins/extendbfsf2.c @@ -0,0 +1,13 @@ +//===-- lib/extendbfsf2.c - bfloat -> single conversion -----------*- C -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#define SRC_BFLOAT16 +#define DST_SINGLE +#include "fp_extend_impl.inc" + +COMPILER_RT_ABI float __extendbfsf2(src_t a) { return __extendXfYf2__(a); } diff --git a/contrib/libs/cxxsupp/builtins/fp_add_impl.inc b/contrib/libs/cxxsupp/builtins/fp_add_impl.inc index 7133358df9b..d20599921e7 100644 --- a/contrib/libs/cxxsupp/builtins/fp_add_impl.inc +++ b/contrib/libs/cxxsupp/builtins/fp_add_impl.inc @@ -91,7 +91,7 @@ static __inline fp_t __addXf3__(fp_t a, fp_t b) { // Shift the significand of b by the difference in exponents, with a sticky // bottom bit to get rounding correct. - const unsigned int align = aExponent - bExponent; + const unsigned int align = (unsigned int)(aExponent - bExponent); if (align) { if (align < typeWidth) { const bool sticky = (bSignificand << (typeWidth - align)) != 0; diff --git a/contrib/libs/cxxsupp/builtins/fp_extend.h b/contrib/libs/cxxsupp/builtins/fp_extend.h index 95ea2a7ac4b..22bf2b2514e 100644 --- a/contrib/libs/cxxsupp/builtins/fp_extend.h +++ b/contrib/libs/cxxsupp/builtins/fp_extend.h @@ -37,16 +37,7 @@ static const int srcSigFracBits = 52; // srcBits - srcSigFracBits - 1 static const int srcExpBits = 11; -static inline int src_rep_t_clz_impl(src_rep_t a) { -#if defined __LP64__ - return __builtin_clzl(a); -#else - if (a & REP_C(0xffffffff00000000)) - return clzsi(a >> 32); - else - return 32 + clzsi(a & REP_C(0xffffffff)); -#endif -} +static inline int src_rep_t_clz_impl(src_rep_t a) { return __builtin_clzll(a); } #define src_rep_t_clz src_rep_t_clz_impl #elif defined SRC_80 @@ -81,6 +72,21 @@ static inline int src_rep_t_clz_impl(src_rep_t a) { #define src_rep_t_clz src_rep_t_clz_impl +#elif defined SRC_BFLOAT16 +#ifdef COMPILER_RT_HAS_BFLOAT16 +typedef __bf16 src_t; +#else +typedef uint16_t src_t; +#endif +typedef uint16_t src_rep_t; +#define SRC_REP_C UINT16_C +static const int srcBits = sizeof(src_t) * CHAR_BIT; +static const int srcSigFracBits = 7; +// -1 accounts for the sign bit. +// srcBits - srcSigFracBits - 1 +static const int srcExpBits = 8; +#define src_rep_t_clz __builtin_clz + #else #error Source should be half, single, or double precision! #endif // end source precision diff --git a/contrib/libs/cxxsupp/builtins/fp_fixint_impl.inc b/contrib/libs/cxxsupp/builtins/fp_fixint_impl.inc index 3556bad9990..2f2f77ce781 100644 --- a/contrib/libs/cxxsupp/builtins/fp_fixint_impl.inc +++ b/contrib/libs/cxxsupp/builtins/fp_fixint_impl.inc @@ -34,7 +34,7 @@ static __inline fixint_t __fixint(fp_t a) { // If 0 <= exponent < significandBits, right shift to get the result. // Otherwise, shift left. if (exponent < significandBits) - return sign * (significand >> (significandBits - exponent)); + return (fixint_t)(sign * (significand >> (significandBits - exponent))); else - return sign * ((fixuint_t)significand << (exponent - significandBits)); + return (fixint_t)(sign * ((fixuint_t)significand << (exponent - significandBits))); } diff --git a/contrib/libs/cxxsupp/builtins/fp_lib.h b/contrib/libs/cxxsupp/builtins/fp_lib.h index c4f0a5b9587..b2a89506135 100644 --- a/contrib/libs/cxxsupp/builtins/fp_lib.h +++ b/contrib/libs/cxxsupp/builtins/fp_lib.h @@ -43,8 +43,8 @@ static __inline int rep_clz(rep_t a) { return clzsi(a); } // 32x32 --> 64 bit multiply static __inline void wideMultiply(rep_t a, rep_t b, rep_t *hi, rep_t *lo) { const uint64_t product = (uint64_t)a * b; - *hi = product >> 32; - *lo = product; + *hi = (rep_t)(product >> 32); + *lo = (rep_t)product; } COMPILER_RT_ABI fp_t __addsf3(fp_t a, fp_t b); @@ -58,16 +58,7 @@ typedef double fp_t; #define REP_C UINT64_C #define significandBits 52 -static __inline int rep_clz(rep_t a) { -#if defined __LP64__ - return __builtin_clzl(a); -#else - if (a & REP_C(0xffffffff00000000)) - return clzsi(a >> 32); - else - return 32 + clzsi(a & REP_C(0xffffffff)); -#endif -} +static inline int rep_clz(rep_t a) { return __builtin_clzll(a); } #define loWord(a) (a & 0xffffffffU) #define hiWord(a) (a >> 32) @@ -239,7 +230,7 @@ static __inline int normalize(rep_t *significand) { return 1 - shift; } -static __inline void wideLeftShift(rep_t *hi, rep_t *lo, int count) { +static __inline void wideLeftShift(rep_t *hi, rep_t *lo, unsigned int count) { *hi = *hi << count | *lo >> (typeWidth - count); *lo = *lo << count; } diff --git a/contrib/libs/cxxsupp/builtins/int_types.h b/contrib/libs/cxxsupp/builtins/int_types.h index ca97391fc28..48862f36421 100644 --- a/contrib/libs/cxxsupp/builtins/int_types.h +++ b/contrib/libs/cxxsupp/builtins/int_types.h @@ -107,8 +107,8 @@ typedef union { static __inline ti_int make_ti(di_int h, di_int l) { twords r; - r.s.high = h; - r.s.low = l; + r.s.high = (du_int)h; + r.s.low = (du_int)l; return r.all; } diff --git a/contrib/libs/cxxsupp/builtins/multc3.c b/contrib/libs/cxxsupp/builtins/multc3.c index 61a3f45e472..a89832f0e88 100644 --- a/contrib/libs/cxxsupp/builtins/multc3.c +++ b/contrib/libs/cxxsupp/builtins/multc3.c @@ -15,7 +15,7 @@ #include "int_lib.h" #include "int_math.h" -#if defined(CRT_HAS_F128) +#if defined(CRT_HAS_128BIT) && defined(CRT_HAS_F128) // Returns: the product of a + ib and c + id diff --git a/contrib/libs/cxxsupp/builtins/os_version_check.c b/contrib/libs/cxxsupp/builtins/os_version_check.c index 182eabe7a6a..01fae834ab2 100644 --- a/contrib/libs/cxxsupp/builtins/os_version_check.c +++ b/contrib/libs/cxxsupp/builtins/os_version_check.c @@ -316,8 +316,8 @@ int32_t __isOSVersionAtLeast(int32_t Major, int32_t Minor, int32_t Subminor) { static pthread_once_t once = PTHREAD_ONCE_INIT; pthread_once(&once, readSystemProperties); - return SdkVersion >= Major || - (IsPreRelease && Major == __ANDROID_API_FUTURE__); + // Allow all on pre-release. Note that we still rely on compile-time checks. + return SdkVersion >= Major || IsPreRelease; } #else diff --git a/contrib/libs/cxxsupp/builtins/riscv/restore.S b/contrib/libs/cxxsupp/builtins/riscv/restore.S index 6f43842c8ca..d87dfc1ac71 100644 --- a/contrib/libs/cxxsupp/builtins/riscv/restore.S +++ b/contrib/libs/cxxsupp/builtins/riscv/restore.S @@ -22,7 +22,7 @@ #if __riscv_xlen == 32 -#ifndef __riscv_32e +#ifndef __riscv_abi_rve .globl __riscv_restore_12 .type __riscv_restore_12,@function @@ -109,7 +109,7 @@ __riscv_restore_0: #elif __riscv_xlen == 64 -#ifndef __riscv_64e +#ifndef __riscv_abi_rve .globl __riscv_restore_12 .type __riscv_restore_12,@function diff --git a/contrib/libs/cxxsupp/builtins/riscv/save.S b/contrib/libs/cxxsupp/builtins/riscv/save.S index 3e044179ff7..6324e05e971 100644 --- a/contrib/libs/cxxsupp/builtins/riscv/save.S +++ b/contrib/libs/cxxsupp/builtins/riscv/save.S @@ -18,7 +18,7 @@ #if __riscv_xlen == 32 -#ifndef __riscv_32e +#ifndef __riscv_abi_rve .globl __riscv_save_12 .type __riscv_save_12,@function @@ -115,7 +115,7 @@ __riscv_save_0: #elif __riscv_xlen == 64 -#ifndef __riscv_64e +#ifndef __riscv_abi_rve .globl __riscv_save_12 .type __riscv_save_12,@function diff --git a/contrib/libs/cxxsupp/builtins/trampoline_setup.c b/contrib/libs/cxxsupp/builtins/trampoline_setup.c index 844eb279441..830e25e4c03 100644 --- a/contrib/libs/cxxsupp/builtins/trampoline_setup.c +++ b/contrib/libs/cxxsupp/builtins/trampoline_setup.c @@ -41,3 +41,45 @@ COMPILER_RT_ABI void __trampoline_setup(uint32_t *trampOnStack, __clear_cache(trampOnStack, &trampOnStack[10]); } #endif // __powerpc__ && !defined(__powerpc64__) + +// The AArch64 compiler generates calls to __trampoline_setup() when creating +// trampoline functions on the stack for use with nested functions. +// This function creates a custom 36-byte trampoline function on the stack +// which loads x18 with a pointer to the outer function's locals +// and then jumps to the target nested function. +// Note: x18 is a reserved platform register on Windows and macOS. + +#if defined(__aarch64__) && defined(__ELF__) +COMPILER_RT_ABI void __trampoline_setup(uint32_t *trampOnStack, + int trampSizeAllocated, + const void *realFunc, void *localsPtr) { + // This should never happen, but if compiler did not allocate + // enough space on stack for the trampoline, abort. + if (trampSizeAllocated < 36) + compilerrt_abort(); + + // create trampoline + // Load realFunc into x17. mov/movk 16 bits at a time. + trampOnStack[0] = + 0xd2800000u | ((((uint64_t)realFunc >> 0) & 0xffffu) << 5) | 0x11; + trampOnStack[1] = + 0xf2a00000u | ((((uint64_t)realFunc >> 16) & 0xffffu) << 5) | 0x11; + trampOnStack[2] = + 0xf2c00000u | ((((uint64_t)realFunc >> 32) & 0xffffu) << 5) | 0x11; + trampOnStack[3] = + 0xf2e00000u | ((((uint64_t)realFunc >> 48) & 0xffffu) << 5) | 0x11; + // Load localsPtr into x18 + trampOnStack[4] = + 0xd2800000u | ((((uint64_t)localsPtr >> 0) & 0xffffu) << 5) | 0x12; + trampOnStack[5] = + 0xf2a00000u | ((((uint64_t)localsPtr >> 16) & 0xffffu) << 5) | 0x12; + trampOnStack[6] = + 0xf2c00000u | ((((uint64_t)localsPtr >> 32) & 0xffffu) << 5) | 0x12; + trampOnStack[7] = + 0xf2e00000u | ((((uint64_t)localsPtr >> 48) & 0xffffu) << 5) | 0x12; + trampOnStack[8] = 0xd61f0220; // br x17 + + // Clear instruction cache. + __clear_cache(trampOnStack, &trampOnStack[9]); +} +#endif // defined(__aarch64__) && !defined(__APPLE__) && !defined(_WIN64) diff --git a/contrib/libs/cxxsupp/builtins/ya.make b/contrib/libs/cxxsupp/builtins/ya.make index 5f9c60552aa..ae250c5db79 100644 --- a/contrib/libs/cxxsupp/builtins/ya.make +++ b/contrib/libs/cxxsupp/builtins/ya.make @@ -12,9 +12,9 @@ LICENSE( LICENSE_TEXTS(.yandex_meta/licenses.list.txt) -VERSION(18.1.8) +VERSION(19.1.3) -ORIGINAL_SOURCE(https://github.com/llvm/llvm-project/releases/download/llvmorg-18.1.8/compiler-rt-18.1.8.src.tar.xz) +ORIGINAL_SOURCE(https://github.com/llvm/llvm-project/releases/download/llvmorg-19.1.3/compiler-rt-19.1.3.src.tar.xz) NO_COMPILER_WARNINGS() @@ -65,7 +65,9 @@ IF (ARCH_AARCH64) aarch64/chkstk.S aarch64/fp_mode.c aarch64/sme-abi-init.c + aarch64/sme-abi-vg.c aarch64/sme-abi.S + aarch64/sme-libc-mem-routines.S absvdi2.c absvsi2.c absvti2.c @@ -117,6 +119,7 @@ IF (ARCH_AARCH64) emutls.c enable_execute_stack.c eprintf.c + extendbfsf2.c extenddftf2.c extendhfsf2.c extendhftf2.c @@ -284,6 +287,7 @@ ELSEIF (ARCH_X86_64) emutls.c enable_execute_stack.c eprintf.c + extendbfsf2.c extenddftf2.c extendhfsf2.c extendhftf2.c @@ -467,6 +471,7 @@ ELSE() emutls.c enable_execute_stack.c eprintf.c + extendbfsf2.c extenddftf2.c extendhfsf2.c extendhftf2.c |