aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorrobot-contrib <robot-contrib@yandex-team.com>2024-12-02 20:23:41 +0300
committerrobot-contrib <robot-contrib@yandex-team.com>2024-12-02 20:47:52 +0300
commit81518d12414d1a5f8f1e3d3e13d884306230609b (patch)
tree3eea40f5490048f98fcca4f5b22e1597d2364f54
parentc6bd6398f1bec61405c83f91872481e3b5e33510 (diff)
downloadydb-81518d12414d1a5f8f1e3d3e13d884306230609b.tar.gz
Update contrib/libs/cxxsupp/builtins to 19.1.3
commit_hash:4898490dcc35775adf8be6d67c2ca83001fb5311
-rw-r--r--build/sysincl/darwin.yml1
-rw-r--r--contrib/libs/cxxsupp/builtins/.yandex_meta/build.ym2
-rw-r--r--contrib/libs/cxxsupp/builtins/.yandex_meta/devtools.licenses.report20
-rw-r--r--contrib/libs/cxxsupp/builtins/CODE_OWNERS.TXT8
-rw-r--r--contrib/libs/cxxsupp/builtins/README.txt5
-rw-r--r--contrib/libs/cxxsupp/builtins/aarch64/sme-abi-vg.c21
-rw-r--r--contrib/libs/cxxsupp/builtins/aarch64/sme-abi.S78
-rw-r--r--contrib/libs/cxxsupp/builtins/aarch64/sme-libc-mem-routines.S352
-rw-r--r--contrib/libs/cxxsupp/builtins/aarch64/sme-libc-routines.c77
-rw-r--r--contrib/libs/cxxsupp/builtins/atomic.c14
-rw-r--r--contrib/libs/cxxsupp/builtins/cpu_model/AArch64CPUFeatures.inc91
-rw-r--r--contrib/libs/cxxsupp/builtins/cpu_model/aarch64.c70
-rw-r--r--contrib/libs/cxxsupp/builtins/cpu_model/aarch64.h21
-rw-r--r--contrib/libs/cxxsupp/builtins/cpu_model/aarch64/fmv/android.inc4
-rw-r--r--contrib/libs/cxxsupp/builtins/cpu_model/aarch64/fmv/apple.inc180
-rw-r--r--contrib/libs/cxxsupp/builtins/cpu_model/aarch64/fmv/freebsd.inc4
-rw-r--r--contrib/libs/cxxsupp/builtins/cpu_model/aarch64/fmv/fuchsia.inc8
-rw-r--r--contrib/libs/cxxsupp/builtins/cpu_model/aarch64/fmv/mrs.inc135
-rw-r--r--contrib/libs/cxxsupp/builtins/cpu_model/aarch64/fmv/sysauxv.inc4
-rw-r--r--contrib/libs/cxxsupp/builtins/cpu_model/aarch64/hwcap.inc6
-rw-r--r--contrib/libs/cxxsupp/builtins/cpu_model/x86.c296
-rw-r--r--contrib/libs/cxxsupp/builtins/divtc3.c2
-rw-r--r--contrib/libs/cxxsupp/builtins/extendbfsf2.c13
-rw-r--r--contrib/libs/cxxsupp/builtins/fp_add_impl.inc2
-rw-r--r--contrib/libs/cxxsupp/builtins/fp_extend.h26
-rw-r--r--contrib/libs/cxxsupp/builtins/fp_fixint_impl.inc4
-rw-r--r--contrib/libs/cxxsupp/builtins/fp_lib.h17
-rw-r--r--contrib/libs/cxxsupp/builtins/int_types.h4
-rw-r--r--contrib/libs/cxxsupp/builtins/multc3.c2
-rw-r--r--contrib/libs/cxxsupp/builtins/os_version_check.c4
-rw-r--r--contrib/libs/cxxsupp/builtins/riscv/restore.S4
-rw-r--r--contrib/libs/cxxsupp/builtins/riscv/save.S4
-rw-r--r--contrib/libs/cxxsupp/builtins/trampoline_setup.c42
-rw-r--r--contrib/libs/cxxsupp/builtins/ya.make9
34 files changed, 1167 insertions, 363 deletions
diff --git a/build/sysincl/darwin.yml b/build/sysincl/darwin.yml
index 06eaa50480f..d583225dfad 100644
--- a/build/sysincl/darwin.yml
+++ b/build/sysincl/darwin.yml
@@ -37,6 +37,7 @@
- MacTypes.h
- TargetConditionals.h
- architecture/byte_order.h
+ - arm/cpu_capabilities_public.h
- asl.h
- copyfile.h
- crt_externs.h
diff --git a/contrib/libs/cxxsupp/builtins/.yandex_meta/build.ym b/contrib/libs/cxxsupp/builtins/.yandex_meta/build.ym
index 20967507889..e454ac162ac 100644
--- a/contrib/libs/cxxsupp/builtins/.yandex_meta/build.ym
+++ b/contrib/libs/cxxsupp/builtins/.yandex_meta/build.ym
@@ -1,6 +1,6 @@
{% extends '//builtin/bag.ym' %}
-{% block current_version %}18.1.8{% endblock %}
+{% block current_version %}19.1.3{% endblock %}
{% block current_url %}
https://github.com/llvm/llvm-project/releases/download/llvmorg-{{self.version().strip()}}/compiler-rt-{{self.version().strip()}}.src.tar.xz
diff --git a/contrib/libs/cxxsupp/builtins/.yandex_meta/devtools.licenses.report b/contrib/libs/cxxsupp/builtins/.yandex_meta/devtools.licenses.report
index ce8fb1f4bb7..874c592edd9 100644
--- a/contrib/libs/cxxsupp/builtins/.yandex_meta/devtools.licenses.report
+++ b/contrib/libs/cxxsupp/builtins/.yandex_meta/devtools.licenses.report
@@ -103,7 +103,9 @@ BELONGS ya.make
aarch64/chkstk.S [1:2]
aarch64/fp_mode.c [3:4]
aarch64/sme-abi-init.c [1:2]
+ aarch64/sme-abi-vg.c [1:2]
aarch64/sme-abi.S [1:2]
+ aarch64/sme-libc-mem-routines.S [1:2]
absvdi2.c [3:4]
absvsi2.c [3:4]
absvti2.c [3:4]
@@ -237,7 +239,9 @@ BELONGS ya.make
comparedf2.c [3:4]
comparesf2.c [3:4]
comparetf2.c [3:4]
+ cpu_model/AArch64CPUFeatures.inc [3:4]
cpu_model/aarch64.c [3:4]
+ cpu_model/aarch64.h [3:4]
cpu_model/cpu_model.h [3:4]
cpu_model/x86.c [3:4]
crtbegin.c [3:4]
@@ -261,6 +265,7 @@ BELONGS ya.make
emutls.c [3:4]
enable_execute_stack.c [3:4]
eprintf.c [3:4]
+ extendbfsf2.c [3:4]
extenddftf2.c [3:4]
extendhfsf2.c [3:4]
extendsfdf2.c [3:4]
@@ -486,7 +491,9 @@ BELONGS ya.make
aarch64/chkstk.S [1:2]
aarch64/fp_mode.c [3:4]
aarch64/sme-abi-init.c [1:2]
+ aarch64/sme-abi-vg.c [1:2]
aarch64/sme-abi.S [1:2]
+ aarch64/sme-libc-mem-routines.S [1:2]
absvdi2.c [3:4]
absvsi2.c [3:4]
absvti2.c [3:4]
@@ -620,7 +627,9 @@ BELONGS ya.make
comparedf2.c [3:4]
comparesf2.c [3:4]
comparetf2.c [3:4]
+ cpu_model/AArch64CPUFeatures.inc [3:4]
cpu_model/aarch64.c [3:4]
+ cpu_model/aarch64.h [3:4]
cpu_model/cpu_model.h [3:4]
cpu_model/x86.c [3:4]
crtbegin.c [3:4]
@@ -644,6 +653,7 @@ BELONGS ya.make
emutls.c [3:4]
enable_execute_stack.c [3:4]
eprintf.c [3:4]
+ extendbfsf2.c [3:4]
extenddftf2.c [3:4]
extendhfsf2.c [3:4]
extendsfdf2.c [3:4]
@@ -927,7 +937,9 @@ BELONGS ya.make
aarch64/chkstk.S [3:3]
aarch64/fp_mode.c [5:5]
aarch64/sme-abi-init.c [3:3]
+ aarch64/sme-abi-vg.c [3:3]
aarch64/sme-abi.S [3:3]
+ aarch64/sme-libc-mem-routines.S [3:3]
absvdi2.c [5:5]
absvsi2.c [5:5]
absvti2.c [5:5]
@@ -1061,7 +1073,9 @@ BELONGS ya.make
comparedf2.c [5:5]
comparesf2.c [5:5]
comparetf2.c [5:5]
+ cpu_model/AArch64CPUFeatures.inc [5:5]
cpu_model/aarch64.c [5:5]
+ cpu_model/aarch64.h [5:5]
cpu_model/cpu_model.h [5:5]
cpu_model/x86.c [5:5]
crtbegin.c [5:5]
@@ -1085,6 +1099,7 @@ BELONGS ya.make
emutls.c [5:5]
enable_execute_stack.c [5:5]
eprintf.c [5:5]
+ extendbfsf2.c [5:5]
extenddftf2.c [5:5]
extendhfsf2.c [5:5]
extendsfdf2.c [5:5]
@@ -1310,7 +1325,9 @@ BELONGS ya.make
aarch64/chkstk.S [3:3]
aarch64/fp_mode.c [5:5]
aarch64/sme-abi-init.c [3:3]
+ aarch64/sme-abi-vg.c [3:3]
aarch64/sme-abi.S [3:3]
+ aarch64/sme-libc-mem-routines.S [3:3]
absvdi2.c [5:5]
absvsi2.c [5:5]
absvti2.c [5:5]
@@ -1444,7 +1461,9 @@ BELONGS ya.make
comparedf2.c [5:5]
comparesf2.c [5:5]
comparetf2.c [5:5]
+ cpu_model/AArch64CPUFeatures.inc [5:5]
cpu_model/aarch64.c [5:5]
+ cpu_model/aarch64.h [5:5]
cpu_model/cpu_model.h [5:5]
cpu_model/x86.c [5:5]
crtbegin.c [5:5]
@@ -1468,6 +1487,7 @@ BELONGS ya.make
emutls.c [5:5]
enable_execute_stack.c [5:5]
eprintf.c [5:5]
+ extendbfsf2.c [5:5]
extenddftf2.c [5:5]
extendhfsf2.c [5:5]
extendsfdf2.c [5:5]
diff --git a/contrib/libs/cxxsupp/builtins/CODE_OWNERS.TXT b/contrib/libs/cxxsupp/builtins/CODE_OWNERS.TXT
index ad136edf967..bd51a1073cc 100644
--- a/contrib/libs/cxxsupp/builtins/CODE_OWNERS.TXT
+++ b/contrib/libs/cxxsupp/builtins/CODE_OWNERS.TXT
@@ -67,3 +67,11 @@ D: ThreadSanitizer
N: Bill Wendling
E: isanbard@gmail.com
D: Profile runtime library
+
+N: Christopher Apple, David Trevelyan
+E: cja-private@pm.me, realtime.sanitizer@gmail.com
+D: Realtime Sanitizer (RTSan)
+
+N: Alexander Shaposhnikov
+E: alexander.v.shaposhnikov@gmail.com
+D: Numerical Sanitizer (NSAN)
diff --git a/contrib/libs/cxxsupp/builtins/README.txt b/contrib/libs/cxxsupp/builtins/README.txt
index 2d213d95f33..19f26c92a0f 100644
--- a/contrib/libs/cxxsupp/builtins/README.txt
+++ b/contrib/libs/cxxsupp/builtins/README.txt
@@ -272,6 +272,11 @@ switch32
switch8
switchu8
+// This function generates a custom trampoline function with the specific
+// realFunc and localsPtr values.
+void __trampoline_setup(uint32_t* trampOnStack, int trampSizeAllocated,
+ const void* realFunc, void* localsPtr);
+
// There is no C interface to the *_vfp_d8_d15_regs functions. There are
// called in the prolog and epilog of Thumb1 functions. When the C++ ABI use
// SJLJ for exceptions, each function with a catch clause or destructors needs
diff --git a/contrib/libs/cxxsupp/builtins/aarch64/sme-abi-vg.c b/contrib/libs/cxxsupp/builtins/aarch64/sme-abi-vg.c
new file mode 100644
index 00000000000..20061012e16
--- /dev/null
+++ b/contrib/libs/cxxsupp/builtins/aarch64/sme-abi-vg.c
@@ -0,0 +1,21 @@
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include "../cpu_model/aarch64.h"
+
+struct FEATURES {
+ unsigned long long features;
+};
+
+extern struct FEATURES __aarch64_cpu_features;
+
+#if __GNUC__ >= 9
+#pragma GCC diagnostic ignored "-Wprio-ctor-dtor"
+#endif
+__attribute__((constructor(90))) static void get_aarch64_cpu_features(void) {
+ if (__atomic_load_n(&__aarch64_cpu_features.features, __ATOMIC_RELAXED))
+ return;
+
+ __init_cpu_features();
+}
diff --git a/contrib/libs/cxxsupp/builtins/aarch64/sme-abi.S b/contrib/libs/cxxsupp/builtins/aarch64/sme-abi.S
index d470ecaf7aa..cd8153f6067 100644
--- a/contrib/libs/cxxsupp/builtins/aarch64/sme-abi.S
+++ b/contrib/libs/cxxsupp/builtins/aarch64/sme-abi.S
@@ -12,11 +12,15 @@
#if !defined(__APPLE__)
#define TPIDR2_SYMBOL SYMBOL_NAME(__aarch64_has_sme_and_tpidr2_el0)
#define TPIDR2_SYMBOL_OFFSET :lo12:SYMBOL_NAME(__aarch64_has_sme_and_tpidr2_el0)
+#define CPU_FEATS_SYMBOL SYMBOL_NAME(__aarch64_cpu_features)
+#define CPU_FEATS_SYMBOL_OFFSET :lo12:SYMBOL_NAME(__aarch64_cpu_features)
#else
// MachO requires @page/@pageoff directives because the global is defined
// in a different file. Otherwise this file may fail to build.
#define TPIDR2_SYMBOL SYMBOL_NAME(__aarch64_has_sme_and_tpidr2_el0)@page
#define TPIDR2_SYMBOL_OFFSET SYMBOL_NAME(__aarch64_has_sme_and_tpidr2_el0)@pageoff
+#define CPU_FEATS_SYMBOL SYMBOL_NAME(__aarch64_cpu_features)@page
+#define CPU_FEATS_SYMBOL_OFFSET SYMBOL_NAME(__aarch64_cpu_features)@pageoff
#endif
.arch armv9-a+sme
@@ -26,9 +30,10 @@
// abort(). Note that there is no need to preserve any state before the call,
// because the function does not return.
DEFINE_COMPILERRT_PRIVATE_FUNCTION(do_abort)
-.cfi_startproc
- .variant_pcs SYMBOL_NAME(do_abort)
- stp x29, x30, [sp, #-32]!
+ .cfi_startproc
+ .variant_pcs SYMBOL_NAME(do_abort)
+ BTI_C
+ stp x29, x30, [sp, #-32]!
cntd x0
// Store VG to a stack location that we describe with .cfi_offset
str x0, [sp, #16]
@@ -36,22 +41,23 @@ DEFINE_COMPILERRT_PRIVATE_FUNCTION(do_abort)
.cfi_offset w30, -24
.cfi_offset w29, -32
.cfi_offset 46, -16
- bl __arm_sme_state
- tbz x0, #0, 2f
+ bl __arm_sme_state
+ tbz x0, #0, 2f
1:
- smstop sm
+ smstop sm
2:
// We can't make this into a tail-call because the unwinder would
// need to restore the value of VG.
- bl SYMBOL_NAME(abort)
-.cfi_endproc
+ bl SYMBOL_NAME(abort)
+ .cfi_endproc
END_COMPILERRT_FUNCTION(do_abort)
// __arm_sme_state fills the result registers based on a local
// that is set as part of the compiler-rt startup code.
// __aarch64_has_sme_and_tpidr2_el0
DEFINE_COMPILERRT_OUTLINE_FUNCTION_UNMANGLED(__arm_sme_state)
- .variant_pcs __arm_sme_state
+ .variant_pcs __arm_sme_state
+ BTI_C
mov x0, xzr
mov x1, xzr
@@ -68,7 +74,8 @@ DEFINE_COMPILERRT_OUTLINE_FUNCTION_UNMANGLED(__arm_sme_state)
END_COMPILERRT_OUTLINE_FUNCTION(__arm_sme_state)
DEFINE_COMPILERRT_OUTLINE_FUNCTION_UNMANGLED(__arm_tpidr2_restore)
- .variant_pcs __arm_tpidr2_restore
+ .variant_pcs __arm_tpidr2_restore
+ BTI_C
// If TPIDR2_EL0 is nonnull, the subroutine aborts in some platform-specific
// manner.
mrs x14, TPIDR2_EL0
@@ -103,7 +110,8 @@ DEFINE_COMPILERRT_OUTLINE_FUNCTION_UNMANGLED(__arm_tpidr2_restore)
END_COMPILERRT_OUTLINE_FUNCTION(__arm_tpidr2_restore)
DEFINE_COMPILERRT_OUTLINE_FUNCTION_UNMANGLED(__arm_tpidr2_save)
- .variant_pcs __arm_tpidr2_restore
+ .variant_pcs __arm_tpidr2_restore
+ BTI_C
// If the current thread does not have access to TPIDR2_EL0, the subroutine
// does nothing.
adrp x14, TPIDR2_SYMBOL
@@ -143,7 +151,8 @@ DEFINE_COMPILERRT_OUTLINE_FUNCTION_UNMANGLED(__arm_tpidr2_save)
END_COMPILERRT_OUTLINE_FUNCTION(__arm_tpidr2_save)
DEFINE_COMPILERRT_OUTLINE_FUNCTION_UNMANGLED(__arm_za_disable)
- .variant_pcs __arm_tpidr2_restore
+ .variant_pcs __arm_tpidr2_restore
+ BTI_C
// If the current thread does not have access to SME, the subroutine does
// nothing.
adrp x14, TPIDR2_SYMBOL
@@ -174,3 +183,48 @@ DEFINE_COMPILERRT_OUTLINE_FUNCTION_UNMANGLED(__arm_za_disable)
0:
ret
END_COMPILERRT_OUTLINE_FUNCTION(__arm_za_disable)
+
+DEFINE_COMPILERRT_OUTLINE_FUNCTION_UNMANGLED(__arm_get_current_vg)
+ .variant_pcs __arm_get_current_vg
+ BTI_C
+
+ stp x29, x30, [sp, #-16]!
+ .cfi_def_cfa_offset 16
+ mov x29, sp
+ .cfi_def_cfa w29, 16
+ .cfi_offset w30, -8
+ .cfi_offset w29, -16
+ adrp x17, CPU_FEATS_SYMBOL
+ ldr w17, [x17, CPU_FEATS_SYMBOL_OFFSET]
+ tbnz w17, #30, 0f
+ adrp x16, TPIDR2_SYMBOL
+ ldrb w16, [x16, TPIDR2_SYMBOL_OFFSET]
+ cbz w16, 1f
+0:
+ mov x18, x1
+ bl __arm_sme_state
+ mov x1, x18
+ and x17, x17, #0x40000000
+ bfxil x17, x0, #0, #1
+ cbz x17, 1f
+ cntd x0
+ .cfi_def_cfa wsp, 16
+ ldp x29, x30, [sp], #16
+ .cfi_def_cfa_offset 0
+ .cfi_restore w30
+ .cfi_restore w29
+ ret
+1:
+ mov x0, xzr
+ .cfi_def_cfa wsp, 16
+ ldp x29, x30, [sp], #16
+ .cfi_def_cfa_offset 0
+ .cfi_restore w30
+ .cfi_restore w29
+ ret
+END_COMPILERRT_OUTLINE_FUNCTION(__arm_get_current_vg)
+
+NO_EXEC_STACK_DIRECTIVE
+
+// GNU property note for BTI and PAC
+GNU_PROPERTY_BTI_PAC
diff --git a/contrib/libs/cxxsupp/builtins/aarch64/sme-libc-mem-routines.S b/contrib/libs/cxxsupp/builtins/aarch64/sme-libc-mem-routines.S
new file mode 100644
index 00000000000..0318d9a6f1e
--- /dev/null
+++ b/contrib/libs/cxxsupp/builtins/aarch64/sme-libc-mem-routines.S
@@ -0,0 +1,352 @@
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+// Routines taken from libc/AOR_v20.02/string/aarch64
+
+#include "../assembly.h"
+
+#ifdef __aarch64__
+
+#define L(l) .L ## l
+
+//
+// __arm_sc_memcpy / __arm_sc_memmove
+//
+
+#define dstin x0
+#define src x1
+#define count x2
+#define dst x3
+#define srcend1 x4
+#define dstend1 x5
+#define A_l x6
+#define A_lw w6
+#define A_h x7
+#define B_l x8
+#define B_lw w8
+#define B_h x9
+#define C_l x10
+#define C_lw w10
+#define C_h x11
+#define D_l x12
+#define D_h x13
+#define E_l x14
+#define E_h x15
+#define F_l x16
+#define F_h x17
+#define G_l count
+#define G_h dst
+#define H_l src
+#define H_h srcend1
+#define tmp1 x14
+
+/* This implementation handles overlaps and supports both memcpy and memmove
+ from a single entry point. It uses unaligned accesses and branchless
+ sequences to keep the code small, simple and improve performance.
+
+ Copies are split into 3 main cases: small copies of up to 32 bytes, medium
+ copies of up to 128 bytes, and large copies. The overhead of the overlap
+ check is negligible since it is only required for large copies.
+
+ Large copies use a software pipelined loop processing 64 bytes per iteration.
+ The destination pointer is 16-byte aligned to minimize unaligned accesses.
+ The loop tail is handled by always copying 64 bytes from the end.
+*/
+
+DEFINE_COMPILERRT_OUTLINE_FUNCTION_UNMANGLED(__arm_sc_memcpy)
+ add srcend1, src, count
+ add dstend1, dstin, count
+ cmp count, 128
+ b.hi L(copy_long)
+ cmp count, 32
+ b.hi L(copy32_128)
+
+ /* Small copies: 0..32 bytes. */
+ cmp count, 16
+ b.lo L(copy16)
+ ldp A_l, A_h, [src]
+ ldp D_l, D_h, [srcend1, -16]
+ stp A_l, A_h, [dstin]
+ stp D_l, D_h, [dstend1, -16]
+ ret
+
+ /* Copy 8-15 bytes. */
+L(copy16):
+ tbz count, 3, L(copy8)
+ ldr A_l, [src]
+ ldr A_h, [srcend1, -8]
+ str A_l, [dstin]
+ str A_h, [dstend1, -8]
+ ret
+
+ .p2align 3
+ /* Copy 4-7 bytes. */
+L(copy8):
+ tbz count, 2, L(copy4)
+ ldr A_lw, [src]
+ ldr B_lw, [srcend1, -4]
+ str A_lw, [dstin]
+ str B_lw, [dstend1, -4]
+ ret
+
+ /* Copy 0..3 bytes using a branchless sequence. */
+L(copy4):
+ cbz count, L(copy0)
+ lsr tmp1, count, 1
+ ldrb A_lw, [src]
+ ldrb C_lw, [srcend1, -1]
+ ldrb B_lw, [src, tmp1]
+ strb A_lw, [dstin]
+ strb B_lw, [dstin, tmp1]
+ strb C_lw, [dstend1, -1]
+L(copy0):
+ ret
+
+ .p2align 4
+ /* Medium copies: 33..128 bytes. */
+L(copy32_128):
+ ldp A_l, A_h, [src]
+ ldp B_l, B_h, [src, 16]
+ ldp C_l, C_h, [srcend1, -32]
+ ldp D_l, D_h, [srcend1, -16]
+ cmp count, 64
+ b.hi L(copy128)
+ stp A_l, A_h, [dstin]
+ stp B_l, B_h, [dstin, 16]
+ stp C_l, C_h, [dstend1, -32]
+ stp D_l, D_h, [dstend1, -16]
+ ret
+
+ .p2align 4
+ /* Copy 65..128 bytes. */
+L(copy128):
+ ldp E_l, E_h, [src, 32]
+ ldp F_l, F_h, [src, 48]
+ cmp count, 96
+ b.ls L(copy96)
+ ldp G_l, G_h, [srcend1, -64]
+ ldp H_l, H_h, [srcend1, -48]
+ stp G_l, G_h, [dstend1, -64]
+ stp H_l, H_h, [dstend1, -48]
+L(copy96):
+ stp A_l, A_h, [dstin]
+ stp B_l, B_h, [dstin, 16]
+ stp E_l, E_h, [dstin, 32]
+ stp F_l, F_h, [dstin, 48]
+ stp C_l, C_h, [dstend1, -32]
+ stp D_l, D_h, [dstend1, -16]
+ ret
+
+ .p2align 4
+ /* Copy more than 128 bytes. */
+L(copy_long):
+ /* Use backwards copy if there is an overlap. */
+ sub tmp1, dstin, src
+ cbz tmp1, L(copy0)
+ cmp tmp1, count
+ b.lo L(copy_long_backwards)
+
+ /* Copy 16 bytes and then align dst to 16-byte alignment. */
+
+ ldp D_l, D_h, [src]
+ and tmp1, dstin, 15
+ bic dst, dstin, 15
+ sub src, src, tmp1
+ add count, count, tmp1 /* Count is now 16 too large. */
+ ldp A_l, A_h, [src, 16]
+ stp D_l, D_h, [dstin]
+ ldp B_l, B_h, [src, 32]
+ ldp C_l, C_h, [src, 48]
+ ldp D_l, D_h, [src, 64]!
+ subs count, count, 128 + 16 /* Test and readjust count. */
+ b.ls L(copy64_from_end)
+L(loop64):
+ stp A_l, A_h, [dst, 16]
+ ldp A_l, A_h, [src, 16]
+ stp B_l, B_h, [dst, 32]
+ ldp B_l, B_h, [src, 32]
+ stp C_l, C_h, [dst, 48]
+ ldp C_l, C_h, [src, 48]
+ stp D_l, D_h, [dst, 64]!
+ ldp D_l, D_h, [src, 64]!
+ subs count, count, 64
+ b.hi L(loop64)
+
+ /* Write the last iteration and copy 64 bytes from the end. */
+L(copy64_from_end):
+ ldp E_l, E_h, [srcend1, -64]
+ stp A_l, A_h, [dst, 16]
+ ldp A_l, A_h, [srcend1, -48]
+ stp B_l, B_h, [dst, 32]
+ ldp B_l, B_h, [srcend1, -32]
+ stp C_l, C_h, [dst, 48]
+ ldp C_l, C_h, [srcend1, -16]
+ stp D_l, D_h, [dst, 64]
+ stp E_l, E_h, [dstend1, -64]
+ stp A_l, A_h, [dstend1, -48]
+ stp B_l, B_h, [dstend1, -32]
+ stp C_l, C_h, [dstend1, -16]
+ ret
+
+ .p2align 4
+
+ /* Large backwards copy for overlapping copies.
+ Copy 16 bytes and then align dst to 16-byte alignment. */
+L(copy_long_backwards):
+ ldp D_l, D_h, [srcend1, -16]
+ and tmp1, dstend1, 15
+ sub srcend1, srcend1, tmp1
+ sub count, count, tmp1
+ ldp A_l, A_h, [srcend1, -16]
+ stp D_l, D_h, [dstend1, -16]
+ ldp B_l, B_h, [srcend1, -32]
+ ldp C_l, C_h, [srcend1, -48]
+ ldp D_l, D_h, [srcend1, -64]!
+ sub dstend1, dstend1, tmp1
+ subs count, count, 128
+ b.ls L(copy64_from_start)
+
+L(loop64_backwards):
+ stp A_l, A_h, [dstend1, -16]
+ ldp A_l, A_h, [srcend1, -16]
+ stp B_l, B_h, [dstend1, -32]
+ ldp B_l, B_h, [srcend1, -32]
+ stp C_l, C_h, [dstend1, -48]
+ ldp C_l, C_h, [srcend1, -48]
+ stp D_l, D_h, [dstend1, -64]!
+ ldp D_l, D_h, [srcend1, -64]!
+ subs count, count, 64
+ b.hi L(loop64_backwards)
+
+ /* Write the last iteration and copy 64 bytes from the start. */
+L(copy64_from_start):
+ ldp G_l, G_h, [src, 48]
+ stp A_l, A_h, [dstend1, -16]
+ ldp A_l, A_h, [src, 32]
+ stp B_l, B_h, [dstend1, -32]
+ ldp B_l, B_h, [src, 16]
+ stp C_l, C_h, [dstend1, -48]
+ ldp C_l, C_h, [src]
+ stp D_l, D_h, [dstend1, -64]
+ stp G_l, G_h, [dstin, 48]
+ stp A_l, A_h, [dstin, 32]
+ stp B_l, B_h, [dstin, 16]
+ stp C_l, C_h, [dstin]
+ ret
+END_COMPILERRT_OUTLINE_FUNCTION(__arm_sc_memcpy)
+
+DEFINE_COMPILERRT_FUNCTION_ALIAS(__arm_sc_memmove, __arm_sc_memcpy)
+
+
+//
+// __arm_sc_memset
+//
+
+#define dstin x0
+#define val x1
+#define valw w1
+#define count x2
+#define dst x3
+#define dstend2 x4
+#define zva_val x5
+
+DEFINE_COMPILERRT_OUTLINE_FUNCTION_UNMANGLED(__arm_sc_memset)
+#ifdef __ARM_FEATURE_SVE
+ mov z0.b, valw
+#else
+ bfi valw, valw, #8, #8
+ bfi valw, valw, #16, #16
+ bfi val, val, #32, #32
+ fmov d0, val
+ fmov v0.d[1], val
+#endif
+ add dstend2, dstin, count
+
+ cmp count, 96
+ b.hi L(set_long)
+ cmp count, 16
+ b.hs L(set_medium)
+ mov val, v0.D[0]
+
+ /* Set 0..15 bytes. */
+ tbz count, 3, 1f
+ str val, [dstin]
+ str val, [dstend2, -8]
+ ret
+ nop
+1: tbz count, 2, 2f
+ str valw, [dstin]
+ str valw, [dstend2, -4]
+ ret
+2: cbz count, 3f
+ strb valw, [dstin]
+ tbz count, 1, 3f
+ strh valw, [dstend2, -2]
+3: ret
+
+ /* Set 17..96 bytes. */
+L(set_medium):
+ str q0, [dstin]
+ tbnz count, 6, L(set96)
+ str q0, [dstend2, -16]
+ tbz count, 5, 1f
+ str q0, [dstin, 16]
+ str q0, [dstend2, -32]
+1: ret
+
+ .p2align 4
+ /* Set 64..96 bytes. Write 64 bytes from the start and
+ 32 bytes from the end. */
+L(set96):
+ str q0, [dstin, 16]
+ stp q0, q0, [dstin, 32]
+ stp q0, q0, [dstend2, -32]
+ ret
+
+ .p2align 4
+L(set_long):
+ and valw, valw, 255
+ bic dst, dstin, 15
+ str q0, [dstin]
+ cmp count, 160
+ ccmp valw, 0, 0, hs
+ b.ne L(no_zva)
+
+#ifndef SKIP_ZVA_CHECK
+ mrs zva_val, dczid_el0
+ and zva_val, zva_val, 31
+ cmp zva_val, 4 /* ZVA size is 64 bytes. */
+ b.ne L(no_zva)
+#endif
+ str q0, [dst, 16]
+ stp q0, q0, [dst, 32]
+ bic dst, dst, 63
+ sub count, dstend2, dst /* Count is now 64 too large. */
+ sub count, count, 128 /* Adjust count and bias for loop. */
+
+ .p2align 4
+L(zva_loop):
+ add dst, dst, 64
+ dc zva, dst
+ subs count, count, 64
+ b.hi L(zva_loop)
+ stp q0, q0, [dstend2, -64]
+ stp q0, q0, [dstend2, -32]
+ ret
+
+L(no_zva):
+ sub count, dstend2, dst /* Count is 16 too large. */
+ sub dst, dst, 16 /* Dst is biased by -32. */
+ sub count, count, 64 + 16 /* Adjust count and bias for loop. */
+L(no_zva_loop):
+ stp q0, q0, [dst, 32]
+ stp q0, q0, [dst, 64]!
+ subs count, count, 64
+ b.hi L(no_zva_loop)
+ stp q0, q0, [dstend2, -64]
+ stp q0, q0, [dstend2, -32]
+ ret
+END_COMPILERRT_OUTLINE_FUNCTION(__arm_sc_memset)
+
+#endif // __aarch64__
diff --git a/contrib/libs/cxxsupp/builtins/aarch64/sme-libc-routines.c b/contrib/libs/cxxsupp/builtins/aarch64/sme-libc-routines.c
index cd73025a19c..315490e73ea 100644
--- a/contrib/libs/cxxsupp/builtins/aarch64/sme-libc-routines.c
+++ b/contrib/libs/cxxsupp/builtins/aarch64/sme-libc-routines.c
@@ -1,79 +1,4 @@
-#include <stdlib.h>
-
-// WARNING: When building the scalar versions of these functions you need to
-// use the compiler flag "-mllvm -disable-loop-idiom-all" to prevent clang
-// from recognising a loop idiom and planting calls to memcpy!
-
-static void *__arm_sc_memcpy_fwd(void *dest, const void *src,
- size_t n) __arm_streaming_compatible {
- unsigned char *destp = (unsigned char *)dest;
- const unsigned char *srcp = (const unsigned char *)src;
- for (size_t i = 0; i < n; ++i)
- destp[i] = srcp[i];
-
- return dest;
-}
-
-// If dest and src overlap then behaviour is undefined, hence we can add the
-// restrict keywords here. This also matches the definition of the libc memcpy
-// according to the man page.
-void *__arm_sc_memcpy(void *__restrict__ dest, const void *__restrict__ src,
- size_t n) __arm_streaming_compatible {
- return __arm_sc_memcpy_fwd(dest, src, n);
-}
-
-void *__arm_sc_memset(void *dest, int c, size_t n) __arm_streaming_compatible {
- unsigned char *destp = (unsigned char *)dest;
- unsigned char c8 = (unsigned char)c;
- for (size_t i = 0; i < n; ++i)
- destp[i] = c8;
-
- return dest;
-}
-
-static void *__arm_sc_memcpy_rev(void *dest, const void *src,
- size_t n) __arm_streaming_compatible {
- unsigned char *destp = (unsigned char *)dest;
- const unsigned char *srcp = (const unsigned char *)src;
- // TODO: Improve performance by copying larger chunks in reverse, or by
- // using SVE.
- while (n > 0) {
- --n;
- destp[n] = srcp[n];
- }
- return dest;
-}
-
-// Semantically a memmove is equivalent to the following:
-// 1. Copy the entire contents of src to a temporary array that does not
-// overlap with src or dest.
-// 2. Copy the contents of the temporary array into dest.
-void *__arm_sc_memmove(void *dest, const void *src,
- size_t n) __arm_streaming_compatible {
- unsigned char *destp = (unsigned char *)dest;
- const unsigned char *srcp = (const unsigned char *)src;
-
- // If src and dest don't overlap then just invoke memcpy
- if ((srcp > (destp + n)) || (destp > (srcp + n)))
- return __arm_sc_memcpy_fwd(dest, src, n);
-
- // Overlap case 1:
- // src: Low | -> | High
- // dest: Low | -> | High
- // Here src is always ahead of dest at a higher addres. If we first read a
- // chunk of data from src we can safely write the same chunk to dest without
- // corrupting future reads of src.
- if (srcp > destp)
- return __arm_sc_memcpy_fwd(dest, src, n);
-
- // Overlap case 2:
- // src: Low | -> | High
- // dest: Low | -> | High
- // While we're in the overlap region we're always corrupting future reads of
- // src when writing to dest. An efficient way to do this is to copy the data
- // in reverse by starting at the highest address.
- return __arm_sc_memcpy_rev(dest, src, n);
-}
+#include <stddef.h>
const void *__arm_sc_memchr(const void *src, int c,
size_t n) __arm_streaming_compatible {
diff --git a/contrib/libs/cxxsupp/builtins/atomic.c b/contrib/libs/cxxsupp/builtins/atomic.c
index 852bb20f086..aded25d9baa 100644
--- a/contrib/libs/cxxsupp/builtins/atomic.c
+++ b/contrib/libs/cxxsupp/builtins/atomic.c
@@ -12,7 +12,7 @@
//
// 1) This code must work with C programs that do not link to anything
// (including pthreads) and so it should not depend on any pthread
-// functions.
+// functions. If the user wishes to opt into using pthreads, they may do so.
// 2) Atomic operations, rather than explicit mutexes, are most commonly used
// on code where contended operations are rate.
//
@@ -56,7 +56,17 @@ static const long SPINLOCK_MASK = SPINLOCK_COUNT - 1;
// defined. Each platform should define the Lock type, and corresponding
// lock() and unlock() functions.
////////////////////////////////////////////////////////////////////////////////
-#if defined(__FreeBSD__) || defined(__DragonFly__)
+#if defined(_LIBATOMIC_USE_PTHREAD)
+#include <pthread.h>
+typedef pthread_mutex_t Lock;
+/// Unlock a lock. This is a release operation.
+__inline static void unlock(Lock *l) { pthread_mutex_unlock(l); }
+/// Locks a lock.
+__inline static void lock(Lock *l) { pthread_mutex_lock(l); }
+/// locks for atomic operations
+static Lock locks[SPINLOCK_COUNT];
+
+#elif defined(__FreeBSD__) || defined(__DragonFly__)
#include <errno.h>
// clang-format off
#include <sys/types.h>
diff --git a/contrib/libs/cxxsupp/builtins/cpu_model/AArch64CPUFeatures.inc b/contrib/libs/cxxsupp/builtins/cpu_model/AArch64CPUFeatures.inc
new file mode 100644
index 00000000000..e78bb88cfed
--- /dev/null
+++ b/contrib/libs/cxxsupp/builtins/cpu_model/AArch64CPUFeatures.inc
@@ -0,0 +1,91 @@
+//===- AArch64CPUFeatures.inc - AArch64 CPU Features enum -------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the CPUFeatures enum for AArch64 to facilitate better
+// testing of this code between LLVM and compiler-rt, primarily that the files
+// are an exact match.
+//
+// This file has two identical copies. The primary copy lives in LLVM and
+// the other one sits in compiler-rt/lib/builtins/cpu_model directory. To make
+// changes in this file, first modify the primary copy and copy it over to
+// compiler-rt. compiler-rt tests will fail if the two files are not synced up.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef AARCH64_CPU_FEATURS_INC_H
+#define AARCH64_CPU_FEATURS_INC_H
+
+// Function Multi Versioning CPU features.
+enum CPUFeatures {
+ FEAT_RNG,
+ FEAT_FLAGM,
+ FEAT_FLAGM2,
+ FEAT_FP16FML,
+ FEAT_DOTPROD,
+ FEAT_SM4,
+ FEAT_RDM,
+ FEAT_LSE,
+ FEAT_FP,
+ FEAT_SIMD,
+ FEAT_CRC,
+ FEAT_SHA1,
+ FEAT_SHA2,
+ FEAT_SHA3,
+ FEAT_AES,
+ FEAT_PMULL,
+ FEAT_FP16,
+ FEAT_DIT,
+ FEAT_DPB,
+ FEAT_DPB2,
+ FEAT_JSCVT,
+ FEAT_FCMA,
+ FEAT_RCPC,
+ FEAT_RCPC2,
+ FEAT_FRINTTS,
+ FEAT_DGH,
+ FEAT_I8MM,
+ FEAT_BF16,
+ FEAT_EBF16,
+ FEAT_RPRES,
+ FEAT_SVE,
+ FEAT_SVE_BF16,
+ FEAT_SVE_EBF16,
+ FEAT_SVE_I8MM,
+ FEAT_SVE_F32MM,
+ FEAT_SVE_F64MM,
+ FEAT_SVE2,
+ FEAT_SVE_AES,
+ FEAT_SVE_PMULL128,
+ FEAT_SVE_BITPERM,
+ FEAT_SVE_SHA3,
+ FEAT_SVE_SM4,
+ FEAT_SME,
+ FEAT_MEMTAG,
+ FEAT_MEMTAG2,
+ FEAT_MEMTAG3,
+ FEAT_SB,
+ FEAT_PREDRES,
+ FEAT_SSBS,
+ FEAT_SSBS2,
+ FEAT_BTI,
+ FEAT_LS64,
+ FEAT_LS64_V,
+ FEAT_LS64_ACCDATA,
+ FEAT_WFXT,
+ FEAT_SME_F64,
+ FEAT_SME_I64,
+ FEAT_SME2,
+ FEAT_RCPC3,
+ FEAT_MOPS,
+ FEAT_MAX,
+ FEAT_EXT = 62, // Reserved to indicate presence of additional features field
+ // in __aarch64_cpu_features
+ FEAT_INIT // Used as flag of features initialization completion
+};
+
+#endif
diff --git a/contrib/libs/cxxsupp/builtins/cpu_model/aarch64.c b/contrib/libs/cxxsupp/builtins/cpu_model/aarch64.c
index 17bddfca46f..b868caa991b 100644
--- a/contrib/libs/cxxsupp/builtins/cpu_model/aarch64.c
+++ b/contrib/libs/cxxsupp/builtins/cpu_model/aarch64.c
@@ -12,7 +12,7 @@
//
//===----------------------------------------------------------------------===//
-#include "cpu_model.h"
+#include "aarch64.h"
#if !defined(__aarch64__)
#error This file is intended only for aarch64-based targets
@@ -53,74 +53,6 @@ _Bool __aarch64_have_lse_atomics
#endif
#if !defined(DISABLE_AARCH64_FMV)
-// CPUFeatures must correspond to the same AArch64 features in
-// AArch64TargetParser.h
-enum CPUFeatures {
- FEAT_RNG,
- FEAT_FLAGM,
- FEAT_FLAGM2,
- FEAT_FP16FML,
- FEAT_DOTPROD,
- FEAT_SM4,
- FEAT_RDM,
- FEAT_LSE,
- FEAT_FP,
- FEAT_SIMD,
- FEAT_CRC,
- FEAT_SHA1,
- FEAT_SHA2,
- FEAT_SHA3,
- FEAT_AES,
- FEAT_PMULL,
- FEAT_FP16,
- FEAT_DIT,
- FEAT_DPB,
- FEAT_DPB2,
- FEAT_JSCVT,
- FEAT_FCMA,
- FEAT_RCPC,
- FEAT_RCPC2,
- FEAT_FRINTTS,
- FEAT_DGH,
- FEAT_I8MM,
- FEAT_BF16,
- FEAT_EBF16,
- FEAT_RPRES,
- FEAT_SVE,
- FEAT_SVE_BF16,
- FEAT_SVE_EBF16,
- FEAT_SVE_I8MM,
- FEAT_SVE_F32MM,
- FEAT_SVE_F64MM,
- FEAT_SVE2,
- FEAT_SVE_AES,
- FEAT_SVE_PMULL128,
- FEAT_SVE_BITPERM,
- FEAT_SVE_SHA3,
- FEAT_SVE_SM4,
- FEAT_SME,
- FEAT_MEMTAG,
- FEAT_MEMTAG2,
- FEAT_MEMTAG3,
- FEAT_SB,
- FEAT_PREDRES,
- FEAT_SSBS,
- FEAT_SSBS2,
- FEAT_BTI,
- FEAT_LS64,
- FEAT_LS64_V,
- FEAT_LS64_ACCDATA,
- FEAT_WFXT,
- FEAT_SME_F64,
- FEAT_SME_I64,
- FEAT_SME2,
- FEAT_RCPC3,
- FEAT_MOPS,
- FEAT_MAX,
- FEAT_EXT = 62, // Reserved to indicate presence of additional features field
- // in __aarch64_cpu_features
- FEAT_INIT // Used as flag of features initialization completion
-};
// Architecture features used
// in Function Multi Versioning
diff --git a/contrib/libs/cxxsupp/builtins/cpu_model/aarch64.h b/contrib/libs/cxxsupp/builtins/cpu_model/aarch64.h
new file mode 100644
index 00000000000..f6cbf75d582
--- /dev/null
+++ b/contrib/libs/cxxsupp/builtins/cpu_model/aarch64.h
@@ -0,0 +1,21 @@
+//===-- cpu_model/aarch64.h --------------------------------------------- -===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "cpu_model.h"
+
+#if !defined(__aarch64__)
+#error This file is intended only for aarch64-based targets
+#endif
+
+#if !defined(DISABLE_AARCH64_FMV)
+
+#include "AArch64CPUFeatures.inc"
+
+void __init_cpu_features(void);
+
+#endif // !defined(DISABLE_AARCH64_FMV)
diff --git a/contrib/libs/cxxsupp/builtins/cpu_model/aarch64/fmv/android.inc b/contrib/libs/cxxsupp/builtins/cpu_model/aarch64/fmv/android.inc
index f711431489c..a9e3594e93c 100644
--- a/contrib/libs/cxxsupp/builtins/cpu_model/aarch64/fmv/android.inc
+++ b/contrib/libs/cxxsupp/builtins/cpu_model/aarch64/fmv/android.inc
@@ -1,6 +1,6 @@
void __init_cpu_features_resolver(unsigned long hwcap,
const __ifunc_arg_t *arg) {
- if (__aarch64_cpu_features.features)
+ if (__atomic_load_n(&__aarch64_cpu_features.features, __ATOMIC_RELAXED))
return;
// ifunc resolvers don't have hwcaps in arguments on Android API lower
@@ -17,7 +17,7 @@ void __init_cpu_features_resolver(unsigned long hwcap,
void CONSTRUCTOR_ATTRIBUTE __init_cpu_features(void) {
// CPU features already initialized.
- if (__aarch64_cpu_features.features)
+ if (__atomic_load_n(&__aarch64_cpu_features.features, __ATOMIC_RELAXED))
return;
// Don't set any CPU features,
diff --git a/contrib/libs/cxxsupp/builtins/cpu_model/aarch64/fmv/apple.inc b/contrib/libs/cxxsupp/builtins/cpu_model/aarch64/fmv/apple.inc
index 0bb755f4b30..f0694900f23 100644
--- a/contrib/libs/cxxsupp/builtins/cpu_model/aarch64/fmv/apple.inc
+++ b/contrib/libs/cxxsupp/builtins/cpu_model/aarch64/fmv/apple.inc
@@ -1,8 +1,27 @@
#include <TargetConditionals.h>
#if TARGET_OS_OSX || TARGET_OS_IPHONE
-#include <dispatch/dispatch.h>
#include <sys/sysctl.h>
+#if __has_include(<arm/cpu_capabilities_public.h>)
+#include <arm/cpu_capabilities_public.h>
+#define HAS_CPU_CAPABILITIES_PUBLIC_H 1
+
+// FB13964283 - A few of these didn't make it into the public SDK yet.
+#ifndef CAP_BIT_FEAT_SME
+#define CAP_BIT_FEAT_SME 40
+#endif
+#ifndef CAP_BIT_FEAT_SME2
+#define CAP_BIT_FEAT_SME2 41
+#endif
+#ifndef CAP_BIT_FEAT_SME_F64F64
+#define CAP_BIT_FEAT_SME_F64F64 42
+#endif
+#ifndef CAP_BIT_FEAT_SME_I16I64
+#define CAP_BIT_FEAT_SME_I16I64 43
+#endif
+
+#endif
+
static bool isKnownAndSupported(const char *name) {
int32_t val = 0;
size_t size = sizeof(val);
@@ -11,61 +30,130 @@ static bool isKnownAndSupported(const char *name) {
return val;
}
+static uint64_t deriveImplicitFeatures(uint64_t features) {
+ // FEAT_SSBS2 implies FEAT_SSBS
+ if ((1ULL << FEAT_SSBS2) & features)
+ features |= (1ULL << FEAT_SSBS);
+
+ // FEAT_FP is always enabled
+ features |= (1ULL << FEAT_FP);
+
+ features |= (1ULL << FEAT_INIT);
+
+ return features;
+}
+
void __init_cpu_features_resolver(void) {
// On Darwin platforms, this may be called concurrently by multiple threads
// because the resolvers that use it are called lazily at runtime (unlike on
// ELF platforms, where IFuncs are resolved serially at load time). This
// function's effect on __aarch64_cpu_features must be idempotent.
- if (!__atomic_load_n(&__aarch64_cpu_features.features, __ATOMIC_RELAXED)) {
- uint64_t features = 0;
-
- // https://developer.apple.com/documentation/kernel/1387446-sysctlbyname/determining_instruction_set_characteristics
- static const struct {
- const char *sysctl_name;
- enum CPUFeatures feature;
- } feature_checks[] = {
- {"hw.optional.arm.FEAT_FlagM", FEAT_FLAGM},
- {"hw.optional.arm.FEAT_FlagM2", FEAT_FLAGM2},
- {"hw.optional.arm.FEAT_FHM", FEAT_FP16FML},
- {"hw.optional.arm.FEAT_DotProd", FEAT_DOTPROD},
- {"hw.optional.arm.FEAT_RDM", FEAT_RDM},
- {"hw.optional.arm.FEAT_LSE", FEAT_LSE},
- {"hw.optional.floatingpoint", FEAT_FP},
- {"hw.optional.AdvSIMD", FEAT_SIMD},
- {"hw.optional.armv8_crc32", FEAT_CRC},
- {"hw.optional.arm.FEAT_SHA1", FEAT_SHA1},
- {"hw.optional.arm.FEAT_SHA256", FEAT_SHA2},
- {"hw.optional.arm.FEAT_SHA3", FEAT_SHA3},
- {"hw.optional.arm.FEAT_AES", FEAT_AES},
- {"hw.optional.arm.FEAT_PMULL", FEAT_PMULL},
- {"hw.optional.arm.FEAT_FP16", FEAT_FP16},
- {"hw.optional.arm.FEAT_DIT", FEAT_DIT},
- {"hw.optional.arm.FEAT_DPB", FEAT_DPB},
- {"hw.optional.arm.FEAT_DPB2", FEAT_DPB2},
- {"hw.optional.arm.FEAT_JSCVT", FEAT_JSCVT},
- {"hw.optional.arm.FEAT_FCMA", FEAT_FCMA},
- {"hw.optional.arm.FEAT_LRCPC", FEAT_RCPC},
- {"hw.optional.arm.FEAT_LRCPC2", FEAT_RCPC2},
- {"hw.optional.arm.FEAT_FRINTTS", FEAT_FRINTTS},
- {"hw.optional.arm.FEAT_I8MM", FEAT_I8MM},
- {"hw.optional.arm.FEAT_BF16", FEAT_BF16},
- {"hw.optional.arm.FEAT_SB", FEAT_SB},
- {"hw.optional.arm.FEAT_SPECRES", FEAT_PREDRES},
- {"hw.optional.arm.FEAT_SSBS", FEAT_SSBS2},
- {"hw.optional.arm.FEAT_BTI", FEAT_BTI},
- };
-
- for (size_t I = 0, E = sizeof(feature_checks) / sizeof(feature_checks[0]);
- I != E; ++I)
- if (isKnownAndSupported(feature_checks[I].sysctl_name))
- features |= (1ULL << feature_checks[I].feature);
-
- features |= (1ULL << FEAT_INIT);
+ if (__atomic_load_n(&__aarch64_cpu_features.features, __ATOMIC_RELAXED))
+ return;
+
+ uint64_t features = 0;
+
+#ifdef HAS_CPU_CAPABILITIES_PUBLIC_H
+ uint8_t feats_bitvec[(CAP_BIT_NB + 7) / 8] = {0};
+ size_t len = sizeof(feats_bitvec);
+ // When hw.optional.arm.feats is available (macOS 15.0+, iOS 18.0+), use the
+ // fast path to get all the feature bits, otherwise fall back to the slow
+ // ~20-something sysctls path.
+ if (!sysctlbyname("hw.optional.arm.caps", &feats_bitvec, &len, 0, 0)) {
+
+#define CHECK_BIT(FROM, TO) \
+ do { \
+ if (feats_bitvec[FROM / 8] & (1u << ((FROM) & 7))) { \
+ features |= (1ULL << TO); \
+ } \
+ } while (0)
+
+ CHECK_BIT(CAP_BIT_FEAT_FlagM, FEAT_FLAGM);
+ CHECK_BIT(CAP_BIT_FEAT_FlagM2, FEAT_FLAGM2);
+ CHECK_BIT(CAP_BIT_FEAT_FHM, FEAT_FP16FML);
+ CHECK_BIT(CAP_BIT_FEAT_DotProd, FEAT_DOTPROD);
+ CHECK_BIT(CAP_BIT_FEAT_SHA3, FEAT_SHA3);
+ CHECK_BIT(CAP_BIT_FEAT_RDM, FEAT_RDM);
+ CHECK_BIT(CAP_BIT_FEAT_LSE, FEAT_LSE);
+ CHECK_BIT(CAP_BIT_FEAT_SHA256, FEAT_SHA2);
+ CHECK_BIT(CAP_BIT_FEAT_SHA1, FEAT_SHA1);
+ CHECK_BIT(CAP_BIT_FEAT_AES, FEAT_AES);
+ CHECK_BIT(CAP_BIT_FEAT_PMULL, FEAT_PMULL);
+ CHECK_BIT(CAP_BIT_FEAT_SPECRES, FEAT_PREDRES);
+ CHECK_BIT(CAP_BIT_FEAT_SB, FEAT_SB);
+ CHECK_BIT(CAP_BIT_FEAT_FRINTTS, FEAT_FRINTTS);
+ CHECK_BIT(CAP_BIT_FEAT_LRCPC, FEAT_RCPC);
+ CHECK_BIT(CAP_BIT_FEAT_LRCPC2, FEAT_RCPC2);
+ CHECK_BIT(CAP_BIT_FEAT_FCMA, FEAT_FCMA);
+ CHECK_BIT(CAP_BIT_FEAT_JSCVT, FEAT_JSCVT);
+ CHECK_BIT(CAP_BIT_FEAT_DPB, FEAT_DPB);
+ CHECK_BIT(CAP_BIT_FEAT_DPB2, FEAT_DPB2);
+ CHECK_BIT(CAP_BIT_FEAT_BF16, FEAT_BF16);
+ CHECK_BIT(CAP_BIT_FEAT_I8MM, FEAT_I8MM);
+ CHECK_BIT(CAP_BIT_FEAT_DIT, FEAT_DIT);
+ CHECK_BIT(CAP_BIT_FEAT_FP16, FEAT_FP16);
+ CHECK_BIT(CAP_BIT_FEAT_SSBS, FEAT_SSBS2);
+ CHECK_BIT(CAP_BIT_FEAT_BTI, FEAT_BTI);
+ CHECK_BIT(CAP_BIT_AdvSIMD, FEAT_SIMD);
+ CHECK_BIT(CAP_BIT_CRC32, FEAT_CRC);
+ CHECK_BIT(CAP_BIT_FEAT_SME, FEAT_SME);
+ CHECK_BIT(CAP_BIT_FEAT_SME2, FEAT_SME2);
+ CHECK_BIT(CAP_BIT_FEAT_SME_F64F64, FEAT_SME_F64);
+ CHECK_BIT(CAP_BIT_FEAT_SME_I16I64, FEAT_SME_I64);
+
+ features = deriveImplicitFeatures(features);
__atomic_store(&__aarch64_cpu_features.features, &features,
__ATOMIC_RELAXED);
+ return;
}
+#endif
+
+ // https://developer.apple.com/documentation/kernel/1387446-sysctlbyname/determining_instruction_set_characteristics
+ static const struct {
+ const char *sysctl_name;
+ enum CPUFeatures feature;
+ } feature_checks[] = {
+ {"hw.optional.arm.FEAT_FlagM", FEAT_FLAGM},
+ {"hw.optional.arm.FEAT_FlagM2", FEAT_FLAGM2},
+ {"hw.optional.arm.FEAT_FHM", FEAT_FP16FML},
+ {"hw.optional.arm.FEAT_DotProd", FEAT_DOTPROD},
+ {"hw.optional.arm.FEAT_RDM", FEAT_RDM},
+ {"hw.optional.arm.FEAT_LSE", FEAT_LSE},
+ {"hw.optional.AdvSIMD", FEAT_SIMD},
+ {"hw.optional.armv8_crc32", FEAT_CRC},
+ {"hw.optional.arm.FEAT_SHA1", FEAT_SHA1},
+ {"hw.optional.arm.FEAT_SHA256", FEAT_SHA2},
+ {"hw.optional.arm.FEAT_SHA3", FEAT_SHA3},
+ {"hw.optional.arm.FEAT_AES", FEAT_AES},
+ {"hw.optional.arm.FEAT_PMULL", FEAT_PMULL},
+ {"hw.optional.arm.FEAT_FP16", FEAT_FP16},
+ {"hw.optional.arm.FEAT_DIT", FEAT_DIT},
+ {"hw.optional.arm.FEAT_DPB", FEAT_DPB},
+ {"hw.optional.arm.FEAT_DPB2", FEAT_DPB2},
+ {"hw.optional.arm.FEAT_JSCVT", FEAT_JSCVT},
+ {"hw.optional.arm.FEAT_FCMA", FEAT_FCMA},
+ {"hw.optional.arm.FEAT_LRCPC", FEAT_RCPC},
+ {"hw.optional.arm.FEAT_LRCPC2", FEAT_RCPC2},
+ {"hw.optional.arm.FEAT_FRINTTS", FEAT_FRINTTS},
+ {"hw.optional.arm.FEAT_I8MM", FEAT_I8MM},
+ {"hw.optional.arm.FEAT_BF16", FEAT_BF16},
+ {"hw.optional.arm.FEAT_SB", FEAT_SB},
+ {"hw.optional.arm.FEAT_SPECRES", FEAT_PREDRES},
+ {"hw.optional.arm.FEAT_SSBS", FEAT_SSBS2},
+ {"hw.optional.arm.FEAT_BTI", FEAT_BTI},
+ };
+
+ for (size_t I = 0, E = sizeof(feature_checks) / sizeof(feature_checks[0]);
+ I != E; ++I)
+ if (isKnownAndSupported(feature_checks[I].sysctl_name))
+ features |= (1ULL << feature_checks[I].feature);
+
+ features = deriveImplicitFeatures(features);
+
+ __atomic_store(&__aarch64_cpu_features.features, &features,
+ __ATOMIC_RELAXED);
}
#endif // TARGET_OS_OSX || TARGET_OS_IPHONE
diff --git a/contrib/libs/cxxsupp/builtins/cpu_model/aarch64/fmv/freebsd.inc b/contrib/libs/cxxsupp/builtins/cpu_model/aarch64/fmv/freebsd.inc
index 793adef44b9..aa975dc854f 100644
--- a/contrib/libs/cxxsupp/builtins/cpu_model/aarch64/fmv/freebsd.inc
+++ b/contrib/libs/cxxsupp/builtins/cpu_model/aarch64/fmv/freebsd.inc
@@ -1,6 +1,6 @@
void __init_cpu_features_resolver(unsigned long hwcap,
const __ifunc_arg_t *arg) {
- if (__aarch64_cpu_features.features)
+ if (__atomic_load_n(&__aarch64_cpu_features.features, __ATOMIC_RELAXED))
return;
__init_cpu_features_constructor(hwcap, arg);
@@ -10,7 +10,7 @@ void CONSTRUCTOR_ATTRIBUTE __init_cpu_features(void) {
unsigned long hwcap = 0;
unsigned long hwcap2 = 0;
// CPU features already initialized.
- if (__aarch64_cpu_features.features)
+ if (__atomic_load_n(&__aarch64_cpu_features.features, __ATOMIC_RELAXED))
return;
int res = 0;
diff --git a/contrib/libs/cxxsupp/builtins/cpu_model/aarch64/fmv/fuchsia.inc b/contrib/libs/cxxsupp/builtins/cpu_model/aarch64/fmv/fuchsia.inc
index 329b6b43a8a..fd0800dd11e 100644
--- a/contrib/libs/cxxsupp/builtins/cpu_model/aarch64/fmv/fuchsia.inc
+++ b/contrib/libs/cxxsupp/builtins/cpu_model/aarch64/fmv/fuchsia.inc
@@ -2,7 +2,7 @@
#include <zircon/syscalls.h>
void __init_cpu_features_resolver() {
- if (__aarch64_cpu_features.features)
+ if (__atomic_load_n(&__aarch64_cpu_features.features, __ATOMIC_RELAXED))
return;
// This ensures the vDSO is a direct link-time dependency of anything that
@@ -13,8 +13,8 @@ void __init_cpu_features_resolver() {
if (status != ZX_OK)
return;
-#define setCPUFeature(cpu_feature) \
- __aarch64_cpu_features.features |= 1ULL << cpu_feature
+ unsigned long long feat = 0;
+#define setCPUFeature(cpu_feature) feat |= 1ULL << cpu_feature
if (features & ZX_ARM64_FEATURE_ISA_FP)
setCPUFeature(FEAT_FP);
@@ -48,4 +48,6 @@ void __init_cpu_features_resolver() {
setCPUFeature(FEAT_SVE);
setCPUFeature(FEAT_INIT);
+
+ __atomic_store_n(&__aarch64_cpu_features.features, feat, __ATOMIC_RELAXED);
}
diff --git a/contrib/libs/cxxsupp/builtins/cpu_model/aarch64/fmv/mrs.inc b/contrib/libs/cxxsupp/builtins/cpu_model/aarch64/fmv/mrs.inc
index 32a21a2fba9..e4d5e7f2bd7 100644
--- a/contrib/libs/cxxsupp/builtins/cpu_model/aarch64/fmv/mrs.inc
+++ b/contrib/libs/cxxsupp/builtins/cpu_model/aarch64/fmv/mrs.inc
@@ -3,11 +3,10 @@
#define HAVE_SYS_AUXV_H
#endif
-
-
static void __init_cpu_features_constructor(unsigned long hwcap,
const __ifunc_arg_t *arg) {
-#define setCPUFeature(F) __aarch64_cpu_features.features |= 1ULL << F
+ unsigned long long feat = 0;
+#define setCPUFeature(F) feat |= 1ULL << F
#define getCPUFeature(id, ftr) __asm__("mrs %0, " #id : "=r"(ftr))
#define extractBits(val, start, number) \
(val & ((1ULL << number) - 1ULL) << start) >> start
@@ -20,26 +19,20 @@ static void __init_cpu_features_constructor(unsigned long hwcap,
setCPUFeature(FEAT_PMULL);
if (hwcap & HWCAP_FLAGM)
setCPUFeature(FEAT_FLAGM);
- if (hwcap2 & HWCAP2_FLAGM2) {
- setCPUFeature(FEAT_FLAGM);
+ if (hwcap2 & HWCAP2_FLAGM2)
setCPUFeature(FEAT_FLAGM2);
- }
- if (hwcap & HWCAP_SM3 && hwcap & HWCAP_SM4)
+ if (hwcap & HWCAP_SM4)
setCPUFeature(FEAT_SM4);
if (hwcap & HWCAP_ASIMDDP)
setCPUFeature(FEAT_DOTPROD);
if (hwcap & HWCAP_ASIMDFHM)
setCPUFeature(FEAT_FP16FML);
- if (hwcap & HWCAP_FPHP) {
+ if (hwcap & HWCAP_FPHP)
setCPUFeature(FEAT_FP16);
- setCPUFeature(FEAT_FP);
- }
if (hwcap & HWCAP_DIT)
setCPUFeature(FEAT_DIT);
if (hwcap & HWCAP_ASIMDRDM)
setCPUFeature(FEAT_RDM);
- if (hwcap & HWCAP_ILRCPC)
- setCPUFeature(FEAT_RCPC2);
if (hwcap & HWCAP_AES)
setCPUFeature(FEAT_AES);
if (hwcap & HWCAP_SHA1)
@@ -52,23 +45,20 @@ static void __init_cpu_features_constructor(unsigned long hwcap,
setCPUFeature(FEAT_FCMA);
if (hwcap & HWCAP_SB)
setCPUFeature(FEAT_SB);
- if (hwcap & HWCAP_SSBS)
+ if (hwcap & HWCAP_SSBS) {
+ setCPUFeature(FEAT_SSBS);
setCPUFeature(FEAT_SSBS2);
+ }
if (hwcap2 & HWCAP2_MTE) {
setCPUFeature(FEAT_MEMTAG);
setCPUFeature(FEAT_MEMTAG2);
}
- if (hwcap2 & HWCAP2_MTE3) {
- setCPUFeature(FEAT_MEMTAG);
- setCPUFeature(FEAT_MEMTAG2);
+ if (hwcap2 & HWCAP2_MTE3)
setCPUFeature(FEAT_MEMTAG3);
- }
if (hwcap2 & HWCAP2_SVEAES)
setCPUFeature(FEAT_SVE_AES);
- if (hwcap2 & HWCAP2_SVEPMULL) {
- setCPUFeature(FEAT_SVE_AES);
+ if (hwcap2 & HWCAP2_SVEPMULL)
setCPUFeature(FEAT_SVE_PMULL128);
- }
if (hwcap2 & HWCAP2_SVEBITPERM)
setCPUFeature(FEAT_SVE_BITPERM);
if (hwcap2 & HWCAP2_SVESHA3)
@@ -105,6 +95,8 @@ static void __init_cpu_features_constructor(unsigned long hwcap,
setCPUFeature(FEAT_WFXT);
if (hwcap2 & HWCAP2_SME)
setCPUFeature(FEAT_SME);
+ if (hwcap2 & HWCAP2_SME2)
+ setCPUFeature(FEAT_SME2);
if (hwcap2 & HWCAP2_SME_I16I64)
setCPUFeature(FEAT_SME_I64);
if (hwcap2 & HWCAP2_SME_F64F64)
@@ -113,86 +105,45 @@ static void __init_cpu_features_constructor(unsigned long hwcap,
setCPUFeature(FEAT_MOPS);
if (hwcap & HWCAP_CPUID) {
unsigned long ftr;
- getCPUFeature(ID_AA64PFR1_EL1, ftr);
- // ID_AA64PFR1_EL1.MTE >= 0b0001
- if (extractBits(ftr, 8, 4) >= 0x1)
- setCPUFeature(FEAT_MEMTAG);
- // ID_AA64PFR1_EL1.SSBS == 0b0001
- if (extractBits(ftr, 4, 4) == 0x1)
- setCPUFeature(FEAT_SSBS);
- // ID_AA64PFR1_EL1.SME == 0b0010
- if (extractBits(ftr, 24, 4) == 0x2)
- setCPUFeature(FEAT_SME2);
- getCPUFeature(ID_AA64PFR0_EL1, ftr);
- // ID_AA64PFR0_EL1.FP != 0b1111
- if (extractBits(ftr, 16, 4) != 0xF) {
- setCPUFeature(FEAT_FP);
- // ID_AA64PFR0_EL1.AdvSIMD has the same value as ID_AA64PFR0_EL1.FP
- setCPUFeature(FEAT_SIMD);
- }
- // ID_AA64PFR0_EL1.SVE != 0b0000
- if (extractBits(ftr, 32, 4) != 0x0) {
- // get ID_AA64ZFR0_EL1, that name supported
- // if sve enabled only
- getCPUFeature(S3_0_C0_C4_4, ftr);
- // ID_AA64ZFR0_EL1.SVEver == 0b0000
- if (extractBits(ftr, 0, 4) == 0x0)
- setCPUFeature(FEAT_SVE);
- // ID_AA64ZFR0_EL1.SVEver == 0b0001
- if (extractBits(ftr, 0, 4) == 0x1)
- setCPUFeature(FEAT_SVE2);
- // ID_AA64ZFR0_EL1.BF16 != 0b0000
- if (extractBits(ftr, 20, 4) != 0x0)
- setCPUFeature(FEAT_SVE_BF16);
- }
- getCPUFeature(ID_AA64ISAR0_EL1, ftr);
- // ID_AA64ISAR0_EL1.SHA3 != 0b0000
- if (extractBits(ftr, 32, 4) != 0x0)
- setCPUFeature(FEAT_SHA3);
+
getCPUFeature(ID_AA64ISAR1_EL1, ftr);
- // ID_AA64ISAR1_EL1.DPB >= 0b0001
- if (extractBits(ftr, 0, 4) >= 0x1)
- setCPUFeature(FEAT_DPB);
- // ID_AA64ISAR1_EL1.LRCPC != 0b0000
- if (extractBits(ftr, 20, 4) != 0x0)
- setCPUFeature(FEAT_RCPC);
- // ID_AA64ISAR1_EL1.LRCPC == 0b0011
- if (extractBits(ftr, 20, 4) == 0x3)
- setCPUFeature(FEAT_RCPC3);
- // ID_AA64ISAR1_EL1.SPECRES == 0b0001
- if (extractBits(ftr, 40, 4) == 0x2)
+ /* ID_AA64ISAR1_EL1.SPECRES >= 0b0001 */
+ if (extractBits(ftr, 40, 4) >= 0x1)
setCPUFeature(FEAT_PREDRES);
- // ID_AA64ISAR1_EL1.BF16 != 0b0000
- if (extractBits(ftr, 44, 4) != 0x0)
- setCPUFeature(FEAT_BF16);
- // ID_AA64ISAR1_EL1.LS64 >= 0b0001
+ /* ID_AA64ISAR1_EL1.LS64 >= 0b0001 */
if (extractBits(ftr, 60, 4) >= 0x1)
setCPUFeature(FEAT_LS64);
- // ID_AA64ISAR1_EL1.LS64 >= 0b0010
+ /* ID_AA64ISAR1_EL1.LS64 >= 0b0010 */
if (extractBits(ftr, 60, 4) >= 0x2)
setCPUFeature(FEAT_LS64_V);
- // ID_AA64ISAR1_EL1.LS64 >= 0b0011
+ /* ID_AA64ISAR1_EL1.LS64 >= 0b0011 */
if (extractBits(ftr, 60, 4) >= 0x3)
setCPUFeature(FEAT_LS64_ACCDATA);
- } else {
- // Set some features in case of no CPUID support
- if (hwcap & (HWCAP_FP | HWCAP_FPHP)) {
- setCPUFeature(FEAT_FP);
- // FP and AdvSIMD fields have the same value
- setCPUFeature(FEAT_SIMD);
- }
- if (hwcap & HWCAP_DCPOP || hwcap2 & HWCAP2_DCPODP)
- setCPUFeature(FEAT_DPB);
- if (hwcap & HWCAP_LRCPC || hwcap & HWCAP_ILRCPC)
- setCPUFeature(FEAT_RCPC);
- if (hwcap2 & HWCAP2_BF16 || hwcap2 & HWCAP2_EBF16)
- setCPUFeature(FEAT_BF16);
- if (hwcap2 & HWCAP2_SVEBF16)
- setCPUFeature(FEAT_SVE_BF16);
- if (hwcap2 & HWCAP2_SVE2 && hwcap & HWCAP_SVE)
- setCPUFeature(FEAT_SVE2);
- if (hwcap & HWCAP_SHA3)
- setCPUFeature(FEAT_SHA3);
}
+ if (hwcap & HWCAP_FP) {
+ setCPUFeature(FEAT_FP);
+ // FP and AdvSIMD fields have the same value
+ setCPUFeature(FEAT_SIMD);
+ }
+ if (hwcap & HWCAP_DCPOP)
+ setCPUFeature(FEAT_DPB);
+ if (hwcap & HWCAP_LRCPC)
+ setCPUFeature(FEAT_RCPC);
+ if (hwcap & HWCAP_ILRCPC)
+ setCPUFeature(FEAT_RCPC2);
+ if (hwcap2 & HWCAP2_LRCPC3)
+ setCPUFeature(FEAT_RCPC3);
+ if (hwcap2 & HWCAP2_BF16)
+ setCPUFeature(FEAT_BF16);
+ if (hwcap2 & HWCAP2_SVEBF16)
+ setCPUFeature(FEAT_SVE_BF16);
+ if (hwcap & HWCAP_SVE)
+ setCPUFeature(FEAT_SVE);
+ if (hwcap2 & HWCAP2_SVE2)
+ setCPUFeature(FEAT_SVE2);
+ if (hwcap & HWCAP_SHA3)
+ setCPUFeature(FEAT_SHA3);
setCPUFeature(FEAT_INIT);
+
+ __atomic_store_n(&__aarch64_cpu_features.features, feat, __ATOMIC_RELAXED);
}
diff --git a/contrib/libs/cxxsupp/builtins/cpu_model/aarch64/fmv/sysauxv.inc b/contrib/libs/cxxsupp/builtins/cpu_model/aarch64/fmv/sysauxv.inc
index fb5722c4306..486f77a1e4d 100644
--- a/contrib/libs/cxxsupp/builtins/cpu_model/aarch64/fmv/sysauxv.inc
+++ b/contrib/libs/cxxsupp/builtins/cpu_model/aarch64/fmv/sysauxv.inc
@@ -1,13 +1,13 @@
void __init_cpu_features_resolver(unsigned long hwcap,
const __ifunc_arg_t *arg) {
- if (__aarch64_cpu_features.features)
+ if (__atomic_load_n(&__aarch64_cpu_features.features, __ATOMIC_RELAXED))
return;
__init_cpu_features_constructor(hwcap, arg);
}
void CONSTRUCTOR_ATTRIBUTE __init_cpu_features(void) {
// CPU features already initialized.
- if (__aarch64_cpu_features.features)
+ if (__atomic_load_n(&__aarch64_cpu_features.features, __ATOMIC_RELAXED))
return;
unsigned long hwcap = getauxval(AT_HWCAP);
diff --git a/contrib/libs/cxxsupp/builtins/cpu_model/aarch64/hwcap.inc b/contrib/libs/cxxsupp/builtins/cpu_model/aarch64/hwcap.inc
index 7ddc125b26d..41aba82ef95 100644
--- a/contrib/libs/cxxsupp/builtins/cpu_model/aarch64/hwcap.inc
+++ b/contrib/libs/cxxsupp/builtins/cpu_model/aarch64/hwcap.inc
@@ -178,6 +178,12 @@
#ifndef HWCAP2_SVE_EBF16
#define HWCAP2_SVE_EBF16 (1ULL << 33)
#endif
+#ifndef HWCAP2_SME2
+#define HWCAP2_SME2 (1UL << 37)
+#endif
#ifndef HWCAP2_MOPS
#define HWCAP2_MOPS (1ULL << 43)
#endif
+#ifndef HWCAP2_LRCPC3
+#define HWCAP2_LRCPC3 (1UL << 46)
+#endif
diff --git a/contrib/libs/cxxsupp/builtins/cpu_model/x86.c b/contrib/libs/cxxsupp/builtins/cpu_model/x86.c
index 0750e29f989..b1c4abd9d11 100644
--- a/contrib/libs/cxxsupp/builtins/cpu_model/x86.c
+++ b/contrib/libs/cxxsupp/builtins/cpu_model/x86.c
@@ -59,6 +59,7 @@ enum ProcessorTypes {
INTEL_SIERRAFOREST,
INTEL_GRANDRIDGE,
INTEL_CLEARWATERFOREST,
+ AMDFAM1AH,
CPU_TYPE_MAX
};
@@ -97,6 +98,7 @@ enum ProcessorSubtypes {
INTEL_COREI7_ARROWLAKE,
INTEL_COREI7_ARROWLAKE_S,
INTEL_COREI7_PANTHERLAKE,
+ AMDFAM1AH_ZNVER5,
CPU_SUBTYPE_MAX
};
@@ -139,20 +141,88 @@ enum ProcessorFeatures {
FEATURE_AVX512BITALG,
FEATURE_AVX512BF16,
FEATURE_AVX512VP2INTERSECT,
-
- FEATURE_CMPXCHG16B = 46,
- FEATURE_F16C = 49,
+ // FIXME: Below Features has some missings comparing to gcc, it's because gcc
+ // has some not one-to-one mapped in llvm.
+ // FEATURE_3DNOW,
+ // FEATURE_3DNOWP,
+ FEATURE_ADX = 40,
+ // FEATURE_ABM,
+ FEATURE_CLDEMOTE = 42,
+ FEATURE_CLFLUSHOPT,
+ FEATURE_CLWB,
+ FEATURE_CLZERO,
+ FEATURE_CMPXCHG16B,
+ // FIXME: Not adding FEATURE_CMPXCHG8B is a workaround to make 'generic' as
+ // a cpu string with no X86_FEATURE_COMPAT features, which is required in
+ // current implementantion of cpu_specific/cpu_dispatch FMV feature.
+ // FEATURE_CMPXCHG8B,
+ FEATURE_ENQCMD = 48,
+ FEATURE_F16C,
+ FEATURE_FSGSBASE,
+ // FEATURE_FXSAVE,
+ // FEATURE_HLE,
+ // FEATURE_IBT,
FEATURE_LAHF_LM = 54,
FEATURE_LM,
- FEATURE_WP,
+ FEATURE_LWP,
FEATURE_LZCNT,
FEATURE_MOVBE,
-
- FEATURE_AVX512FP16 = 94,
+ FEATURE_MOVDIR64B,
+ FEATURE_MOVDIRI,
+ FEATURE_MWAITX,
+ // FEATURE_OSXSAVE,
+ FEATURE_PCONFIG = 63,
+ FEATURE_PKU,
+ FEATURE_PREFETCHWT1,
+ FEATURE_PRFCHW,
+ FEATURE_PTWRITE,
+ FEATURE_RDPID,
+ FEATURE_RDRND,
+ FEATURE_RDSEED,
+ FEATURE_RTM,
+ FEATURE_SERIALIZE,
+ FEATURE_SGX,
+ FEATURE_SHA,
+ FEATURE_SHSTK,
+ FEATURE_TBM,
+ FEATURE_TSXLDTRK,
+ FEATURE_VAES,
+ FEATURE_WAITPKG,
+ FEATURE_WBNOINVD,
+ FEATURE_XSAVE,
+ FEATURE_XSAVEC,
+ FEATURE_XSAVEOPT,
+ FEATURE_XSAVES,
+ FEATURE_AMX_TILE,
+ FEATURE_AMX_INT8,
+ FEATURE_AMX_BF16,
+ FEATURE_UINTR,
+ FEATURE_HRESET,
+ FEATURE_KL,
+ // FEATURE_AESKLE,
+ FEATURE_WIDEKL = 92,
+ FEATURE_AVXVNNI,
+ FEATURE_AVX512FP16,
FEATURE_X86_64_BASELINE,
FEATURE_X86_64_V2,
FEATURE_X86_64_V3,
FEATURE_X86_64_V4,
+ FEATURE_AVXIFMA,
+ FEATURE_AVXVNNIINT8,
+ FEATURE_AVXNECONVERT,
+ FEATURE_CMPCCXADD,
+ FEATURE_AMX_FP16,
+ FEATURE_PREFETCHI,
+ FEATURE_RAOINT,
+ FEATURE_AMX_COMPLEX,
+ FEATURE_AVXVNNIINT16,
+ FEATURE_SM3,
+ FEATURE_SHA512,
+ FEATURE_SM4,
+ FEATURE_APXF,
+ FEATURE_USERMSR,
+ FEATURE_AVX10_1_256,
+ FEATURE_AVX10_1_512,
CPU_FEATURE_MAX
};
@@ -299,13 +369,13 @@ static void detectX86FamilyModel(unsigned EAX, unsigned *Family,
}
}
+#define testFeature(F) (Features[F / 32] & (1 << (F % 32))) != 0
+
static const char *getIntelProcessorTypeAndSubtype(unsigned Family,
unsigned Model,
const unsigned *Features,
unsigned *Type,
unsigned *Subtype) {
-#define testFeature(F) (Features[F / 32] & (1 << (F % 32))) != 0
-
// We select CPU strings to match the code in Host.cpp, but we don't use them
// in compiler-rt.
const char *CPU = 0;
@@ -594,14 +664,48 @@ static const char *getAMDProcessorTypeAndSubtype(unsigned Family,
const unsigned *Features,
unsigned *Type,
unsigned *Subtype) {
- // We select CPU strings to match the code in Host.cpp, but we don't use them
- // in compiler-rt.
const char *CPU = 0;
switch (Family) {
+ case 4:
+ CPU = "i486";
+ break;
+ case 5:
+ CPU = "pentium";
+ switch (Model) {
+ case 6:
+ case 7:
+ CPU = "k6";
+ break;
+ case 8:
+ CPU = "k6-2";
+ break;
+ case 9:
+ case 13:
+ CPU = "k6-3";
+ break;
+ case 10:
+ CPU = "geode";
+ break;
+ }
+ break;
+ case 6:
+ if (testFeature(FEATURE_SSE)) {
+ CPU = "athlon-xp";
+ break;
+ }
+ CPU = "athlon";
+ break;
+ case 15:
+ if (testFeature(FEATURE_SSE3)) {
+ CPU = "k8-sse3";
+ break;
+ }
+ CPU = "k8";
+ break;
case 16:
CPU = "amdfam10";
- *Type = AMDFAM10H;
+ *Type = AMDFAM10H; // "amdfam10"
switch (Model) {
case 2:
*Subtype = AMDFAM10H_BARCELONA;
@@ -677,7 +781,7 @@ static const char *getAMDProcessorTypeAndSubtype(unsigned Family,
case 25:
CPU = "znver3";
*Type = AMDFAM19H;
- if ((Model <= 0x0f) || (Model >= 0x20 && Model <= 0x2f) ||
+ if (Model <= 0x0f || (Model >= 0x20 && Model <= 0x2f) ||
(Model >= 0x30 && Model <= 0x3f) || (Model >= 0x40 && Model <= 0x4f) ||
(Model >= 0x50 && Model <= 0x5f)) {
// Family 19h Models 00h-0Fh (Genesis, Chagall) Zen 3
@@ -701,6 +805,24 @@ static const char *getAMDProcessorTypeAndSubtype(unsigned Family,
break; // "znver4"
}
break; // family 19h
+ case 26:
+ CPU = "znver5";
+ *Type = AMDFAM1AH;
+ if (Model <= 0x77) {
+ // Models 00h-0Fh (Breithorn).
+ // Models 10h-1Fh (Breithorn-Dense).
+ // Models 20h-2Fh (Strix 1).
+ // Models 30h-37h (Strix 2).
+ // Models 38h-3Fh (Strix 3).
+ // Models 40h-4Fh (Granite Ridge).
+ // Models 50h-5Fh (Weisshorn).
+ // Models 60h-6Fh (Krackan1).
+ // Models 70h-77h (Sarlak).
+ CPU = "znver5";
+ *Subtype = AMDFAM1AH_ZNVER5;
+ break; // "znver5"
+ }
+ break;
default:
break; // Unknown AMD CPU.
}
@@ -708,6 +830,8 @@ static const char *getAMDProcessorTypeAndSubtype(unsigned Family,
return CPU;
}
+#undef testFeature
+
static void getAvailableFeatures(unsigned ECX, unsigned EDX, unsigned MaxLeaf,
unsigned *Features) {
unsigned EAX = 0, EBX = 0;
@@ -746,13 +870,15 @@ static void getAvailableFeatures(unsigned ECX, unsigned EDX, unsigned MaxLeaf,
setFeature(FEATURE_AES);
if ((ECX >> 29) & 1)
setFeature(FEATURE_F16C);
+ if ((ECX >> 30) & 1)
+ setFeature(FEATURE_RDRND);
// If CPUID indicates support for XSAVE, XRESTORE and AVX, and XGETBV
// indicates that the AVX registers will be saved and restored on context
// switch, then we have full AVX support.
const unsigned AVXBits = (1 << 27) | (1 << 28);
- bool HasAVX = ((ECX & AVXBits) == AVXBits) && !getX86XCR0(&EAX, &EDX) &&
- ((EAX & 0x6) == 0x6);
+ bool HasAVXSave = ((ECX & AVXBits) == AVXBits) && !getX86XCR0(&EAX, &EDX) &&
+ ((EAX & 0x6) == 0x6);
#if defined(__APPLE__)
// Darwin lazily saves the AVX512 context on first use: trust that the OS will
// save the AVX512 context if we use AVX512 instructions, even the bit is not
@@ -760,45 +886,76 @@ static void getAvailableFeatures(unsigned ECX, unsigned EDX, unsigned MaxLeaf,
bool HasAVX512Save = true;
#else
// AVX512 requires additional context to be saved by the OS.
- bool HasAVX512Save = HasAVX && ((EAX & 0xe0) == 0xe0);
+ bool HasAVX512Save = HasAVXSave && ((EAX & 0xe0) == 0xe0);
#endif
+ // AMX requires additional context to be saved by the OS.
+ const unsigned AMXBits = (1 << 17) | (1 << 18);
+ bool HasXSave = ((ECX >> 27) & 1) && !getX86XCR0(&EAX, &EDX);
+ bool HasAMXSave = HasXSave && ((EAX & AMXBits) == AMXBits);
- if (HasAVX)
+ if (HasAVXSave)
setFeature(FEATURE_AVX);
+ if (((ECX >> 26) & 1) && HasAVXSave)
+ setFeature(FEATURE_XSAVE);
+
bool HasLeaf7 =
MaxLeaf >= 0x7 && !getX86CpuIDAndInfoEx(0x7, 0x0, &EAX, &EBX, &ECX, &EDX);
+ if (HasLeaf7 && ((EBX >> 0) & 1))
+ setFeature(FEATURE_FSGSBASE);
+ if (HasLeaf7 && ((EBX >> 2) & 1))
+ setFeature(FEATURE_SGX);
if (HasLeaf7 && ((EBX >> 3) & 1))
setFeature(FEATURE_BMI);
- if (HasLeaf7 && ((EBX >> 5) & 1) && HasAVX)
+ if (HasLeaf7 && ((EBX >> 5) & 1) && HasAVXSave)
setFeature(FEATURE_AVX2);
if (HasLeaf7 && ((EBX >> 8) & 1))
setFeature(FEATURE_BMI2);
+ if (HasLeaf7 && ((EBX >> 11) & 1))
+ setFeature(FEATURE_RTM);
if (HasLeaf7 && ((EBX >> 16) & 1) && HasAVX512Save)
setFeature(FEATURE_AVX512F);
if (HasLeaf7 && ((EBX >> 17) & 1) && HasAVX512Save)
setFeature(FEATURE_AVX512DQ);
+ if (HasLeaf7 && ((EBX >> 18) & 1))
+ setFeature(FEATURE_RDSEED);
+ if (HasLeaf7 && ((EBX >> 19) & 1))
+ setFeature(FEATURE_ADX);
if (HasLeaf7 && ((EBX >> 21) & 1) && HasAVX512Save)
setFeature(FEATURE_AVX512IFMA);
+ if (HasLeaf7 && ((EBX >> 24) & 1))
+ setFeature(FEATURE_CLWB);
if (HasLeaf7 && ((EBX >> 26) & 1) && HasAVX512Save)
setFeature(FEATURE_AVX512PF);
if (HasLeaf7 && ((EBX >> 27) & 1) && HasAVX512Save)
setFeature(FEATURE_AVX512ER);
if (HasLeaf7 && ((EBX >> 28) & 1) && HasAVX512Save)
setFeature(FEATURE_AVX512CD);
+ if (HasLeaf7 && ((EBX >> 29) & 1))
+ setFeature(FEATURE_SHA);
if (HasLeaf7 && ((EBX >> 30) & 1) && HasAVX512Save)
setFeature(FEATURE_AVX512BW);
if (HasLeaf7 && ((EBX >> 31) & 1) && HasAVX512Save)
setFeature(FEATURE_AVX512VL);
+ if (HasLeaf7 && ((ECX >> 0) & 1))
+ setFeature(FEATURE_PREFETCHWT1);
if (HasLeaf7 && ((ECX >> 1) & 1) && HasAVX512Save)
setFeature(FEATURE_AVX512VBMI);
+ if (HasLeaf7 && ((ECX >> 4) & 1))
+ setFeature(FEATURE_PKU);
+ if (HasLeaf7 && ((ECX >> 5) & 1))
+ setFeature(FEATURE_WAITPKG);
if (HasLeaf7 && ((ECX >> 6) & 1) && HasAVX512Save)
setFeature(FEATURE_AVX512VBMI2);
+ if (HasLeaf7 && ((ECX >> 7) & 1))
+ setFeature(FEATURE_SHSTK);
if (HasLeaf7 && ((ECX >> 8) & 1))
setFeature(FEATURE_GFNI);
- if (HasLeaf7 && ((ECX >> 10) & 1) && HasAVX)
+ if (HasLeaf7 && ((ECX >> 9) & 1) && HasAVXSave)
+ setFeature(FEATURE_VAES);
+ if (HasLeaf7 && ((ECX >> 10) & 1) && HasAVXSave)
setFeature(FEATURE_VPCLMULQDQ);
if (HasLeaf7 && ((ECX >> 11) & 1) && HasAVX512Save)
setFeature(FEATURE_AVX512VNNI);
@@ -806,23 +963,100 @@ static void getAvailableFeatures(unsigned ECX, unsigned EDX, unsigned MaxLeaf,
setFeature(FEATURE_AVX512BITALG);
if (HasLeaf7 && ((ECX >> 14) & 1) && HasAVX512Save)
setFeature(FEATURE_AVX512VPOPCNTDQ);
+ if (HasLeaf7 && ((ECX >> 22) & 1))
+ setFeature(FEATURE_RDPID);
+ if (HasLeaf7 && ((ECX >> 23) & 1))
+ setFeature(FEATURE_KL);
+ if (HasLeaf7 && ((ECX >> 25) & 1))
+ setFeature(FEATURE_CLDEMOTE);
+ if (HasLeaf7 && ((ECX >> 27) & 1))
+ setFeature(FEATURE_MOVDIRI);
+ if (HasLeaf7 && ((ECX >> 28) & 1))
+ setFeature(FEATURE_MOVDIR64B);
+ if (HasLeaf7 && ((ECX >> 29) & 1))
+ setFeature(FEATURE_ENQCMD);
if (HasLeaf7 && ((EDX >> 2) & 1) && HasAVX512Save)
setFeature(FEATURE_AVX5124VNNIW);
if (HasLeaf7 && ((EDX >> 3) & 1) && HasAVX512Save)
setFeature(FEATURE_AVX5124FMAPS);
+ if (HasLeaf7 && ((EDX >> 5) & 1))
+ setFeature(FEATURE_UINTR);
if (HasLeaf7 && ((EDX >> 8) & 1) && HasAVX512Save)
setFeature(FEATURE_AVX512VP2INTERSECT);
+ if (HasLeaf7 && ((EDX >> 14) & 1))
+ setFeature(FEATURE_SERIALIZE);
+ if (HasLeaf7 && ((EDX >> 16) & 1))
+ setFeature(FEATURE_TSXLDTRK);
+ if (HasLeaf7 && ((EDX >> 18) & 1))
+ setFeature(FEATURE_PCONFIG);
+ if (HasLeaf7 && ((EDX >> 22) & 1) && HasAMXSave)
+ setFeature(FEATURE_AMX_BF16);
if (HasLeaf7 && ((EDX >> 23) & 1) && HasAVX512Save)
setFeature(FEATURE_AVX512FP16);
+ if (HasLeaf7 && ((EDX >> 24) & 1) && HasAMXSave)
+ setFeature(FEATURE_AMX_TILE);
+ if (HasLeaf7 && ((EDX >> 25) & 1) && HasAMXSave)
+ setFeature(FEATURE_AMX_INT8);
// EAX from subleaf 0 is the maximum subleaf supported. Some CPUs don't
// return all 0s for invalid subleaves so check the limit.
bool HasLeaf7Subleaf1 =
HasLeaf7 && EAX >= 1 &&
!getX86CpuIDAndInfoEx(0x7, 0x1, &EAX, &EBX, &ECX, &EDX);
+ if (HasLeaf7Subleaf1 && ((EAX >> 0) & 1))
+ setFeature(FEATURE_SHA512);
+ if (HasLeaf7Subleaf1 && ((EAX >> 1) & 1))
+ setFeature(FEATURE_SM3);
+ if (HasLeaf7Subleaf1 && ((EAX >> 2) & 1))
+ setFeature(FEATURE_SM4);
+ if (HasLeaf7Subleaf1 && ((EAX >> 3) & 1))
+ setFeature(FEATURE_RAOINT);
+ if (HasLeaf7Subleaf1 && ((EAX >> 4) & 1) && HasAVXSave)
+ setFeature(FEATURE_AVXVNNI);
if (HasLeaf7Subleaf1 && ((EAX >> 5) & 1) && HasAVX512Save)
setFeature(FEATURE_AVX512BF16);
+ if (HasLeaf7Subleaf1 && ((EAX >> 7) & 1))
+ setFeature(FEATURE_CMPCCXADD);
+ if (HasLeaf7Subleaf1 && ((EAX >> 21) & 1) && HasAMXSave)
+ setFeature(FEATURE_AMX_FP16);
+ if (HasLeaf7Subleaf1 && ((EAX >> 22) & 1))
+ setFeature(FEATURE_HRESET);
+ if (HasLeaf7Subleaf1 && ((EAX >> 23) & 1) && HasAVXSave)
+ setFeature(FEATURE_AVXIFMA);
+
+ if (HasLeaf7Subleaf1 && ((EDX >> 4) & 1) && HasAVXSave)
+ setFeature(FEATURE_AVXVNNIINT8);
+ if (HasLeaf7Subleaf1 && ((EDX >> 5) & 1) && HasAVXSave)
+ setFeature(FEATURE_AVXNECONVERT);
+ if (HasLeaf7Subleaf1 && ((EDX >> 8) & 1) && HasAMXSave)
+ setFeature(FEATURE_AMX_COMPLEX);
+ if (HasLeaf7Subleaf1 && ((EDX >> 10) & 1) && HasAVXSave)
+ setFeature(FEATURE_AVXVNNIINT16);
+ if (HasLeaf7Subleaf1 && ((EDX >> 14) & 1))
+ setFeature(FEATURE_PREFETCHI);
+ if (HasLeaf7Subleaf1 && ((EDX >> 15) & 1))
+ setFeature(FEATURE_USERMSR);
+ if (HasLeaf7Subleaf1 && ((EDX >> 19) & 1))
+ setFeature(FEATURE_AVX10_1_256);
+ if (HasLeaf7Subleaf1 && ((EDX >> 21) & 1))
+ setFeature(FEATURE_APXF);
+
+ unsigned MaxLevel;
+ getX86CpuIDAndInfo(0, &MaxLevel, &EBX, &ECX, &EDX);
+ bool HasLeafD = MaxLevel >= 0xd &&
+ !getX86CpuIDAndInfoEx(0xd, 0x1, &EAX, &EBX, &ECX, &EDX);
+ if (HasLeafD && ((EAX >> 0) & 1) && HasAVXSave)
+ setFeature(FEATURE_XSAVEOPT);
+ if (HasLeafD && ((EAX >> 1) & 1) && HasAVXSave)
+ setFeature(FEATURE_XSAVEC);
+ if (HasLeafD && ((EAX >> 3) & 1) && HasAVXSave)
+ setFeature(FEATURE_XSAVES);
+
+ bool HasLeaf24 =
+ MaxLevel >= 0x24 && !getX86CpuIDAndInfo(0x24, &EAX, &EBX, &ECX, &EDX);
+ if (HasLeaf7Subleaf1 && ((EDX >> 19) & 1) && HasLeaf24 && ((EBX >> 18) & 1))
+ setFeature(FEATURE_AVX10_1_512);
unsigned MaxExtLevel;
getX86CpuIDAndInfo(0x80000000, &MaxExtLevel, &EBX, &ECX, &EDX);
@@ -836,14 +1070,40 @@ static void getAvailableFeatures(unsigned ECX, unsigned EDX, unsigned MaxLeaf,
setFeature(FEATURE_LZCNT);
if (((ECX >> 6) & 1))
setFeature(FEATURE_SSE4_A);
+ if (((ECX >> 8) & 1))
+ setFeature(FEATURE_PRFCHW);
if (((ECX >> 11) & 1))
setFeature(FEATURE_XOP);
+ if (((ECX >> 15) & 1))
+ setFeature(FEATURE_LWP);
if (((ECX >> 16) & 1))
setFeature(FEATURE_FMA4);
+ if (((ECX >> 21) & 1))
+ setFeature(FEATURE_TBM);
+ if (((ECX >> 29) & 1))
+ setFeature(FEATURE_MWAITX);
+
if (((EDX >> 29) & 1))
setFeature(FEATURE_LM);
}
+ bool HasExtLeaf8 = MaxExtLevel >= 0x80000008 &&
+ !getX86CpuIDAndInfo(0x80000008, &EAX, &EBX, &ECX, &EDX);
+ if (HasExtLeaf8 && ((EBX >> 0) & 1))
+ setFeature(FEATURE_CLZERO);
+ if (HasExtLeaf8 && ((EBX >> 9) & 1))
+ setFeature(FEATURE_WBNOINVD);
+
+ bool HasLeaf14 = MaxLevel >= 0x14 &&
+ !getX86CpuIDAndInfoEx(0x14, 0x0, &EAX, &EBX, &ECX, &EDX);
+ if (HasLeaf14 && ((EBX >> 4) & 1))
+ setFeature(FEATURE_PTWRITE);
+
+ bool HasLeaf19 =
+ MaxLevel >= 0x19 && !getX86CpuIDAndInfo(0x19, &EAX, &EBX, &ECX, &EDX);
+ if (HasLeaf7 && HasLeaf19 && ((EBX >> 2) & 1))
+ setFeature(FEATURE_WIDEKL);
+
if (hasFeature(FEATURE_LM) && hasFeature(FEATURE_SSE2)) {
setFeature(FEATURE_X86_64_BASELINE);
if (hasFeature(FEATURE_CMPXCHG16B) && hasFeature(FEATURE_POPCNT) &&
diff --git a/contrib/libs/cxxsupp/builtins/divtc3.c b/contrib/libs/cxxsupp/builtins/divtc3.c
index 099de5802da..c393de81533 100644
--- a/contrib/libs/cxxsupp/builtins/divtc3.c
+++ b/contrib/libs/cxxsupp/builtins/divtc3.c
@@ -13,7 +13,7 @@
#define QUAD_PRECISION
#include "fp_lib.h"
-#if defined(CRT_HAS_F128)
+#if defined(CRT_HAS_128BIT) && defined(CRT_HAS_F128)
// Returns: the quotient of (a + ib) / (c + id)
diff --git a/contrib/libs/cxxsupp/builtins/extendbfsf2.c b/contrib/libs/cxxsupp/builtins/extendbfsf2.c
new file mode 100644
index 00000000000..e159d7997f6
--- /dev/null
+++ b/contrib/libs/cxxsupp/builtins/extendbfsf2.c
@@ -0,0 +1,13 @@
+//===-- lib/extendbfsf2.c - bfloat -> single conversion -----------*- C -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#define SRC_BFLOAT16
+#define DST_SINGLE
+#include "fp_extend_impl.inc"
+
+COMPILER_RT_ABI float __extendbfsf2(src_t a) { return __extendXfYf2__(a); }
diff --git a/contrib/libs/cxxsupp/builtins/fp_add_impl.inc b/contrib/libs/cxxsupp/builtins/fp_add_impl.inc
index 7133358df9b..d20599921e7 100644
--- a/contrib/libs/cxxsupp/builtins/fp_add_impl.inc
+++ b/contrib/libs/cxxsupp/builtins/fp_add_impl.inc
@@ -91,7 +91,7 @@ static __inline fp_t __addXf3__(fp_t a, fp_t b) {
// Shift the significand of b by the difference in exponents, with a sticky
// bottom bit to get rounding correct.
- const unsigned int align = aExponent - bExponent;
+ const unsigned int align = (unsigned int)(aExponent - bExponent);
if (align) {
if (align < typeWidth) {
const bool sticky = (bSignificand << (typeWidth - align)) != 0;
diff --git a/contrib/libs/cxxsupp/builtins/fp_extend.h b/contrib/libs/cxxsupp/builtins/fp_extend.h
index 95ea2a7ac4b..22bf2b2514e 100644
--- a/contrib/libs/cxxsupp/builtins/fp_extend.h
+++ b/contrib/libs/cxxsupp/builtins/fp_extend.h
@@ -37,16 +37,7 @@ static const int srcSigFracBits = 52;
// srcBits - srcSigFracBits - 1
static const int srcExpBits = 11;
-static inline int src_rep_t_clz_impl(src_rep_t a) {
-#if defined __LP64__
- return __builtin_clzl(a);
-#else
- if (a & REP_C(0xffffffff00000000))
- return clzsi(a >> 32);
- else
- return 32 + clzsi(a & REP_C(0xffffffff));
-#endif
-}
+static inline int src_rep_t_clz_impl(src_rep_t a) { return __builtin_clzll(a); }
#define src_rep_t_clz src_rep_t_clz_impl
#elif defined SRC_80
@@ -81,6 +72,21 @@ static inline int src_rep_t_clz_impl(src_rep_t a) {
#define src_rep_t_clz src_rep_t_clz_impl
+#elif defined SRC_BFLOAT16
+#ifdef COMPILER_RT_HAS_BFLOAT16
+typedef __bf16 src_t;
+#else
+typedef uint16_t src_t;
+#endif
+typedef uint16_t src_rep_t;
+#define SRC_REP_C UINT16_C
+static const int srcBits = sizeof(src_t) * CHAR_BIT;
+static const int srcSigFracBits = 7;
+// -1 accounts for the sign bit.
+// srcBits - srcSigFracBits - 1
+static const int srcExpBits = 8;
+#define src_rep_t_clz __builtin_clz
+
#else
#error Source should be half, single, or double precision!
#endif // end source precision
diff --git a/contrib/libs/cxxsupp/builtins/fp_fixint_impl.inc b/contrib/libs/cxxsupp/builtins/fp_fixint_impl.inc
index 3556bad9990..2f2f77ce781 100644
--- a/contrib/libs/cxxsupp/builtins/fp_fixint_impl.inc
+++ b/contrib/libs/cxxsupp/builtins/fp_fixint_impl.inc
@@ -34,7 +34,7 @@ static __inline fixint_t __fixint(fp_t a) {
// If 0 <= exponent < significandBits, right shift to get the result.
// Otherwise, shift left.
if (exponent < significandBits)
- return sign * (significand >> (significandBits - exponent));
+ return (fixint_t)(sign * (significand >> (significandBits - exponent)));
else
- return sign * ((fixuint_t)significand << (exponent - significandBits));
+ return (fixint_t)(sign * ((fixuint_t)significand << (exponent - significandBits)));
}
diff --git a/contrib/libs/cxxsupp/builtins/fp_lib.h b/contrib/libs/cxxsupp/builtins/fp_lib.h
index c4f0a5b9587..b2a89506135 100644
--- a/contrib/libs/cxxsupp/builtins/fp_lib.h
+++ b/contrib/libs/cxxsupp/builtins/fp_lib.h
@@ -43,8 +43,8 @@ static __inline int rep_clz(rep_t a) { return clzsi(a); }
// 32x32 --> 64 bit multiply
static __inline void wideMultiply(rep_t a, rep_t b, rep_t *hi, rep_t *lo) {
const uint64_t product = (uint64_t)a * b;
- *hi = product >> 32;
- *lo = product;
+ *hi = (rep_t)(product >> 32);
+ *lo = (rep_t)product;
}
COMPILER_RT_ABI fp_t __addsf3(fp_t a, fp_t b);
@@ -58,16 +58,7 @@ typedef double fp_t;
#define REP_C UINT64_C
#define significandBits 52
-static __inline int rep_clz(rep_t a) {
-#if defined __LP64__
- return __builtin_clzl(a);
-#else
- if (a & REP_C(0xffffffff00000000))
- return clzsi(a >> 32);
- else
- return 32 + clzsi(a & REP_C(0xffffffff));
-#endif
-}
+static inline int rep_clz(rep_t a) { return __builtin_clzll(a); }
#define loWord(a) (a & 0xffffffffU)
#define hiWord(a) (a >> 32)
@@ -239,7 +230,7 @@ static __inline int normalize(rep_t *significand) {
return 1 - shift;
}
-static __inline void wideLeftShift(rep_t *hi, rep_t *lo, int count) {
+static __inline void wideLeftShift(rep_t *hi, rep_t *lo, unsigned int count) {
*hi = *hi << count | *lo >> (typeWidth - count);
*lo = *lo << count;
}
diff --git a/contrib/libs/cxxsupp/builtins/int_types.h b/contrib/libs/cxxsupp/builtins/int_types.h
index ca97391fc28..48862f36421 100644
--- a/contrib/libs/cxxsupp/builtins/int_types.h
+++ b/contrib/libs/cxxsupp/builtins/int_types.h
@@ -107,8 +107,8 @@ typedef union {
static __inline ti_int make_ti(di_int h, di_int l) {
twords r;
- r.s.high = h;
- r.s.low = l;
+ r.s.high = (du_int)h;
+ r.s.low = (du_int)l;
return r.all;
}
diff --git a/contrib/libs/cxxsupp/builtins/multc3.c b/contrib/libs/cxxsupp/builtins/multc3.c
index 61a3f45e472..a89832f0e88 100644
--- a/contrib/libs/cxxsupp/builtins/multc3.c
+++ b/contrib/libs/cxxsupp/builtins/multc3.c
@@ -15,7 +15,7 @@
#include "int_lib.h"
#include "int_math.h"
-#if defined(CRT_HAS_F128)
+#if defined(CRT_HAS_128BIT) && defined(CRT_HAS_F128)
// Returns: the product of a + ib and c + id
diff --git a/contrib/libs/cxxsupp/builtins/os_version_check.c b/contrib/libs/cxxsupp/builtins/os_version_check.c
index 182eabe7a6a..01fae834ab2 100644
--- a/contrib/libs/cxxsupp/builtins/os_version_check.c
+++ b/contrib/libs/cxxsupp/builtins/os_version_check.c
@@ -316,8 +316,8 @@ int32_t __isOSVersionAtLeast(int32_t Major, int32_t Minor, int32_t Subminor) {
static pthread_once_t once = PTHREAD_ONCE_INIT;
pthread_once(&once, readSystemProperties);
- return SdkVersion >= Major ||
- (IsPreRelease && Major == __ANDROID_API_FUTURE__);
+ // Allow all on pre-release. Note that we still rely on compile-time checks.
+ return SdkVersion >= Major || IsPreRelease;
}
#else
diff --git a/contrib/libs/cxxsupp/builtins/riscv/restore.S b/contrib/libs/cxxsupp/builtins/riscv/restore.S
index 6f43842c8ca..d87dfc1ac71 100644
--- a/contrib/libs/cxxsupp/builtins/riscv/restore.S
+++ b/contrib/libs/cxxsupp/builtins/riscv/restore.S
@@ -22,7 +22,7 @@
#if __riscv_xlen == 32
-#ifndef __riscv_32e
+#ifndef __riscv_abi_rve
.globl __riscv_restore_12
.type __riscv_restore_12,@function
@@ -109,7 +109,7 @@ __riscv_restore_0:
#elif __riscv_xlen == 64
-#ifndef __riscv_64e
+#ifndef __riscv_abi_rve
.globl __riscv_restore_12
.type __riscv_restore_12,@function
diff --git a/contrib/libs/cxxsupp/builtins/riscv/save.S b/contrib/libs/cxxsupp/builtins/riscv/save.S
index 3e044179ff7..6324e05e971 100644
--- a/contrib/libs/cxxsupp/builtins/riscv/save.S
+++ b/contrib/libs/cxxsupp/builtins/riscv/save.S
@@ -18,7 +18,7 @@
#if __riscv_xlen == 32
-#ifndef __riscv_32e
+#ifndef __riscv_abi_rve
.globl __riscv_save_12
.type __riscv_save_12,@function
@@ -115,7 +115,7 @@ __riscv_save_0:
#elif __riscv_xlen == 64
-#ifndef __riscv_64e
+#ifndef __riscv_abi_rve
.globl __riscv_save_12
.type __riscv_save_12,@function
diff --git a/contrib/libs/cxxsupp/builtins/trampoline_setup.c b/contrib/libs/cxxsupp/builtins/trampoline_setup.c
index 844eb279441..830e25e4c03 100644
--- a/contrib/libs/cxxsupp/builtins/trampoline_setup.c
+++ b/contrib/libs/cxxsupp/builtins/trampoline_setup.c
@@ -41,3 +41,45 @@ COMPILER_RT_ABI void __trampoline_setup(uint32_t *trampOnStack,
__clear_cache(trampOnStack, &trampOnStack[10]);
}
#endif // __powerpc__ && !defined(__powerpc64__)
+
+// The AArch64 compiler generates calls to __trampoline_setup() when creating
+// trampoline functions on the stack for use with nested functions.
+// This function creates a custom 36-byte trampoline function on the stack
+// which loads x18 with a pointer to the outer function's locals
+// and then jumps to the target nested function.
+// Note: x18 is a reserved platform register on Windows and macOS.
+
+#if defined(__aarch64__) && defined(__ELF__)
+COMPILER_RT_ABI void __trampoline_setup(uint32_t *trampOnStack,
+ int trampSizeAllocated,
+ const void *realFunc, void *localsPtr) {
+ // This should never happen, but if compiler did not allocate
+ // enough space on stack for the trampoline, abort.
+ if (trampSizeAllocated < 36)
+ compilerrt_abort();
+
+ // create trampoline
+ // Load realFunc into x17. mov/movk 16 bits at a time.
+ trampOnStack[0] =
+ 0xd2800000u | ((((uint64_t)realFunc >> 0) & 0xffffu) << 5) | 0x11;
+ trampOnStack[1] =
+ 0xf2a00000u | ((((uint64_t)realFunc >> 16) & 0xffffu) << 5) | 0x11;
+ trampOnStack[2] =
+ 0xf2c00000u | ((((uint64_t)realFunc >> 32) & 0xffffu) << 5) | 0x11;
+ trampOnStack[3] =
+ 0xf2e00000u | ((((uint64_t)realFunc >> 48) & 0xffffu) << 5) | 0x11;
+ // Load localsPtr into x18
+ trampOnStack[4] =
+ 0xd2800000u | ((((uint64_t)localsPtr >> 0) & 0xffffu) << 5) | 0x12;
+ trampOnStack[5] =
+ 0xf2a00000u | ((((uint64_t)localsPtr >> 16) & 0xffffu) << 5) | 0x12;
+ trampOnStack[6] =
+ 0xf2c00000u | ((((uint64_t)localsPtr >> 32) & 0xffffu) << 5) | 0x12;
+ trampOnStack[7] =
+ 0xf2e00000u | ((((uint64_t)localsPtr >> 48) & 0xffffu) << 5) | 0x12;
+ trampOnStack[8] = 0xd61f0220; // br x17
+
+ // Clear instruction cache.
+ __clear_cache(trampOnStack, &trampOnStack[9]);
+}
+#endif // defined(__aarch64__) && !defined(__APPLE__) && !defined(_WIN64)
diff --git a/contrib/libs/cxxsupp/builtins/ya.make b/contrib/libs/cxxsupp/builtins/ya.make
index 5f9c60552aa..ae250c5db79 100644
--- a/contrib/libs/cxxsupp/builtins/ya.make
+++ b/contrib/libs/cxxsupp/builtins/ya.make
@@ -12,9 +12,9 @@ LICENSE(
LICENSE_TEXTS(.yandex_meta/licenses.list.txt)
-VERSION(18.1.8)
+VERSION(19.1.3)
-ORIGINAL_SOURCE(https://github.com/llvm/llvm-project/releases/download/llvmorg-18.1.8/compiler-rt-18.1.8.src.tar.xz)
+ORIGINAL_SOURCE(https://github.com/llvm/llvm-project/releases/download/llvmorg-19.1.3/compiler-rt-19.1.3.src.tar.xz)
NO_COMPILER_WARNINGS()
@@ -65,7 +65,9 @@ IF (ARCH_AARCH64)
aarch64/chkstk.S
aarch64/fp_mode.c
aarch64/sme-abi-init.c
+ aarch64/sme-abi-vg.c
aarch64/sme-abi.S
+ aarch64/sme-libc-mem-routines.S
absvdi2.c
absvsi2.c
absvti2.c
@@ -117,6 +119,7 @@ IF (ARCH_AARCH64)
emutls.c
enable_execute_stack.c
eprintf.c
+ extendbfsf2.c
extenddftf2.c
extendhfsf2.c
extendhftf2.c
@@ -284,6 +287,7 @@ ELSEIF (ARCH_X86_64)
emutls.c
enable_execute_stack.c
eprintf.c
+ extendbfsf2.c
extenddftf2.c
extendhfsf2.c
extendhftf2.c
@@ -467,6 +471,7 @@ ELSE()
emutls.c
enable_execute_stack.c
eprintf.c
+ extendbfsf2.c
extenddftf2.c
extendhfsf2.c
extendhftf2.c