diff options
author | robot-contrib <[email protected]> | 2025-05-14 06:53:03 +0300 |
---|---|---|
committer | robot-contrib <[email protected]> | 2025-05-14 07:05:42 +0300 |
commit | 286dbc77293811055ff4f9303cd376eff9e50104 (patch) | |
tree | a50eea3eb2b824c7c68e15b4cc3e127731776d32 /contrib/restricted/aws/aws-c-common/source/arch/intel | |
parent | 0bf9db6399352012396e7791bcfd762e944b33c2 (diff) |
Update contrib/restricted/aws/aws-c-common to 0.12.2
commit_hash:fc6e67f9b12b0b888c90bb97bf2b1cbfcd74a044
Diffstat (limited to 'contrib/restricted/aws/aws-c-common/source/arch/intel')
3 files changed, 86 insertions, 106 deletions
diff --git a/contrib/restricted/aws/aws-c-common/source/arch/intel/asm/cpuid.c b/contrib/restricted/aws/aws-c-common/source/arch/intel/asm/cpuid.c index d2ceab01060..93657460e63 100644 --- a/contrib/restricted/aws/aws-c-common/source/arch/intel/asm/cpuid.c +++ b/contrib/restricted/aws/aws-c-common/source/arch/intel/asm/cpuid.c @@ -27,3 +27,14 @@ void aws_run_cpuid(uint32_t eax, uint32_t ecx, uint32_t *abcd) { abcd[2] = ecx; abcd[3] = edx; } + +uint64_t aws_run_xgetbv(uint32_t xcr) { + /* NOTE: we could have used the _xgetbv() intrinsic in <immintrin.h>, but it's missing from GCC < 9.0: + * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=71659 */ + + /* xgetbv writes high and low of 64bit value to EDX:EAX */ + uint32_t xcrhigh; + uint32_t xcrlow; + __asm__ __volatile__("xgetbv" : "=a"(xcrlow), "=d"(xcrhigh) : "c"(xcr)); + return (((uint64_t)xcrhigh) << 32) | xcrlow; +} diff --git a/contrib/restricted/aws/aws-c-common/source/arch/intel/cpuid.c b/contrib/restricted/aws/aws-c-common/source/arch/intel/cpuid.c index 465fccd17a5..e50fa2cdf3d 100644 --- a/contrib/restricted/aws/aws-c-common/source/arch/intel/cpuid.c +++ b/contrib/restricted/aws/aws-c-common/source/arch/intel/cpuid.c @@ -13,57 +13,69 @@ #include <stdlib.h> extern void aws_run_cpuid(uint32_t eax, uint32_t ecx, uint32_t *abcd); +extern uint64_t aws_run_xgetbv(uint32_t xcr); -typedef bool(has_feature_fn)(void); +static bool s_cpu_features[AWS_CPU_FEATURE_COUNT]; +static bool s_cpu_features_cached; -static bool s_has_clmul(void) { +static void s_cache_cpu_features(void) { + /*************************************************************************** + * First, find the max EAX value we can pass to CPUID without undefined behavior + * https://en.wikipedia.org/w/index.php?title=CPUID&oldid=1270569388#EAX=0:_Highest_Function_Parameter_and_Manufacturer_ID + **************************************************************************/ uint32_t abcd[4]; - uint32_t clmul_mask = 0x00000002; - aws_run_cpuid(1, 0, abcd); - - if ((abcd[2] & clmul_mask) != clmul_mask) - return false; - - return true; -} - -static bool s_has_sse41(void) { - uint32_t abcd[4]; - uint32_t sse41_mask = 0x00080000; - aws_run_cpuid(1, 0, abcd); - - if ((abcd[2] & sse41_mask) != sse41_mask) - return false; - - return true; -} - -static bool s_has_sse42(void) { - uint32_t abcd[4]; - uint32_t sse42_mask = 0x00100000; - aws_run_cpuid(1, 0, abcd); - - if ((abcd[2] & sse42_mask) != sse42_mask) - return false; - - return true; -} + aws_run_cpuid(0x0, 0x0, abcd); + const uint32_t max_cpuid_eax_value = abcd[0]; /* max-value = EAX */ + + /************************************************************************** + * CPUID(EAX=1H, ECX=0H): Processor Info and Feature Bits + * https://en.wikipedia.org/w/index.php?title=CPUID&oldid=1270569388#EAX=1:_Processor_Info_and_Feature_Bits + **************************************************************************/ + if (0x1 > max_cpuid_eax_value) { + return; + } + aws_run_cpuid(0x1, 0x0, abcd); + s_cpu_features[AWS_CPU_FEATURE_CLMUL] = abcd[2] & (1 << 1); /* pclmulqdq = ECX[bit 1] */ + s_cpu_features[AWS_CPU_FEATURE_SSE_4_1] = abcd[2] & (1 << 19); /* sse4.1 = ECX[bit 19] */ + s_cpu_features[AWS_CPU_FEATURE_SSE_4_2] = abcd[2] & (1 << 20); /* sse4.2 = ECX[bit 20] */ + + /* NOTE: Even if the AVX flag is set, it's not necessarily usable. + * We need to check that OSXSAVE is enabled, and check further capabilities via XGETBV. + * GCC had the same bug until 7.4: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=85100 */ + bool avx_usable = false; + bool avx512_usable = false; + bool feature_osxsave = abcd[2] & (1 << 27); /* osxsave = ECX[bit 27] */ + if (feature_osxsave) { + /* Check XCR0 (Extended Control Register 0) via XGETBV + * https://en.wikipedia.org/w/index.php?title=Control_register&oldid=1268423710#XCR0_and_XSS */ + uint64_t xcr0 = aws_run_xgetbv(0); + const uint64_t avx_mask = (1 << 1) /* SSE = XCR0[bit 1] */ + | (1 << 2) /* AVX = XCR0[bit 2] */; + avx_usable = (xcr0 & avx_mask) == avx_mask; + + const uint64_t avx512_mask = (1 << 5) /* OPMASK = XCR0[bit 5] */ + | (1 << 6) /* ZMM_Hi256 = XCR0[bit 6] */ + | (1 << 7) /* Hi16_ZMM = XCR0[bit 7] */ + | avx_mask; + avx512_usable = (xcr0 & avx512_mask) == avx512_mask; + } -static bool s_has_avx2(void) { - uint32_t abcd[4]; + bool feature_avx = false; + if (avx_usable) { + feature_avx = abcd[2] & (1 << 28); /* avx = ECX[bit 28] */ + } - /* Check AVX2: - * CPUID.(EAX=07H, ECX=0H):EBX.AVX2[bit 5]==1 */ - uint32_t avx2_mask = (1 << 5); - aws_run_cpuid(7, 0, abcd); - if ((abcd[1] & avx2_mask) != avx2_mask) { - return false; + /*************************************************************************** + * CPUID(EAX=7H, ECX=0H): Extended Features + * https://en.wikipedia.org/w/index.php?title=CPUID&oldid=1270569388#EAX=7,_ECX=0:_Extended_Features + **************************************************************************/ + if (0x7 > max_cpuid_eax_value) { + return; } + aws_run_cpuid(0x7, 0x0, abcd); + s_cpu_features[AWS_CPU_FEATURE_BMI2] = abcd[1] & (1 << 8); /* bmi2 = EBX[bit 8] */ - /* Also check AVX: - * CPUID.(EAX=01H, ECX=0H):ECX.AVX[bit 28]==1 - * - * NOTE: It SHOULD be impossible for a CPU to support AVX2 without supporting AVX. + /* NOTE: It SHOULD be impossible for a CPU to support AVX2 without supporting AVX. * But we've received crash reports where the AVX2 feature check passed * and then an AVX instruction caused an "invalid instruction" crash. * @@ -76,69 +88,26 @@ static bool s_has_avx2(void) { * * We don't know for sure what was up with those machines, but this extra * check should stop them from running our AVX/AVX2 code paths. */ - uint32_t avx1_mask = (1 << 28); - aws_run_cpuid(1, 0, abcd); - if ((abcd[2] & avx1_mask) != avx1_mask) { - return false; - } - - return true; -} - -static bool s_has_avx512(void) { - uint32_t abcd[4]; - - /* Check AVX512F: - * CPUID.(EAX=07H, ECX=0H):EBX.AVX512[bit 16]==1 */ - uint32_t avx512_mask = (1 << 16); - aws_run_cpuid(7, 0, abcd); - if ((abcd[1] & avx512_mask) != avx512_mask) { - return false; - } - - return true; -} - -static bool s_has_bmi2(void) { - uint32_t abcd[4]; - - /* Check BMI2: - * CPUID.(EAX=07H, ECX=0H):EBX.BMI2[bit 8]==1 */ - uint32_t bmi2_mask = (1 << 8); - aws_run_cpuid(7, 0, abcd); - if ((abcd[1] & bmi2_mask) != bmi2_mask) { - return false; + if (feature_avx) { + if (avx_usable) { + s_cpu_features[AWS_CPU_FEATURE_AVX2] = abcd[1] & (1 << 5); /* AVX2 = EBX[bit 5] */ + s_cpu_features[AWS_CPU_FEATURE_VPCLMULQDQ] = abcd[2] & (1 << 10); /* vpclmulqdq = ECX[bit 10] */ + } + if (avx512_usable) { + s_cpu_features[AWS_CPU_FEATURE_AVX512] = abcd[1] & (1 << 16); /* AVX-512 Foundation = EBX[bit 16] */ + } } - - return true; } -static bool s_has_vpclmulqdq(void) { - uint32_t abcd[4]; - /* Check VPCLMULQDQ: - * CPUID.(EAX=07H, ECX=0H):ECX.VPCLMULQDQ[bit 10]==1 */ - uint32_t vpclmulqdq_mask = (1 << 10); - aws_run_cpuid(7, 0, abcd); - if ((abcd[2] & vpclmulqdq_mask) != vpclmulqdq_mask) { - return false; +bool aws_cpu_has_feature(enum aws_cpu_feature_name feature_name) { + /* Look up and cache all hardware features the first time this is called */ + if (AWS_UNLIKELY(!s_cpu_features_cached)) { + s_cache_cpu_features(); + s_cpu_features_cached = true; } - return true; -} -has_feature_fn *s_check_cpu_feature[AWS_CPU_FEATURE_COUNT] = { - [AWS_CPU_FEATURE_CLMUL] = s_has_clmul, - [AWS_CPU_FEATURE_SSE_4_1] = s_has_sse41, - [AWS_CPU_FEATURE_SSE_4_2] = s_has_sse42, - [AWS_CPU_FEATURE_AVX2] = s_has_avx2, - [AWS_CPU_FEATURE_AVX512] = s_has_avx512, - [AWS_CPU_FEATURE_BMI2] = s_has_bmi2, - [AWS_CPU_FEATURE_VPCLMULQDQ] = s_has_vpclmulqdq, -}; - -bool aws_cpu_has_feature(enum aws_cpu_feature_name feature_name) { - if (s_check_cpu_feature[feature_name]) - return s_check_cpu_feature[feature_name](); - return false; + AWS_ASSERT(feature_name >= 0 && feature_name < AWS_CPU_FEATURE_COUNT); + return s_cpu_features[feature_name]; } #define CPUID_AVAILABLE 0 diff --git a/contrib/restricted/aws/aws-c-common/source/arch/intel/encoding_avx2.c b/contrib/restricted/aws/aws-c-common/source/arch/intel/encoding_avx2.c index 439d6ddada9..92c4abdfa76 100644 --- a/contrib/restricted/aws/aws-c-common/source/arch/intel/encoding_avx2.c +++ b/contrib/restricted/aws/aws-c-common/source/arch/intel/encoding_avx2.c @@ -194,13 +194,13 @@ static inline bool decode(const unsigned char *in, unsigned char *out) { size_t aws_common_private_base64_decode_sse41(const unsigned char *in, unsigned char *out, size_t len) { if (len % 4) { - return (size_t)-1; + return SIZE_MAX; } size_t outlen = 0; while (len > 32) { if (!decode(in, out)) { - return (size_t)-1; + return SIZE_MAX; } len -= 32; in += 32; @@ -230,13 +230,13 @@ size_t aws_common_private_base64_decode_sse41(const unsigned char *in, unsigned } if (!decode(tmp_in, tmp_out)) { - return (size_t)-1; + return SIZE_MAX; } /* Check that there are no trailing ones bits */ for (size_t i = final_out; i < sizeof(tmp_out); i++) { if (tmp_out[i]) { - return (size_t)-1; + return SIZE_MAX; } } |