summaryrefslogtreecommitdiffstats
path: root/contrib/restricted/aws/aws-c-common/source/arch/intel
diff options
context:
space:
mode:
authorrobot-contrib <[email protected]>2025-05-14 06:53:03 +0300
committerrobot-contrib <[email protected]>2025-05-14 07:05:42 +0300
commit286dbc77293811055ff4f9303cd376eff9e50104 (patch)
treea50eea3eb2b824c7c68e15b4cc3e127731776d32 /contrib/restricted/aws/aws-c-common/source/arch/intel
parent0bf9db6399352012396e7791bcfd762e944b33c2 (diff)
Update contrib/restricted/aws/aws-c-common to 0.12.2
commit_hash:fc6e67f9b12b0b888c90bb97bf2b1cbfcd74a044
Diffstat (limited to 'contrib/restricted/aws/aws-c-common/source/arch/intel')
-rw-r--r--contrib/restricted/aws/aws-c-common/source/arch/intel/asm/cpuid.c11
-rw-r--r--contrib/restricted/aws/aws-c-common/source/arch/intel/cpuid.c173
-rw-r--r--contrib/restricted/aws/aws-c-common/source/arch/intel/encoding_avx2.c8
3 files changed, 86 insertions, 106 deletions
diff --git a/contrib/restricted/aws/aws-c-common/source/arch/intel/asm/cpuid.c b/contrib/restricted/aws/aws-c-common/source/arch/intel/asm/cpuid.c
index d2ceab01060..93657460e63 100644
--- a/contrib/restricted/aws/aws-c-common/source/arch/intel/asm/cpuid.c
+++ b/contrib/restricted/aws/aws-c-common/source/arch/intel/asm/cpuid.c
@@ -27,3 +27,14 @@ void aws_run_cpuid(uint32_t eax, uint32_t ecx, uint32_t *abcd) {
abcd[2] = ecx;
abcd[3] = edx;
}
+
+uint64_t aws_run_xgetbv(uint32_t xcr) {
+ /* NOTE: we could have used the _xgetbv() intrinsic in <immintrin.h>, but it's missing from GCC < 9.0:
+ * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=71659 */
+
+ /* xgetbv writes high and low of 64bit value to EDX:EAX */
+ uint32_t xcrhigh;
+ uint32_t xcrlow;
+ __asm__ __volatile__("xgetbv" : "=a"(xcrlow), "=d"(xcrhigh) : "c"(xcr));
+ return (((uint64_t)xcrhigh) << 32) | xcrlow;
+}
diff --git a/contrib/restricted/aws/aws-c-common/source/arch/intel/cpuid.c b/contrib/restricted/aws/aws-c-common/source/arch/intel/cpuid.c
index 465fccd17a5..e50fa2cdf3d 100644
--- a/contrib/restricted/aws/aws-c-common/source/arch/intel/cpuid.c
+++ b/contrib/restricted/aws/aws-c-common/source/arch/intel/cpuid.c
@@ -13,57 +13,69 @@
#include <stdlib.h>
extern void aws_run_cpuid(uint32_t eax, uint32_t ecx, uint32_t *abcd);
+extern uint64_t aws_run_xgetbv(uint32_t xcr);
-typedef bool(has_feature_fn)(void);
+static bool s_cpu_features[AWS_CPU_FEATURE_COUNT];
+static bool s_cpu_features_cached;
-static bool s_has_clmul(void) {
+static void s_cache_cpu_features(void) {
+ /***************************************************************************
+ * First, find the max EAX value we can pass to CPUID without undefined behavior
+ * https://en.wikipedia.org/w/index.php?title=CPUID&oldid=1270569388#EAX=0:_Highest_Function_Parameter_and_Manufacturer_ID
+ **************************************************************************/
uint32_t abcd[4];
- uint32_t clmul_mask = 0x00000002;
- aws_run_cpuid(1, 0, abcd);
-
- if ((abcd[2] & clmul_mask) != clmul_mask)
- return false;
-
- return true;
-}
-
-static bool s_has_sse41(void) {
- uint32_t abcd[4];
- uint32_t sse41_mask = 0x00080000;
- aws_run_cpuid(1, 0, abcd);
-
- if ((abcd[2] & sse41_mask) != sse41_mask)
- return false;
-
- return true;
-}
-
-static bool s_has_sse42(void) {
- uint32_t abcd[4];
- uint32_t sse42_mask = 0x00100000;
- aws_run_cpuid(1, 0, abcd);
-
- if ((abcd[2] & sse42_mask) != sse42_mask)
- return false;
-
- return true;
-}
+ aws_run_cpuid(0x0, 0x0, abcd);
+ const uint32_t max_cpuid_eax_value = abcd[0]; /* max-value = EAX */
+
+ /**************************************************************************
+ * CPUID(EAX=1H, ECX=0H): Processor Info and Feature Bits
+ * https://en.wikipedia.org/w/index.php?title=CPUID&oldid=1270569388#EAX=1:_Processor_Info_and_Feature_Bits
+ **************************************************************************/
+ if (0x1 > max_cpuid_eax_value) {
+ return;
+ }
+ aws_run_cpuid(0x1, 0x0, abcd);
+ s_cpu_features[AWS_CPU_FEATURE_CLMUL] = abcd[2] & (1 << 1); /* pclmulqdq = ECX[bit 1] */
+ s_cpu_features[AWS_CPU_FEATURE_SSE_4_1] = abcd[2] & (1 << 19); /* sse4.1 = ECX[bit 19] */
+ s_cpu_features[AWS_CPU_FEATURE_SSE_4_2] = abcd[2] & (1 << 20); /* sse4.2 = ECX[bit 20] */
+
+ /* NOTE: Even if the AVX flag is set, it's not necessarily usable.
+ * We need to check that OSXSAVE is enabled, and check further capabilities via XGETBV.
+ * GCC had the same bug until 7.4: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=85100 */
+ bool avx_usable = false;
+ bool avx512_usable = false;
+ bool feature_osxsave = abcd[2] & (1 << 27); /* osxsave = ECX[bit 27] */
+ if (feature_osxsave) {
+ /* Check XCR0 (Extended Control Register 0) via XGETBV
+ * https://en.wikipedia.org/w/index.php?title=Control_register&oldid=1268423710#XCR0_and_XSS */
+ uint64_t xcr0 = aws_run_xgetbv(0);
+ const uint64_t avx_mask = (1 << 1) /* SSE = XCR0[bit 1] */
+ | (1 << 2) /* AVX = XCR0[bit 2] */;
+ avx_usable = (xcr0 & avx_mask) == avx_mask;
+
+ const uint64_t avx512_mask = (1 << 5) /* OPMASK = XCR0[bit 5] */
+ | (1 << 6) /* ZMM_Hi256 = XCR0[bit 6] */
+ | (1 << 7) /* Hi16_ZMM = XCR0[bit 7] */
+ | avx_mask;
+ avx512_usable = (xcr0 & avx512_mask) == avx512_mask;
+ }
-static bool s_has_avx2(void) {
- uint32_t abcd[4];
+ bool feature_avx = false;
+ if (avx_usable) {
+ feature_avx = abcd[2] & (1 << 28); /* avx = ECX[bit 28] */
+ }
- /* Check AVX2:
- * CPUID.(EAX=07H, ECX=0H):EBX.AVX2[bit 5]==1 */
- uint32_t avx2_mask = (1 << 5);
- aws_run_cpuid(7, 0, abcd);
- if ((abcd[1] & avx2_mask) != avx2_mask) {
- return false;
+ /***************************************************************************
+ * CPUID(EAX=7H, ECX=0H): Extended Features
+ * https://en.wikipedia.org/w/index.php?title=CPUID&oldid=1270569388#EAX=7,_ECX=0:_Extended_Features
+ **************************************************************************/
+ if (0x7 > max_cpuid_eax_value) {
+ return;
}
+ aws_run_cpuid(0x7, 0x0, abcd);
+ s_cpu_features[AWS_CPU_FEATURE_BMI2] = abcd[1] & (1 << 8); /* bmi2 = EBX[bit 8] */
- /* Also check AVX:
- * CPUID.(EAX=01H, ECX=0H):ECX.AVX[bit 28]==1
- *
- * NOTE: It SHOULD be impossible for a CPU to support AVX2 without supporting AVX.
+ /* NOTE: It SHOULD be impossible for a CPU to support AVX2 without supporting AVX.
* But we've received crash reports where the AVX2 feature check passed
* and then an AVX instruction caused an "invalid instruction" crash.
*
@@ -76,69 +88,26 @@ static bool s_has_avx2(void) {
*
* We don't know for sure what was up with those machines, but this extra
* check should stop them from running our AVX/AVX2 code paths. */
- uint32_t avx1_mask = (1 << 28);
- aws_run_cpuid(1, 0, abcd);
- if ((abcd[2] & avx1_mask) != avx1_mask) {
- return false;
- }
-
- return true;
-}
-
-static bool s_has_avx512(void) {
- uint32_t abcd[4];
-
- /* Check AVX512F:
- * CPUID.(EAX=07H, ECX=0H):EBX.AVX512[bit 16]==1 */
- uint32_t avx512_mask = (1 << 16);
- aws_run_cpuid(7, 0, abcd);
- if ((abcd[1] & avx512_mask) != avx512_mask) {
- return false;
- }
-
- return true;
-}
-
-static bool s_has_bmi2(void) {
- uint32_t abcd[4];
-
- /* Check BMI2:
- * CPUID.(EAX=07H, ECX=0H):EBX.BMI2[bit 8]==1 */
- uint32_t bmi2_mask = (1 << 8);
- aws_run_cpuid(7, 0, abcd);
- if ((abcd[1] & bmi2_mask) != bmi2_mask) {
- return false;
+ if (feature_avx) {
+ if (avx_usable) {
+ s_cpu_features[AWS_CPU_FEATURE_AVX2] = abcd[1] & (1 << 5); /* AVX2 = EBX[bit 5] */
+ s_cpu_features[AWS_CPU_FEATURE_VPCLMULQDQ] = abcd[2] & (1 << 10); /* vpclmulqdq = ECX[bit 10] */
+ }
+ if (avx512_usable) {
+ s_cpu_features[AWS_CPU_FEATURE_AVX512] = abcd[1] & (1 << 16); /* AVX-512 Foundation = EBX[bit 16] */
+ }
}
-
- return true;
}
-static bool s_has_vpclmulqdq(void) {
- uint32_t abcd[4];
- /* Check VPCLMULQDQ:
- * CPUID.(EAX=07H, ECX=0H):ECX.VPCLMULQDQ[bit 10]==1 */
- uint32_t vpclmulqdq_mask = (1 << 10);
- aws_run_cpuid(7, 0, abcd);
- if ((abcd[2] & vpclmulqdq_mask) != vpclmulqdq_mask) {
- return false;
+bool aws_cpu_has_feature(enum aws_cpu_feature_name feature_name) {
+ /* Look up and cache all hardware features the first time this is called */
+ if (AWS_UNLIKELY(!s_cpu_features_cached)) {
+ s_cache_cpu_features();
+ s_cpu_features_cached = true;
}
- return true;
-}
-has_feature_fn *s_check_cpu_feature[AWS_CPU_FEATURE_COUNT] = {
- [AWS_CPU_FEATURE_CLMUL] = s_has_clmul,
- [AWS_CPU_FEATURE_SSE_4_1] = s_has_sse41,
- [AWS_CPU_FEATURE_SSE_4_2] = s_has_sse42,
- [AWS_CPU_FEATURE_AVX2] = s_has_avx2,
- [AWS_CPU_FEATURE_AVX512] = s_has_avx512,
- [AWS_CPU_FEATURE_BMI2] = s_has_bmi2,
- [AWS_CPU_FEATURE_VPCLMULQDQ] = s_has_vpclmulqdq,
-};
-
-bool aws_cpu_has_feature(enum aws_cpu_feature_name feature_name) {
- if (s_check_cpu_feature[feature_name])
- return s_check_cpu_feature[feature_name]();
- return false;
+ AWS_ASSERT(feature_name >= 0 && feature_name < AWS_CPU_FEATURE_COUNT);
+ return s_cpu_features[feature_name];
}
#define CPUID_AVAILABLE 0
diff --git a/contrib/restricted/aws/aws-c-common/source/arch/intel/encoding_avx2.c b/contrib/restricted/aws/aws-c-common/source/arch/intel/encoding_avx2.c
index 439d6ddada9..92c4abdfa76 100644
--- a/contrib/restricted/aws/aws-c-common/source/arch/intel/encoding_avx2.c
+++ b/contrib/restricted/aws/aws-c-common/source/arch/intel/encoding_avx2.c
@@ -194,13 +194,13 @@ static inline bool decode(const unsigned char *in, unsigned char *out) {
size_t aws_common_private_base64_decode_sse41(const unsigned char *in, unsigned char *out, size_t len) {
if (len % 4) {
- return (size_t)-1;
+ return SIZE_MAX;
}
size_t outlen = 0;
while (len > 32) {
if (!decode(in, out)) {
- return (size_t)-1;
+ return SIZE_MAX;
}
len -= 32;
in += 32;
@@ -230,13 +230,13 @@ size_t aws_common_private_base64_decode_sse41(const unsigned char *in, unsigned
}
if (!decode(tmp_in, tmp_out)) {
- return (size_t)-1;
+ return SIZE_MAX;
}
/* Check that there are no trailing ones bits */
for (size_t i = final_out; i < sizeof(tmp_out); i++) {
if (tmp_out[i]) {
- return (size_t)-1;
+ return SIZE_MAX;
}
}