aboutsummaryrefslogtreecommitdiffstats
path: root/contrib/libs/base64/avx2/dec_avx2.c
diff options
context:
space:
mode:
authoryazevnul <yazevnul@yandex-team.ru>2022-02-10 16:46:46 +0300
committerDaniil Cherednik <dcherednik@yandex-team.ru>2022-02-10 16:46:46 +0300
commit8cbc307de0221f84c80c42dcbe07d40727537e2c (patch)
tree625d5a673015d1df891e051033e9fcde5c7be4e5 /contrib/libs/base64/avx2/dec_avx2.c
parent30d1ef3941e0dc835be7609de5ebee66958f215a (diff)
downloadydb-8cbc307de0221f84c80c42dcbe07d40727537e2c.tar.gz
Restoring authorship annotation for <yazevnul@yandex-team.ru>. Commit 1 of 2.
Diffstat (limited to 'contrib/libs/base64/avx2/dec_avx2.c')
-rw-r--r--contrib/libs/base64/avx2/dec_avx2.c116
1 files changed, 58 insertions, 58 deletions
diff --git a/contrib/libs/base64/avx2/dec_avx2.c b/contrib/libs/base64/avx2/dec_avx2.c
index cd3cafd30a..90dde140fb 100644
--- a/contrib/libs/base64/avx2/dec_avx2.c
+++ b/contrib/libs/base64/avx2/dec_avx2.c
@@ -1,43 +1,43 @@
-// If we have AVX2 support, pick off 32 bytes at a time for as long as we can,
-// but make sure that we quit before seeing any == markers at the end of the
-// string. Also, because we write 8 zeroes at the end of the output, ensure
-// that there are at least 11 valid bytes of input data remaining to close the
-// gap. 32 + 2 + 11 = 45 bytes:
-while (srclen >= 45)
-{
- // Load string:
- __m256i str = _mm256_loadu_si256((__m256i *)c);
-
- // The input consists of six character sets in the Base64 alphabet,
- // which we need to map back to the 6-bit values they represent.
- // There are three ranges, two singles, and then there's the rest.
- //
- // # From To Add Characters
- // 1 [43] [62] +19 +
- // 2 [47] [63] +16 /
- // 3 [48..57] [52..61] +4 0..9
- // 4 [65..90] [0..25] -65 A..Z
- // 5 [97..122] [26..51] -71 a..z
- // (6) Everything else => invalid input
-
- const __m256i set1 = CMPEQ(str, '+');
- const __m256i set2 = CMPEQ(str, '/');
- const __m256i set3 = RANGE(str, '0', '9');
- const __m256i set4 = RANGE(str, 'A', 'Z');
- const __m256i set5 = RANGE(str, 'a', 'z');
- const __m256i set6 = CMPEQ(str, '-');
- const __m256i set7 = CMPEQ(str, '_');
-
- __m256i delta = REPLACE(set1, 19);
- delta = _mm256_or_si256(delta, REPLACE(set2, 16));
- delta = _mm256_or_si256(delta, REPLACE(set3, 4));
- delta = _mm256_or_si256(delta, REPLACE(set4, -65));
- delta = _mm256_or_si256(delta, REPLACE(set5, -71));
- delta = _mm256_or_si256(delta, REPLACE(set6, 17));
- delta = _mm256_or_si256(delta, REPLACE(set7, -32));
-
- // Check for invalid input: if any of the delta values are zero,
- // fall back on bytewise code to do error checking and reporting:
+// If we have AVX2 support, pick off 32 bytes at a time for as long as we can,
+// but make sure that we quit before seeing any == markers at the end of the
+// string. Also, because we write 8 zeroes at the end of the output, ensure
+// that there are at least 11 valid bytes of input data remaining to close the
+// gap. 32 + 2 + 11 = 45 bytes:
+while (srclen >= 45)
+{
+ // Load string:
+ __m256i str = _mm256_loadu_si256((__m256i *)c);
+
+ // The input consists of six character sets in the Base64 alphabet,
+ // which we need to map back to the 6-bit values they represent.
+ // There are three ranges, two singles, and then there's the rest.
+ //
+ // # From To Add Characters
+ // 1 [43] [62] +19 +
+ // 2 [47] [63] +16 /
+ // 3 [48..57] [52..61] +4 0..9
+ // 4 [65..90] [0..25] -65 A..Z
+ // 5 [97..122] [26..51] -71 a..z
+ // (6) Everything else => invalid input
+
+ const __m256i set1 = CMPEQ(str, '+');
+ const __m256i set2 = CMPEQ(str, '/');
+ const __m256i set3 = RANGE(str, '0', '9');
+ const __m256i set4 = RANGE(str, 'A', 'Z');
+ const __m256i set5 = RANGE(str, 'a', 'z');
+ const __m256i set6 = CMPEQ(str, '-');
+ const __m256i set7 = CMPEQ(str, '_');
+
+ __m256i delta = REPLACE(set1, 19);
+ delta = _mm256_or_si256(delta, REPLACE(set2, 16));
+ delta = _mm256_or_si256(delta, REPLACE(set3, 4));
+ delta = _mm256_or_si256(delta, REPLACE(set4, -65));
+ delta = _mm256_or_si256(delta, REPLACE(set5, -71));
+ delta = _mm256_or_si256(delta, REPLACE(set6, 17));
+ delta = _mm256_or_si256(delta, REPLACE(set7, -32));
+
+ // Check for invalid input: if any of the delta values are zero,
+ // fall back on bytewise code to do error checking and reporting:
#ifdef _MSC_VER
// Hack for MSVC miscompilation - it inserts vzeroupper for the break
// (we need to clear YMM registers before exiting the function)
@@ -45,21 +45,21 @@ while (srclen >= 45)
// Save delta/str in memory manually.
_mm256_zeroupper();
#endif
- if (_mm256_movemask_epi8(CMPEQ(delta, 0))) {
- break;
- }
-
- // Now simply add the delta values to the input:
- str = _mm256_add_epi8(str, delta);
-
- // Reshuffle the input to packed 12-byte output format:
- str = dec_reshuffle(str);
-
- // Store back:
- _mm256_storeu_si256((__m256i *)o, str);
-
- c += 32;
- o += 24;
- outl += 24;
- srclen -= 32;
-}
+ if (_mm256_movemask_epi8(CMPEQ(delta, 0))) {
+ break;
+ }
+
+ // Now simply add the delta values to the input:
+ str = _mm256_add_epi8(str, delta);
+
+ // Reshuffle the input to packed 12-byte output format:
+ str = dec_reshuffle(str);
+
+ // Store back:
+ _mm256_storeu_si256((__m256i *)o, str);
+
+ c += 32;
+ o += 24;
+ outl += 24;
+ srclen -= 32;
+}