diff options
author | yazevnul <yazevnul@yandex-team.ru> | 2022-02-10 16:46:48 +0300 |
---|---|---|
committer | Daniil Cherednik <dcherednik@yandex-team.ru> | 2022-02-10 16:46:48 +0300 |
commit | 9abfb1a53b7f7b791444d1378e645d8fad9b06ed (patch) | |
tree | 49e222ea1c5804306084bb3ae065bb702625360f /contrib/libs/base64/avx2/dec_avx2.c | |
parent | 8cbc307de0221f84c80c42dcbe07d40727537e2c (diff) | |
download | ydb-9abfb1a53b7f7b791444d1378e645d8fad9b06ed.tar.gz |
Restoring authorship annotation for <yazevnul@yandex-team.ru>. Commit 2 of 2.
Diffstat (limited to 'contrib/libs/base64/avx2/dec_avx2.c')
-rw-r--r-- | contrib/libs/base64/avx2/dec_avx2.c | 116 |
1 files changed, 58 insertions, 58 deletions
diff --git a/contrib/libs/base64/avx2/dec_avx2.c b/contrib/libs/base64/avx2/dec_avx2.c index 90dde140fb..cd3cafd30a 100644 --- a/contrib/libs/base64/avx2/dec_avx2.c +++ b/contrib/libs/base64/avx2/dec_avx2.c @@ -1,43 +1,43 @@ -// If we have AVX2 support, pick off 32 bytes at a time for as long as we can, -// but make sure that we quit before seeing any == markers at the end of the -// string. Also, because we write 8 zeroes at the end of the output, ensure -// that there are at least 11 valid bytes of input data remaining to close the -// gap. 32 + 2 + 11 = 45 bytes: -while (srclen >= 45) -{ - // Load string: - __m256i str = _mm256_loadu_si256((__m256i *)c); - - // The input consists of six character sets in the Base64 alphabet, - // which we need to map back to the 6-bit values they represent. - // There are three ranges, two singles, and then there's the rest. - // - // # From To Add Characters - // 1 [43] [62] +19 + - // 2 [47] [63] +16 / - // 3 [48..57] [52..61] +4 0..9 - // 4 [65..90] [0..25] -65 A..Z - // 5 [97..122] [26..51] -71 a..z - // (6) Everything else => invalid input - - const __m256i set1 = CMPEQ(str, '+'); - const __m256i set2 = CMPEQ(str, '/'); - const __m256i set3 = RANGE(str, '0', '9'); - const __m256i set4 = RANGE(str, 'A', 'Z'); - const __m256i set5 = RANGE(str, 'a', 'z'); - const __m256i set6 = CMPEQ(str, '-'); - const __m256i set7 = CMPEQ(str, '_'); - - __m256i delta = REPLACE(set1, 19); - delta = _mm256_or_si256(delta, REPLACE(set2, 16)); - delta = _mm256_or_si256(delta, REPLACE(set3, 4)); - delta = _mm256_or_si256(delta, REPLACE(set4, -65)); - delta = _mm256_or_si256(delta, REPLACE(set5, -71)); - delta = _mm256_or_si256(delta, REPLACE(set6, 17)); - delta = _mm256_or_si256(delta, REPLACE(set7, -32)); - - // Check for invalid input: if any of the delta values are zero, - // fall back on bytewise code to do error checking and reporting: +// If we have AVX2 support, pick off 32 bytes at a time for as long as we can, +// but make sure that we quit before seeing any == markers at the end of the +// string. Also, because we write 8 zeroes at the end of the output, ensure +// that there are at least 11 valid bytes of input data remaining to close the +// gap. 32 + 2 + 11 = 45 bytes: +while (srclen >= 45) +{ + // Load string: + __m256i str = _mm256_loadu_si256((__m256i *)c); + + // The input consists of six character sets in the Base64 alphabet, + // which we need to map back to the 6-bit values they represent. + // There are three ranges, two singles, and then there's the rest. + // + // # From To Add Characters + // 1 [43] [62] +19 + + // 2 [47] [63] +16 / + // 3 [48..57] [52..61] +4 0..9 + // 4 [65..90] [0..25] -65 A..Z + // 5 [97..122] [26..51] -71 a..z + // (6) Everything else => invalid input + + const __m256i set1 = CMPEQ(str, '+'); + const __m256i set2 = CMPEQ(str, '/'); + const __m256i set3 = RANGE(str, '0', '9'); + const __m256i set4 = RANGE(str, 'A', 'Z'); + const __m256i set5 = RANGE(str, 'a', 'z'); + const __m256i set6 = CMPEQ(str, '-'); + const __m256i set7 = CMPEQ(str, '_'); + + __m256i delta = REPLACE(set1, 19); + delta = _mm256_or_si256(delta, REPLACE(set2, 16)); + delta = _mm256_or_si256(delta, REPLACE(set3, 4)); + delta = _mm256_or_si256(delta, REPLACE(set4, -65)); + delta = _mm256_or_si256(delta, REPLACE(set5, -71)); + delta = _mm256_or_si256(delta, REPLACE(set6, 17)); + delta = _mm256_or_si256(delta, REPLACE(set7, -32)); + + // Check for invalid input: if any of the delta values are zero, + // fall back on bytewise code to do error checking and reporting: #ifdef _MSC_VER // Hack for MSVC miscompilation - it inserts vzeroupper for the break // (we need to clear YMM registers before exiting the function) @@ -45,21 +45,21 @@ while (srclen >= 45) // Save delta/str in memory manually. _mm256_zeroupper(); #endif - if (_mm256_movemask_epi8(CMPEQ(delta, 0))) { - break; - } - - // Now simply add the delta values to the input: - str = _mm256_add_epi8(str, delta); - - // Reshuffle the input to packed 12-byte output format: - str = dec_reshuffle(str); - - // Store back: - _mm256_storeu_si256((__m256i *)o, str); - - c += 32; - o += 24; - outl += 24; - srclen -= 32; -} + if (_mm256_movemask_epi8(CMPEQ(delta, 0))) { + break; + } + + // Now simply add the delta values to the input: + str = _mm256_add_epi8(str, delta); + + // Reshuffle the input to packed 12-byte output format: + str = dec_reshuffle(str); + + // Store back: + _mm256_storeu_si256((__m256i *)o, str); + + c += 32; + o += 24; + outl += 24; + srclen -= 32; +} |