diff options
author | Devtools Arcadia <arcadia-devtools@yandex-team.ru> | 2022-02-07 18:08:42 +0300 |
---|---|---|
committer | Devtools Arcadia <arcadia-devtools@mous.vla.yp-c.yandex.net> | 2022-02-07 18:08:42 +0300 |
commit | 1110808a9d39d4b808aef724c861a2e1a38d2a69 (patch) | |
tree | e26c9fed0de5d9873cce7e00bc214573dc2195b7 /contrib/libs/base64/ssse3/dec_ssse3.c | |
download | ydb-1110808a9d39d4b808aef724c861a2e1a38d2a69.tar.gz |
intermediate changes
ref:cde9a383711a11544ce7e107a78147fb96cc4029
Diffstat (limited to 'contrib/libs/base64/ssse3/dec_ssse3.c')
-rw-r--r-- | contrib/libs/base64/ssse3/dec_ssse3.c | 58 |
1 files changed, 58 insertions, 0 deletions
diff --git a/contrib/libs/base64/ssse3/dec_ssse3.c b/contrib/libs/base64/ssse3/dec_ssse3.c new file mode 100644 index 0000000000..ce11b3cb1b --- /dev/null +++ b/contrib/libs/base64/ssse3/dec_ssse3.c @@ -0,0 +1,58 @@ +// If we have SSSE3 support, pick off 16 bytes at a time for as long as we can, +// but make sure that we quit before seeing any == markers at the end of the +// string. Also, because we write four zeroes at the end of the output, ensure +// that there are at least 6 valid bytes of input data remaining to close the +// gap. 16 + 2 + 6 = 24 bytes: +while (srclen >= 24) +{ + // Load string: + __m128i str = _mm_loadu_si128((__m128i *)c); + + // The input consists of six character sets in the Base64 alphabet, + // which we need to map back to the 6-bit values they represent. + // There are three ranges, two singles, and then there's the rest. + // + // # From To Add Characters + // 1 [43] [62] +19 + + // 2 [47] [63] +16 / + // 3 [48..57] [52..61] +4 0..9 + // 4 [65..90] [0..25] -65 A..Z + // 5 [97..122] [26..51] -71 a..z + // (6) Everything else => invalid input + + const __m128i set1 = CMPEQ(str, '+'); + const __m128i set2 = CMPEQ(str, '/'); + const __m128i set3 = RANGE(str, '0', '9'); + const __m128i set4 = RANGE(str, 'A', 'Z'); + const __m128i set5 = RANGE(str, 'a', 'z'); + const __m128i set6 = CMPEQ(str, '-'); + const __m128i set7 = CMPEQ(str, '_'); + + __m128i delta = REPLACE(set1, 19); + delta = _mm_or_si128(delta, REPLACE(set2, 16)); + delta = _mm_or_si128(delta, REPLACE(set3, 4)); + delta = _mm_or_si128(delta, REPLACE(set4, -65)); + delta = _mm_or_si128(delta, REPLACE(set5, -71)); + delta = _mm_or_si128(delta, REPLACE(set6, 17)); + delta = _mm_or_si128(delta, REPLACE(set7, -32)); + + // Check for invalid input: if any of the delta values are zero, + // fall back on bytewise code to do error checking and reporting: + if (_mm_movemask_epi8(CMPEQ(delta, 0))) { + break; + } + + // Now simply add the delta values to the input: + str = _mm_add_epi8(str, delta); + + // Reshuffle the input to packed 12-byte output format: + str = dec_reshuffle(str); + + // Store back: + _mm_storeu_si128((__m128i *)o, str); + + c += 16; + o += 12; + outl += 12; + srclen -= 16; +} |