diff options
author | yazevnul <yazevnul@yandex-team.ru> | 2022-02-10 16:46:48 +0300 |
---|---|---|
committer | Daniil Cherednik <dcherednik@yandex-team.ru> | 2022-02-10 16:46:48 +0300 |
commit | 9abfb1a53b7f7b791444d1378e645d8fad9b06ed (patch) | |
tree | 49e222ea1c5804306084bb3ae065bb702625360f /contrib/libs/base64/ssse3/codec_ssse3.c | |
parent | 8cbc307de0221f84c80c42dcbe07d40727537e2c (diff) | |
download | ydb-9abfb1a53b7f7b791444d1378e645d8fad9b06ed.tar.gz |
Restoring authorship annotation for <yazevnul@yandex-team.ru>. Commit 2 of 2.
Diffstat (limited to 'contrib/libs/base64/ssse3/codec_ssse3.c')
-rw-r--r-- | contrib/libs/base64/ssse3/codec_ssse3.c | 336 |
1 files changed, 168 insertions, 168 deletions
diff --git a/contrib/libs/base64/ssse3/codec_ssse3.c b/contrib/libs/base64/ssse3/codec_ssse3.c index c8c9c72c3f..acf4109e7a 100644 --- a/contrib/libs/base64/ssse3/codec_ssse3.c +++ b/contrib/libs/base64/ssse3/codec_ssse3.c @@ -1,168 +1,168 @@ -#include <stdint.h> -#include <stddef.h> -#include <stdlib.h> - -#include "libbase64.h" -#include "codecs.h" - -#ifdef __SSSE3__ -#include <tmmintrin.h> - -#define CMPGT(s,n) _mm_cmpgt_epi8((s), _mm_set1_epi8(n)) -#define CMPEQ(s,n) _mm_cmpeq_epi8((s), _mm_set1_epi8(n)) -#define REPLACE(s,n) _mm_and_si128((s), _mm_set1_epi8(n)) -#define RANGE(s,a,b) _mm_andnot_si128(CMPGT((s), (b)), CMPGT((s), (a) - 1)) - -static inline __m128i -_mm_bswap_epi32 (const __m128i in) -{ - return _mm_shuffle_epi8(in, _mm_setr_epi8( - 3, 2, 1, 0, - 7, 6, 5, 4, - 11, 10, 9, 8, - 15, 14, 13, 12)); -} - -static inline __m128i -enc_reshuffle (__m128i in) -{ - // Slice into 32-bit chunks and operate on all chunks in parallel. - // All processing is done within the 32-bit chunk. First, shuffle: - // before: [eeeeeeff|ccdddddd|bbbbcccc|aaaaaabb] - // after: [00000000|aaaaaabb|bbbbcccc|ccdddddd] - in = _mm_shuffle_epi8(in, _mm_set_epi8( - -1, 9, 10, 11, - -1, 6, 7, 8, - -1, 3, 4, 5, - -1, 0, 1, 2)); - - // cd = [00000000|00000000|0000cccc|ccdddddd] - const __m128i cd = _mm_and_si128(in, _mm_set1_epi32(0x00000FFF)); - - // ab = [0000aaaa|aabbbbbb|00000000|00000000] - const __m128i ab = _mm_and_si128(_mm_slli_epi32(in, 4), _mm_set1_epi32(0x0FFF0000)); - - // merged = [0000aaaa|aabbbbbb|0000cccc|ccdddddd] - const __m128i merged = _mm_or_si128(ab, cd); - - // bd = [00000000|00bbbbbb|00000000|00dddddd] - const __m128i bd = _mm_and_si128(merged, _mm_set1_epi32(0x003F003F)); - - // ac = [00aaaaaa|00000000|00cccccc|00000000] - const __m128i ac = _mm_and_si128(_mm_slli_epi32(merged, 2), _mm_set1_epi32(0x3F003F00)); - - // indices = [00aaaaaa|00bbbbbb|00cccccc|00dddddd] - const __m128i indices = _mm_or_si128(ac, bd); - - // return = [00dddddd|00cccccc|00bbbbbb|00aaaaaa] - return _mm_bswap_epi32(indices); -} - -static inline __m128i -enc_translate (const __m128i in) -{ - // Translate values 0..63 to the Base64 alphabet. There are five sets: - // # From To Abs Delta Characters - // 0 [0..25] [65..90] +65 +65 ABCDEFGHIJKLMNOPQRSTUVWXYZ - // 1 [26..51] [97..122] +71 +6 abcdefghijklmnopqrstuvwxyz - // 2 [52..61] [48..57] -4 -75 0123456789 - // 3 [62] [43] -19 -15 + - // 4 [63] [47] -16 +3 / - - // Create cumulative masks for characters in sets [1,2,3,4], [2,3,4], - // [3,4], and [4]: - const __m128i mask1 = CMPGT(in, 25); - const __m128i mask2 = CMPGT(in, 51); - const __m128i mask3 = CMPGT(in, 61); - const __m128i mask4 = CMPEQ(in, 63); - - // All characters are at least in cumulative set 0, so add 'A': - __m128i out = _mm_add_epi8(in, _mm_set1_epi8(65)); - - // For inputs which are also in any of the other cumulative sets, - // add delta values against the previous set(s) to correct the shift: - out = _mm_add_epi8(out, REPLACE(mask1, 6)); - out = _mm_sub_epi8(out, REPLACE(mask2, 75)); - out = _mm_sub_epi8(out, REPLACE(mask3, 15)); - out = _mm_add_epi8(out, REPLACE(mask4, 3)); - - return out; -} - -static inline __m128i -dec_reshuffle (__m128i in) -{ - // Shuffle bytes to 32-bit bigendian: - in = _mm_bswap_epi32(in); - - // Mask in a single byte per shift: - __m128i mask = _mm_set1_epi32(0x3F000000); - - // Pack bytes together: - __m128i out = _mm_slli_epi32(_mm_and_si128(in, mask), 2); - mask = _mm_srli_epi32(mask, 8); - - out = _mm_or_si128(out, _mm_slli_epi32(_mm_and_si128(in, mask), 4)); - mask = _mm_srli_epi32(mask, 8); - - out = _mm_or_si128(out, _mm_slli_epi32(_mm_and_si128(in, mask), 6)); - mask = _mm_srli_epi32(mask, 8); - - out = _mm_or_si128(out, _mm_slli_epi32(_mm_and_si128(in, mask), 8)); - - // Reshuffle and repack into 12-byte output format: - return _mm_shuffle_epi8(out, _mm_setr_epi8( - 3, 2, 1, - 7, 6, 5, - 11, 10, 9, - 15, 14, 13, - -1, -1, -1, -1)); -} - -#endif // __SSSE3__ - -void -ssse3_base64_stream_encode - ( struct ssse3_base64_state *state - , const char *src - , size_t srclen - , char *out - , size_t *outlen - ) -{ -#ifdef __SSSE3__ - #include "enc_head.c" - #include "enc_ssse3.c" - #include "enc_tail.c" -#else - (void)state; - (void)src; - (void)srclen; - (void)out; - (void)outlen; - abort(); -#endif -} - -int -ssse3_base64_stream_decode - ( struct ssse3_base64_state *state - , const char *src - , size_t srclen - , char *out - , size_t *outlen - ) -{ -#ifdef __SSSE3__ - #include "dec_head.c" - #include "dec_ssse3.c" - #include "dec_tail.c" -#else - (void)state; - (void)src; - (void)srclen; - (void)out; - (void)outlen; - abort(); -#endif -} +#include <stdint.h> +#include <stddef.h> +#include <stdlib.h> + +#include "libbase64.h" +#include "codecs.h" + +#ifdef __SSSE3__ +#include <tmmintrin.h> + +#define CMPGT(s,n) _mm_cmpgt_epi8((s), _mm_set1_epi8(n)) +#define CMPEQ(s,n) _mm_cmpeq_epi8((s), _mm_set1_epi8(n)) +#define REPLACE(s,n) _mm_and_si128((s), _mm_set1_epi8(n)) +#define RANGE(s,a,b) _mm_andnot_si128(CMPGT((s), (b)), CMPGT((s), (a) - 1)) + +static inline __m128i +_mm_bswap_epi32 (const __m128i in) +{ + return _mm_shuffle_epi8(in, _mm_setr_epi8( + 3, 2, 1, 0, + 7, 6, 5, 4, + 11, 10, 9, 8, + 15, 14, 13, 12)); +} + +static inline __m128i +enc_reshuffle (__m128i in) +{ + // Slice into 32-bit chunks and operate on all chunks in parallel. + // All processing is done within the 32-bit chunk. First, shuffle: + // before: [eeeeeeff|ccdddddd|bbbbcccc|aaaaaabb] + // after: [00000000|aaaaaabb|bbbbcccc|ccdddddd] + in = _mm_shuffle_epi8(in, _mm_set_epi8( + -1, 9, 10, 11, + -1, 6, 7, 8, + -1, 3, 4, 5, + -1, 0, 1, 2)); + + // cd = [00000000|00000000|0000cccc|ccdddddd] + const __m128i cd = _mm_and_si128(in, _mm_set1_epi32(0x00000FFF)); + + // ab = [0000aaaa|aabbbbbb|00000000|00000000] + const __m128i ab = _mm_and_si128(_mm_slli_epi32(in, 4), _mm_set1_epi32(0x0FFF0000)); + + // merged = [0000aaaa|aabbbbbb|0000cccc|ccdddddd] + const __m128i merged = _mm_or_si128(ab, cd); + + // bd = [00000000|00bbbbbb|00000000|00dddddd] + const __m128i bd = _mm_and_si128(merged, _mm_set1_epi32(0x003F003F)); + + // ac = [00aaaaaa|00000000|00cccccc|00000000] + const __m128i ac = _mm_and_si128(_mm_slli_epi32(merged, 2), _mm_set1_epi32(0x3F003F00)); + + // indices = [00aaaaaa|00bbbbbb|00cccccc|00dddddd] + const __m128i indices = _mm_or_si128(ac, bd); + + // return = [00dddddd|00cccccc|00bbbbbb|00aaaaaa] + return _mm_bswap_epi32(indices); +} + +static inline __m128i +enc_translate (const __m128i in) +{ + // Translate values 0..63 to the Base64 alphabet. There are five sets: + // # From To Abs Delta Characters + // 0 [0..25] [65..90] +65 +65 ABCDEFGHIJKLMNOPQRSTUVWXYZ + // 1 [26..51] [97..122] +71 +6 abcdefghijklmnopqrstuvwxyz + // 2 [52..61] [48..57] -4 -75 0123456789 + // 3 [62] [43] -19 -15 + + // 4 [63] [47] -16 +3 / + + // Create cumulative masks for characters in sets [1,2,3,4], [2,3,4], + // [3,4], and [4]: + const __m128i mask1 = CMPGT(in, 25); + const __m128i mask2 = CMPGT(in, 51); + const __m128i mask3 = CMPGT(in, 61); + const __m128i mask4 = CMPEQ(in, 63); + + // All characters are at least in cumulative set 0, so add 'A': + __m128i out = _mm_add_epi8(in, _mm_set1_epi8(65)); + + // For inputs which are also in any of the other cumulative sets, + // add delta values against the previous set(s) to correct the shift: + out = _mm_add_epi8(out, REPLACE(mask1, 6)); + out = _mm_sub_epi8(out, REPLACE(mask2, 75)); + out = _mm_sub_epi8(out, REPLACE(mask3, 15)); + out = _mm_add_epi8(out, REPLACE(mask4, 3)); + + return out; +} + +static inline __m128i +dec_reshuffle (__m128i in) +{ + // Shuffle bytes to 32-bit bigendian: + in = _mm_bswap_epi32(in); + + // Mask in a single byte per shift: + __m128i mask = _mm_set1_epi32(0x3F000000); + + // Pack bytes together: + __m128i out = _mm_slli_epi32(_mm_and_si128(in, mask), 2); + mask = _mm_srli_epi32(mask, 8); + + out = _mm_or_si128(out, _mm_slli_epi32(_mm_and_si128(in, mask), 4)); + mask = _mm_srli_epi32(mask, 8); + + out = _mm_or_si128(out, _mm_slli_epi32(_mm_and_si128(in, mask), 6)); + mask = _mm_srli_epi32(mask, 8); + + out = _mm_or_si128(out, _mm_slli_epi32(_mm_and_si128(in, mask), 8)); + + // Reshuffle and repack into 12-byte output format: + return _mm_shuffle_epi8(out, _mm_setr_epi8( + 3, 2, 1, + 7, 6, 5, + 11, 10, 9, + 15, 14, 13, + -1, -1, -1, -1)); +} + +#endif // __SSSE3__ + +void +ssse3_base64_stream_encode + ( struct ssse3_base64_state *state + , const char *src + , size_t srclen + , char *out + , size_t *outlen + ) +{ +#ifdef __SSSE3__ + #include "enc_head.c" + #include "enc_ssse3.c" + #include "enc_tail.c" +#else + (void)state; + (void)src; + (void)srclen; + (void)out; + (void)outlen; + abort(); +#endif +} + +int +ssse3_base64_stream_decode + ( struct ssse3_base64_state *state + , const char *src + , size_t srclen + , char *out + , size_t *outlen + ) +{ +#ifdef __SSSE3__ + #include "dec_head.c" + #include "dec_ssse3.c" + #include "dec_tail.c" +#else + (void)state; + (void)src; + (void)srclen; + (void)out; + (void)outlen; + abort(); +#endif +} |