diff options
author | Devtools Arcadia <arcadia-devtools@yandex-team.ru> | 2022-02-07 18:08:42 +0300 |
---|---|---|
committer | Devtools Arcadia <arcadia-devtools@mous.vla.yp-c.yandex.net> | 2022-02-07 18:08:42 +0300 |
commit | 1110808a9d39d4b808aef724c861a2e1a38d2a69 (patch) | |
tree | e26c9fed0de5d9873cce7e00bc214573dc2195b7 /contrib/libs/base64/avx2 | |
download | ydb-1110808a9d39d4b808aef724c861a2e1a38d2a69.tar.gz |
intermediate changes
ref:cde9a383711a11544ce7e107a78147fb96cc4029
Diffstat (limited to 'contrib/libs/base64/avx2')
-rw-r--r-- | contrib/libs/base64/avx2/.yandex_meta/licenses.list.txt | 62 | ||||
-rw-r--r-- | contrib/libs/base64/avx2/codec_avx2.c | 191 | ||||
-rw-r--r-- | contrib/libs/base64/avx2/codecs.h | 35 | ||||
-rw-r--r-- | contrib/libs/base64/avx2/dec_avx2.c | 65 | ||||
-rw-r--r-- | contrib/libs/base64/avx2/dec_head.c | 29 | ||||
-rw-r--r-- | contrib/libs/base64/avx2/dec_tail.c | 65 | ||||
-rw-r--r-- | contrib/libs/base64/avx2/enc_avx2.c | 22 | ||||
-rw-r--r-- | contrib/libs/base64/avx2/enc_head.c | 23 | ||||
-rw-r--r-- | contrib/libs/base64/avx2/enc_tail.c | 28 | ||||
-rw-r--r-- | contrib/libs/base64/avx2/lib.c | 121 | ||||
-rw-r--r-- | contrib/libs/base64/avx2/libbase64.h | 89 | ||||
-rw-r--r-- | contrib/libs/base64/avx2/ya.make | 36 |
12 files changed, 766 insertions, 0 deletions
diff --git a/contrib/libs/base64/avx2/.yandex_meta/licenses.list.txt b/contrib/libs/base64/avx2/.yandex_meta/licenses.list.txt new file mode 100644 index 0000000000..6308eed322 --- /dev/null +++ b/contrib/libs/base64/avx2/.yandex_meta/licenses.list.txt @@ -0,0 +1,62 @@ +====================BSD-2-Clause==================== +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + +- Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + +- Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED +TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +====================BSD-2-Clause AND MIT==================== +## License + +This repository is licensed under the +[BSD 2-clause License](http://opensource.org/licenses/BSD-2-Clause). See the +LICENSE file. + +====================COPYRIGHT==================== +Copyright (c) 2013-2015, Alfred Klomp +All rights reserved. + + +====================File: LICENSE==================== +Copyright (c) 2013-2015, Alfred Klomp +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + +- Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + +- Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED +TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/contrib/libs/base64/avx2/codec_avx2.c b/contrib/libs/base64/avx2/codec_avx2.c new file mode 100644 index 0000000000..46c351c539 --- /dev/null +++ b/contrib/libs/base64/avx2/codec_avx2.c @@ -0,0 +1,191 @@ +#include <stdint.h> +#include <stddef.h> +#include <stdlib.h> + +#include "libbase64.h" +#include "codecs.h" + +#ifdef __AVX2__ +#include <immintrin.h> + +#define CMPGT(s,n) _mm256_cmpgt_epi8((s), _mm256_set1_epi8(n)) +#define CMPEQ(s,n) _mm256_cmpeq_epi8((s), _mm256_set1_epi8(n)) +#define REPLACE(s,n) _mm256_and_si256((s), _mm256_set1_epi8(n)) +#define RANGE(s,a,b) _mm256_andnot_si256(CMPGT((s), (b)), CMPGT((s), (a) - 1)) + +static inline __m256i +_mm256_bswap_epi32 (const __m256i in) +{ + // _mm256_shuffle_epi8() works on two 128-bit lanes separately: + return _mm256_shuffle_epi8(in, _mm256_setr_epi8( + 3, 2, 1, 0, + 7, 6, 5, 4, + 11, 10, 9, 8, + 15, 14, 13, 12, + 3, 2, 1, 0, + 7, 6, 5, 4, + 11, 10, 9, 8, + 15, 14, 13, 12)); +} + +static inline __m256i +enc_reshuffle (__m256i in) +{ + // Spread out 32-bit words over both halves of the input register: + in = _mm256_permutevar8x32_epi32(in, _mm256_setr_epi32( + 0, 1, 2, -1, + 3, 4, 5, -1)); + + // Slice into 32-bit chunks and operate on all chunks in parallel. + // All processing is done within the 32-bit chunk. First, shuffle: + // before: [eeeeeeff|ccdddddd|bbbbcccc|aaaaaabb] + // after: [00000000|aaaaaabb|bbbbcccc|ccdddddd] + in = _mm256_shuffle_epi8(in, _mm256_set_epi8( + -1, 9, 10, 11, + -1, 6, 7, 8, + -1, 3, 4, 5, + -1, 0, 1, 2, + -1, 9, 10, 11, + -1, 6, 7, 8, + -1, 3, 4, 5, + -1, 0, 1, 2)); + + // cd = [00000000|00000000|0000cccc|ccdddddd] + const __m256i cd = _mm256_and_si256(in, _mm256_set1_epi32(0x00000FFF)); + + // ab = [0000aaaa|aabbbbbb|00000000|00000000] + const __m256i ab = _mm256_and_si256(_mm256_slli_epi32(in, 4), _mm256_set1_epi32(0x0FFF0000)); + + // merged = [0000aaaa|aabbbbbb|0000cccc|ccdddddd] + const __m256i merged = _mm256_or_si256(ab, cd); + + // bd = [00000000|00bbbbbb|00000000|00dddddd] + const __m256i bd = _mm256_and_si256(merged, _mm256_set1_epi32(0x003F003F)); + + // ac = [00aaaaaa|00000000|00cccccc|00000000] + const __m256i ac = _mm256_and_si256(_mm256_slli_epi32(merged, 2), _mm256_set1_epi32(0x3F003F00)); + + // indices = [00aaaaaa|00bbbbbb|00cccccc|00dddddd] + const __m256i indices = _mm256_or_si256(ac, bd); + + // return = [00dddddd|00cccccc|00bbbbbb|00aaaaaa] + return _mm256_bswap_epi32(indices); +} + +static inline __m256i +enc_translate (const __m256i in) +{ + // Translate values 0..63 to the Base64 alphabet. There are five sets: + // # From To Abs Delta Characters + // 0 [0..25] [65..90] +65 +65 ABCDEFGHIJKLMNOPQRSTUVWXYZ + // 1 [26..51] [97..122] +71 +6 abcdefghijklmnopqrstuvwxyz + // 2 [52..61] [48..57] -4 -75 0123456789 + // 3 [62] [43] -19 -15 + + // 4 [63] [47] -16 +3 / + + // Create cumulative masks for characters in sets [1,2,3,4], [2,3,4], + // [3,4], and [4]: + const __m256i mask1 = CMPGT(in, 25); + const __m256i mask2 = CMPGT(in, 51); + const __m256i mask3 = CMPGT(in, 61); + const __m256i mask4 = CMPEQ(in, 63); + + // All characters are at least in cumulative set 0, so add 'A': + __m256i out = _mm256_add_epi8(in, _mm256_set1_epi8(65)); + + // For inputs which are also in any of the other cumulative sets, + // add delta values against the previous set(s) to correct the shift: + out = _mm256_add_epi8(out, REPLACE(mask1, 6)); + out = _mm256_sub_epi8(out, REPLACE(mask2, 75)); + out = _mm256_sub_epi8(out, REPLACE(mask3, 15)); + out = _mm256_add_epi8(out, REPLACE(mask4, 3)); + + return out; +} + +static inline __m256i +dec_reshuffle (__m256i in) +{ + // Shuffle bytes to 32-bit bigendian: + in = _mm256_bswap_epi32(in); + + // Mask in a single byte per shift: + __m256i mask = _mm256_set1_epi32(0x3F000000); + + // Pack bytes together: + __m256i out = _mm256_slli_epi32(_mm256_and_si256(in, mask), 2); + mask = _mm256_srli_epi32(mask, 8); + + out = _mm256_or_si256(out, _mm256_slli_epi32(_mm256_and_si256(in, mask), 4)); + mask = _mm256_srli_epi32(mask, 8); + + out = _mm256_or_si256(out, _mm256_slli_epi32(_mm256_and_si256(in, mask), 6)); + mask = _mm256_srli_epi32(mask, 8); + + out = _mm256_or_si256(out, _mm256_slli_epi32(_mm256_and_si256(in, mask), 8)); + + // Pack bytes together within 32-bit words, discarding words 3 and 7: + out = _mm256_shuffle_epi8(out, _mm256_setr_epi8( + 3, 2, 1, + 7, 6, 5, + 11, 10, 9, + 15, 14, 13, + -1, -1, -1, -1, + 3, 2, 1, + 7, 6, 5, + 11, 10, 9, + 15, 14, 13, + -1, -1, -1, -1)); + + // Pack 32-bit words together, squashing empty words 3 and 7: + return _mm256_permutevar8x32_epi32(out, _mm256_setr_epi32( + 0, 1, 2, 4, 5, 6, -1, -1)); +} + +#endif // __AVX2__ + +void +avx2_base64_stream_encode + ( struct avx2_base64_state *state + , const char *src + , size_t srclen + , char *out + , size_t *outlen + ) +{ +#if defined(__AVX2__) + #include "enc_head.c" + #include "enc_avx2.c" + #include "enc_tail.c" +#else + (void)state; + (void)src; + (void)srclen; + (void)out; + (void)outlen; + abort(); +#endif +} + +int +avx2_base64_stream_decode + ( struct avx2_base64_state *state + , const char *src + , size_t srclen + , char *out + , size_t *outlen + ) +{ +#if defined(__AVX2__) + #include "dec_head.c" + #include "dec_avx2.c" + #include "dec_tail.c" +#else + (void)state; + (void)src; + (void)srclen; + (void)out; + (void)outlen; + abort(); +#endif +} diff --git a/contrib/libs/base64/avx2/codecs.h b/contrib/libs/base64/avx2/codecs.h new file mode 100644 index 0000000000..16a285ac0b --- /dev/null +++ b/contrib/libs/base64/avx2/codecs.h @@ -0,0 +1,35 @@ +#pragma once + +// Define machine endianness. This is for GCC: +#if (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) + #define BASE64_AVX2_LITTLE_ENDIAN 1 +#else + #define BASE64_AVX2_LITTLE_ENDIAN 0 +#endif + +// This is for Clang: +#ifdef __LITTLE_ENDIAN__ + #define BASE64_AVX2_LITTLE_ENDIAN 1 +#endif + +#ifdef __BIG_ENDIAN__ + #define BASE64_AVX2_LITTLE_ENDIAN 0 +#endif + +// Endian conversion functions +#if BASE64_AVX2_LITTLE_ENDIAN + #define cpu_to_be32(x) __builtin_bswap32(x) + #define cpu_to_be64(x) __builtin_bswap64(x) + #define be32_to_cpu(x) __builtin_bswap32(x) + #define be64_to_cpu(x) __builtin_bswap64(x) +#else + #define cpu_to_be32(x) (x) + #define cpu_to_be64(x) (x) + #define be32_to_cpu(x) (x) + #define be64_to_cpu(x) (x) +#endif + +// These tables are used by all codecs +// for fallback plain encoding/decoding: +extern const uint8_t avx2_base64_table_enc[]; +extern const uint8_t avx2_base64_table_dec[]; diff --git a/contrib/libs/base64/avx2/dec_avx2.c b/contrib/libs/base64/avx2/dec_avx2.c new file mode 100644 index 0000000000..cd3cafd30a --- /dev/null +++ b/contrib/libs/base64/avx2/dec_avx2.c @@ -0,0 +1,65 @@ +// If we have AVX2 support, pick off 32 bytes at a time for as long as we can, +// but make sure that we quit before seeing any == markers at the end of the +// string. Also, because we write 8 zeroes at the end of the output, ensure +// that there are at least 11 valid bytes of input data remaining to close the +// gap. 32 + 2 + 11 = 45 bytes: +while (srclen >= 45) +{ + // Load string: + __m256i str = _mm256_loadu_si256((__m256i *)c); + + // The input consists of six character sets in the Base64 alphabet, + // which we need to map back to the 6-bit values they represent. + // There are three ranges, two singles, and then there's the rest. + // + // # From To Add Characters + // 1 [43] [62] +19 + + // 2 [47] [63] +16 / + // 3 [48..57] [52..61] +4 0..9 + // 4 [65..90] [0..25] -65 A..Z + // 5 [97..122] [26..51] -71 a..z + // (6) Everything else => invalid input + + const __m256i set1 = CMPEQ(str, '+'); + const __m256i set2 = CMPEQ(str, '/'); + const __m256i set3 = RANGE(str, '0', '9'); + const __m256i set4 = RANGE(str, 'A', 'Z'); + const __m256i set5 = RANGE(str, 'a', 'z'); + const __m256i set6 = CMPEQ(str, '-'); + const __m256i set7 = CMPEQ(str, '_'); + + __m256i delta = REPLACE(set1, 19); + delta = _mm256_or_si256(delta, REPLACE(set2, 16)); + delta = _mm256_or_si256(delta, REPLACE(set3, 4)); + delta = _mm256_or_si256(delta, REPLACE(set4, -65)); + delta = _mm256_or_si256(delta, REPLACE(set5, -71)); + delta = _mm256_or_si256(delta, REPLACE(set6, 17)); + delta = _mm256_or_si256(delta, REPLACE(set7, -32)); + + // Check for invalid input: if any of the delta values are zero, + // fall back on bytewise code to do error checking and reporting: +#ifdef _MSC_VER + // Hack for MSVC miscompilation - it inserts vzeroupper for the break + // (we need to clear YMM registers before exiting the function) + // while delta and str are still in the registers. + // Save delta/str in memory manually. + _mm256_zeroupper(); +#endif + if (_mm256_movemask_epi8(CMPEQ(delta, 0))) { + break; + } + + // Now simply add the delta values to the input: + str = _mm256_add_epi8(str, delta); + + // Reshuffle the input to packed 12-byte output format: + str = dec_reshuffle(str); + + // Store back: + _mm256_storeu_si256((__m256i *)o, str); + + c += 32; + o += 24; + outl += 24; + srclen -= 32; +} diff --git a/contrib/libs/base64/avx2/dec_head.c b/contrib/libs/base64/avx2/dec_head.c new file mode 100644 index 0000000000..8bbd46ebc4 --- /dev/null +++ b/contrib/libs/base64/avx2/dec_head.c @@ -0,0 +1,29 @@ +int ret = 0; +const uint8_t *c = (const uint8_t *)src; +uint8_t *o = (uint8_t *)out; +uint8_t q; + +// Use local temporaries to avoid cache thrashing: +size_t outl = 0; +struct avx2_base64_state st; +st.eof = state->eof; +st.bytes = state->bytes; +st.carry = state->carry; + +// If we previously saw an EOF or an invalid character, bail out: +if (st.eof) { + *outlen = 0; + return 0; +} + +// Turn four 6-bit numbers into three bytes: +// out[0] = 11111122 +// out[1] = 22223333 +// out[2] = 33444444 + +// Duff's device again: +switch (st.bytes) +{ + for (;;) + { + case 0: diff --git a/contrib/libs/base64/avx2/dec_tail.c b/contrib/libs/base64/avx2/dec_tail.c new file mode 100644 index 0000000000..b472f91b33 --- /dev/null +++ b/contrib/libs/base64/avx2/dec_tail.c @@ -0,0 +1,65 @@ + if (srclen-- == 0) { + ret = 1; + break; + } + if ((q = avx2_base64_table_dec[*c++]) >= 254) { + st.eof = 1; + // Treat character '=' as invalid for byte 0: + break; + } + st.carry = q << 2; + st.bytes++; + + case 1: if (srclen-- == 0) { + ret = 1; + break; + } + if ((q = avx2_base64_table_dec[*c++]) >= 254) { + st.eof = 1; + // Treat character '=' as invalid for byte 1: + break; + } + *o++ = st.carry | (q >> 4); + st.carry = q << 4; + st.bytes++; + outl++; + + case 2: if (srclen-- == 0) { + ret = 1; + break; + } + if ((q = avx2_base64_table_dec[*c++]) >= 254) { + st.eof = 1; + // When q == 254, the input char is '='. Return 1 and EOF. + // Technically, should check if next byte is also '=', but never mind. + // When q == 255, the input char is invalid. Return 0 and EOF. + ret = (q == 254) ? 1 : 0; + break; + } + *o++ = st.carry | (q >> 2); + st.carry = q << 6; + st.bytes++; + outl++; + + case 3: if (srclen-- == 0) { + ret = 1; + break; + } + if ((q = avx2_base64_table_dec[*c++]) >= 254) { + st.eof = 1; + // When q == 254, the input char is '='. Return 1 and EOF. + // When q == 255, the input char is invalid. Return 0 and EOF. + ret = (q == 254) ? 1 : 0; + break; + } + *o++ = st.carry | q; + st.carry = 0; + st.bytes = 0; + outl++; + } +} +state->eof = st.eof; +state->bytes = st.bytes; +state->carry = st.carry; +*outlen = outl; +return ret; diff --git a/contrib/libs/base64/avx2/enc_avx2.c b/contrib/libs/base64/avx2/enc_avx2.c new file mode 100644 index 0000000000..25e36f40b4 --- /dev/null +++ b/contrib/libs/base64/avx2/enc_avx2.c @@ -0,0 +1,22 @@ +// If we have AVX2 support, pick off 24 bytes at a time for as long as we can. +// But because we read 32 bytes at a time, ensure we have enough room to do a +// full 32-byte read without segfaulting: +while (srclen >= 32) +{ + // Load string: + __m256i str = _mm256_loadu_si256((__m256i *)c); + + // Reshuffle: + str = enc_reshuffle(str); + + // Translate reshuffled bytes to the Base64 alphabet: + str = enc_translate(str); + + // Store: + _mm256_storeu_si256((__m256i *)o, str); + + c += 24; // 6 * 4 bytes of input + o += 32; // 8 * 4 bytes of output + outl += 32; + srclen -= 24; +} diff --git a/contrib/libs/base64/avx2/enc_head.c b/contrib/libs/base64/avx2/enc_head.c new file mode 100644 index 0000000000..3d05b0bd04 --- /dev/null +++ b/contrib/libs/base64/avx2/enc_head.c @@ -0,0 +1,23 @@ +// Assume that *out is large enough to contain the output. +// Theoretically it should be 4/3 the length of src. +const uint8_t *c = (const uint8_t *)src; +uint8_t *o = (uint8_t *)out; + +// Use local temporaries to avoid cache thrashing: +size_t outl = 0; +struct avx2_base64_state st; +st.bytes = state->bytes; +st.carry = state->carry; + +// Turn three bytes into four 6-bit numbers: +// in[0] = 00111111 +// in[1] = 00112222 +// in[2] = 00222233 +// in[3] = 00333333 + +// Duff's device, a for() loop inside a switch() statement. Legal! +switch (st.bytes) +{ + for (;;) + { + case 0: diff --git a/contrib/libs/base64/avx2/enc_tail.c b/contrib/libs/base64/avx2/enc_tail.c new file mode 100644 index 0000000000..e4362db594 --- /dev/null +++ b/contrib/libs/base64/avx2/enc_tail.c @@ -0,0 +1,28 @@ + if (srclen-- == 0) { + break; + } + *o++ = avx2_base64_table_enc[*c >> 2]; + st.carry = (*c++ << 4) & 0x30; + st.bytes++; + outl += 1; + + case 1: if (srclen-- == 0) { + break; + } + *o++ = avx2_base64_table_enc[st.carry | (*c >> 4)]; + st.carry = (*c++ << 2) & 0x3C; + st.bytes++; + outl += 1; + + case 2: if (srclen-- == 0) { + break; + } + *o++ = avx2_base64_table_enc[st.carry | (*c >> 6)]; + *o++ = avx2_base64_table_enc[*c++ & 0x3F]; + st.bytes = 0; + outl += 2; + } +} +state->bytes = st.bytes; +state->carry = st.carry; +*outlen = outl; diff --git a/contrib/libs/base64/avx2/lib.c b/contrib/libs/base64/avx2/lib.c new file mode 100644 index 0000000000..b0671c8cae --- /dev/null +++ b/contrib/libs/base64/avx2/lib.c @@ -0,0 +1,121 @@ +#include <stdint.h> +#include <stddef.h> + +#include "libbase64.h" +#include "codecs.h" + +const uint8_t +avx2_base64_table_enc[] = + "ABCDEFGHIJKLMNOPQRSTUVWXYZ" + "abcdefghijklmnopqrstuvwxyz" + "0123456789+/"; + +// In the lookup table below, note that the value for '=' (character 61) is +// 254, not 255. This character is used for in-band signaling of the end of +// the datastream, and we will use that later. The characters A-Z, a-z, 0-9 +// and + / are mapped to their "decoded" values. The other bytes all map to +// the value 255, which flags them as "invalid input". + +const uint8_t +avx2_base64_table_dec[] = +{ + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, // 0..15 + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, // 16..31 + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 62, 254, 62, 255, 63, // 32..47 + 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 255, 255, 255, 254, 255, 255, // 48..63 + 255, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, // 64..79 + 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 255, 255, 255, 255, 63, // 80..95 + 255, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, // 96..111 + 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 255, 255, 255, 255, 255, // 112..127 + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, // 128..143 + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, +}; + +void +avx2_base64_stream_encode_init (struct avx2_base64_state *state) +{ + state->eof = 0; + state->bytes = 0; + state->carry = 0; +} + +void +avx2_base64_stream_encode_final + ( struct avx2_base64_state *state + , char *out + , size_t *outlen + ) +{ + uint8_t *o = (uint8_t *)out; + + if (state->bytes == 1) { + *o++ = avx2_base64_table_enc[state->carry]; + *o++ = '='; + *o++ = '='; + *outlen = 3; + return; + } + if (state->bytes == 2) { + *o++ = avx2_base64_table_enc[state->carry]; + *o++ = '='; + *outlen = 2; + return; + } + *outlen = 0; +} + +void +avx2_base64_stream_decode_init (struct avx2_base64_state *state) +{ + state->eof = 0; + state->bytes = 0; + state->carry = 0; +} + +void +avx2_base64_encode + ( const char *src + , size_t srclen + , char *out + , size_t *outlen + ) +{ + size_t s; + size_t t; + struct avx2_base64_state state; + + // Init the stream reader: + avx2_base64_stream_encode_init(&state); + + // Feed the whole string to the stream reader: + avx2_base64_stream_encode(&state, src, srclen, out, &s); + + // Finalize the stream by writing trailer if any: + avx2_base64_stream_encode_final(&state, out + s, &t); + + // Final output length is stream length plus tail: + *outlen = s + t; +} + +int +avx2_base64_decode + ( const char *src + , size_t srclen + , char *out + , size_t *outlen + ) +{ + struct avx2_base64_state state; + + // Init the stream reader: + avx2_base64_stream_decode_init(&state); + + // Feed the whole string to the stream reader: + return avx2_base64_stream_decode(&state, src, srclen, out, outlen); +} diff --git a/contrib/libs/base64/avx2/libbase64.h b/contrib/libs/base64/avx2/libbase64.h new file mode 100644 index 0000000000..91a8ab1ed2 --- /dev/null +++ b/contrib/libs/base64/avx2/libbase64.h @@ -0,0 +1,89 @@ +#pragma once + +#ifdef __cplusplus +extern "C" { +#endif + +struct avx2_base64_state { + int eof; + int bytes; + unsigned char carry; +}; + +/* Wrapper function to encode a plain string of given length. Output is written + * to *out without trailing zero. Output length in bytes is written to *outlen. + * The buffer in `out` has been allocated by the caller and is at least 4/3 the + * size of the input. See above for `flags`; set to 0 for default operation: */ +void avx2_base64_encode + ( const char *src + , size_t srclen + , char *out + , size_t *outlen + ) ; + +/* Call this before calling base64_stream_encode() to init the state. See above + * for `flags`; set to 0 for default operation: */ +void avx2_base64_stream_encode_init + ( struct avx2_base64_state *state + ) ; + +/* Encodes the block of data of given length at `src`, into the buffer at + * `out`. Caller is responsible for allocating a large enough out-buffer; it + * must be at least 4/3 the size of the in-buffer, but take some margin. Places + * the number of new bytes written into `outlen` (which is set to zero when the + * function starts). Does not zero-terminate or finalize the output. */ +void avx2_base64_stream_encode + ( struct avx2_base64_state *state + , const char *src + , size_t srclen + , char *out + , size_t *outlen + ) ; + +/* Finalizes the output begun by previous calls to `base64_stream_encode()`. + * Adds the required end-of-stream markers if appropriate. `outlen` is modified + * and will contain the number of new bytes written at `out` (which will quite + * often be zero). */ +void avx2_base64_stream_encode_final + ( struct avx2_base64_state *state + , char *out + , size_t *outlen + ) ; + +/* Wrapper function to decode a plain string of given length. Output is written + * to *out without trailing zero. Output length in bytes is written to *outlen. + * The buffer in `out` has been allocated by the caller and is at least 3/4 the + * size of the input. See above for `flags`, set to 0 for default operation: */ +int avx2_base64_decode + ( const char *src + , size_t srclen + , char *out + , size_t *outlen + ) ; + +/* Call this before calling base64_stream_decode() to init the state. See above + * for `flags`; set to 0 for default operation: */ +void avx2_base64_stream_decode_init + ( struct avx2_base64_state *state + ) ; + +/* Decodes the block of data of given length at `src`, into the buffer at + * `out`. Caller is responsible for allocating a large enough out-buffer; it + * must be at least 3/4 the size of the in-buffer, but take some margin. Places + * the number of new bytes written into `outlen` (which is set to zero when the + * function starts). Does not zero-terminate the output. Returns 1 if all is + * well, and 0 if a decoding error was found, such as an invalid character. + * Returns -1 if the chosen codec is not included in the current build. Used by + * the test harness to check whether a codec is available for testing. */ +int avx2_base64_stream_decode + ( struct avx2_base64_state *state + , const char *src + , size_t srclen + , char *out + , size_t *outlen + ) ; + +#ifdef __cplusplus +} +#endif + diff --git a/contrib/libs/base64/avx2/ya.make b/contrib/libs/base64/avx2/ya.make new file mode 100644 index 0000000000..b0dc5ce772 --- /dev/null +++ b/contrib/libs/base64/avx2/ya.make @@ -0,0 +1,36 @@ +OWNER( + yazevnul + g:contrib + g:cpp-contrib +) + +LIBRARY() + +LICENSE( + BSD-2-Clause AND + MIT +) + +LICENSE_TEXTS(.yandex_meta/licenses.list.txt) + +NO_UTIL() + +SRCS( + codec_avx2.c + lib.c +) + +IF (ARCH_X86_64 OR ARCH_I386) + IF (MSVC AND NOT CLANG_CL) + CONLYFLAGS(/D__AVX2__=1) + ELSEIF (CLANG_CL) + CONLYFLAGS(-mavx2) + ELSE() + CONLYFLAGS( + -mavx2 + -std=c11 + ) + ENDIF() +ENDIF() + +END() |