aboutsummaryrefslogtreecommitdiffstats
path: root/contrib/libs/base64/avx2
diff options
context:
space:
mode:
authorDevtools Arcadia <arcadia-devtools@yandex-team.ru>2022-02-07 18:08:42 +0300
committerDevtools Arcadia <arcadia-devtools@mous.vla.yp-c.yandex.net>2022-02-07 18:08:42 +0300
commit1110808a9d39d4b808aef724c861a2e1a38d2a69 (patch)
treee26c9fed0de5d9873cce7e00bc214573dc2195b7 /contrib/libs/base64/avx2
downloadydb-1110808a9d39d4b808aef724c861a2e1a38d2a69.tar.gz
intermediate changes
ref:cde9a383711a11544ce7e107a78147fb96cc4029
Diffstat (limited to 'contrib/libs/base64/avx2')
-rw-r--r--contrib/libs/base64/avx2/.yandex_meta/licenses.list.txt62
-rw-r--r--contrib/libs/base64/avx2/codec_avx2.c191
-rw-r--r--contrib/libs/base64/avx2/codecs.h35
-rw-r--r--contrib/libs/base64/avx2/dec_avx2.c65
-rw-r--r--contrib/libs/base64/avx2/dec_head.c29
-rw-r--r--contrib/libs/base64/avx2/dec_tail.c65
-rw-r--r--contrib/libs/base64/avx2/enc_avx2.c22
-rw-r--r--contrib/libs/base64/avx2/enc_head.c23
-rw-r--r--contrib/libs/base64/avx2/enc_tail.c28
-rw-r--r--contrib/libs/base64/avx2/lib.c121
-rw-r--r--contrib/libs/base64/avx2/libbase64.h89
-rw-r--r--contrib/libs/base64/avx2/ya.make36
12 files changed, 766 insertions, 0 deletions
diff --git a/contrib/libs/base64/avx2/.yandex_meta/licenses.list.txt b/contrib/libs/base64/avx2/.yandex_meta/licenses.list.txt
new file mode 100644
index 0000000000..6308eed322
--- /dev/null
+++ b/contrib/libs/base64/avx2/.yandex_meta/licenses.list.txt
@@ -0,0 +1,62 @@
+====================BSD-2-Clause====================
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+- Redistributions of source code must retain the above copyright notice,
+ this list of conditions and the following disclaimer.
+
+- Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+====================BSD-2-Clause AND MIT====================
+## License
+
+This repository is licensed under the
+[BSD 2-clause License](http://opensource.org/licenses/BSD-2-Clause). See the
+LICENSE file.
+
+====================COPYRIGHT====================
+Copyright (c) 2013-2015, Alfred Klomp
+All rights reserved.
+
+
+====================File: LICENSE====================
+Copyright (c) 2013-2015, Alfred Klomp
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+- Redistributions of source code must retain the above copyright notice,
+ this list of conditions and the following disclaimer.
+
+- Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/contrib/libs/base64/avx2/codec_avx2.c b/contrib/libs/base64/avx2/codec_avx2.c
new file mode 100644
index 0000000000..46c351c539
--- /dev/null
+++ b/contrib/libs/base64/avx2/codec_avx2.c
@@ -0,0 +1,191 @@
+#include <stdint.h>
+#include <stddef.h>
+#include <stdlib.h>
+
+#include "libbase64.h"
+#include "codecs.h"
+
+#ifdef __AVX2__
+#include <immintrin.h>
+
+#define CMPGT(s,n) _mm256_cmpgt_epi8((s), _mm256_set1_epi8(n))
+#define CMPEQ(s,n) _mm256_cmpeq_epi8((s), _mm256_set1_epi8(n))
+#define REPLACE(s,n) _mm256_and_si256((s), _mm256_set1_epi8(n))
+#define RANGE(s,a,b) _mm256_andnot_si256(CMPGT((s), (b)), CMPGT((s), (a) - 1))
+
+static inline __m256i
+_mm256_bswap_epi32 (const __m256i in)
+{
+ // _mm256_shuffle_epi8() works on two 128-bit lanes separately:
+ return _mm256_shuffle_epi8(in, _mm256_setr_epi8(
+ 3, 2, 1, 0,
+ 7, 6, 5, 4,
+ 11, 10, 9, 8,
+ 15, 14, 13, 12,
+ 3, 2, 1, 0,
+ 7, 6, 5, 4,
+ 11, 10, 9, 8,
+ 15, 14, 13, 12));
+}
+
+static inline __m256i
+enc_reshuffle (__m256i in)
+{
+ // Spread out 32-bit words over both halves of the input register:
+ in = _mm256_permutevar8x32_epi32(in, _mm256_setr_epi32(
+ 0, 1, 2, -1,
+ 3, 4, 5, -1));
+
+ // Slice into 32-bit chunks and operate on all chunks in parallel.
+ // All processing is done within the 32-bit chunk. First, shuffle:
+ // before: [eeeeeeff|ccdddddd|bbbbcccc|aaaaaabb]
+ // after: [00000000|aaaaaabb|bbbbcccc|ccdddddd]
+ in = _mm256_shuffle_epi8(in, _mm256_set_epi8(
+ -1, 9, 10, 11,
+ -1, 6, 7, 8,
+ -1, 3, 4, 5,
+ -1, 0, 1, 2,
+ -1, 9, 10, 11,
+ -1, 6, 7, 8,
+ -1, 3, 4, 5,
+ -1, 0, 1, 2));
+
+ // cd = [00000000|00000000|0000cccc|ccdddddd]
+ const __m256i cd = _mm256_and_si256(in, _mm256_set1_epi32(0x00000FFF));
+
+ // ab = [0000aaaa|aabbbbbb|00000000|00000000]
+ const __m256i ab = _mm256_and_si256(_mm256_slli_epi32(in, 4), _mm256_set1_epi32(0x0FFF0000));
+
+ // merged = [0000aaaa|aabbbbbb|0000cccc|ccdddddd]
+ const __m256i merged = _mm256_or_si256(ab, cd);
+
+ // bd = [00000000|00bbbbbb|00000000|00dddddd]
+ const __m256i bd = _mm256_and_si256(merged, _mm256_set1_epi32(0x003F003F));
+
+ // ac = [00aaaaaa|00000000|00cccccc|00000000]
+ const __m256i ac = _mm256_and_si256(_mm256_slli_epi32(merged, 2), _mm256_set1_epi32(0x3F003F00));
+
+ // indices = [00aaaaaa|00bbbbbb|00cccccc|00dddddd]
+ const __m256i indices = _mm256_or_si256(ac, bd);
+
+ // return = [00dddddd|00cccccc|00bbbbbb|00aaaaaa]
+ return _mm256_bswap_epi32(indices);
+}
+
+static inline __m256i
+enc_translate (const __m256i in)
+{
+ // Translate values 0..63 to the Base64 alphabet. There are five sets:
+ // # From To Abs Delta Characters
+ // 0 [0..25] [65..90] +65 +65 ABCDEFGHIJKLMNOPQRSTUVWXYZ
+ // 1 [26..51] [97..122] +71 +6 abcdefghijklmnopqrstuvwxyz
+ // 2 [52..61] [48..57] -4 -75 0123456789
+ // 3 [62] [43] -19 -15 +
+ // 4 [63] [47] -16 +3 /
+
+ // Create cumulative masks for characters in sets [1,2,3,4], [2,3,4],
+ // [3,4], and [4]:
+ const __m256i mask1 = CMPGT(in, 25);
+ const __m256i mask2 = CMPGT(in, 51);
+ const __m256i mask3 = CMPGT(in, 61);
+ const __m256i mask4 = CMPEQ(in, 63);
+
+ // All characters are at least in cumulative set 0, so add 'A':
+ __m256i out = _mm256_add_epi8(in, _mm256_set1_epi8(65));
+
+ // For inputs which are also in any of the other cumulative sets,
+ // add delta values against the previous set(s) to correct the shift:
+ out = _mm256_add_epi8(out, REPLACE(mask1, 6));
+ out = _mm256_sub_epi8(out, REPLACE(mask2, 75));
+ out = _mm256_sub_epi8(out, REPLACE(mask3, 15));
+ out = _mm256_add_epi8(out, REPLACE(mask4, 3));
+
+ return out;
+}
+
+static inline __m256i
+dec_reshuffle (__m256i in)
+{
+ // Shuffle bytes to 32-bit bigendian:
+ in = _mm256_bswap_epi32(in);
+
+ // Mask in a single byte per shift:
+ __m256i mask = _mm256_set1_epi32(0x3F000000);
+
+ // Pack bytes together:
+ __m256i out = _mm256_slli_epi32(_mm256_and_si256(in, mask), 2);
+ mask = _mm256_srli_epi32(mask, 8);
+
+ out = _mm256_or_si256(out, _mm256_slli_epi32(_mm256_and_si256(in, mask), 4));
+ mask = _mm256_srli_epi32(mask, 8);
+
+ out = _mm256_or_si256(out, _mm256_slli_epi32(_mm256_and_si256(in, mask), 6));
+ mask = _mm256_srli_epi32(mask, 8);
+
+ out = _mm256_or_si256(out, _mm256_slli_epi32(_mm256_and_si256(in, mask), 8));
+
+ // Pack bytes together within 32-bit words, discarding words 3 and 7:
+ out = _mm256_shuffle_epi8(out, _mm256_setr_epi8(
+ 3, 2, 1,
+ 7, 6, 5,
+ 11, 10, 9,
+ 15, 14, 13,
+ -1, -1, -1, -1,
+ 3, 2, 1,
+ 7, 6, 5,
+ 11, 10, 9,
+ 15, 14, 13,
+ -1, -1, -1, -1));
+
+ // Pack 32-bit words together, squashing empty words 3 and 7:
+ return _mm256_permutevar8x32_epi32(out, _mm256_setr_epi32(
+ 0, 1, 2, 4, 5, 6, -1, -1));
+}
+
+#endif // __AVX2__
+
+void
+avx2_base64_stream_encode
+ ( struct avx2_base64_state *state
+ , const char *src
+ , size_t srclen
+ , char *out
+ , size_t *outlen
+ )
+{
+#if defined(__AVX2__)
+ #include "enc_head.c"
+ #include "enc_avx2.c"
+ #include "enc_tail.c"
+#else
+ (void)state;
+ (void)src;
+ (void)srclen;
+ (void)out;
+ (void)outlen;
+ abort();
+#endif
+}
+
+int
+avx2_base64_stream_decode
+ ( struct avx2_base64_state *state
+ , const char *src
+ , size_t srclen
+ , char *out
+ , size_t *outlen
+ )
+{
+#if defined(__AVX2__)
+ #include "dec_head.c"
+ #include "dec_avx2.c"
+ #include "dec_tail.c"
+#else
+ (void)state;
+ (void)src;
+ (void)srclen;
+ (void)out;
+ (void)outlen;
+ abort();
+#endif
+}
diff --git a/contrib/libs/base64/avx2/codecs.h b/contrib/libs/base64/avx2/codecs.h
new file mode 100644
index 0000000000..16a285ac0b
--- /dev/null
+++ b/contrib/libs/base64/avx2/codecs.h
@@ -0,0 +1,35 @@
+#pragma once
+
+// Define machine endianness. This is for GCC:
+#if (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
+ #define BASE64_AVX2_LITTLE_ENDIAN 1
+#else
+ #define BASE64_AVX2_LITTLE_ENDIAN 0
+#endif
+
+// This is for Clang:
+#ifdef __LITTLE_ENDIAN__
+ #define BASE64_AVX2_LITTLE_ENDIAN 1
+#endif
+
+#ifdef __BIG_ENDIAN__
+ #define BASE64_AVX2_LITTLE_ENDIAN 0
+#endif
+
+// Endian conversion functions
+#if BASE64_AVX2_LITTLE_ENDIAN
+ #define cpu_to_be32(x) __builtin_bswap32(x)
+ #define cpu_to_be64(x) __builtin_bswap64(x)
+ #define be32_to_cpu(x) __builtin_bswap32(x)
+ #define be64_to_cpu(x) __builtin_bswap64(x)
+#else
+ #define cpu_to_be32(x) (x)
+ #define cpu_to_be64(x) (x)
+ #define be32_to_cpu(x) (x)
+ #define be64_to_cpu(x) (x)
+#endif
+
+// These tables are used by all codecs
+// for fallback plain encoding/decoding:
+extern const uint8_t avx2_base64_table_enc[];
+extern const uint8_t avx2_base64_table_dec[];
diff --git a/contrib/libs/base64/avx2/dec_avx2.c b/contrib/libs/base64/avx2/dec_avx2.c
new file mode 100644
index 0000000000..cd3cafd30a
--- /dev/null
+++ b/contrib/libs/base64/avx2/dec_avx2.c
@@ -0,0 +1,65 @@
+// If we have AVX2 support, pick off 32 bytes at a time for as long as we can,
+// but make sure that we quit before seeing any == markers at the end of the
+// string. Also, because we write 8 zeroes at the end of the output, ensure
+// that there are at least 11 valid bytes of input data remaining to close the
+// gap. 32 + 2 + 11 = 45 bytes:
+while (srclen >= 45)
+{
+ // Load string:
+ __m256i str = _mm256_loadu_si256((__m256i *)c);
+
+ // The input consists of six character sets in the Base64 alphabet,
+ // which we need to map back to the 6-bit values they represent.
+ // There are three ranges, two singles, and then there's the rest.
+ //
+ // # From To Add Characters
+ // 1 [43] [62] +19 +
+ // 2 [47] [63] +16 /
+ // 3 [48..57] [52..61] +4 0..9
+ // 4 [65..90] [0..25] -65 A..Z
+ // 5 [97..122] [26..51] -71 a..z
+ // (6) Everything else => invalid input
+
+ const __m256i set1 = CMPEQ(str, '+');
+ const __m256i set2 = CMPEQ(str, '/');
+ const __m256i set3 = RANGE(str, '0', '9');
+ const __m256i set4 = RANGE(str, 'A', 'Z');
+ const __m256i set5 = RANGE(str, 'a', 'z');
+ const __m256i set6 = CMPEQ(str, '-');
+ const __m256i set7 = CMPEQ(str, '_');
+
+ __m256i delta = REPLACE(set1, 19);
+ delta = _mm256_or_si256(delta, REPLACE(set2, 16));
+ delta = _mm256_or_si256(delta, REPLACE(set3, 4));
+ delta = _mm256_or_si256(delta, REPLACE(set4, -65));
+ delta = _mm256_or_si256(delta, REPLACE(set5, -71));
+ delta = _mm256_or_si256(delta, REPLACE(set6, 17));
+ delta = _mm256_or_si256(delta, REPLACE(set7, -32));
+
+ // Check for invalid input: if any of the delta values are zero,
+ // fall back on bytewise code to do error checking and reporting:
+#ifdef _MSC_VER
+ // Hack for MSVC miscompilation - it inserts vzeroupper for the break
+ // (we need to clear YMM registers before exiting the function)
+ // while delta and str are still in the registers.
+ // Save delta/str in memory manually.
+ _mm256_zeroupper();
+#endif
+ if (_mm256_movemask_epi8(CMPEQ(delta, 0))) {
+ break;
+ }
+
+ // Now simply add the delta values to the input:
+ str = _mm256_add_epi8(str, delta);
+
+ // Reshuffle the input to packed 12-byte output format:
+ str = dec_reshuffle(str);
+
+ // Store back:
+ _mm256_storeu_si256((__m256i *)o, str);
+
+ c += 32;
+ o += 24;
+ outl += 24;
+ srclen -= 32;
+}
diff --git a/contrib/libs/base64/avx2/dec_head.c b/contrib/libs/base64/avx2/dec_head.c
new file mode 100644
index 0000000000..8bbd46ebc4
--- /dev/null
+++ b/contrib/libs/base64/avx2/dec_head.c
@@ -0,0 +1,29 @@
+int ret = 0;
+const uint8_t *c = (const uint8_t *)src;
+uint8_t *o = (uint8_t *)out;
+uint8_t q;
+
+// Use local temporaries to avoid cache thrashing:
+size_t outl = 0;
+struct avx2_base64_state st;
+st.eof = state->eof;
+st.bytes = state->bytes;
+st.carry = state->carry;
+
+// If we previously saw an EOF or an invalid character, bail out:
+if (st.eof) {
+ *outlen = 0;
+ return 0;
+}
+
+// Turn four 6-bit numbers into three bytes:
+// out[0] = 11111122
+// out[1] = 22223333
+// out[2] = 33444444
+
+// Duff's device again:
+switch (st.bytes)
+{
+ for (;;)
+ {
+ case 0:
diff --git a/contrib/libs/base64/avx2/dec_tail.c b/contrib/libs/base64/avx2/dec_tail.c
new file mode 100644
index 0000000000..b472f91b33
--- /dev/null
+++ b/contrib/libs/base64/avx2/dec_tail.c
@@ -0,0 +1,65 @@
+ if (srclen-- == 0) {
+ ret = 1;
+ break;
+ }
+ if ((q = avx2_base64_table_dec[*c++]) >= 254) {
+ st.eof = 1;
+ // Treat character '=' as invalid for byte 0:
+ break;
+ }
+ st.carry = q << 2;
+ st.bytes++;
+
+ case 1: if (srclen-- == 0) {
+ ret = 1;
+ break;
+ }
+ if ((q = avx2_base64_table_dec[*c++]) >= 254) {
+ st.eof = 1;
+ // Treat character '=' as invalid for byte 1:
+ break;
+ }
+ *o++ = st.carry | (q >> 4);
+ st.carry = q << 4;
+ st.bytes++;
+ outl++;
+
+ case 2: if (srclen-- == 0) {
+ ret = 1;
+ break;
+ }
+ if ((q = avx2_base64_table_dec[*c++]) >= 254) {
+ st.eof = 1;
+ // When q == 254, the input char is '='. Return 1 and EOF.
+ // Technically, should check if next byte is also '=', but never mind.
+ // When q == 255, the input char is invalid. Return 0 and EOF.
+ ret = (q == 254) ? 1 : 0;
+ break;
+ }
+ *o++ = st.carry | (q >> 2);
+ st.carry = q << 6;
+ st.bytes++;
+ outl++;
+
+ case 3: if (srclen-- == 0) {
+ ret = 1;
+ break;
+ }
+ if ((q = avx2_base64_table_dec[*c++]) >= 254) {
+ st.eof = 1;
+ // When q == 254, the input char is '='. Return 1 and EOF.
+ // When q == 255, the input char is invalid. Return 0 and EOF.
+ ret = (q == 254) ? 1 : 0;
+ break;
+ }
+ *o++ = st.carry | q;
+ st.carry = 0;
+ st.bytes = 0;
+ outl++;
+ }
+}
+state->eof = st.eof;
+state->bytes = st.bytes;
+state->carry = st.carry;
+*outlen = outl;
+return ret;
diff --git a/contrib/libs/base64/avx2/enc_avx2.c b/contrib/libs/base64/avx2/enc_avx2.c
new file mode 100644
index 0000000000..25e36f40b4
--- /dev/null
+++ b/contrib/libs/base64/avx2/enc_avx2.c
@@ -0,0 +1,22 @@
+// If we have AVX2 support, pick off 24 bytes at a time for as long as we can.
+// But because we read 32 bytes at a time, ensure we have enough room to do a
+// full 32-byte read without segfaulting:
+while (srclen >= 32)
+{
+ // Load string:
+ __m256i str = _mm256_loadu_si256((__m256i *)c);
+
+ // Reshuffle:
+ str = enc_reshuffle(str);
+
+ // Translate reshuffled bytes to the Base64 alphabet:
+ str = enc_translate(str);
+
+ // Store:
+ _mm256_storeu_si256((__m256i *)o, str);
+
+ c += 24; // 6 * 4 bytes of input
+ o += 32; // 8 * 4 bytes of output
+ outl += 32;
+ srclen -= 24;
+}
diff --git a/contrib/libs/base64/avx2/enc_head.c b/contrib/libs/base64/avx2/enc_head.c
new file mode 100644
index 0000000000..3d05b0bd04
--- /dev/null
+++ b/contrib/libs/base64/avx2/enc_head.c
@@ -0,0 +1,23 @@
+// Assume that *out is large enough to contain the output.
+// Theoretically it should be 4/3 the length of src.
+const uint8_t *c = (const uint8_t *)src;
+uint8_t *o = (uint8_t *)out;
+
+// Use local temporaries to avoid cache thrashing:
+size_t outl = 0;
+struct avx2_base64_state st;
+st.bytes = state->bytes;
+st.carry = state->carry;
+
+// Turn three bytes into four 6-bit numbers:
+// in[0] = 00111111
+// in[1] = 00112222
+// in[2] = 00222233
+// in[3] = 00333333
+
+// Duff's device, a for() loop inside a switch() statement. Legal!
+switch (st.bytes)
+{
+ for (;;)
+ {
+ case 0:
diff --git a/contrib/libs/base64/avx2/enc_tail.c b/contrib/libs/base64/avx2/enc_tail.c
new file mode 100644
index 0000000000..e4362db594
--- /dev/null
+++ b/contrib/libs/base64/avx2/enc_tail.c
@@ -0,0 +1,28 @@
+ if (srclen-- == 0) {
+ break;
+ }
+ *o++ = avx2_base64_table_enc[*c >> 2];
+ st.carry = (*c++ << 4) & 0x30;
+ st.bytes++;
+ outl += 1;
+
+ case 1: if (srclen-- == 0) {
+ break;
+ }
+ *o++ = avx2_base64_table_enc[st.carry | (*c >> 4)];
+ st.carry = (*c++ << 2) & 0x3C;
+ st.bytes++;
+ outl += 1;
+
+ case 2: if (srclen-- == 0) {
+ break;
+ }
+ *o++ = avx2_base64_table_enc[st.carry | (*c >> 6)];
+ *o++ = avx2_base64_table_enc[*c++ & 0x3F];
+ st.bytes = 0;
+ outl += 2;
+ }
+}
+state->bytes = st.bytes;
+state->carry = st.carry;
+*outlen = outl;
diff --git a/contrib/libs/base64/avx2/lib.c b/contrib/libs/base64/avx2/lib.c
new file mode 100644
index 0000000000..b0671c8cae
--- /dev/null
+++ b/contrib/libs/base64/avx2/lib.c
@@ -0,0 +1,121 @@
+#include <stdint.h>
+#include <stddef.h>
+
+#include "libbase64.h"
+#include "codecs.h"
+
+const uint8_t
+avx2_base64_table_enc[] =
+ "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
+ "abcdefghijklmnopqrstuvwxyz"
+ "0123456789+/";
+
+// In the lookup table below, note that the value for '=' (character 61) is
+// 254, not 255. This character is used for in-band signaling of the end of
+// the datastream, and we will use that later. The characters A-Z, a-z, 0-9
+// and + / are mapped to their "decoded" values. The other bytes all map to
+// the value 255, which flags them as "invalid input".
+
+const uint8_t
+avx2_base64_table_dec[] =
+{
+ 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, // 0..15
+ 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, // 16..31
+ 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 62, 254, 62, 255, 63, // 32..47
+ 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 255, 255, 255, 254, 255, 255, // 48..63
+ 255, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, // 64..79
+ 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 255, 255, 255, 255, 63, // 80..95
+ 255, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, // 96..111
+ 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 255, 255, 255, 255, 255, // 112..127
+ 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, // 128..143
+ 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+ 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+ 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+ 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+ 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+ 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+ 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+};
+
+void
+avx2_base64_stream_encode_init (struct avx2_base64_state *state)
+{
+ state->eof = 0;
+ state->bytes = 0;
+ state->carry = 0;
+}
+
+void
+avx2_base64_stream_encode_final
+ ( struct avx2_base64_state *state
+ , char *out
+ , size_t *outlen
+ )
+{
+ uint8_t *o = (uint8_t *)out;
+
+ if (state->bytes == 1) {
+ *o++ = avx2_base64_table_enc[state->carry];
+ *o++ = '=';
+ *o++ = '=';
+ *outlen = 3;
+ return;
+ }
+ if (state->bytes == 2) {
+ *o++ = avx2_base64_table_enc[state->carry];
+ *o++ = '=';
+ *outlen = 2;
+ return;
+ }
+ *outlen = 0;
+}
+
+void
+avx2_base64_stream_decode_init (struct avx2_base64_state *state)
+{
+ state->eof = 0;
+ state->bytes = 0;
+ state->carry = 0;
+}
+
+void
+avx2_base64_encode
+ ( const char *src
+ , size_t srclen
+ , char *out
+ , size_t *outlen
+ )
+{
+ size_t s;
+ size_t t;
+ struct avx2_base64_state state;
+
+ // Init the stream reader:
+ avx2_base64_stream_encode_init(&state);
+
+ // Feed the whole string to the stream reader:
+ avx2_base64_stream_encode(&state, src, srclen, out, &s);
+
+ // Finalize the stream by writing trailer if any:
+ avx2_base64_stream_encode_final(&state, out + s, &t);
+
+ // Final output length is stream length plus tail:
+ *outlen = s + t;
+}
+
+int
+avx2_base64_decode
+ ( const char *src
+ , size_t srclen
+ , char *out
+ , size_t *outlen
+ )
+{
+ struct avx2_base64_state state;
+
+ // Init the stream reader:
+ avx2_base64_stream_decode_init(&state);
+
+ // Feed the whole string to the stream reader:
+ return avx2_base64_stream_decode(&state, src, srclen, out, outlen);
+}
diff --git a/contrib/libs/base64/avx2/libbase64.h b/contrib/libs/base64/avx2/libbase64.h
new file mode 100644
index 0000000000..91a8ab1ed2
--- /dev/null
+++ b/contrib/libs/base64/avx2/libbase64.h
@@ -0,0 +1,89 @@
+#pragma once
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct avx2_base64_state {
+ int eof;
+ int bytes;
+ unsigned char carry;
+};
+
+/* Wrapper function to encode a plain string of given length. Output is written
+ * to *out without trailing zero. Output length in bytes is written to *outlen.
+ * The buffer in `out` has been allocated by the caller and is at least 4/3 the
+ * size of the input. See above for `flags`; set to 0 for default operation: */
+void avx2_base64_encode
+ ( const char *src
+ , size_t srclen
+ , char *out
+ , size_t *outlen
+ ) ;
+
+/* Call this before calling base64_stream_encode() to init the state. See above
+ * for `flags`; set to 0 for default operation: */
+void avx2_base64_stream_encode_init
+ ( struct avx2_base64_state *state
+ ) ;
+
+/* Encodes the block of data of given length at `src`, into the buffer at
+ * `out`. Caller is responsible for allocating a large enough out-buffer; it
+ * must be at least 4/3 the size of the in-buffer, but take some margin. Places
+ * the number of new bytes written into `outlen` (which is set to zero when the
+ * function starts). Does not zero-terminate or finalize the output. */
+void avx2_base64_stream_encode
+ ( struct avx2_base64_state *state
+ , const char *src
+ , size_t srclen
+ , char *out
+ , size_t *outlen
+ ) ;
+
+/* Finalizes the output begun by previous calls to `base64_stream_encode()`.
+ * Adds the required end-of-stream markers if appropriate. `outlen` is modified
+ * and will contain the number of new bytes written at `out` (which will quite
+ * often be zero). */
+void avx2_base64_stream_encode_final
+ ( struct avx2_base64_state *state
+ , char *out
+ , size_t *outlen
+ ) ;
+
+/* Wrapper function to decode a plain string of given length. Output is written
+ * to *out without trailing zero. Output length in bytes is written to *outlen.
+ * The buffer in `out` has been allocated by the caller and is at least 3/4 the
+ * size of the input. See above for `flags`, set to 0 for default operation: */
+int avx2_base64_decode
+ ( const char *src
+ , size_t srclen
+ , char *out
+ , size_t *outlen
+ ) ;
+
+/* Call this before calling base64_stream_decode() to init the state. See above
+ * for `flags`; set to 0 for default operation: */
+void avx2_base64_stream_decode_init
+ ( struct avx2_base64_state *state
+ ) ;
+
+/* Decodes the block of data of given length at `src`, into the buffer at
+ * `out`. Caller is responsible for allocating a large enough out-buffer; it
+ * must be at least 3/4 the size of the in-buffer, but take some margin. Places
+ * the number of new bytes written into `outlen` (which is set to zero when the
+ * function starts). Does not zero-terminate the output. Returns 1 if all is
+ * well, and 0 if a decoding error was found, such as an invalid character.
+ * Returns -1 if the chosen codec is not included in the current build. Used by
+ * the test harness to check whether a codec is available for testing. */
+int avx2_base64_stream_decode
+ ( struct avx2_base64_state *state
+ , const char *src
+ , size_t srclen
+ , char *out
+ , size_t *outlen
+ ) ;
+
+#ifdef __cplusplus
+}
+#endif
+
diff --git a/contrib/libs/base64/avx2/ya.make b/contrib/libs/base64/avx2/ya.make
new file mode 100644
index 0000000000..b0dc5ce772
--- /dev/null
+++ b/contrib/libs/base64/avx2/ya.make
@@ -0,0 +1,36 @@
+OWNER(
+ yazevnul
+ g:contrib
+ g:cpp-contrib
+)
+
+LIBRARY()
+
+LICENSE(
+ BSD-2-Clause AND
+ MIT
+)
+
+LICENSE_TEXTS(.yandex_meta/licenses.list.txt)
+
+NO_UTIL()
+
+SRCS(
+ codec_avx2.c
+ lib.c
+)
+
+IF (ARCH_X86_64 OR ARCH_I386)
+ IF (MSVC AND NOT CLANG_CL)
+ CONLYFLAGS(/D__AVX2__=1)
+ ELSEIF (CLANG_CL)
+ CONLYFLAGS(-mavx2)
+ ELSE()
+ CONLYFLAGS(
+ -mavx2
+ -std=c11
+ )
+ ENDIF()
+ENDIF()
+
+END()