aboutsummaryrefslogtreecommitdiffstats
path: root/library/cpp/string_utils/base64
diff options
context:
space:
mode:
authorDevtools Arcadia <arcadia-devtools@yandex-team.ru>2022-02-07 18:08:42 +0300
committerDevtools Arcadia <arcadia-devtools@mous.vla.yp-c.yandex.net>2022-02-07 18:08:42 +0300
commit1110808a9d39d4b808aef724c861a2e1a38d2a69 (patch)
treee26c9fed0de5d9873cce7e00bc214573dc2195b7 /library/cpp/string_utils/base64
downloadydb-1110808a9d39d4b808aef724c861a2e1a38d2a69.tar.gz
intermediate changes
ref:cde9a383711a11544ce7e107a78147fb96cc4029
Diffstat (limited to 'library/cpp/string_utils/base64')
-rw-r--r--library/cpp/string_utils/base64/base64.cpp268
-rw-r--r--library/cpp/string_utils/base64/base64.h130
-rw-r--r--library/cpp/string_utils/base64/base64_decode_uneven_ut.cpp46
-rw-r--r--library/cpp/string_utils/base64/base64_ut.cpp497
-rw-r--r--library/cpp/string_utils/base64/bench/main.cpp326
-rw-r--r--library/cpp/string_utils/base64/bench/metrics/main.py5
-rw-r--r--library/cpp/string_utils/base64/bench/metrics/ya.make20
-rw-r--r--library/cpp/string_utils/base64/bench/ya.make16
-rw-r--r--library/cpp/string_utils/base64/fuzz/generic/ya.make12
-rw-r--r--library/cpp/string_utils/base64/fuzz/lib/main.cpp13
-rw-r--r--library/cpp/string_utils/base64/fuzz/lib/ya.make16
-rw-r--r--library/cpp/string_utils/base64/fuzz/uneven/main.cpp10
-rw-r--r--library/cpp/string_utils/base64/fuzz/uneven/ya.make15
-rw-r--r--library/cpp/string_utils/base64/fuzz/ya.make10
-rw-r--r--library/cpp/string_utils/base64/ut/ya.make22
-rw-r--r--library/cpp/string_utils/base64/ya.make23
16 files changed, 1429 insertions, 0 deletions
diff --git a/library/cpp/string_utils/base64/base64.cpp b/library/cpp/string_utils/base64/base64.cpp
new file mode 100644
index 0000000000..05c201f0de
--- /dev/null
+++ b/library/cpp/string_utils/base64/base64.cpp
@@ -0,0 +1,268 @@
+#include "base64.h"
+
+#include <contrib/libs/base64/avx2/libbase64.h>
+#include <contrib/libs/base64/ssse3/libbase64.h>
+#include <contrib/libs/base64/neon32/libbase64.h>
+#include <contrib/libs/base64/neon64/libbase64.h>
+#include <contrib/libs/base64/plain32/libbase64.h>
+#include <contrib/libs/base64/plain64/libbase64.h>
+
+#include <util/generic/yexception.h>
+#include <util/system/cpu_id.h>
+#include <util/system/platform.h>
+
+#include <cstdlib>
+
+namespace {
+ struct TImpl {
+ void (*Encode)(const char* src, size_t srclen, char* out, size_t* outlen);
+ int (*Decode)(const char* src, size_t srclen, char* out, size_t* outlen);
+
+ TImpl() {
+#if defined(_arm32_)
+ const bool haveNEON32 = true;
+#else
+ const bool haveNEON32 = false;
+#endif
+
+#if defined(_arm64_)
+ const bool haveNEON64 = true;
+#else
+ const bool haveNEON64 = false;
+#endif
+
+# ifdef _windows_
+ // msvc does something wrong in release-build, so we temprorary disable this branch on windows
+ // https://developercommunity.visualstudio.com/content/problem/334085/release-build-has-made-wrong-optimizaion-in-base64.html
+ const bool isWin = true;
+# else
+ const bool isWin = false;
+# endif
+ if (!isWin && NX86::HaveAVX() && NX86::HaveAVX2()) {
+ Encode = avx2_base64_encode;
+ Decode = avx2_base64_decode;
+ } else if (NX86::HaveSSSE3()) {
+ Encode = ssse3_base64_encode;
+ Decode = ssse3_base64_decode;
+ } else if (haveNEON64) {
+ Encode = neon64_base64_encode;
+ Decode = neon64_base64_decode;
+ } else if (haveNEON32) {
+ Encode = neon32_base64_encode;
+ Decode = neon32_base64_decode;
+ } else if (sizeof(void*) == 8) {
+ // running on a 64 bit platform
+ Encode = plain64_base64_encode;
+ Decode = plain64_base64_decode;
+ } else if (sizeof(void*) == 4) {
+ // running on a 32 bit platform (actually impossible in Arcadia)
+ Encode = plain32_base64_encode;
+ Decode = plain32_base64_decode;
+ } else {
+ // failed to find appropriate implementation
+ std::abort();
+ }
+ }
+ };
+
+ const TImpl GetImpl() {
+ static const TImpl IMPL;
+ return IMPL;
+ }
+}
+
+static const char base64_etab_std[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
+static const char base64_bkw[] = {
+ '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', // 0..15
+ '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', // 16..31
+ '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\76', '\0', '\76', '\0', '\77', // 32.47
+ '\64', '\65', '\66', '\67', '\70', '\71', '\72', '\73', '\74', '\75', '\0', '\0', '\0', '\0', '\0', '\0', // 48..63
+ '\0', '\0', '\1', '\2', '\3', '\4', '\5', '\6', '\7', '\10', '\11', '\12', '\13', '\14', '\15', '\16', // 64..79
+ '\17', '\20', '\21', '\22', '\23', '\24', '\25', '\26', '\27', '\30', '\31', '\0', '\0', '\0', '\0', '\77', // 80..95
+ '\0', '\32', '\33', '\34', '\35', '\36', '\37', '\40', '\41', '\42', '\43', '\44', '\45', '\46', '\47', '\50', // 96..111
+ '\51', '\52', '\53', '\54', '\55', '\56', '\57', '\60', '\61', '\62', '\63', '\0', '\0', '\0', '\0', '\0', // 112..127
+ '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', // 128..143
+ '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0',
+ '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0',
+ '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0',
+ '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0',
+ '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0',
+ '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0',
+ '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0'};
+
+static_assert(Y_ARRAY_SIZE(base64_bkw) == 256, "wrong size");
+
+// Base64 for url encoding, RFC3548
+static const char base64_etab_url[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_";
+
+static inline unsigned char GetBase64EncodedIndex0(unsigned char octet0) {
+ return (octet0 >> 2);
+}
+
+static inline unsigned char GetBase64EncodedIndex1(unsigned char octet0, unsigned char octet1) {
+ return (((octet0 << 4) & 0x30) | ((octet1 >> 4) & 0x0f));
+}
+
+static inline unsigned char GetBase64EncodedIndex2(unsigned char octet1, unsigned char octet2) {
+ return (((octet1 << 2) & 0x3c) | ((octet2 >> 6) & 0x03));
+}
+
+static inline unsigned char GetBase64EncodedIndex3(unsigned char octet2) {
+ return (octet2 & 0x3f);
+}
+
+template <bool urlVersion>
+static inline char* Base64EncodeImpl(char* outstr, const unsigned char* instr, size_t len) {
+ const char* const base64_etab = (urlVersion ? base64_etab_url : base64_etab_std);
+ const char pad = (urlVersion ? ',' : '=');
+
+ size_t idx = 0;
+
+ while (idx + 2 < len) {
+ *outstr++ = base64_etab[GetBase64EncodedIndex0(instr[idx])];
+ *outstr++ = base64_etab[GetBase64EncodedIndex1(instr[idx], instr[idx + 1])];
+ *outstr++ = base64_etab[GetBase64EncodedIndex2(instr[idx + 1], instr[idx + 2])];
+ *outstr++ = base64_etab[GetBase64EncodedIndex3(instr[idx + 2])];
+ idx += 3;
+ }
+ if (idx < len) {
+ *outstr++ = base64_etab[GetBase64EncodedIndex0(instr[idx])];
+ if (idx + 1 < len) {
+ *outstr++ = base64_etab[GetBase64EncodedIndex1(instr[idx], instr[idx + 1])];
+ *outstr++ = base64_etab[GetBase64EncodedIndex2(instr[idx + 1], '\0')];
+ } else {
+ *outstr++ = base64_etab[GetBase64EncodedIndex1(instr[idx], '\0')];
+ *outstr++ = pad;
+ }
+ *outstr++ = pad;
+ }
+ *outstr = 0;
+
+ return outstr;
+}
+
+static char* Base64EncodePlain(char* outstr, const unsigned char* instr, size_t len) {
+ return Base64EncodeImpl<false>(outstr, instr, len);
+}
+
+char* Base64EncodeUrl(char* outstr, const unsigned char* instr, size_t len) {
+ return Base64EncodeImpl<true>(outstr, instr, len);
+}
+
+inline void uudecode_1(char* dst, unsigned char* src) {
+ dst[0] = char((base64_bkw[src[0]] << 2) | (base64_bkw[src[1]] >> 4));
+ dst[1] = char((base64_bkw[src[1]] << 4) | (base64_bkw[src[2]] >> 2));
+ dst[2] = char((base64_bkw[src[2]] << 6) | base64_bkw[src[3]]);
+}
+
+static size_t Base64DecodePlain(void* dst, const char* b, const char* e) {
+ size_t n = 0;
+ while (b < e) {
+ uudecode_1((char*)dst + n, (unsigned char*)b);
+
+ b += 4;
+ n += 3;
+ }
+
+ if (n > 0) {
+ if (b[-1] == ',' || b[-1] == '=') {
+ n--;
+
+ if (b[-2] == ',' || b[-2] == '=') {
+ n--;
+ }
+ }
+ }
+
+ return n;
+}
+
+// Table for Base64StrictDecode
+static const char base64_bkw_strict[] =
+ "\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100"
+ "\100\100\100\100\100\100\100\100\100\100\100\76\101\76\100\77\64\65\66\67\70\71\72\73\74\75\100\100\100\101\100\100"
+ "\100\0\1\2\3\4\5\6\7\10\11\12\13\14\15\16\17\20\21\22\23\24\25\26\27\30\31\100\100\100\100\77"
+ "\100\32\33\34\35\36\37\40\41\42\43\44\45\46\47\50\51\52\53\54\55\56\57\60\61\62\63\100\100\100\100\100"
+ "\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100"
+ "\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100"
+ "\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100"
+ "\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100";
+
+size_t Base64StrictDecode(void* out, const char* b, const char* e) {
+ char* dst = (char*)out;
+ const unsigned char* src = (unsigned char*)b;
+ const unsigned char* const end = (unsigned char*)e;
+
+ Y_ENSURE(!((e - b) % 4), "incorrect input length for base64 decode");
+
+ while (src < end) {
+ const char zeroth = base64_bkw_strict[src[0]];
+ const char first = base64_bkw_strict[src[1]];
+ const char second = base64_bkw_strict[src[2]];
+ const char third = base64_bkw_strict[src[3]];
+
+ constexpr char invalid = 64;
+ constexpr char padding = 65;
+ if (Y_UNLIKELY(zeroth == invalid || first == invalid ||
+ second == invalid || third == invalid ||
+ zeroth == padding || first == padding))
+ {
+ ythrow yexception() << "invalid character in input";
+ }
+
+ dst[0] = char((zeroth << 2) | (first >> 4));
+ dst[1] = char((first << 4) | (second >> 2));
+ dst[2] = char((second << 6) | third);
+
+ src += 4;
+ dst += 3;
+
+ if (src[-1] == ',' || src[-1] == '=') {
+ --dst;
+
+ if (src[-2] == ',' || src[-2] == '=') {
+ --dst;
+ }
+ } else if (Y_UNLIKELY(src[-2] == ',' || src[-2] == '=')) {
+ ythrow yexception() << "incorrect padding";
+ }
+ }
+
+ return dst - (char*)out;
+}
+
+size_t Base64Decode(void* dst, const char* b, const char* e) {
+ static const TImpl IMPL = GetImpl();
+ const auto size = e - b;
+ Y_ENSURE(!(size % 4), "incorrect input length for base64 decode");
+ if (Y_LIKELY(size < 8)) {
+ return Base64DecodePlain(dst, b, e);
+ }
+
+ size_t outLen;
+ IMPL.Decode(b, size, (char*)dst, &outLen);
+
+ return outLen;
+}
+
+TString Base64DecodeUneven(const TStringBuf s) {
+ if (s.length() % 4 == 0) {
+ return Base64Decode(s);
+ }
+
+ // padding to 4
+ return Base64Decode(TString(s) + TString(4 - (s.length() % 4), '='));
+}
+
+char* Base64Encode(char* outstr, const unsigned char* instr, size_t len) {
+ static const TImpl IMPL = GetImpl();
+ if (Y_LIKELY(len < 8)) {
+ return Base64EncodePlain(outstr, instr, len);
+ }
+
+ size_t outLen;
+ IMPL.Encode((char*)instr, len, outstr, &outLen);
+
+ *(outstr + outLen) = '\0';
+ return outstr + outLen;
+}
diff --git a/library/cpp/string_utils/base64/base64.h b/library/cpp/string_utils/base64/base64.h
new file mode 100644
index 0000000000..f778a6425a
--- /dev/null
+++ b/library/cpp/string_utils/base64/base64.h
@@ -0,0 +1,130 @@
+#pragma once
+
+#include <util/system/defaults.h>
+#include <util/generic/strbuf.h>
+#include <util/generic/string.h>
+
+/* @return Size of the buffer required to decode Base64 encoded data of size `len`.
+ */
+constexpr size_t Base64DecodeBufSize(const size_t len) noexcept {
+ return (len + 3) / 4 * 3;
+}
+
+/* Decode Base64 encoded data. Can decode both regular Base64 and Base64URL encoded data. Can decode
+ * only valid Base64[URL] data, behaviour for invalid data is unspecified.
+ *
+ * @throws Throws exception in case of incorrect padding.
+ *
+ * @param dst memory for writing output.
+ * @param b pointer to the beginning of base64 encoded string.
+ * @param a pointer to the end of base64 encoded string
+ *
+ * @return Return number of bytes decoded.
+ */
+size_t Base64Decode(void* dst, const char* b, const char* e);
+
+inline TStringBuf Base64Decode(const TStringBuf src, void* dst) {
+ return TStringBuf((const char*)dst, Base64Decode(dst, src.begin(), src.end()));
+}
+
+inline void Base64Decode(const TStringBuf src, TString& dst) {
+ dst.ReserveAndResize(Base64DecodeBufSize(src.size()));
+ dst.resize(Base64Decode(src, dst.begin()).size());
+}
+
+//WARNING: can process not whole input silently, use Base64StrictDecode instead of this function
+inline TString Base64Decode(const TStringBuf s) {
+ TString ret;
+ Base64Decode(s, ret);
+ return ret;
+}
+
+///
+/// @brief Decodes Base64 string with strict verification
+/// of invalid symbols, also tries to decode Base64 string with padding
+/// inside.
+//
+/// @throws Throws exceptions on inputs which contain invalid symbols
+/// or incorrect padding.
+/// @{
+///
+/// @param b a pointer to the beginning of base64 encoded string.
+/// @param e a pointer to the end of base64 encoded string.
+/// @param dst memory for writing output.
+///
+/// @return Returns number of bytes decoded.
+///
+size_t Base64StrictDecode(void* dst, const char* b, const char* e);
+
+///
+/// @param src a base64 encoded string.
+/// @param dst an pointer to allocated memory
+/// for writing result.
+///
+/// @return Returns dst wrapped into TStringBuf.
+///
+inline TStringBuf Base64StrictDecode(const TStringBuf src, void* dst) {
+ return TStringBuf((const char*)dst, Base64StrictDecode(dst, src.begin(), src.end()));
+}
+
+///
+/// @param src a base64 encoded string.
+/// @param dst a decoded string.
+///
+inline void Base64StrictDecode(const TStringBuf src, TString& dst) {
+ dst.ReserveAndResize(Base64DecodeBufSize(src.size()));
+ dst.resize(Base64StrictDecode(src, dst.begin()).size());
+}
+
+///
+/// @param src a base64 encoded string.
+///
+/// @returns a decoded string.
+///
+inline TString Base64StrictDecode(const TStringBuf src) {
+ TString ret;
+ Base64StrictDecode(src, ret);
+ return ret;
+}
+/// @}
+
+/// Works with strings which length is not divisible by 4.
+TString Base64DecodeUneven(const TStringBuf s);
+
+//encode
+constexpr size_t Base64EncodeBufSize(const size_t len) noexcept {
+ return (len + 2) / 3 * 4 + 1;
+}
+
+char* Base64Encode(char* outstr, const unsigned char* instr, size_t len);
+char* Base64EncodeUrl(char* outstr, const unsigned char* instr, size_t len);
+
+inline TStringBuf Base64Encode(const TStringBuf src, void* tmp) {
+ return TStringBuf((const char*)tmp, Base64Encode((char*)tmp, (const unsigned char*)src.data(), src.size()));
+}
+
+inline TStringBuf Base64EncodeUrl(const TStringBuf src, void* tmp) {
+ return TStringBuf((const char*)tmp, Base64EncodeUrl((char*)tmp, (const unsigned char*)src.data(), src.size()));
+}
+
+inline void Base64Encode(const TStringBuf src, TString& dst) {
+ dst.ReserveAndResize(Base64EncodeBufSize(src.size()));
+ dst.resize(Base64Encode(src, dst.begin()).size());
+}
+
+inline void Base64EncodeUrl(const TStringBuf src, TString& dst) {
+ dst.ReserveAndResize(Base64EncodeBufSize(src.size()));
+ dst.resize(Base64EncodeUrl(src, dst.begin()).size());
+}
+
+inline TString Base64Encode(const TStringBuf s) {
+ TString ret;
+ Base64Encode(s, ret);
+ return ret;
+}
+
+inline TString Base64EncodeUrl(const TStringBuf s) {
+ TString ret;
+ Base64EncodeUrl(s, ret);
+ return ret;
+}
diff --git a/library/cpp/string_utils/base64/base64_decode_uneven_ut.cpp b/library/cpp/string_utils/base64/base64_decode_uneven_ut.cpp
new file mode 100644
index 0000000000..c3ed068a37
--- /dev/null
+++ b/library/cpp/string_utils/base64/base64_decode_uneven_ut.cpp
@@ -0,0 +1,46 @@
+#include <library/cpp/testing/unittest/registar.h>
+
+#include <library/cpp/string_utils/base64/base64.h>
+
+Y_UNIT_TEST_SUITE(TBase64DecodeUneven) {
+ Y_UNIT_TEST(Base64DecodeUneven) {
+ const TString wikipedia_slogan =
+ "Man is distinguished, not only by his reason, "
+ "but by this singular passion from other animals, which is a lust of the "
+ "mind, that by a perseverance of delight in the continued and "
+ "indefatigable generation of knowledge, exceeds the short "
+ "vehemence of any carnal pleasure.";
+ const TString encoded =
+ "TWFuIGlzIGRpc3Rpbmd1aXNoZWQsIG5vdCBvbmx5IGJ5IGhpcyByZWFzb24sIGJ1dCBieSB0"
+ "aGlzIHNpbmd1bGFyIHBhc3Npb24gZnJvbSBvdGhlciBhbmltYWxzLCB3aGljaCBpcyBhIGx1"
+ "c3Qgb2YgdGhlIG1pbmQsIHRoYXQgYnkgYSBwZXJzZXZlcmFuY2Ugb2YgZGVsaWdodCBpbiB0"
+ "aGUgY29udGludWVkIGFuZCBpbmRlZmF0aWdhYmxlIGdlbmVyYXRpb24gb2Yga25vd2xlZGdl"
+ "LCBleGNlZWRzIHRoZSBzaG9ydCB2ZWhlbWVuY2Ugb2YgYW55IGNhcm5hbCBwbGVhc3VyZS4=";
+
+ UNIT_ASSERT_VALUES_EQUAL(encoded, Base64Encode(wikipedia_slogan));
+ UNIT_ASSERT_VALUES_EQUAL(wikipedia_slogan, Base64DecodeUneven(encoded));
+
+ const TString encoded_url1 =
+ "TWFuIGlzIGRpc3Rpbmd1aXNoZWQsIG5vdCBvbmx5IGJ5IGhpcyByZWFzb24sIGJ1dCBieSB0"
+ "aGlzIHNpbmd1bGFyIHBhc3Npb24gZnJvbSBvdGhlciBhbmltYWxzLCB3aGljaCBpcyBhIGx1"
+ "c3Qgb2YgdGhlIG1pbmQsIHRoYXQgYnkgYSBwZXJzZXZlcmFuY2Ugb2YgZGVsaWdodCBpbiB0"
+ "aGUgY29udGludWVkIGFuZCBpbmRlZmF0aWdhYmxlIGdlbmVyYXRpb24gb2Yga25vd2xlZGdl"
+ "LCBleGNlZWRzIHRoZSBzaG9ydCB2ZWhlbWVuY2Ugb2YgYW55IGNhcm5hbCBwbGVhc3VyZS4,";
+ const TString encoded_url2 =
+ "TWFuIGlzIGRpc3Rpbmd1aXNoZWQsIG5vdCBvbmx5IGJ5IGhpcyByZWFzb24sIGJ1dCBieSB0"
+ "aGlzIHNpbmd1bGFyIHBhc3Npb24gZnJvbSBvdGhlciBhbmltYWxzLCB3aGljaCBpcyBhIGx1"
+ "c3Qgb2YgdGhlIG1pbmQsIHRoYXQgYnkgYSBwZXJzZXZlcmFuY2Ugb2YgZGVsaWdodCBpbiB0"
+ "aGUgY29udGludWVkIGFuZCBpbmRlZmF0aWdhYmxlIGdlbmVyYXRpb24gb2Yga25vd2xlZGdl"
+ "LCBleGNlZWRzIHRoZSBzaG9ydCB2ZWhlbWVuY2Ugb2YgYW55IGNhcm5hbCBwbGVhc3VyZS4";
+ UNIT_ASSERT_VALUES_EQUAL(wikipedia_slogan, Base64DecodeUneven(encoded_url1));
+ UNIT_ASSERT_VALUES_EQUAL(wikipedia_slogan, Base64DecodeUneven(encoded_url2));
+
+ const TString lp = "Linkin Park";
+ UNIT_ASSERT_VALUES_EQUAL(lp, Base64DecodeUneven(Base64Encode(lp)));
+ UNIT_ASSERT_VALUES_EQUAL(lp, Base64DecodeUneven(Base64EncodeUrl(lp)));
+
+ const TString dp = "ADP GmbH\nAnalyse Design & Programmierung\nGesellschaft mit beschränkter Haftung";
+ UNIT_ASSERT_VALUES_EQUAL(dp, Base64DecodeUneven(Base64Encode(dp)));
+ UNIT_ASSERT_VALUES_EQUAL(dp, Base64DecodeUneven(Base64EncodeUrl(dp)));
+ }
+}
diff --git a/library/cpp/string_utils/base64/base64_ut.cpp b/library/cpp/string_utils/base64/base64_ut.cpp
new file mode 100644
index 0000000000..bcc1e65879
--- /dev/null
+++ b/library/cpp/string_utils/base64/base64_ut.cpp
@@ -0,0 +1,497 @@
+#include "base64.h"
+
+#include <contrib/libs/base64/avx2/libbase64.h>
+#include <contrib/libs/base64/neon32/libbase64.h>
+#include <contrib/libs/base64/neon64/libbase64.h>
+#include <contrib/libs/base64/plain32/libbase64.h>
+#include <contrib/libs/base64/plain64/libbase64.h>
+#include <contrib/libs/base64/ssse3/libbase64.h>
+
+#include <library/cpp/testing/unittest/registar.h>
+
+#include <util/generic/vector.h>
+#include <util/random/fast.h>
+#include <util/system/cpu_id.h>
+#include <util/system/platform.h>
+
+#include <array>
+
+using namespace std::string_view_literals;
+
+#define BASE64_UT_DECLARE_BASE64_IMPL(prefix, encFunction, decFunction) \
+ Y_DECLARE_UNUSED \
+ static size_t prefix##Base64Decode(void* dst, const char* b, const char* e) { \
+ const auto size = e - b; \
+ Y_ENSURE(!(size % 4), "incorrect input length for base64 decode"); \
+ \
+ size_t outLen; \
+ decFunction(b, size, (char*)dst, &outLen); \
+ return outLen; \
+ } \
+ \
+ Y_DECLARE_UNUSED \
+ static inline TStringBuf prefix##Base64Decode(const TStringBuf& src, void* dst) { \
+ return TStringBuf((const char*)dst, ::NB64Etalon::prefix##Base64Decode(dst, src.begin(), src.end())); \
+ } \
+ \
+ Y_DECLARE_UNUSED \
+ static inline void prefix##Base64Decode(const TStringBuf& src, TString& dst) { \
+ dst.ReserveAndResize(Base64DecodeBufSize(src.size())); \
+ dst.resize(::NB64Etalon::prefix##Base64Decode(src, dst.begin()).size()); \
+ } \
+ \
+ Y_DECLARE_UNUSED \
+ static inline TString prefix##Base64Decode(const TStringBuf& s) { \
+ TString ret; \
+ prefix##Base64Decode(s, ret); \
+ return ret; \
+ } \
+ \
+ Y_DECLARE_UNUSED \
+ static char* prefix##Base64Encode(char* outstr, const unsigned char* instr, size_t len) { \
+ size_t outLen; \
+ encFunction((char*)instr, len, outstr, &outLen); \
+ *(outstr + outLen) = '\0'; \
+ return outstr + outLen; \
+ } \
+ \
+ Y_DECLARE_UNUSED \
+ static inline TStringBuf prefix##Base64Encode(const TStringBuf& src, void* tmp) { \
+ return TStringBuf((const char*)tmp, ::NB64Etalon::prefix##Base64Encode((char*)tmp, (const unsigned char*)src.data(), src.size())); \
+ } \
+ \
+ Y_DECLARE_UNUSED \
+ static inline void prefix##Base64Encode(const TStringBuf& src, TString& dst) { \
+ dst.ReserveAndResize(Base64EncodeBufSize(src.size())); \
+ dst.resize(::NB64Etalon::prefix##Base64Encode(src, dst.begin()).size()); \
+ } \
+ \
+ Y_DECLARE_UNUSED \
+ static inline TString prefix##Base64Encode(const TStringBuf& s) { \
+ TString ret; \
+ prefix##Base64Encode(s, ret); \
+ return ret; \
+ }
+
+namespace NB64Etalon {
+ BASE64_UT_DECLARE_BASE64_IMPL(PLAIN32, plain32_base64_encode, plain32_base64_decode);
+ BASE64_UT_DECLARE_BASE64_IMPL(PLAIN64, plain64_base64_encode, plain64_base64_decode);
+ BASE64_UT_DECLARE_BASE64_IMPL(NEON32, neon32_base64_encode, neon32_base64_decode);
+ BASE64_UT_DECLARE_BASE64_IMPL(NEON64, neon64_base64_encode, neon64_base64_decode);
+ BASE64_UT_DECLARE_BASE64_IMPL(AVX2, avx2_base64_encode, avx2_base64_decode);
+ BASE64_UT_DECLARE_BASE64_IMPL(SSSE3, ssse3_base64_encode, ssse3_base64_decode);
+
+#undef BASE64_UT_DECLARE_BASE64_IMPL
+
+ struct TImpls {
+ enum EImpl : size_t {
+ PLAIN32_IMPL,
+ PLAIN64_IMPL,
+ NEON32_IMPL,
+ NEON64_IMPL,
+ AVX2_IMPL,
+ SSSE3_IMPL,
+ MAX_IMPL
+ };
+
+ using TEncodeF = void (*)(const TStringBuf&, TString&);
+ using TDecodeF = void (*)(const TStringBuf&, TString&);
+
+ struct TImpl {
+ TEncodeF Encode = nullptr;
+ TDecodeF Decode = nullptr;
+ };
+
+ std::array<TImpl, MAX_IMPL> Impl;
+
+ TImpls() {
+ Impl[PLAIN32_IMPL].Encode = PLAIN32Base64Encode;
+ Impl[PLAIN32_IMPL].Decode = PLAIN32Base64Decode;
+ Impl[PLAIN64_IMPL].Encode = PLAIN64Base64Encode;
+ Impl[PLAIN64_IMPL].Decode = PLAIN64Base64Decode;
+#if defined(_arm32_)
+ Impl[NEON32_IMPL].Encode = NEON32Base64Encode;
+ Impl[NEON32_IMPL].Decode = NEON32Base64Decode;
+#elif defined(_arm64_)
+ Impl[NEON64_IMPL].Encode = NEON64Base64Encode;
+ Impl[NEON64_IMPL].Decode = NEON64Base64Decode;
+#elif defined(_x86_64_)
+ if (NX86::HaveSSSE3()) {
+ Impl[SSSE3_IMPL].Encode = SSSE3Base64Encode;
+ Impl[SSSE3_IMPL].Decode = SSSE3Base64Decode;
+ }
+
+ if (NX86::HaveAVX2()) {
+ Impl[AVX2_IMPL].Encode = AVX2Base64Encode;
+ Impl[AVX2_IMPL].Decode = AVX2Base64Decode;
+ }
+#else
+ ythrow yexception() << "Failed to identify the platform";
+#endif
+ }
+ };
+
+ TImpls GetImpls() {
+ static const TImpls IMPLS;
+ return IMPLS;
+ }
+}
+
+template <>
+void Out<NB64Etalon::TImpls::EImpl>(IOutputStream& o, typename TTypeTraits<NB64Etalon::TImpls::EImpl>::TFuncParam v) {
+ switch (v) {
+ case NB64Etalon::TImpls::PLAIN32_IMPL:
+ o << TStringBuf{"PLAIN32"};
+ return;
+ case NB64Etalon::TImpls::PLAIN64_IMPL:
+ o << TStringBuf{"PLAIN64"};
+ return;
+ case NB64Etalon::TImpls::NEON64_IMPL:
+ o << TStringBuf{"NEON64"};
+ return;
+ case NB64Etalon::TImpls::NEON32_IMPL:
+ o << TStringBuf{"NEON32"};
+ return;
+ case NB64Etalon::TImpls::SSSE3_IMPL:
+ o << TStringBuf{"SSSE3"};
+ return;
+ case NB64Etalon::TImpls::AVX2_IMPL:
+ o << TStringBuf{"AVX2"};
+ return;
+ default:
+ ythrow yexception() << "invalid";
+ }
+}
+
+static void TestEncodeDecodeIntoString(const TString& plain, const TString& encoded, const TString& encodedUrl) {
+ TString a, b;
+
+ Base64Encode(plain, a);
+ UNIT_ASSERT_VALUES_EQUAL(a, encoded);
+
+ Base64Decode(a, b);
+ UNIT_ASSERT_VALUES_EQUAL(b, plain);
+
+ Base64EncodeUrl(plain, a);
+ UNIT_ASSERT_VALUES_EQUAL(a, encodedUrl);
+
+ Base64Decode(a, b);
+ UNIT_ASSERT_VALUES_EQUAL(b, plain);
+}
+
+static void TestEncodeStrictDecodeIntoString(const TString& plain, const TString& encoded, const TString& encodedUrl) {
+ TString a, b;
+
+ Base64Encode(plain, a);
+ UNIT_ASSERT_VALUES_EQUAL(a, encoded);
+
+ Base64StrictDecode(a, b);
+ UNIT_ASSERT_VALUES_EQUAL(b, plain);
+
+ Base64EncodeUrl(plain, a);
+ UNIT_ASSERT_VALUES_EQUAL(a, encodedUrl);
+
+ Base64StrictDecode(a, b);
+ UNIT_ASSERT_VALUES_EQUAL(b, plain);
+}
+
+Y_UNIT_TEST_SUITE(TBase64) {
+ Y_UNIT_TEST(TestEncode) {
+ UNIT_ASSERT_VALUES_EQUAL(Base64Encode("12z"), "MTJ6");
+ UNIT_ASSERT_VALUES_EQUAL(Base64Encode("123"), "MTIz");
+ UNIT_ASSERT_VALUES_EQUAL(Base64Encode("12"), "MTI=");
+ UNIT_ASSERT_VALUES_EQUAL(Base64Encode("1"), "MQ==");
+ }
+
+ Y_UNIT_TEST(TestIntoString) {
+ {
+ TString str;
+ for (size_t i = 0; i < 256; ++i)
+ str += char(i);
+
+ const TString base64 =
+ "AAECAwQFBgcICQoLDA0ODxAREhMUFRYXGBkaGxwdHh8gISIjJCUmJy"
+ "gpKissLS4vMDEyMzQ1Njc4OTo7PD0+P0BBQkNERUZHSElKS0xNTk9Q"
+ "UVJTVFVWV1hZWltcXV5fYGFiY2RlZmdoaWprbG1ub3BxcnN0dXZ3eH"
+ "l6e3x9fn+AgYKDhIWGh4iJiouMjY6PkJGSk5SVlpeYmZqbnJ2en6Ch"
+ "oqOkpaanqKmqq6ytrq+wsbKztLW2t7i5uru8vb6/wMHCw8TFxsfIyc"
+ "rLzM3Oz9DR0tPU1dbX2Nna29zd3t/g4eLj5OXm5+jp6uvs7e7v8PHy"
+ "8/T19vf4+fr7/P3+/w==";
+ const TString base64Url =
+ "AAECAwQFBgcICQoLDA0ODxAREhMUFRYXGBkaGxwdHh8gISIjJCUmJy"
+ "gpKissLS4vMDEyMzQ1Njc4OTo7PD0-P0BBQkNERUZHSElKS0xNTk9Q"
+ "UVJTVFVWV1hZWltcXV5fYGFiY2RlZmdoaWprbG1ub3BxcnN0dXZ3eH"
+ "l6e3x9fn-AgYKDhIWGh4iJiouMjY6PkJGSk5SVlpeYmZqbnJ2en6Ch"
+ "oqOkpaanqKmqq6ytrq-wsbKztLW2t7i5uru8vb6_wMHCw8TFxsfIyc"
+ "rLzM3Oz9DR0tPU1dbX2Nna29zd3t_g4eLj5OXm5-jp6uvs7e7v8PHy"
+ "8_T19vf4-fr7_P3-_w,,";
+
+ TestEncodeDecodeIntoString(str, base64, base64Url);
+ TestEncodeStrictDecodeIntoString(str, base64, base64Url);
+ }
+
+ {
+ const TString str = "http://yandex.ru:1234/request?param=value&lll=fff#fragment";
+
+ const TString base64 = "aHR0cDovL3lhbmRleC5ydToxMjM0L3JlcXVlc3Q/cGFyYW09dmFsdWUmbGxsPWZmZiNmcmFnbWVudA==";
+ const TString base64Url = "aHR0cDovL3lhbmRleC5ydToxMjM0L3JlcXVlc3Q_cGFyYW09dmFsdWUmbGxsPWZmZiNmcmFnbWVudA,,";
+
+ TestEncodeDecodeIntoString(str, base64, base64Url);
+ TestEncodeStrictDecodeIntoString(str, base64, base64Url);
+ }
+ }
+
+ Y_UNIT_TEST(TestDecode) {
+ UNIT_ASSERT_EXCEPTION(Base64Decode("a"), yexception);
+ UNIT_ASSERT_EXCEPTION(Base64StrictDecode("a"), yexception);
+
+ UNIT_ASSERT_VALUES_EQUAL(Base64Decode(""), "");
+ UNIT_ASSERT_VALUES_EQUAL(Base64StrictDecode(""), "");
+
+ UNIT_ASSERT_VALUES_EQUAL(Base64Decode("MTI="), "12");
+ UNIT_ASSERT_VALUES_EQUAL(Base64StrictDecode("MTI="), "12");
+
+ UNIT_ASSERT_VALUES_EQUAL(Base64Decode("QQ=="), "A");
+ UNIT_ASSERT_VALUES_EQUAL(Base64StrictDecode("QQ=="), "A");
+
+ UNIT_ASSERT_EXCEPTION(Base64StrictDecode("M=I="), yexception);
+
+ UNIT_ASSERT_VALUES_EQUAL(Base64Decode("dnluZHg="), "vyndx");
+ UNIT_ASSERT_VALUES_EQUAL(Base64StrictDecode("dnluZHg="), "vyndx");
+
+ UNIT_ASSERT_VALUES_EQUAL(Base64StrictDecode("dnluZHg=dmlkZW8="), "vyndxvideo");
+
+ UNIT_ASSERT_EXCEPTION(Base64StrictDecode("aHR0cDovL2ltZy5tZWdhLXBvcm5vLnJ1Lw=a"), yexception);
+
+ UNIT_ASSERT_EXCEPTION(Base64StrictDecode("aHh=="), yexception);
+ UNIT_ASSERT_EXCEPTION(Base64StrictDecode("\1\1\1\2"), yexception);
+ }
+
+ Y_UNIT_TEST(TestDecodeUneven) {
+ UNIT_ASSERT_VALUES_EQUAL(Base64DecodeUneven(""), "");
+
+ UNIT_ASSERT_VALUES_EQUAL(Base64DecodeUneven("YWFh"), "aaa");
+
+ UNIT_ASSERT_VALUES_EQUAL(Base64DecodeUneven("MTI="), "12");
+ UNIT_ASSERT_VALUES_EQUAL(Base64DecodeUneven("MTI,"), "12");
+ UNIT_ASSERT_VALUES_EQUAL(Base64DecodeUneven("MTI"), "12");
+
+ UNIT_ASSERT_VALUES_EQUAL(Base64DecodeUneven("QQ=="), "A");
+ UNIT_ASSERT_VALUES_EQUAL(Base64DecodeUneven("QQ,,"), "A");
+ UNIT_ASSERT_VALUES_EQUAL(Base64DecodeUneven("QQ"), "A");
+
+ UNIT_ASSERT_VALUES_EQUAL(Base64DecodeUneven("dnluZHg="), "vyndx");
+ UNIT_ASSERT_VALUES_EQUAL(Base64DecodeUneven("dnluZHg,"), "vyndx");
+ UNIT_ASSERT_VALUES_EQUAL(Base64DecodeUneven("dnluZHg"), "vyndx");
+ }
+
+ Y_UNIT_TEST(TestDecodeRandom) {
+ TString input;
+ constexpr size_t testSize = 240000;
+ for (size_t i = 0; i < testSize; ++i) {
+ input.push_back(rand() % 256);
+ }
+ TString output;
+ TString encoded = Base64Encode(input);
+ UNIT_ASSERT_VALUES_EQUAL(Base64Decode(encoded), input);
+ UNIT_ASSERT_VALUES_EQUAL(Base64StrictDecode(encoded), input);
+ }
+
+ Y_UNIT_TEST(TestAllPossibleOctets) {
+ const TString x("\0\x01\x02\x03\x04\x05\x06\x07\b\t\n\x0B\f\r\x0E\x0F\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\x7F"sv);
+ const TString xEnc = "AAECAwQFBgcICQoLDA0ODxAREhMUFRYXGBkaGxwdHh8gISIjJCUmJygpKissLS4vMDEyMzQ1Njc4OTo7PD0+P0BBQkNERUZHSElKS0xNTk9QUVJTVFVWV1hZWltcXV5fYGFiY2RlZmdoaWprbG1ub3BxcnN0dXZ3eHl6e3x9fn8=";
+ const TString y = Base64Decode(xEnc);
+ const TString yEnc = Base64Encode(x);
+ UNIT_ASSERT_VALUES_EQUAL(x, y);
+ UNIT_ASSERT_VALUES_EQUAL(xEnc, yEnc);
+ }
+
+ Y_UNIT_TEST(TestTwoPaddingCharacters) {
+ const TString x("a");
+ const TString xEnc = "YQ==";
+ const TString y = Base64Decode(xEnc);
+ const TString yEnc = Base64Encode(x);
+ UNIT_ASSERT_VALUES_EQUAL(x, y);
+ UNIT_ASSERT_VALUES_EQUAL(xEnc, yEnc);
+ }
+
+ Y_UNIT_TEST(TestOnePaddingCharacter) {
+ const TString x("aa");
+ const TString xEnc = "YWE=";
+ const TString y = Base64Decode(xEnc);
+ const TString yEnc = Base64Encode(x);
+ UNIT_ASSERT_VALUES_EQUAL(x, y);
+ UNIT_ASSERT_VALUES_EQUAL(xEnc, yEnc);
+ }
+
+ Y_UNIT_TEST(TestNoPaddingCharacters) {
+ const TString x("aaa");
+ const TString xEnc = "YWFh";
+ const TString y = Base64Decode(xEnc);
+ const TString yEnc = Base64Encode(x);
+ UNIT_ASSERT_VALUES_EQUAL(x, y);
+ UNIT_ASSERT_VALUES_EQUAL(xEnc, yEnc);
+ }
+
+ Y_UNIT_TEST(TestTrailingZero) {
+ const TString x("foo\0"sv);
+ const TString xEnc = "Zm9vAA==";
+ const TString y = Base64Decode(xEnc);
+ const TString yEnc = Base64Encode(x);
+ UNIT_ASSERT_VALUES_EQUAL(x, y);
+ UNIT_ASSERT_VALUES_EQUAL(xEnc, yEnc);
+ }
+
+ Y_UNIT_TEST(TestTwoTrailingZeroes) {
+ const TString x("foo\0\0"sv);
+ const TString xEnc = "Zm9vAAA=";
+ const TString y = Base64Decode(xEnc);
+ const TString yEnc = Base64Encode(x);
+ UNIT_ASSERT_VALUES_EQUAL(x, y);
+ UNIT_ASSERT_VALUES_EQUAL(xEnc, yEnc);
+ }
+
+ Y_UNIT_TEST(TestZero) {
+ const TString x("\0"sv);
+ const TString xEnc = "AA==";
+ const TString y = Base64Decode(xEnc);
+ const TString yEnc = Base64Encode(x);
+ UNIT_ASSERT_VALUES_EQUAL(x, y);
+ UNIT_ASSERT_VALUES_EQUAL(xEnc, yEnc);
+ }
+
+ Y_UNIT_TEST(TestSymbolsAfterZero) {
+ const TString x("\0a"sv);
+ const TString xEnc = "AGE=";
+ const TString y = Base64Decode(xEnc);
+ const TString yEnc = Base64Encode(x);
+ UNIT_ASSERT_VALUES_EQUAL(x, y);
+ UNIT_ASSERT_VALUES_EQUAL(xEnc, yEnc);
+ }
+
+ Y_UNIT_TEST(TestEmptyString) {
+ const TString x = "";
+ const TString xEnc = "";
+ const TString y = Base64Decode(xEnc);
+ const TString yEnc = Base64Encode(x);
+ UNIT_ASSERT_VALUES_EQUAL(x, y);
+ UNIT_ASSERT_VALUES_EQUAL(xEnc, yEnc);
+ }
+
+ Y_UNIT_TEST(TestBackendsConsistencyOnRandomData) {
+ constexpr size_t TEST_CASES_COUNT = 1000;
+ constexpr size_t MAX_DATA_SIZE = 1000;
+ TFastRng<ui32> prng{42};
+ TVector<TString> xs{TEST_CASES_COUNT};
+ TString xEnc;
+ TString xDec;
+ TString yEnc;
+ TString yDec;
+
+ for (auto& x : xs) {
+ const size_t size = prng() % MAX_DATA_SIZE;
+ for (size_t j = 0; j < size; ++j) {
+ x += static_cast<char>(prng() % 256);
+ }
+ }
+
+ static const auto IMPLS = NB64Etalon::GetImpls();
+ for (size_t i = 0; i < static_cast<size_t>(NB64Etalon::TImpls::MAX_IMPL); ++i) {
+ for (size_t j = 0; j < static_cast<size_t>(NB64Etalon::TImpls::MAX_IMPL); ++j) {
+ const auto ei = static_cast<NB64Etalon::TImpls::EImpl>(i);
+ const auto ej = static_cast<NB64Etalon::TImpls::EImpl>(j);
+ const auto impl = IMPLS.Impl[i];
+ const auto otherImpl = IMPLS.Impl[j];
+ if (!impl.Encode && !impl.Decode || !otherImpl.Encode && !otherImpl.Decode) {
+ continue;
+ }
+
+ for (const auto& x : xs) {
+ impl.Encode(x, xEnc);
+ impl.Decode(xEnc, xDec);
+ Y_ENSURE(x == xDec, "something is wrong with " << ei << " implementation");
+
+ otherImpl.Encode(x, yEnc);
+ otherImpl.Decode(xEnc, yDec);
+ Y_ENSURE(x == yDec, "something is wrong with " << ej << " implementation");
+
+ UNIT_ASSERT_VALUES_EQUAL(xEnc, yEnc);
+ UNIT_ASSERT_VALUES_EQUAL(xDec, yDec);
+ }
+ }
+ }
+ }
+
+ Y_UNIT_TEST(TestIfEncodedDataIsZeroTerminatedOnRandomData) {
+ constexpr size_t TEST_CASES_COUNT = 1000;
+ constexpr size_t MAX_DATA_SIZE = 1000;
+ TFastRng<ui32> prng{42};
+ TString x;
+ TVector<char> buf;
+ for (size_t i = 0; i < TEST_CASES_COUNT; ++i) {
+ const size_t size = prng() % MAX_DATA_SIZE;
+ x.clear();
+ for (size_t j = 0; j < size; ++j) {
+ x += static_cast<char>(prng() % 256);
+ }
+
+ buf.assign(Base64EncodeBufSize(x.size()), Max<char>());
+ const auto* const xEncEnd = Base64Encode(buf.data(), (const unsigned char*)x.data(), x.size());
+ UNIT_ASSERT_VALUES_EQUAL(*xEncEnd, '\0');
+ }
+ }
+
+ Y_UNIT_TEST(TestDecodeURLEncodedNoPadding) {
+ const auto x = "123";
+ const auto xDec = Base64Decode("MTIz");
+ UNIT_ASSERT_VALUES_EQUAL(x, xDec);
+ }
+
+ Y_UNIT_TEST(TestDecodeURLEncodedOnePadding) {
+ const auto x = "12";
+ const auto xDec = Base64Decode("MTI,");
+ UNIT_ASSERT_VALUES_EQUAL(x, xDec);
+ }
+
+ Y_UNIT_TEST(TestDecodeURLEncodedTwoPadding) {
+ const auto x = "1";
+ const auto xDec = Base64Decode("MQ,,");
+ UNIT_ASSERT_VALUES_EQUAL(x, xDec);
+ }
+
+ Y_UNIT_TEST(TestDecodeNoPaddingLongString) {
+ const auto x = "How do I convert between big-endian and little-endian values in C++?a";
+ const auto xDec = Base64Decode("SG93IGRvIEkgY29udmVydCBiZXR3ZWVuIGJpZy1lbmRpYW4gYW5kIGxpdHRsZS1lbmRpYW4gdmFsdWVzIGluIEMrKz9h");
+ UNIT_ASSERT_VALUES_EQUAL(x, xDec);
+ }
+
+ Y_UNIT_TEST(TestDecodeOnePaddingLongString) {
+ const auto x = "How do I convert between big-endian and little-endian values in C++?";
+ const auto xDec = Base64Decode("SG93IGRvIEkgY29udmVydCBiZXR3ZWVuIGJpZy1lbmRpYW4gYW5kIGxpdHRsZS1lbmRpYW4gdmFsdWVzIGluIEMrKz8=");
+ UNIT_ASSERT_VALUES_EQUAL(x, xDec);
+ }
+
+ Y_UNIT_TEST(TestDecodeTwoPaddingLongString) {
+ const auto x = "How do I convert between big-endian and little-endian values in C++?aa";
+ const auto xDec = Base64Decode("SG93IGRvIEkgY29udmVydCBiZXR3ZWVuIGJpZy1lbmRpYW4gYW5kIGxpdHRsZS1lbmRpYW4gdmFsdWVzIGluIEMrKz9hYQ==");
+ UNIT_ASSERT_VALUES_EQUAL(x, xDec);
+ }
+
+ Y_UNIT_TEST(TestDecodeURLEncodedNoPaddingLongString) {
+ const auto x = "How do I convert between big-endian and little-endian values in C++?a";
+ const auto xDec = Base64Decode("SG93IGRvIEkgY29udmVydCBiZXR3ZWVuIGJpZy1lbmRpYW4gYW5kIGxpdHRsZS1lbmRpYW4gdmFsdWVzIGluIEMrKz9h");
+ UNIT_ASSERT_VALUES_EQUAL(x, xDec);
+ }
+
+ Y_UNIT_TEST(TestDecodeURLEncodedOnePaddingLongString) {
+ const auto x = "How do I convert between big-endian and little-endian values in C++?";
+ const auto xDec = Base64Decode("SG93IGRvIEkgY29udmVydCBiZXR3ZWVuIGJpZy1lbmRpYW4gYW5kIGxpdHRsZS1lbmRpYW4gdmFsdWVzIGluIEMrKz8,");
+ UNIT_ASSERT_VALUES_EQUAL(x, xDec);
+ }
+
+ Y_UNIT_TEST(TestDecodeURLEncodedTwoPaddingLongString) {
+ const auto x = "How do I convert between big-endian and little-endian values in C++?aa";
+ const auto xDec = Base64Decode("SG93IGRvIEkgY29udmVydCBiZXR3ZWVuIGJpZy1lbmRpYW4gYW5kIGxpdHRsZS1lbmRpYW4gdmFsdWVzIGluIEMrKz9hYQ,,");
+ UNIT_ASSERT_VALUES_EQUAL(x, xDec);
+ }
+}
diff --git a/library/cpp/string_utils/base64/bench/main.cpp b/library/cpp/string_utils/base64/bench/main.cpp
new file mode 100644
index 0000000000..10e09bc1c7
--- /dev/null
+++ b/library/cpp/string_utils/base64/bench/main.cpp
@@ -0,0 +1,326 @@
+#include <library/cpp/string_utils/base64/base64.h>
+
+#include <library/cpp/testing/benchmark/bench.h>
+
+#include <util/generic/buffer.h>
+#include <util/generic/singleton.h>
+#include <util/generic/string.h>
+#include <util/generic/vector.h>
+#include <util/generic/xrange.h>
+#include <util/generic/yexception.h>
+#include <util/random/random.h>
+
+#include <array>
+
+static TString GenerateRandomData(const size_t minSize, const size_t maxSize) {
+ Y_ENSURE(minSize <= maxSize, "wow");
+ TString r;
+ for (size_t i = 0; i < minSize; ++i) {
+ r.push_back(RandomNumber<char>());
+ }
+
+ if (minSize == maxSize) {
+ return r;
+ }
+
+ const size_t size = RandomNumber<size_t>() % (maxSize - minSize + 1);
+ for (size_t i = 0; i < size; ++i) {
+ r.push_back(RandomNumber<char>());
+ }
+
+ return r;
+}
+
+template <size_t N>
+static std::array<TString, N> GenerateRandomDataVector(const size_t minSize, const size_t maxSize) {
+ std::array<TString, N> r;
+ for (size_t i = 0; i < N; ++i) {
+ r[i] = GenerateRandomData(minSize, maxSize);
+ }
+
+ return r;
+}
+
+template <size_t N>
+static std::array<TString, N> Encode(const std::array<TString, N>& d) {
+ std::array<TString, N> r;
+ for (size_t i = 0, iEnd = d.size(); i < iEnd; ++i) {
+ r[i] = Base64Encode(d[i]);
+ }
+
+ return r;
+}
+
+namespace {
+ template <size_t N, size_t MinSize, size_t MaxSize>
+ struct TRandomDataHolder {
+ TRandomDataHolder()
+ : Data(GenerateRandomDataVector<N>(MinSize, MaxSize))
+ , DataEncoded(Encode<N>(Data))
+ {
+ for (size_t i = 0; i < N; ++i) {
+ const size_t size = Data[i].size();
+ const size_t sizeEnc = DataEncoded[i].size();
+ PlaceToEncode[i].Resize(Base64EncodeBufSize(size));
+ PlaceToDecode[i].Resize(Base64DecodeBufSize(sizeEnc));
+ }
+ }
+
+ static constexpr size_t Size = N;
+ const std::array<TString, N> Data;
+ const std::array<TString, N> DataEncoded;
+ std::array<TBuffer, N> PlaceToEncode;
+ std::array<TBuffer, N> PlaceToDecode;
+ };
+
+ template <size_t N, size_t Size>
+ using TFixedSizeRandomDataHolder = TRandomDataHolder<N, Size, Size>;
+
+ using FSRDH_1 = TFixedSizeRandomDataHolder<10, 1>;
+ using FSRDH_2 = TFixedSizeRandomDataHolder<10, 2>;
+ using FSRDH_4 = TFixedSizeRandomDataHolder<10, 4>;
+ using FSRDH_8 = TFixedSizeRandomDataHolder<10, 8>;
+ using FSRDH_16 = TFixedSizeRandomDataHolder<10, 16>;
+ using FSRDH_32 = TFixedSizeRandomDataHolder<10, 32>;
+ using FSRDH_64 = TFixedSizeRandomDataHolder<10, 64>;
+ using FSRDH_128 = TFixedSizeRandomDataHolder<10, 128>;
+ using FSRDH_1024 = TFixedSizeRandomDataHolder<10, 1024>;
+ using FSRDH_10240 = TFixedSizeRandomDataHolder<10, 10240>;
+ using FSRDH_102400 = TFixedSizeRandomDataHolder<10, 102400>;
+ using FSRDH_1048576 = TFixedSizeRandomDataHolder<10, 1048576>;
+ using FSRDH_10485760 = TFixedSizeRandomDataHolder<10, 10485760>;
+}
+
+template <typename T>
+static inline void BenchEncode(T& d, const NBench::NCpu::TParams& iface) {
+ for (const auto it : xrange(iface.Iterations())) {
+ Y_UNUSED(it);
+ for (size_t i = 0; i < d.Size; ++i) {
+ NBench::Escape(d.PlaceToEncode[i].data());
+ Y_DO_NOT_OPTIMIZE_AWAY(
+ Base64Encode(d.PlaceToEncode[i].data(), (const unsigned char*)d.Data[i].data(), d.Data[i].size()));
+ NBench::Clobber();
+ }
+ }
+}
+
+template <typename T>
+static inline void BenchEncodeUrl(T& d, const NBench::NCpu::TParams& iface) {
+ for (const auto it : xrange(iface.Iterations())) {
+ Y_UNUSED(it);
+ for (size_t i = 0; i < d.Size; ++i) {
+ NBench::Escape(d.PlaceToEncode[i].data());
+ Y_DO_NOT_OPTIMIZE_AWAY(
+ Base64EncodeUrl(d.PlaceToEncode[i].data(), (const unsigned char*)d.Data[i].data(), d.Data[i].size()));
+ NBench::Clobber();
+ }
+ }
+}
+
+template <typename T>
+static inline void BenchDecode(T& d, const NBench::NCpu::TParams& iface) {
+ for (const auto it : xrange(iface.Iterations())) {
+ Y_UNUSED(it);
+ for (size_t i = 0; i < d.Size; ++i) {
+ NBench::Escape(d.PlaceToDecode[i].data());
+ Y_DO_NOT_OPTIMIZE_AWAY(
+ Base64Decode(d.PlaceToDecode[i].data(), (const char*)d.DataEncoded[i].data(), (const char*)(d.DataEncoded[i].data() + d.DataEncoded[i].size())));
+ NBench::Clobber();
+ }
+ }
+}
+
+Y_CPU_BENCHMARK(EncodeF1, iface) {
+ auto& d = *Singleton<FSRDH_1>();
+ BenchEncode(d, iface);
+}
+
+Y_CPU_BENCHMARK(DecodeF1, iface) {
+ auto& d = *Singleton<FSRDH_1>();
+ BenchDecode(d, iface);
+}
+
+Y_CPU_BENCHMARK(EncodeF2, iface) {
+ auto& d = *Singleton<FSRDH_2>();
+ BenchEncode(d, iface);
+}
+
+Y_CPU_BENCHMARK(DecodeF2, iface) {
+ auto& d = *Singleton<FSRDH_2>();
+ BenchDecode(d, iface);
+}
+
+Y_CPU_BENCHMARK(EncodeF4, iface) {
+ auto& d = *Singleton<FSRDH_4>();
+ BenchEncode(d, iface);
+}
+
+Y_CPU_BENCHMARK(DecodeF4, iface) {
+ auto& d = *Singleton<FSRDH_4>();
+ BenchDecode(d, iface);
+}
+
+Y_CPU_BENCHMARK(EncodeF8, iface) {
+ auto& d = *Singleton<FSRDH_8>();
+ BenchEncode(d, iface);
+}
+
+Y_CPU_BENCHMARK(DecodeF8, iface) {
+ auto& d = *Singleton<FSRDH_8>();
+ BenchDecode(d, iface);
+}
+
+Y_CPU_BENCHMARK(EncodeF16, iface) {
+ auto& d = *Singleton<FSRDH_16>();
+ BenchEncode(d, iface);
+}
+
+Y_CPU_BENCHMARK(DecodeF16, iface) {
+ auto& d = *Singleton<FSRDH_16>();
+ BenchDecode(d, iface);
+}
+
+Y_CPU_BENCHMARK(EncodeF32, iface) {
+ auto& d = *Singleton<FSRDH_32>();
+ BenchEncode(d, iface);
+}
+
+Y_CPU_BENCHMARK(DecodeF32, iface) {
+ auto& d = *Singleton<FSRDH_32>();
+ BenchDecode(d, iface);
+}
+
+Y_CPU_BENCHMARK(EncodeF64, iface) {
+ auto& d = *Singleton<FSRDH_64>();
+ BenchEncode(d, iface);
+}
+
+Y_CPU_BENCHMARK(DecodeF64, iface) {
+ auto& d = *Singleton<FSRDH_64>();
+ BenchDecode(d, iface);
+}
+
+Y_CPU_BENCHMARK(EncodeF128, iface) {
+ auto& d = *Singleton<FSRDH_128>();
+ BenchEncode(d, iface);
+}
+
+Y_CPU_BENCHMARK(DecodeF128, iface) {
+ auto& d = *Singleton<FSRDH_128>();
+ BenchDecode(d, iface);
+}
+
+Y_CPU_BENCHMARK(EncodeF1024, iface) {
+ auto& d = *Singleton<FSRDH_1024>();
+ BenchEncode(d, iface);
+}
+
+Y_CPU_BENCHMARK(DecodeF1024, iface) {
+ auto& d = *Singleton<FSRDH_1024>();
+ BenchDecode(d, iface);
+}
+
+Y_CPU_BENCHMARK(EncodeF10240, iface) {
+ auto& d = *Singleton<FSRDH_10240>();
+ BenchEncode(d, iface);
+}
+
+Y_CPU_BENCHMARK(DecodeF10240, iface) {
+ auto& d = *Singleton<FSRDH_10240>();
+ BenchDecode(d, iface);
+}
+
+Y_CPU_BENCHMARK(EncodeF102400, iface) {
+ auto& d = *Singleton<FSRDH_102400>();
+ BenchEncode(d, iface);
+}
+
+Y_CPU_BENCHMARK(DecodeF102400, iface) {
+ auto& d = *Singleton<FSRDH_102400>();
+ BenchDecode(d, iface);
+}
+
+Y_CPU_BENCHMARK(EncodeF1048576, iface) {
+ auto& d = *Singleton<FSRDH_1048576>();
+ BenchEncode(d, iface);
+}
+
+Y_CPU_BENCHMARK(DecodeF1048576, iface) {
+ auto& d = *Singleton<FSRDH_1048576>();
+ BenchDecode(d, iface);
+}
+
+Y_CPU_BENCHMARK(EncodeF10485760, iface) {
+ auto& d = *Singleton<FSRDH_10485760>();
+ BenchEncode(d, iface);
+}
+
+Y_CPU_BENCHMARK(DecodeF10485760, iface) {
+ auto& d = *Singleton<FSRDH_10485760>();
+ BenchDecode(d, iface);
+}
+
+Y_CPU_BENCHMARK(EncodeUrlF1, iface) {
+ auto& d = *Singleton<FSRDH_1>();
+ BenchEncodeUrl(d, iface);
+}
+
+Y_CPU_BENCHMARK(EncodeUrlF2, iface) {
+ auto& d = *Singleton<FSRDH_2>();
+ BenchEncodeUrl(d, iface);
+}
+
+Y_CPU_BENCHMARK(EncodeUrlF4, iface) {
+ auto& d = *Singleton<FSRDH_4>();
+ BenchEncodeUrl(d, iface);
+}
+
+Y_CPU_BENCHMARK(EncodeUrlF8, iface) {
+ auto& d = *Singleton<FSRDH_8>();
+ BenchEncodeUrl(d, iface);
+}
+
+Y_CPU_BENCHMARK(EncodeUrlF16, iface) {
+ auto& d = *Singleton<FSRDH_16>();
+ BenchEncodeUrl(d, iface);
+}
+
+Y_CPU_BENCHMARK(EncodeUrlF32, iface) {
+ auto& d = *Singleton<FSRDH_32>();
+ BenchEncodeUrl(d, iface);
+}
+
+Y_CPU_BENCHMARK(EncodeUrlF64, iface) {
+ auto& d = *Singleton<FSRDH_64>();
+ BenchEncodeUrl(d, iface);
+}
+
+Y_CPU_BENCHMARK(EncodeUrlF128, iface) {
+ auto& d = *Singleton<FSRDH_128>();
+ BenchEncodeUrl(d, iface);
+}
+
+Y_CPU_BENCHMARK(EncodeUrlF1024, iface) {
+ auto& d = *Singleton<FSRDH_1024>();
+ BenchEncodeUrl(d, iface);
+}
+
+Y_CPU_BENCHMARK(EncodeUrlF10240, iface) {
+ auto& d = *Singleton<FSRDH_10240>();
+ BenchEncodeUrl(d, iface);
+}
+
+Y_CPU_BENCHMARK(EncodeUrlF102400, iface) {
+ auto& d = *Singleton<FSRDH_102400>();
+ BenchEncodeUrl(d, iface);
+}
+
+Y_CPU_BENCHMARK(EncodeUrlF1048576, iface) {
+ auto& d = *Singleton<FSRDH_1048576>();
+ BenchEncodeUrl(d, iface);
+}
+
+Y_CPU_BENCHMARK(EncodeUrlF10485760, iface) {
+ auto& d = *Singleton<FSRDH_10485760>();
+ BenchEncodeUrl(d, iface);
+}
diff --git a/library/cpp/string_utils/base64/bench/metrics/main.py b/library/cpp/string_utils/base64/bench/metrics/main.py
new file mode 100644
index 0000000000..c35fd6d8cd
--- /dev/null
+++ b/library/cpp/string_utils/base64/bench/metrics/main.py
@@ -0,0 +1,5 @@
+import yatest.common as yc
+
+
+def test_export_metrics(metrics):
+ metrics.set_benchmark(yc.execute_benchmark('library/cpp/string_utils/base64/bench/bench'))
diff --git a/library/cpp/string_utils/base64/bench/metrics/ya.make b/library/cpp/string_utils/base64/bench/metrics/ya.make
new file mode 100644
index 0000000000..b0406516c3
--- /dev/null
+++ b/library/cpp/string_utils/base64/bench/metrics/ya.make
@@ -0,0 +1,20 @@
+OWNER(
+ yazevnul
+ g:util
+)
+
+PY2TEST()
+
+SIZE(LARGE)
+
+TAG(
+ ya:force_sandbox
+ sb:intel_e5_2660v1
+ ya:fat
+)
+
+TEST_SRCS(main.py)
+
+DEPENDS(library/cpp/string_utils/base64/bench)
+
+END()
diff --git a/library/cpp/string_utils/base64/bench/ya.make b/library/cpp/string_utils/base64/bench/ya.make
new file mode 100644
index 0000000000..5ac5f3d6ce
--- /dev/null
+++ b/library/cpp/string_utils/base64/bench/ya.make
@@ -0,0 +1,16 @@
+OWNER(
+ yazevnul
+ g:util
+)
+
+Y_BENCHMARK()
+
+SRCS(
+ main.cpp
+)
+
+PEERDIR(
+ library/cpp/string_utils/base64
+)
+
+END()
diff --git a/library/cpp/string_utils/base64/fuzz/generic/ya.make b/library/cpp/string_utils/base64/fuzz/generic/ya.make
new file mode 100644
index 0000000000..d155e2b0a0
--- /dev/null
+++ b/library/cpp/string_utils/base64/fuzz/generic/ya.make
@@ -0,0 +1,12 @@
+OWNER(
+ yazevnul
+ g:util
+)
+
+FUZZ()
+
+PEERDIR(
+ library/cpp/string_utils/base64/fuzz/lib
+)
+
+END()
diff --git a/library/cpp/string_utils/base64/fuzz/lib/main.cpp b/library/cpp/string_utils/base64/fuzz/lib/main.cpp
new file mode 100644
index 0000000000..28547ae7a5
--- /dev/null
+++ b/library/cpp/string_utils/base64/fuzz/lib/main.cpp
@@ -0,0 +1,13 @@
+#include <library/cpp/string_utils/base64/base64.h>
+
+#include <util/system/types.h>
+#include <util/system/yassert.h>
+
+extern "C" int LLVMFuzzerTestOneInput(const ui8* data, size_t size) {
+ const TStringBuf example{reinterpret_cast<const char*>(data), size};
+ const auto converted = Base64Decode(Base64Encode(example));
+
+ Y_VERIFY(example == converted);
+
+ return 0;
+}
diff --git a/library/cpp/string_utils/base64/fuzz/lib/ya.make b/library/cpp/string_utils/base64/fuzz/lib/ya.make
new file mode 100644
index 0000000000..7b981b86a3
--- /dev/null
+++ b/library/cpp/string_utils/base64/fuzz/lib/ya.make
@@ -0,0 +1,16 @@
+OWNER(
+ yazevnul
+ g:util
+)
+
+LIBRARY()
+
+SRCS(
+ main.cpp
+)
+
+PEERDIR(
+ library/cpp/string_utils/base64
+)
+
+END()
diff --git a/library/cpp/string_utils/base64/fuzz/uneven/main.cpp b/library/cpp/string_utils/base64/fuzz/uneven/main.cpp
new file mode 100644
index 0000000000..915e81a7e5
--- /dev/null
+++ b/library/cpp/string_utils/base64/fuzz/uneven/main.cpp
@@ -0,0 +1,10 @@
+#include <library/cpp/string_utils/base64/base64.h>
+
+#include <util/system/types.h>
+#include <util/system/yassert.h>
+
+extern "C" int LLVMFuzzerTestOneInput(const ui8* data, size_t size) {
+ const TStringBuf example{reinterpret_cast<const char*>(data), size};
+ Y_UNUSED(Base64DecodeUneven(example));
+ return 0;
+}
diff --git a/library/cpp/string_utils/base64/fuzz/uneven/ya.make b/library/cpp/string_utils/base64/fuzz/uneven/ya.make
new file mode 100644
index 0000000000..18cb18ef52
--- /dev/null
+++ b/library/cpp/string_utils/base64/fuzz/uneven/ya.make
@@ -0,0 +1,15 @@
+FUZZ()
+
+OWNER(
+ g:util
+)
+
+SRCS(
+ main.cpp
+)
+
+PEERDIR(
+ library/cpp/string_utils/base64
+)
+
+END()
diff --git a/library/cpp/string_utils/base64/fuzz/ya.make b/library/cpp/string_utils/base64/fuzz/ya.make
new file mode 100644
index 0000000000..bef82061c4
--- /dev/null
+++ b/library/cpp/string_utils/base64/fuzz/ya.make
@@ -0,0 +1,10 @@
+OWNER(
+ yazevnul
+ g:util
+)
+
+RECURSE(
+ generic
+ lib
+ uneven
+)
diff --git a/library/cpp/string_utils/base64/ut/ya.make b/library/cpp/string_utils/base64/ut/ya.make
new file mode 100644
index 0000000000..9b61241f0e
--- /dev/null
+++ b/library/cpp/string_utils/base64/ut/ya.make
@@ -0,0 +1,22 @@
+OWNER(
+ g:util
+ yazevnul
+)
+
+UNITTEST_FOR(library/cpp/string_utils/base64)
+
+SRCS(
+ base64_ut.cpp
+ base64_decode_uneven_ut.cpp
+)
+
+PEERDIR(
+ contrib/libs/base64/avx2
+ contrib/libs/base64/ssse3
+ contrib/libs/base64/neon32
+ contrib/libs/base64/neon64
+ contrib/libs/base64/plain32
+ contrib/libs/base64/plain64
+)
+
+END()
diff --git a/library/cpp/string_utils/base64/ya.make b/library/cpp/string_utils/base64/ya.make
new file mode 100644
index 0000000000..f5258c446c
--- /dev/null
+++ b/library/cpp/string_utils/base64/ya.make
@@ -0,0 +1,23 @@
+OWNER(
+ g:util
+ yazevnul
+)
+
+LIBRARY()
+
+SRCS(
+ base64.cpp
+)
+
+PEERDIR(
+ contrib/libs/base64/avx2
+ contrib/libs/base64/ssse3
+ contrib/libs/base64/neon32
+ contrib/libs/base64/neon64
+ contrib/libs/base64/plain32
+ contrib/libs/base64/plain64
+)
+
+END()
+
+RECURSE_FOR_TESTS(ut)