diff options
author | Devtools Arcadia <arcadia-devtools@yandex-team.ru> | 2022-02-07 18:08:42 +0300 |
---|---|---|
committer | Devtools Arcadia <arcadia-devtools@mous.vla.yp-c.yandex.net> | 2022-02-07 18:08:42 +0300 |
commit | 1110808a9d39d4b808aef724c861a2e1a38d2a69 (patch) | |
tree | e26c9fed0de5d9873cce7e00bc214573dc2195b7 /library/cpp/string_utils/base64 | |
download | ydb-1110808a9d39d4b808aef724c861a2e1a38d2a69.tar.gz |
intermediate changes
ref:cde9a383711a11544ce7e107a78147fb96cc4029
Diffstat (limited to 'library/cpp/string_utils/base64')
-rw-r--r-- | library/cpp/string_utils/base64/base64.cpp | 268 | ||||
-rw-r--r-- | library/cpp/string_utils/base64/base64.h | 130 | ||||
-rw-r--r-- | library/cpp/string_utils/base64/base64_decode_uneven_ut.cpp | 46 | ||||
-rw-r--r-- | library/cpp/string_utils/base64/base64_ut.cpp | 497 | ||||
-rw-r--r-- | library/cpp/string_utils/base64/bench/main.cpp | 326 | ||||
-rw-r--r-- | library/cpp/string_utils/base64/bench/metrics/main.py | 5 | ||||
-rw-r--r-- | library/cpp/string_utils/base64/bench/metrics/ya.make | 20 | ||||
-rw-r--r-- | library/cpp/string_utils/base64/bench/ya.make | 16 | ||||
-rw-r--r-- | library/cpp/string_utils/base64/fuzz/generic/ya.make | 12 | ||||
-rw-r--r-- | library/cpp/string_utils/base64/fuzz/lib/main.cpp | 13 | ||||
-rw-r--r-- | library/cpp/string_utils/base64/fuzz/lib/ya.make | 16 | ||||
-rw-r--r-- | library/cpp/string_utils/base64/fuzz/uneven/main.cpp | 10 | ||||
-rw-r--r-- | library/cpp/string_utils/base64/fuzz/uneven/ya.make | 15 | ||||
-rw-r--r-- | library/cpp/string_utils/base64/fuzz/ya.make | 10 | ||||
-rw-r--r-- | library/cpp/string_utils/base64/ut/ya.make | 22 | ||||
-rw-r--r-- | library/cpp/string_utils/base64/ya.make | 23 |
16 files changed, 1429 insertions, 0 deletions
diff --git a/library/cpp/string_utils/base64/base64.cpp b/library/cpp/string_utils/base64/base64.cpp new file mode 100644 index 0000000000..05c201f0de --- /dev/null +++ b/library/cpp/string_utils/base64/base64.cpp @@ -0,0 +1,268 @@ +#include "base64.h" + +#include <contrib/libs/base64/avx2/libbase64.h> +#include <contrib/libs/base64/ssse3/libbase64.h> +#include <contrib/libs/base64/neon32/libbase64.h> +#include <contrib/libs/base64/neon64/libbase64.h> +#include <contrib/libs/base64/plain32/libbase64.h> +#include <contrib/libs/base64/plain64/libbase64.h> + +#include <util/generic/yexception.h> +#include <util/system/cpu_id.h> +#include <util/system/platform.h> + +#include <cstdlib> + +namespace { + struct TImpl { + void (*Encode)(const char* src, size_t srclen, char* out, size_t* outlen); + int (*Decode)(const char* src, size_t srclen, char* out, size_t* outlen); + + TImpl() { +#if defined(_arm32_) + const bool haveNEON32 = true; +#else + const bool haveNEON32 = false; +#endif + +#if defined(_arm64_) + const bool haveNEON64 = true; +#else + const bool haveNEON64 = false; +#endif + +# ifdef _windows_ + // msvc does something wrong in release-build, so we temprorary disable this branch on windows + // https://developercommunity.visualstudio.com/content/problem/334085/release-build-has-made-wrong-optimizaion-in-base64.html + const bool isWin = true; +# else + const bool isWin = false; +# endif + if (!isWin && NX86::HaveAVX() && NX86::HaveAVX2()) { + Encode = avx2_base64_encode; + Decode = avx2_base64_decode; + } else if (NX86::HaveSSSE3()) { + Encode = ssse3_base64_encode; + Decode = ssse3_base64_decode; + } else if (haveNEON64) { + Encode = neon64_base64_encode; + Decode = neon64_base64_decode; + } else if (haveNEON32) { + Encode = neon32_base64_encode; + Decode = neon32_base64_decode; + } else if (sizeof(void*) == 8) { + // running on a 64 bit platform + Encode = plain64_base64_encode; + Decode = plain64_base64_decode; + } else if (sizeof(void*) == 4) { + // running on a 32 bit platform (actually impossible in Arcadia) + Encode = plain32_base64_encode; + Decode = plain32_base64_decode; + } else { + // failed to find appropriate implementation + std::abort(); + } + } + }; + + const TImpl GetImpl() { + static const TImpl IMPL; + return IMPL; + } +} + +static const char base64_etab_std[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; +static const char base64_bkw[] = { + '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', // 0..15 + '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', // 16..31 + '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\76', '\0', '\76', '\0', '\77', // 32.47 + '\64', '\65', '\66', '\67', '\70', '\71', '\72', '\73', '\74', '\75', '\0', '\0', '\0', '\0', '\0', '\0', // 48..63 + '\0', '\0', '\1', '\2', '\3', '\4', '\5', '\6', '\7', '\10', '\11', '\12', '\13', '\14', '\15', '\16', // 64..79 + '\17', '\20', '\21', '\22', '\23', '\24', '\25', '\26', '\27', '\30', '\31', '\0', '\0', '\0', '\0', '\77', // 80..95 + '\0', '\32', '\33', '\34', '\35', '\36', '\37', '\40', '\41', '\42', '\43', '\44', '\45', '\46', '\47', '\50', // 96..111 + '\51', '\52', '\53', '\54', '\55', '\56', '\57', '\60', '\61', '\62', '\63', '\0', '\0', '\0', '\0', '\0', // 112..127 + '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', // 128..143 + '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', + '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', + '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', + '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', + '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', + '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', + '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0'}; + +static_assert(Y_ARRAY_SIZE(base64_bkw) == 256, "wrong size"); + +// Base64 for url encoding, RFC3548 +static const char base64_etab_url[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_"; + +static inline unsigned char GetBase64EncodedIndex0(unsigned char octet0) { + return (octet0 >> 2); +} + +static inline unsigned char GetBase64EncodedIndex1(unsigned char octet0, unsigned char octet1) { + return (((octet0 << 4) & 0x30) | ((octet1 >> 4) & 0x0f)); +} + +static inline unsigned char GetBase64EncodedIndex2(unsigned char octet1, unsigned char octet2) { + return (((octet1 << 2) & 0x3c) | ((octet2 >> 6) & 0x03)); +} + +static inline unsigned char GetBase64EncodedIndex3(unsigned char octet2) { + return (octet2 & 0x3f); +} + +template <bool urlVersion> +static inline char* Base64EncodeImpl(char* outstr, const unsigned char* instr, size_t len) { + const char* const base64_etab = (urlVersion ? base64_etab_url : base64_etab_std); + const char pad = (urlVersion ? ',' : '='); + + size_t idx = 0; + + while (idx + 2 < len) { + *outstr++ = base64_etab[GetBase64EncodedIndex0(instr[idx])]; + *outstr++ = base64_etab[GetBase64EncodedIndex1(instr[idx], instr[idx + 1])]; + *outstr++ = base64_etab[GetBase64EncodedIndex2(instr[idx + 1], instr[idx + 2])]; + *outstr++ = base64_etab[GetBase64EncodedIndex3(instr[idx + 2])]; + idx += 3; + } + if (idx < len) { + *outstr++ = base64_etab[GetBase64EncodedIndex0(instr[idx])]; + if (idx + 1 < len) { + *outstr++ = base64_etab[GetBase64EncodedIndex1(instr[idx], instr[idx + 1])]; + *outstr++ = base64_etab[GetBase64EncodedIndex2(instr[idx + 1], '\0')]; + } else { + *outstr++ = base64_etab[GetBase64EncodedIndex1(instr[idx], '\0')]; + *outstr++ = pad; + } + *outstr++ = pad; + } + *outstr = 0; + + return outstr; +} + +static char* Base64EncodePlain(char* outstr, const unsigned char* instr, size_t len) { + return Base64EncodeImpl<false>(outstr, instr, len); +} + +char* Base64EncodeUrl(char* outstr, const unsigned char* instr, size_t len) { + return Base64EncodeImpl<true>(outstr, instr, len); +} + +inline void uudecode_1(char* dst, unsigned char* src) { + dst[0] = char((base64_bkw[src[0]] << 2) | (base64_bkw[src[1]] >> 4)); + dst[1] = char((base64_bkw[src[1]] << 4) | (base64_bkw[src[2]] >> 2)); + dst[2] = char((base64_bkw[src[2]] << 6) | base64_bkw[src[3]]); +} + +static size_t Base64DecodePlain(void* dst, const char* b, const char* e) { + size_t n = 0; + while (b < e) { + uudecode_1((char*)dst + n, (unsigned char*)b); + + b += 4; + n += 3; + } + + if (n > 0) { + if (b[-1] == ',' || b[-1] == '=') { + n--; + + if (b[-2] == ',' || b[-2] == '=') { + n--; + } + } + } + + return n; +} + +// Table for Base64StrictDecode +static const char base64_bkw_strict[] = + "\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100" + "\100\100\100\100\100\100\100\100\100\100\100\76\101\76\100\77\64\65\66\67\70\71\72\73\74\75\100\100\100\101\100\100" + "\100\0\1\2\3\4\5\6\7\10\11\12\13\14\15\16\17\20\21\22\23\24\25\26\27\30\31\100\100\100\100\77" + "\100\32\33\34\35\36\37\40\41\42\43\44\45\46\47\50\51\52\53\54\55\56\57\60\61\62\63\100\100\100\100\100" + "\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100" + "\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100" + "\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100" + "\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100"; + +size_t Base64StrictDecode(void* out, const char* b, const char* e) { + char* dst = (char*)out; + const unsigned char* src = (unsigned char*)b; + const unsigned char* const end = (unsigned char*)e; + + Y_ENSURE(!((e - b) % 4), "incorrect input length for base64 decode"); + + while (src < end) { + const char zeroth = base64_bkw_strict[src[0]]; + const char first = base64_bkw_strict[src[1]]; + const char second = base64_bkw_strict[src[2]]; + const char third = base64_bkw_strict[src[3]]; + + constexpr char invalid = 64; + constexpr char padding = 65; + if (Y_UNLIKELY(zeroth == invalid || first == invalid || + second == invalid || third == invalid || + zeroth == padding || first == padding)) + { + ythrow yexception() << "invalid character in input"; + } + + dst[0] = char((zeroth << 2) | (first >> 4)); + dst[1] = char((first << 4) | (second >> 2)); + dst[2] = char((second << 6) | third); + + src += 4; + dst += 3; + + if (src[-1] == ',' || src[-1] == '=') { + --dst; + + if (src[-2] == ',' || src[-2] == '=') { + --dst; + } + } else if (Y_UNLIKELY(src[-2] == ',' || src[-2] == '=')) { + ythrow yexception() << "incorrect padding"; + } + } + + return dst - (char*)out; +} + +size_t Base64Decode(void* dst, const char* b, const char* e) { + static const TImpl IMPL = GetImpl(); + const auto size = e - b; + Y_ENSURE(!(size % 4), "incorrect input length for base64 decode"); + if (Y_LIKELY(size < 8)) { + return Base64DecodePlain(dst, b, e); + } + + size_t outLen; + IMPL.Decode(b, size, (char*)dst, &outLen); + + return outLen; +} + +TString Base64DecodeUneven(const TStringBuf s) { + if (s.length() % 4 == 0) { + return Base64Decode(s); + } + + // padding to 4 + return Base64Decode(TString(s) + TString(4 - (s.length() % 4), '=')); +} + +char* Base64Encode(char* outstr, const unsigned char* instr, size_t len) { + static const TImpl IMPL = GetImpl(); + if (Y_LIKELY(len < 8)) { + return Base64EncodePlain(outstr, instr, len); + } + + size_t outLen; + IMPL.Encode((char*)instr, len, outstr, &outLen); + + *(outstr + outLen) = '\0'; + return outstr + outLen; +} diff --git a/library/cpp/string_utils/base64/base64.h b/library/cpp/string_utils/base64/base64.h new file mode 100644 index 0000000000..f778a6425a --- /dev/null +++ b/library/cpp/string_utils/base64/base64.h @@ -0,0 +1,130 @@ +#pragma once + +#include <util/system/defaults.h> +#include <util/generic/strbuf.h> +#include <util/generic/string.h> + +/* @return Size of the buffer required to decode Base64 encoded data of size `len`. + */ +constexpr size_t Base64DecodeBufSize(const size_t len) noexcept { + return (len + 3) / 4 * 3; +} + +/* Decode Base64 encoded data. Can decode both regular Base64 and Base64URL encoded data. Can decode + * only valid Base64[URL] data, behaviour for invalid data is unspecified. + * + * @throws Throws exception in case of incorrect padding. + * + * @param dst memory for writing output. + * @param b pointer to the beginning of base64 encoded string. + * @param a pointer to the end of base64 encoded string + * + * @return Return number of bytes decoded. + */ +size_t Base64Decode(void* dst, const char* b, const char* e); + +inline TStringBuf Base64Decode(const TStringBuf src, void* dst) { + return TStringBuf((const char*)dst, Base64Decode(dst, src.begin(), src.end())); +} + +inline void Base64Decode(const TStringBuf src, TString& dst) { + dst.ReserveAndResize(Base64DecodeBufSize(src.size())); + dst.resize(Base64Decode(src, dst.begin()).size()); +} + +//WARNING: can process not whole input silently, use Base64StrictDecode instead of this function +inline TString Base64Decode(const TStringBuf s) { + TString ret; + Base64Decode(s, ret); + return ret; +} + +/// +/// @brief Decodes Base64 string with strict verification +/// of invalid symbols, also tries to decode Base64 string with padding +/// inside. +// +/// @throws Throws exceptions on inputs which contain invalid symbols +/// or incorrect padding. +/// @{ +/// +/// @param b a pointer to the beginning of base64 encoded string. +/// @param e a pointer to the end of base64 encoded string. +/// @param dst memory for writing output. +/// +/// @return Returns number of bytes decoded. +/// +size_t Base64StrictDecode(void* dst, const char* b, const char* e); + +/// +/// @param src a base64 encoded string. +/// @param dst an pointer to allocated memory +/// for writing result. +/// +/// @return Returns dst wrapped into TStringBuf. +/// +inline TStringBuf Base64StrictDecode(const TStringBuf src, void* dst) { + return TStringBuf((const char*)dst, Base64StrictDecode(dst, src.begin(), src.end())); +} + +/// +/// @param src a base64 encoded string. +/// @param dst a decoded string. +/// +inline void Base64StrictDecode(const TStringBuf src, TString& dst) { + dst.ReserveAndResize(Base64DecodeBufSize(src.size())); + dst.resize(Base64StrictDecode(src, dst.begin()).size()); +} + +/// +/// @param src a base64 encoded string. +/// +/// @returns a decoded string. +/// +inline TString Base64StrictDecode(const TStringBuf src) { + TString ret; + Base64StrictDecode(src, ret); + return ret; +} +/// @} + +/// Works with strings which length is not divisible by 4. +TString Base64DecodeUneven(const TStringBuf s); + +//encode +constexpr size_t Base64EncodeBufSize(const size_t len) noexcept { + return (len + 2) / 3 * 4 + 1; +} + +char* Base64Encode(char* outstr, const unsigned char* instr, size_t len); +char* Base64EncodeUrl(char* outstr, const unsigned char* instr, size_t len); + +inline TStringBuf Base64Encode(const TStringBuf src, void* tmp) { + return TStringBuf((const char*)tmp, Base64Encode((char*)tmp, (const unsigned char*)src.data(), src.size())); +} + +inline TStringBuf Base64EncodeUrl(const TStringBuf src, void* tmp) { + return TStringBuf((const char*)tmp, Base64EncodeUrl((char*)tmp, (const unsigned char*)src.data(), src.size())); +} + +inline void Base64Encode(const TStringBuf src, TString& dst) { + dst.ReserveAndResize(Base64EncodeBufSize(src.size())); + dst.resize(Base64Encode(src, dst.begin()).size()); +} + +inline void Base64EncodeUrl(const TStringBuf src, TString& dst) { + dst.ReserveAndResize(Base64EncodeBufSize(src.size())); + dst.resize(Base64EncodeUrl(src, dst.begin()).size()); +} + +inline TString Base64Encode(const TStringBuf s) { + TString ret; + Base64Encode(s, ret); + return ret; +} + +inline TString Base64EncodeUrl(const TStringBuf s) { + TString ret; + Base64EncodeUrl(s, ret); + return ret; +} diff --git a/library/cpp/string_utils/base64/base64_decode_uneven_ut.cpp b/library/cpp/string_utils/base64/base64_decode_uneven_ut.cpp new file mode 100644 index 0000000000..c3ed068a37 --- /dev/null +++ b/library/cpp/string_utils/base64/base64_decode_uneven_ut.cpp @@ -0,0 +1,46 @@ +#include <library/cpp/testing/unittest/registar.h> + +#include <library/cpp/string_utils/base64/base64.h> + +Y_UNIT_TEST_SUITE(TBase64DecodeUneven) { + Y_UNIT_TEST(Base64DecodeUneven) { + const TString wikipedia_slogan = + "Man is distinguished, not only by his reason, " + "but by this singular passion from other animals, which is a lust of the " + "mind, that by a perseverance of delight in the continued and " + "indefatigable generation of knowledge, exceeds the short " + "vehemence of any carnal pleasure."; + const TString encoded = + "TWFuIGlzIGRpc3Rpbmd1aXNoZWQsIG5vdCBvbmx5IGJ5IGhpcyByZWFzb24sIGJ1dCBieSB0" + "aGlzIHNpbmd1bGFyIHBhc3Npb24gZnJvbSBvdGhlciBhbmltYWxzLCB3aGljaCBpcyBhIGx1" + "c3Qgb2YgdGhlIG1pbmQsIHRoYXQgYnkgYSBwZXJzZXZlcmFuY2Ugb2YgZGVsaWdodCBpbiB0" + "aGUgY29udGludWVkIGFuZCBpbmRlZmF0aWdhYmxlIGdlbmVyYXRpb24gb2Yga25vd2xlZGdl" + "LCBleGNlZWRzIHRoZSBzaG9ydCB2ZWhlbWVuY2Ugb2YgYW55IGNhcm5hbCBwbGVhc3VyZS4="; + + UNIT_ASSERT_VALUES_EQUAL(encoded, Base64Encode(wikipedia_slogan)); + UNIT_ASSERT_VALUES_EQUAL(wikipedia_slogan, Base64DecodeUneven(encoded)); + + const TString encoded_url1 = + "TWFuIGlzIGRpc3Rpbmd1aXNoZWQsIG5vdCBvbmx5IGJ5IGhpcyByZWFzb24sIGJ1dCBieSB0" + "aGlzIHNpbmd1bGFyIHBhc3Npb24gZnJvbSBvdGhlciBhbmltYWxzLCB3aGljaCBpcyBhIGx1" + "c3Qgb2YgdGhlIG1pbmQsIHRoYXQgYnkgYSBwZXJzZXZlcmFuY2Ugb2YgZGVsaWdodCBpbiB0" + "aGUgY29udGludWVkIGFuZCBpbmRlZmF0aWdhYmxlIGdlbmVyYXRpb24gb2Yga25vd2xlZGdl" + "LCBleGNlZWRzIHRoZSBzaG9ydCB2ZWhlbWVuY2Ugb2YgYW55IGNhcm5hbCBwbGVhc3VyZS4,"; + const TString encoded_url2 = + "TWFuIGlzIGRpc3Rpbmd1aXNoZWQsIG5vdCBvbmx5IGJ5IGhpcyByZWFzb24sIGJ1dCBieSB0" + "aGlzIHNpbmd1bGFyIHBhc3Npb24gZnJvbSBvdGhlciBhbmltYWxzLCB3aGljaCBpcyBhIGx1" + "c3Qgb2YgdGhlIG1pbmQsIHRoYXQgYnkgYSBwZXJzZXZlcmFuY2Ugb2YgZGVsaWdodCBpbiB0" + "aGUgY29udGludWVkIGFuZCBpbmRlZmF0aWdhYmxlIGdlbmVyYXRpb24gb2Yga25vd2xlZGdl" + "LCBleGNlZWRzIHRoZSBzaG9ydCB2ZWhlbWVuY2Ugb2YgYW55IGNhcm5hbCBwbGVhc3VyZS4"; + UNIT_ASSERT_VALUES_EQUAL(wikipedia_slogan, Base64DecodeUneven(encoded_url1)); + UNIT_ASSERT_VALUES_EQUAL(wikipedia_slogan, Base64DecodeUneven(encoded_url2)); + + const TString lp = "Linkin Park"; + UNIT_ASSERT_VALUES_EQUAL(lp, Base64DecodeUneven(Base64Encode(lp))); + UNIT_ASSERT_VALUES_EQUAL(lp, Base64DecodeUneven(Base64EncodeUrl(lp))); + + const TString dp = "ADP GmbH\nAnalyse Design & Programmierung\nGesellschaft mit beschränkter Haftung"; + UNIT_ASSERT_VALUES_EQUAL(dp, Base64DecodeUneven(Base64Encode(dp))); + UNIT_ASSERT_VALUES_EQUAL(dp, Base64DecodeUneven(Base64EncodeUrl(dp))); + } +} diff --git a/library/cpp/string_utils/base64/base64_ut.cpp b/library/cpp/string_utils/base64/base64_ut.cpp new file mode 100644 index 0000000000..bcc1e65879 --- /dev/null +++ b/library/cpp/string_utils/base64/base64_ut.cpp @@ -0,0 +1,497 @@ +#include "base64.h" + +#include <contrib/libs/base64/avx2/libbase64.h> +#include <contrib/libs/base64/neon32/libbase64.h> +#include <contrib/libs/base64/neon64/libbase64.h> +#include <contrib/libs/base64/plain32/libbase64.h> +#include <contrib/libs/base64/plain64/libbase64.h> +#include <contrib/libs/base64/ssse3/libbase64.h> + +#include <library/cpp/testing/unittest/registar.h> + +#include <util/generic/vector.h> +#include <util/random/fast.h> +#include <util/system/cpu_id.h> +#include <util/system/platform.h> + +#include <array> + +using namespace std::string_view_literals; + +#define BASE64_UT_DECLARE_BASE64_IMPL(prefix, encFunction, decFunction) \ + Y_DECLARE_UNUSED \ + static size_t prefix##Base64Decode(void* dst, const char* b, const char* e) { \ + const auto size = e - b; \ + Y_ENSURE(!(size % 4), "incorrect input length for base64 decode"); \ + \ + size_t outLen; \ + decFunction(b, size, (char*)dst, &outLen); \ + return outLen; \ + } \ + \ + Y_DECLARE_UNUSED \ + static inline TStringBuf prefix##Base64Decode(const TStringBuf& src, void* dst) { \ + return TStringBuf((const char*)dst, ::NB64Etalon::prefix##Base64Decode(dst, src.begin(), src.end())); \ + } \ + \ + Y_DECLARE_UNUSED \ + static inline void prefix##Base64Decode(const TStringBuf& src, TString& dst) { \ + dst.ReserveAndResize(Base64DecodeBufSize(src.size())); \ + dst.resize(::NB64Etalon::prefix##Base64Decode(src, dst.begin()).size()); \ + } \ + \ + Y_DECLARE_UNUSED \ + static inline TString prefix##Base64Decode(const TStringBuf& s) { \ + TString ret; \ + prefix##Base64Decode(s, ret); \ + return ret; \ + } \ + \ + Y_DECLARE_UNUSED \ + static char* prefix##Base64Encode(char* outstr, const unsigned char* instr, size_t len) { \ + size_t outLen; \ + encFunction((char*)instr, len, outstr, &outLen); \ + *(outstr + outLen) = '\0'; \ + return outstr + outLen; \ + } \ + \ + Y_DECLARE_UNUSED \ + static inline TStringBuf prefix##Base64Encode(const TStringBuf& src, void* tmp) { \ + return TStringBuf((const char*)tmp, ::NB64Etalon::prefix##Base64Encode((char*)tmp, (const unsigned char*)src.data(), src.size())); \ + } \ + \ + Y_DECLARE_UNUSED \ + static inline void prefix##Base64Encode(const TStringBuf& src, TString& dst) { \ + dst.ReserveAndResize(Base64EncodeBufSize(src.size())); \ + dst.resize(::NB64Etalon::prefix##Base64Encode(src, dst.begin()).size()); \ + } \ + \ + Y_DECLARE_UNUSED \ + static inline TString prefix##Base64Encode(const TStringBuf& s) { \ + TString ret; \ + prefix##Base64Encode(s, ret); \ + return ret; \ + } + +namespace NB64Etalon { + BASE64_UT_DECLARE_BASE64_IMPL(PLAIN32, plain32_base64_encode, plain32_base64_decode); + BASE64_UT_DECLARE_BASE64_IMPL(PLAIN64, plain64_base64_encode, plain64_base64_decode); + BASE64_UT_DECLARE_BASE64_IMPL(NEON32, neon32_base64_encode, neon32_base64_decode); + BASE64_UT_DECLARE_BASE64_IMPL(NEON64, neon64_base64_encode, neon64_base64_decode); + BASE64_UT_DECLARE_BASE64_IMPL(AVX2, avx2_base64_encode, avx2_base64_decode); + BASE64_UT_DECLARE_BASE64_IMPL(SSSE3, ssse3_base64_encode, ssse3_base64_decode); + +#undef BASE64_UT_DECLARE_BASE64_IMPL + + struct TImpls { + enum EImpl : size_t { + PLAIN32_IMPL, + PLAIN64_IMPL, + NEON32_IMPL, + NEON64_IMPL, + AVX2_IMPL, + SSSE3_IMPL, + MAX_IMPL + }; + + using TEncodeF = void (*)(const TStringBuf&, TString&); + using TDecodeF = void (*)(const TStringBuf&, TString&); + + struct TImpl { + TEncodeF Encode = nullptr; + TDecodeF Decode = nullptr; + }; + + std::array<TImpl, MAX_IMPL> Impl; + + TImpls() { + Impl[PLAIN32_IMPL].Encode = PLAIN32Base64Encode; + Impl[PLAIN32_IMPL].Decode = PLAIN32Base64Decode; + Impl[PLAIN64_IMPL].Encode = PLAIN64Base64Encode; + Impl[PLAIN64_IMPL].Decode = PLAIN64Base64Decode; +#if defined(_arm32_) + Impl[NEON32_IMPL].Encode = NEON32Base64Encode; + Impl[NEON32_IMPL].Decode = NEON32Base64Decode; +#elif defined(_arm64_) + Impl[NEON64_IMPL].Encode = NEON64Base64Encode; + Impl[NEON64_IMPL].Decode = NEON64Base64Decode; +#elif defined(_x86_64_) + if (NX86::HaveSSSE3()) { + Impl[SSSE3_IMPL].Encode = SSSE3Base64Encode; + Impl[SSSE3_IMPL].Decode = SSSE3Base64Decode; + } + + if (NX86::HaveAVX2()) { + Impl[AVX2_IMPL].Encode = AVX2Base64Encode; + Impl[AVX2_IMPL].Decode = AVX2Base64Decode; + } +#else + ythrow yexception() << "Failed to identify the platform"; +#endif + } + }; + + TImpls GetImpls() { + static const TImpls IMPLS; + return IMPLS; + } +} + +template <> +void Out<NB64Etalon::TImpls::EImpl>(IOutputStream& o, typename TTypeTraits<NB64Etalon::TImpls::EImpl>::TFuncParam v) { + switch (v) { + case NB64Etalon::TImpls::PLAIN32_IMPL: + o << TStringBuf{"PLAIN32"}; + return; + case NB64Etalon::TImpls::PLAIN64_IMPL: + o << TStringBuf{"PLAIN64"}; + return; + case NB64Etalon::TImpls::NEON64_IMPL: + o << TStringBuf{"NEON64"}; + return; + case NB64Etalon::TImpls::NEON32_IMPL: + o << TStringBuf{"NEON32"}; + return; + case NB64Etalon::TImpls::SSSE3_IMPL: + o << TStringBuf{"SSSE3"}; + return; + case NB64Etalon::TImpls::AVX2_IMPL: + o << TStringBuf{"AVX2"}; + return; + default: + ythrow yexception() << "invalid"; + } +} + +static void TestEncodeDecodeIntoString(const TString& plain, const TString& encoded, const TString& encodedUrl) { + TString a, b; + + Base64Encode(plain, a); + UNIT_ASSERT_VALUES_EQUAL(a, encoded); + + Base64Decode(a, b); + UNIT_ASSERT_VALUES_EQUAL(b, plain); + + Base64EncodeUrl(plain, a); + UNIT_ASSERT_VALUES_EQUAL(a, encodedUrl); + + Base64Decode(a, b); + UNIT_ASSERT_VALUES_EQUAL(b, plain); +} + +static void TestEncodeStrictDecodeIntoString(const TString& plain, const TString& encoded, const TString& encodedUrl) { + TString a, b; + + Base64Encode(plain, a); + UNIT_ASSERT_VALUES_EQUAL(a, encoded); + + Base64StrictDecode(a, b); + UNIT_ASSERT_VALUES_EQUAL(b, plain); + + Base64EncodeUrl(plain, a); + UNIT_ASSERT_VALUES_EQUAL(a, encodedUrl); + + Base64StrictDecode(a, b); + UNIT_ASSERT_VALUES_EQUAL(b, plain); +} + +Y_UNIT_TEST_SUITE(TBase64) { + Y_UNIT_TEST(TestEncode) { + UNIT_ASSERT_VALUES_EQUAL(Base64Encode("12z"), "MTJ6"); + UNIT_ASSERT_VALUES_EQUAL(Base64Encode("123"), "MTIz"); + UNIT_ASSERT_VALUES_EQUAL(Base64Encode("12"), "MTI="); + UNIT_ASSERT_VALUES_EQUAL(Base64Encode("1"), "MQ=="); + } + + Y_UNIT_TEST(TestIntoString) { + { + TString str; + for (size_t i = 0; i < 256; ++i) + str += char(i); + + const TString base64 = + "AAECAwQFBgcICQoLDA0ODxAREhMUFRYXGBkaGxwdHh8gISIjJCUmJy" + "gpKissLS4vMDEyMzQ1Njc4OTo7PD0+P0BBQkNERUZHSElKS0xNTk9Q" + "UVJTVFVWV1hZWltcXV5fYGFiY2RlZmdoaWprbG1ub3BxcnN0dXZ3eH" + "l6e3x9fn+AgYKDhIWGh4iJiouMjY6PkJGSk5SVlpeYmZqbnJ2en6Ch" + "oqOkpaanqKmqq6ytrq+wsbKztLW2t7i5uru8vb6/wMHCw8TFxsfIyc" + "rLzM3Oz9DR0tPU1dbX2Nna29zd3t/g4eLj5OXm5+jp6uvs7e7v8PHy" + "8/T19vf4+fr7/P3+/w=="; + const TString base64Url = + "AAECAwQFBgcICQoLDA0ODxAREhMUFRYXGBkaGxwdHh8gISIjJCUmJy" + "gpKissLS4vMDEyMzQ1Njc4OTo7PD0-P0BBQkNERUZHSElKS0xNTk9Q" + "UVJTVFVWV1hZWltcXV5fYGFiY2RlZmdoaWprbG1ub3BxcnN0dXZ3eH" + "l6e3x9fn-AgYKDhIWGh4iJiouMjY6PkJGSk5SVlpeYmZqbnJ2en6Ch" + "oqOkpaanqKmqq6ytrq-wsbKztLW2t7i5uru8vb6_wMHCw8TFxsfIyc" + "rLzM3Oz9DR0tPU1dbX2Nna29zd3t_g4eLj5OXm5-jp6uvs7e7v8PHy" + "8_T19vf4-fr7_P3-_w,,"; + + TestEncodeDecodeIntoString(str, base64, base64Url); + TestEncodeStrictDecodeIntoString(str, base64, base64Url); + } + + { + const TString str = "http://yandex.ru:1234/request?param=value&lll=fff#fragment"; + + const TString base64 = "aHR0cDovL3lhbmRleC5ydToxMjM0L3JlcXVlc3Q/cGFyYW09dmFsdWUmbGxsPWZmZiNmcmFnbWVudA=="; + const TString base64Url = "aHR0cDovL3lhbmRleC5ydToxMjM0L3JlcXVlc3Q_cGFyYW09dmFsdWUmbGxsPWZmZiNmcmFnbWVudA,,"; + + TestEncodeDecodeIntoString(str, base64, base64Url); + TestEncodeStrictDecodeIntoString(str, base64, base64Url); + } + } + + Y_UNIT_TEST(TestDecode) { + UNIT_ASSERT_EXCEPTION(Base64Decode("a"), yexception); + UNIT_ASSERT_EXCEPTION(Base64StrictDecode("a"), yexception); + + UNIT_ASSERT_VALUES_EQUAL(Base64Decode(""), ""); + UNIT_ASSERT_VALUES_EQUAL(Base64StrictDecode(""), ""); + + UNIT_ASSERT_VALUES_EQUAL(Base64Decode("MTI="), "12"); + UNIT_ASSERT_VALUES_EQUAL(Base64StrictDecode("MTI="), "12"); + + UNIT_ASSERT_VALUES_EQUAL(Base64Decode("QQ=="), "A"); + UNIT_ASSERT_VALUES_EQUAL(Base64StrictDecode("QQ=="), "A"); + + UNIT_ASSERT_EXCEPTION(Base64StrictDecode("M=I="), yexception); + + UNIT_ASSERT_VALUES_EQUAL(Base64Decode("dnluZHg="), "vyndx"); + UNIT_ASSERT_VALUES_EQUAL(Base64StrictDecode("dnluZHg="), "vyndx"); + + UNIT_ASSERT_VALUES_EQUAL(Base64StrictDecode("dnluZHg=dmlkZW8="), "vyndxvideo"); + + UNIT_ASSERT_EXCEPTION(Base64StrictDecode("aHR0cDovL2ltZy5tZWdhLXBvcm5vLnJ1Lw=a"), yexception); + + UNIT_ASSERT_EXCEPTION(Base64StrictDecode("aHh=="), yexception); + UNIT_ASSERT_EXCEPTION(Base64StrictDecode("\1\1\1\2"), yexception); + } + + Y_UNIT_TEST(TestDecodeUneven) { + UNIT_ASSERT_VALUES_EQUAL(Base64DecodeUneven(""), ""); + + UNIT_ASSERT_VALUES_EQUAL(Base64DecodeUneven("YWFh"), "aaa"); + + UNIT_ASSERT_VALUES_EQUAL(Base64DecodeUneven("MTI="), "12"); + UNIT_ASSERT_VALUES_EQUAL(Base64DecodeUneven("MTI,"), "12"); + UNIT_ASSERT_VALUES_EQUAL(Base64DecodeUneven("MTI"), "12"); + + UNIT_ASSERT_VALUES_EQUAL(Base64DecodeUneven("QQ=="), "A"); + UNIT_ASSERT_VALUES_EQUAL(Base64DecodeUneven("QQ,,"), "A"); + UNIT_ASSERT_VALUES_EQUAL(Base64DecodeUneven("QQ"), "A"); + + UNIT_ASSERT_VALUES_EQUAL(Base64DecodeUneven("dnluZHg="), "vyndx"); + UNIT_ASSERT_VALUES_EQUAL(Base64DecodeUneven("dnluZHg,"), "vyndx"); + UNIT_ASSERT_VALUES_EQUAL(Base64DecodeUneven("dnluZHg"), "vyndx"); + } + + Y_UNIT_TEST(TestDecodeRandom) { + TString input; + constexpr size_t testSize = 240000; + for (size_t i = 0; i < testSize; ++i) { + input.push_back(rand() % 256); + } + TString output; + TString encoded = Base64Encode(input); + UNIT_ASSERT_VALUES_EQUAL(Base64Decode(encoded), input); + UNIT_ASSERT_VALUES_EQUAL(Base64StrictDecode(encoded), input); + } + + Y_UNIT_TEST(TestAllPossibleOctets) { + const TString x("\0\x01\x02\x03\x04\x05\x06\x07\b\t\n\x0B\f\r\x0E\x0F\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\x7F"sv); + const TString xEnc = "AAECAwQFBgcICQoLDA0ODxAREhMUFRYXGBkaGxwdHh8gISIjJCUmJygpKissLS4vMDEyMzQ1Njc4OTo7PD0+P0BBQkNERUZHSElKS0xNTk9QUVJTVFVWV1hZWltcXV5fYGFiY2RlZmdoaWprbG1ub3BxcnN0dXZ3eHl6e3x9fn8="; + const TString y = Base64Decode(xEnc); + const TString yEnc = Base64Encode(x); + UNIT_ASSERT_VALUES_EQUAL(x, y); + UNIT_ASSERT_VALUES_EQUAL(xEnc, yEnc); + } + + Y_UNIT_TEST(TestTwoPaddingCharacters) { + const TString x("a"); + const TString xEnc = "YQ=="; + const TString y = Base64Decode(xEnc); + const TString yEnc = Base64Encode(x); + UNIT_ASSERT_VALUES_EQUAL(x, y); + UNIT_ASSERT_VALUES_EQUAL(xEnc, yEnc); + } + + Y_UNIT_TEST(TestOnePaddingCharacter) { + const TString x("aa"); + const TString xEnc = "YWE="; + const TString y = Base64Decode(xEnc); + const TString yEnc = Base64Encode(x); + UNIT_ASSERT_VALUES_EQUAL(x, y); + UNIT_ASSERT_VALUES_EQUAL(xEnc, yEnc); + } + + Y_UNIT_TEST(TestNoPaddingCharacters) { + const TString x("aaa"); + const TString xEnc = "YWFh"; + const TString y = Base64Decode(xEnc); + const TString yEnc = Base64Encode(x); + UNIT_ASSERT_VALUES_EQUAL(x, y); + UNIT_ASSERT_VALUES_EQUAL(xEnc, yEnc); + } + + Y_UNIT_TEST(TestTrailingZero) { + const TString x("foo\0"sv); + const TString xEnc = "Zm9vAA=="; + const TString y = Base64Decode(xEnc); + const TString yEnc = Base64Encode(x); + UNIT_ASSERT_VALUES_EQUAL(x, y); + UNIT_ASSERT_VALUES_EQUAL(xEnc, yEnc); + } + + Y_UNIT_TEST(TestTwoTrailingZeroes) { + const TString x("foo\0\0"sv); + const TString xEnc = "Zm9vAAA="; + const TString y = Base64Decode(xEnc); + const TString yEnc = Base64Encode(x); + UNIT_ASSERT_VALUES_EQUAL(x, y); + UNIT_ASSERT_VALUES_EQUAL(xEnc, yEnc); + } + + Y_UNIT_TEST(TestZero) { + const TString x("\0"sv); + const TString xEnc = "AA=="; + const TString y = Base64Decode(xEnc); + const TString yEnc = Base64Encode(x); + UNIT_ASSERT_VALUES_EQUAL(x, y); + UNIT_ASSERT_VALUES_EQUAL(xEnc, yEnc); + } + + Y_UNIT_TEST(TestSymbolsAfterZero) { + const TString x("\0a"sv); + const TString xEnc = "AGE="; + const TString y = Base64Decode(xEnc); + const TString yEnc = Base64Encode(x); + UNIT_ASSERT_VALUES_EQUAL(x, y); + UNIT_ASSERT_VALUES_EQUAL(xEnc, yEnc); + } + + Y_UNIT_TEST(TestEmptyString) { + const TString x = ""; + const TString xEnc = ""; + const TString y = Base64Decode(xEnc); + const TString yEnc = Base64Encode(x); + UNIT_ASSERT_VALUES_EQUAL(x, y); + UNIT_ASSERT_VALUES_EQUAL(xEnc, yEnc); + } + + Y_UNIT_TEST(TestBackendsConsistencyOnRandomData) { + constexpr size_t TEST_CASES_COUNT = 1000; + constexpr size_t MAX_DATA_SIZE = 1000; + TFastRng<ui32> prng{42}; + TVector<TString> xs{TEST_CASES_COUNT}; + TString xEnc; + TString xDec; + TString yEnc; + TString yDec; + + for (auto& x : xs) { + const size_t size = prng() % MAX_DATA_SIZE; + for (size_t j = 0; j < size; ++j) { + x += static_cast<char>(prng() % 256); + } + } + + static const auto IMPLS = NB64Etalon::GetImpls(); + for (size_t i = 0; i < static_cast<size_t>(NB64Etalon::TImpls::MAX_IMPL); ++i) { + for (size_t j = 0; j < static_cast<size_t>(NB64Etalon::TImpls::MAX_IMPL); ++j) { + const auto ei = static_cast<NB64Etalon::TImpls::EImpl>(i); + const auto ej = static_cast<NB64Etalon::TImpls::EImpl>(j); + const auto impl = IMPLS.Impl[i]; + const auto otherImpl = IMPLS.Impl[j]; + if (!impl.Encode && !impl.Decode || !otherImpl.Encode && !otherImpl.Decode) { + continue; + } + + for (const auto& x : xs) { + impl.Encode(x, xEnc); + impl.Decode(xEnc, xDec); + Y_ENSURE(x == xDec, "something is wrong with " << ei << " implementation"); + + otherImpl.Encode(x, yEnc); + otherImpl.Decode(xEnc, yDec); + Y_ENSURE(x == yDec, "something is wrong with " << ej << " implementation"); + + UNIT_ASSERT_VALUES_EQUAL(xEnc, yEnc); + UNIT_ASSERT_VALUES_EQUAL(xDec, yDec); + } + } + } + } + + Y_UNIT_TEST(TestIfEncodedDataIsZeroTerminatedOnRandomData) { + constexpr size_t TEST_CASES_COUNT = 1000; + constexpr size_t MAX_DATA_SIZE = 1000; + TFastRng<ui32> prng{42}; + TString x; + TVector<char> buf; + for (size_t i = 0; i < TEST_CASES_COUNT; ++i) { + const size_t size = prng() % MAX_DATA_SIZE; + x.clear(); + for (size_t j = 0; j < size; ++j) { + x += static_cast<char>(prng() % 256); + } + + buf.assign(Base64EncodeBufSize(x.size()), Max<char>()); + const auto* const xEncEnd = Base64Encode(buf.data(), (const unsigned char*)x.data(), x.size()); + UNIT_ASSERT_VALUES_EQUAL(*xEncEnd, '\0'); + } + } + + Y_UNIT_TEST(TestDecodeURLEncodedNoPadding) { + const auto x = "123"; + const auto xDec = Base64Decode("MTIz"); + UNIT_ASSERT_VALUES_EQUAL(x, xDec); + } + + Y_UNIT_TEST(TestDecodeURLEncodedOnePadding) { + const auto x = "12"; + const auto xDec = Base64Decode("MTI,"); + UNIT_ASSERT_VALUES_EQUAL(x, xDec); + } + + Y_UNIT_TEST(TestDecodeURLEncodedTwoPadding) { + const auto x = "1"; + const auto xDec = Base64Decode("MQ,,"); + UNIT_ASSERT_VALUES_EQUAL(x, xDec); + } + + Y_UNIT_TEST(TestDecodeNoPaddingLongString) { + const auto x = "How do I convert between big-endian and little-endian values in C++?a"; + const auto xDec = Base64Decode("SG93IGRvIEkgY29udmVydCBiZXR3ZWVuIGJpZy1lbmRpYW4gYW5kIGxpdHRsZS1lbmRpYW4gdmFsdWVzIGluIEMrKz9h"); + UNIT_ASSERT_VALUES_EQUAL(x, xDec); + } + + Y_UNIT_TEST(TestDecodeOnePaddingLongString) { + const auto x = "How do I convert between big-endian and little-endian values in C++?"; + const auto xDec = Base64Decode("SG93IGRvIEkgY29udmVydCBiZXR3ZWVuIGJpZy1lbmRpYW4gYW5kIGxpdHRsZS1lbmRpYW4gdmFsdWVzIGluIEMrKz8="); + UNIT_ASSERT_VALUES_EQUAL(x, xDec); + } + + Y_UNIT_TEST(TestDecodeTwoPaddingLongString) { + const auto x = "How do I convert between big-endian and little-endian values in C++?aa"; + const auto xDec = Base64Decode("SG93IGRvIEkgY29udmVydCBiZXR3ZWVuIGJpZy1lbmRpYW4gYW5kIGxpdHRsZS1lbmRpYW4gdmFsdWVzIGluIEMrKz9hYQ=="); + UNIT_ASSERT_VALUES_EQUAL(x, xDec); + } + + Y_UNIT_TEST(TestDecodeURLEncodedNoPaddingLongString) { + const auto x = "How do I convert between big-endian and little-endian values in C++?a"; + const auto xDec = Base64Decode("SG93IGRvIEkgY29udmVydCBiZXR3ZWVuIGJpZy1lbmRpYW4gYW5kIGxpdHRsZS1lbmRpYW4gdmFsdWVzIGluIEMrKz9h"); + UNIT_ASSERT_VALUES_EQUAL(x, xDec); + } + + Y_UNIT_TEST(TestDecodeURLEncodedOnePaddingLongString) { + const auto x = "How do I convert between big-endian and little-endian values in C++?"; + const auto xDec = Base64Decode("SG93IGRvIEkgY29udmVydCBiZXR3ZWVuIGJpZy1lbmRpYW4gYW5kIGxpdHRsZS1lbmRpYW4gdmFsdWVzIGluIEMrKz8,"); + UNIT_ASSERT_VALUES_EQUAL(x, xDec); + } + + Y_UNIT_TEST(TestDecodeURLEncodedTwoPaddingLongString) { + const auto x = "How do I convert between big-endian and little-endian values in C++?aa"; + const auto xDec = Base64Decode("SG93IGRvIEkgY29udmVydCBiZXR3ZWVuIGJpZy1lbmRpYW4gYW5kIGxpdHRsZS1lbmRpYW4gdmFsdWVzIGluIEMrKz9hYQ,,"); + UNIT_ASSERT_VALUES_EQUAL(x, xDec); + } +} diff --git a/library/cpp/string_utils/base64/bench/main.cpp b/library/cpp/string_utils/base64/bench/main.cpp new file mode 100644 index 0000000000..10e09bc1c7 --- /dev/null +++ b/library/cpp/string_utils/base64/bench/main.cpp @@ -0,0 +1,326 @@ +#include <library/cpp/string_utils/base64/base64.h> + +#include <library/cpp/testing/benchmark/bench.h> + +#include <util/generic/buffer.h> +#include <util/generic/singleton.h> +#include <util/generic/string.h> +#include <util/generic/vector.h> +#include <util/generic/xrange.h> +#include <util/generic/yexception.h> +#include <util/random/random.h> + +#include <array> + +static TString GenerateRandomData(const size_t minSize, const size_t maxSize) { + Y_ENSURE(minSize <= maxSize, "wow"); + TString r; + for (size_t i = 0; i < minSize; ++i) { + r.push_back(RandomNumber<char>()); + } + + if (minSize == maxSize) { + return r; + } + + const size_t size = RandomNumber<size_t>() % (maxSize - minSize + 1); + for (size_t i = 0; i < size; ++i) { + r.push_back(RandomNumber<char>()); + } + + return r; +} + +template <size_t N> +static std::array<TString, N> GenerateRandomDataVector(const size_t minSize, const size_t maxSize) { + std::array<TString, N> r; + for (size_t i = 0; i < N; ++i) { + r[i] = GenerateRandomData(minSize, maxSize); + } + + return r; +} + +template <size_t N> +static std::array<TString, N> Encode(const std::array<TString, N>& d) { + std::array<TString, N> r; + for (size_t i = 0, iEnd = d.size(); i < iEnd; ++i) { + r[i] = Base64Encode(d[i]); + } + + return r; +} + +namespace { + template <size_t N, size_t MinSize, size_t MaxSize> + struct TRandomDataHolder { + TRandomDataHolder() + : Data(GenerateRandomDataVector<N>(MinSize, MaxSize)) + , DataEncoded(Encode<N>(Data)) + { + for (size_t i = 0; i < N; ++i) { + const size_t size = Data[i].size(); + const size_t sizeEnc = DataEncoded[i].size(); + PlaceToEncode[i].Resize(Base64EncodeBufSize(size)); + PlaceToDecode[i].Resize(Base64DecodeBufSize(sizeEnc)); + } + } + + static constexpr size_t Size = N; + const std::array<TString, N> Data; + const std::array<TString, N> DataEncoded; + std::array<TBuffer, N> PlaceToEncode; + std::array<TBuffer, N> PlaceToDecode; + }; + + template <size_t N, size_t Size> + using TFixedSizeRandomDataHolder = TRandomDataHolder<N, Size, Size>; + + using FSRDH_1 = TFixedSizeRandomDataHolder<10, 1>; + using FSRDH_2 = TFixedSizeRandomDataHolder<10, 2>; + using FSRDH_4 = TFixedSizeRandomDataHolder<10, 4>; + using FSRDH_8 = TFixedSizeRandomDataHolder<10, 8>; + using FSRDH_16 = TFixedSizeRandomDataHolder<10, 16>; + using FSRDH_32 = TFixedSizeRandomDataHolder<10, 32>; + using FSRDH_64 = TFixedSizeRandomDataHolder<10, 64>; + using FSRDH_128 = TFixedSizeRandomDataHolder<10, 128>; + using FSRDH_1024 = TFixedSizeRandomDataHolder<10, 1024>; + using FSRDH_10240 = TFixedSizeRandomDataHolder<10, 10240>; + using FSRDH_102400 = TFixedSizeRandomDataHolder<10, 102400>; + using FSRDH_1048576 = TFixedSizeRandomDataHolder<10, 1048576>; + using FSRDH_10485760 = TFixedSizeRandomDataHolder<10, 10485760>; +} + +template <typename T> +static inline void BenchEncode(T& d, const NBench::NCpu::TParams& iface) { + for (const auto it : xrange(iface.Iterations())) { + Y_UNUSED(it); + for (size_t i = 0; i < d.Size; ++i) { + NBench::Escape(d.PlaceToEncode[i].data()); + Y_DO_NOT_OPTIMIZE_AWAY( + Base64Encode(d.PlaceToEncode[i].data(), (const unsigned char*)d.Data[i].data(), d.Data[i].size())); + NBench::Clobber(); + } + } +} + +template <typename T> +static inline void BenchEncodeUrl(T& d, const NBench::NCpu::TParams& iface) { + for (const auto it : xrange(iface.Iterations())) { + Y_UNUSED(it); + for (size_t i = 0; i < d.Size; ++i) { + NBench::Escape(d.PlaceToEncode[i].data()); + Y_DO_NOT_OPTIMIZE_AWAY( + Base64EncodeUrl(d.PlaceToEncode[i].data(), (const unsigned char*)d.Data[i].data(), d.Data[i].size())); + NBench::Clobber(); + } + } +} + +template <typename T> +static inline void BenchDecode(T& d, const NBench::NCpu::TParams& iface) { + for (const auto it : xrange(iface.Iterations())) { + Y_UNUSED(it); + for (size_t i = 0; i < d.Size; ++i) { + NBench::Escape(d.PlaceToDecode[i].data()); + Y_DO_NOT_OPTIMIZE_AWAY( + Base64Decode(d.PlaceToDecode[i].data(), (const char*)d.DataEncoded[i].data(), (const char*)(d.DataEncoded[i].data() + d.DataEncoded[i].size()))); + NBench::Clobber(); + } + } +} + +Y_CPU_BENCHMARK(EncodeF1, iface) { + auto& d = *Singleton<FSRDH_1>(); + BenchEncode(d, iface); +} + +Y_CPU_BENCHMARK(DecodeF1, iface) { + auto& d = *Singleton<FSRDH_1>(); + BenchDecode(d, iface); +} + +Y_CPU_BENCHMARK(EncodeF2, iface) { + auto& d = *Singleton<FSRDH_2>(); + BenchEncode(d, iface); +} + +Y_CPU_BENCHMARK(DecodeF2, iface) { + auto& d = *Singleton<FSRDH_2>(); + BenchDecode(d, iface); +} + +Y_CPU_BENCHMARK(EncodeF4, iface) { + auto& d = *Singleton<FSRDH_4>(); + BenchEncode(d, iface); +} + +Y_CPU_BENCHMARK(DecodeF4, iface) { + auto& d = *Singleton<FSRDH_4>(); + BenchDecode(d, iface); +} + +Y_CPU_BENCHMARK(EncodeF8, iface) { + auto& d = *Singleton<FSRDH_8>(); + BenchEncode(d, iface); +} + +Y_CPU_BENCHMARK(DecodeF8, iface) { + auto& d = *Singleton<FSRDH_8>(); + BenchDecode(d, iface); +} + +Y_CPU_BENCHMARK(EncodeF16, iface) { + auto& d = *Singleton<FSRDH_16>(); + BenchEncode(d, iface); +} + +Y_CPU_BENCHMARK(DecodeF16, iface) { + auto& d = *Singleton<FSRDH_16>(); + BenchDecode(d, iface); +} + +Y_CPU_BENCHMARK(EncodeF32, iface) { + auto& d = *Singleton<FSRDH_32>(); + BenchEncode(d, iface); +} + +Y_CPU_BENCHMARK(DecodeF32, iface) { + auto& d = *Singleton<FSRDH_32>(); + BenchDecode(d, iface); +} + +Y_CPU_BENCHMARK(EncodeF64, iface) { + auto& d = *Singleton<FSRDH_64>(); + BenchEncode(d, iface); +} + +Y_CPU_BENCHMARK(DecodeF64, iface) { + auto& d = *Singleton<FSRDH_64>(); + BenchDecode(d, iface); +} + +Y_CPU_BENCHMARK(EncodeF128, iface) { + auto& d = *Singleton<FSRDH_128>(); + BenchEncode(d, iface); +} + +Y_CPU_BENCHMARK(DecodeF128, iface) { + auto& d = *Singleton<FSRDH_128>(); + BenchDecode(d, iface); +} + +Y_CPU_BENCHMARK(EncodeF1024, iface) { + auto& d = *Singleton<FSRDH_1024>(); + BenchEncode(d, iface); +} + +Y_CPU_BENCHMARK(DecodeF1024, iface) { + auto& d = *Singleton<FSRDH_1024>(); + BenchDecode(d, iface); +} + +Y_CPU_BENCHMARK(EncodeF10240, iface) { + auto& d = *Singleton<FSRDH_10240>(); + BenchEncode(d, iface); +} + +Y_CPU_BENCHMARK(DecodeF10240, iface) { + auto& d = *Singleton<FSRDH_10240>(); + BenchDecode(d, iface); +} + +Y_CPU_BENCHMARK(EncodeF102400, iface) { + auto& d = *Singleton<FSRDH_102400>(); + BenchEncode(d, iface); +} + +Y_CPU_BENCHMARK(DecodeF102400, iface) { + auto& d = *Singleton<FSRDH_102400>(); + BenchDecode(d, iface); +} + +Y_CPU_BENCHMARK(EncodeF1048576, iface) { + auto& d = *Singleton<FSRDH_1048576>(); + BenchEncode(d, iface); +} + +Y_CPU_BENCHMARK(DecodeF1048576, iface) { + auto& d = *Singleton<FSRDH_1048576>(); + BenchDecode(d, iface); +} + +Y_CPU_BENCHMARK(EncodeF10485760, iface) { + auto& d = *Singleton<FSRDH_10485760>(); + BenchEncode(d, iface); +} + +Y_CPU_BENCHMARK(DecodeF10485760, iface) { + auto& d = *Singleton<FSRDH_10485760>(); + BenchDecode(d, iface); +} + +Y_CPU_BENCHMARK(EncodeUrlF1, iface) { + auto& d = *Singleton<FSRDH_1>(); + BenchEncodeUrl(d, iface); +} + +Y_CPU_BENCHMARK(EncodeUrlF2, iface) { + auto& d = *Singleton<FSRDH_2>(); + BenchEncodeUrl(d, iface); +} + +Y_CPU_BENCHMARK(EncodeUrlF4, iface) { + auto& d = *Singleton<FSRDH_4>(); + BenchEncodeUrl(d, iface); +} + +Y_CPU_BENCHMARK(EncodeUrlF8, iface) { + auto& d = *Singleton<FSRDH_8>(); + BenchEncodeUrl(d, iface); +} + +Y_CPU_BENCHMARK(EncodeUrlF16, iface) { + auto& d = *Singleton<FSRDH_16>(); + BenchEncodeUrl(d, iface); +} + +Y_CPU_BENCHMARK(EncodeUrlF32, iface) { + auto& d = *Singleton<FSRDH_32>(); + BenchEncodeUrl(d, iface); +} + +Y_CPU_BENCHMARK(EncodeUrlF64, iface) { + auto& d = *Singleton<FSRDH_64>(); + BenchEncodeUrl(d, iface); +} + +Y_CPU_BENCHMARK(EncodeUrlF128, iface) { + auto& d = *Singleton<FSRDH_128>(); + BenchEncodeUrl(d, iface); +} + +Y_CPU_BENCHMARK(EncodeUrlF1024, iface) { + auto& d = *Singleton<FSRDH_1024>(); + BenchEncodeUrl(d, iface); +} + +Y_CPU_BENCHMARK(EncodeUrlF10240, iface) { + auto& d = *Singleton<FSRDH_10240>(); + BenchEncodeUrl(d, iface); +} + +Y_CPU_BENCHMARK(EncodeUrlF102400, iface) { + auto& d = *Singleton<FSRDH_102400>(); + BenchEncodeUrl(d, iface); +} + +Y_CPU_BENCHMARK(EncodeUrlF1048576, iface) { + auto& d = *Singleton<FSRDH_1048576>(); + BenchEncodeUrl(d, iface); +} + +Y_CPU_BENCHMARK(EncodeUrlF10485760, iface) { + auto& d = *Singleton<FSRDH_10485760>(); + BenchEncodeUrl(d, iface); +} diff --git a/library/cpp/string_utils/base64/bench/metrics/main.py b/library/cpp/string_utils/base64/bench/metrics/main.py new file mode 100644 index 0000000000..c35fd6d8cd --- /dev/null +++ b/library/cpp/string_utils/base64/bench/metrics/main.py @@ -0,0 +1,5 @@ +import yatest.common as yc + + +def test_export_metrics(metrics): + metrics.set_benchmark(yc.execute_benchmark('library/cpp/string_utils/base64/bench/bench')) diff --git a/library/cpp/string_utils/base64/bench/metrics/ya.make b/library/cpp/string_utils/base64/bench/metrics/ya.make new file mode 100644 index 0000000000..b0406516c3 --- /dev/null +++ b/library/cpp/string_utils/base64/bench/metrics/ya.make @@ -0,0 +1,20 @@ +OWNER( + yazevnul + g:util +) + +PY2TEST() + +SIZE(LARGE) + +TAG( + ya:force_sandbox + sb:intel_e5_2660v1 + ya:fat +) + +TEST_SRCS(main.py) + +DEPENDS(library/cpp/string_utils/base64/bench) + +END() diff --git a/library/cpp/string_utils/base64/bench/ya.make b/library/cpp/string_utils/base64/bench/ya.make new file mode 100644 index 0000000000..5ac5f3d6ce --- /dev/null +++ b/library/cpp/string_utils/base64/bench/ya.make @@ -0,0 +1,16 @@ +OWNER( + yazevnul + g:util +) + +Y_BENCHMARK() + +SRCS( + main.cpp +) + +PEERDIR( + library/cpp/string_utils/base64 +) + +END() diff --git a/library/cpp/string_utils/base64/fuzz/generic/ya.make b/library/cpp/string_utils/base64/fuzz/generic/ya.make new file mode 100644 index 0000000000..d155e2b0a0 --- /dev/null +++ b/library/cpp/string_utils/base64/fuzz/generic/ya.make @@ -0,0 +1,12 @@ +OWNER( + yazevnul + g:util +) + +FUZZ() + +PEERDIR( + library/cpp/string_utils/base64/fuzz/lib +) + +END() diff --git a/library/cpp/string_utils/base64/fuzz/lib/main.cpp b/library/cpp/string_utils/base64/fuzz/lib/main.cpp new file mode 100644 index 0000000000..28547ae7a5 --- /dev/null +++ b/library/cpp/string_utils/base64/fuzz/lib/main.cpp @@ -0,0 +1,13 @@ +#include <library/cpp/string_utils/base64/base64.h> + +#include <util/system/types.h> +#include <util/system/yassert.h> + +extern "C" int LLVMFuzzerTestOneInput(const ui8* data, size_t size) { + const TStringBuf example{reinterpret_cast<const char*>(data), size}; + const auto converted = Base64Decode(Base64Encode(example)); + + Y_VERIFY(example == converted); + + return 0; +} diff --git a/library/cpp/string_utils/base64/fuzz/lib/ya.make b/library/cpp/string_utils/base64/fuzz/lib/ya.make new file mode 100644 index 0000000000..7b981b86a3 --- /dev/null +++ b/library/cpp/string_utils/base64/fuzz/lib/ya.make @@ -0,0 +1,16 @@ +OWNER( + yazevnul + g:util +) + +LIBRARY() + +SRCS( + main.cpp +) + +PEERDIR( + library/cpp/string_utils/base64 +) + +END() diff --git a/library/cpp/string_utils/base64/fuzz/uneven/main.cpp b/library/cpp/string_utils/base64/fuzz/uneven/main.cpp new file mode 100644 index 0000000000..915e81a7e5 --- /dev/null +++ b/library/cpp/string_utils/base64/fuzz/uneven/main.cpp @@ -0,0 +1,10 @@ +#include <library/cpp/string_utils/base64/base64.h> + +#include <util/system/types.h> +#include <util/system/yassert.h> + +extern "C" int LLVMFuzzerTestOneInput(const ui8* data, size_t size) { + const TStringBuf example{reinterpret_cast<const char*>(data), size}; + Y_UNUSED(Base64DecodeUneven(example)); + return 0; +} diff --git a/library/cpp/string_utils/base64/fuzz/uneven/ya.make b/library/cpp/string_utils/base64/fuzz/uneven/ya.make new file mode 100644 index 0000000000..18cb18ef52 --- /dev/null +++ b/library/cpp/string_utils/base64/fuzz/uneven/ya.make @@ -0,0 +1,15 @@ +FUZZ() + +OWNER( + g:util +) + +SRCS( + main.cpp +) + +PEERDIR( + library/cpp/string_utils/base64 +) + +END() diff --git a/library/cpp/string_utils/base64/fuzz/ya.make b/library/cpp/string_utils/base64/fuzz/ya.make new file mode 100644 index 0000000000..bef82061c4 --- /dev/null +++ b/library/cpp/string_utils/base64/fuzz/ya.make @@ -0,0 +1,10 @@ +OWNER( + yazevnul + g:util +) + +RECURSE( + generic + lib + uneven +) diff --git a/library/cpp/string_utils/base64/ut/ya.make b/library/cpp/string_utils/base64/ut/ya.make new file mode 100644 index 0000000000..9b61241f0e --- /dev/null +++ b/library/cpp/string_utils/base64/ut/ya.make @@ -0,0 +1,22 @@ +OWNER( + g:util + yazevnul +) + +UNITTEST_FOR(library/cpp/string_utils/base64) + +SRCS( + base64_ut.cpp + base64_decode_uneven_ut.cpp +) + +PEERDIR( + contrib/libs/base64/avx2 + contrib/libs/base64/ssse3 + contrib/libs/base64/neon32 + contrib/libs/base64/neon64 + contrib/libs/base64/plain32 + contrib/libs/base64/plain64 +) + +END() diff --git a/library/cpp/string_utils/base64/ya.make b/library/cpp/string_utils/base64/ya.make new file mode 100644 index 0000000000..f5258c446c --- /dev/null +++ b/library/cpp/string_utils/base64/ya.make @@ -0,0 +1,23 @@ +OWNER( + g:util + yazevnul +) + +LIBRARY() + +SRCS( + base64.cpp +) + +PEERDIR( + contrib/libs/base64/avx2 + contrib/libs/base64/ssse3 + contrib/libs/base64/neon32 + contrib/libs/base64/neon64 + contrib/libs/base64/plain32 + contrib/libs/base64/plain64 +) + +END() + +RECURSE_FOR_TESTS(ut) |