aboutsummaryrefslogtreecommitdiffstats
path: root/library/cpp/string_utils
diff options
context:
space:
mode:
authorDevtools Arcadia <arcadia-devtools@yandex-team.ru>2022-02-07 18:08:42 +0300
committerDevtools Arcadia <arcadia-devtools@mous.vla.yp-c.yandex.net>2022-02-07 18:08:42 +0300
commit1110808a9d39d4b808aef724c861a2e1a38d2a69 (patch)
treee26c9fed0de5d9873cce7e00bc214573dc2195b7 /library/cpp/string_utils
downloadydb-1110808a9d39d4b808aef724c861a2e1a38d2a69.tar.gz
intermediate changes
ref:cde9a383711a11544ce7e107a78147fb96cc4029
Diffstat (limited to 'library/cpp/string_utils')
-rw-r--r--library/cpp/string_utils/base64/base64.cpp268
-rw-r--r--library/cpp/string_utils/base64/base64.h130
-rw-r--r--library/cpp/string_utils/base64/base64_decode_uneven_ut.cpp46
-rw-r--r--library/cpp/string_utils/base64/base64_ut.cpp497
-rw-r--r--library/cpp/string_utils/base64/bench/main.cpp326
-rw-r--r--library/cpp/string_utils/base64/bench/metrics/main.py5
-rw-r--r--library/cpp/string_utils/base64/bench/metrics/ya.make20
-rw-r--r--library/cpp/string_utils/base64/bench/ya.make16
-rw-r--r--library/cpp/string_utils/base64/fuzz/generic/ya.make12
-rw-r--r--library/cpp/string_utils/base64/fuzz/lib/main.cpp13
-rw-r--r--library/cpp/string_utils/base64/fuzz/lib/ya.make16
-rw-r--r--library/cpp/string_utils/base64/fuzz/uneven/main.cpp10
-rw-r--r--library/cpp/string_utils/base64/fuzz/uneven/ya.make15
-rw-r--r--library/cpp/string_utils/base64/fuzz/ya.make10
-rw-r--r--library/cpp/string_utils/base64/ut/ya.make22
-rw-r--r--library/cpp/string_utils/base64/ya.make23
-rw-r--r--library/cpp/string_utils/indent_text/indent_text.cpp25
-rw-r--r--library/cpp/string_utils/indent_text/indent_text.h6
-rw-r--r--library/cpp/string_utils/indent_text/ya.make9
-rw-r--r--library/cpp/string_utils/levenshtein_diff/levenshtein_diff.cpp1
-rw-r--r--library/cpp/string_utils/levenshtein_diff/levenshtein_diff.h192
-rw-r--r--library/cpp/string_utils/levenshtein_diff/levenshtein_diff_ut.cpp190
-rw-r--r--library/cpp/string_utils/levenshtein_diff/ut/ya.make9
-rw-r--r--library/cpp/string_utils/levenshtein_diff/ya.make13
-rw-r--r--library/cpp/string_utils/parse_size/parse_size.cpp95
-rw-r--r--library/cpp/string_utils/parse_size/parse_size.h33
-rw-r--r--library/cpp/string_utils/parse_size/parse_size_ut.cpp63
-rw-r--r--library/cpp/string_utils/parse_size/ut/ya.make9
-rw-r--r--library/cpp/string_utils/parse_size/ya.make10
-rw-r--r--library/cpp/string_utils/quote/quote.cpp311
-rw-r--r--library/cpp/string_utils/quote/quote.h72
-rw-r--r--library/cpp/string_utils/quote/quote_ut.cpp319
-rw-r--r--library/cpp/string_utils/quote/ut/ya.make9
-rw-r--r--library/cpp/string_utils/quote/ya.make10
-rw-r--r--library/cpp/string_utils/relaxed_escaper/relaxed_escaper.cpp1
-rw-r--r--library/cpp/string_utils/relaxed_escaper/relaxed_escaper.h208
-rw-r--r--library/cpp/string_utils/relaxed_escaper/relaxed_escaper_ut.cpp66
-rw-r--r--library/cpp/string_utils/relaxed_escaper/ut/ya.make9
-rw-r--r--library/cpp/string_utils/relaxed_escaper/ya.make9
-rw-r--r--library/cpp/string_utils/scan/scan.cpp1
-rw-r--r--library/cpp/string_utils/scan/scan.h22
-rw-r--r--library/cpp/string_utils/scan/ya.make11
-rw-r--r--library/cpp/string_utils/url/url.cpp421
-rw-r--r--library/cpp/string_utils/url/url.h170
-rw-r--r--library/cpp/string_utils/url/url_ut.cpp281
-rw-r--r--library/cpp/string_utils/url/ut/ya.make9
-rw-r--r--library/cpp/string_utils/url/ya.make10
-rw-r--r--library/cpp/string_utils/ya.make37
-rw-r--r--library/cpp/string_utils/ztstrbuf/ya.make9
-rw-r--r--library/cpp/string_utils/ztstrbuf/ztstrbuf.cpp8
-rw-r--r--library/cpp/string_utils/ztstrbuf/ztstrbuf.h36
51 files changed, 4113 insertions, 0 deletions
diff --git a/library/cpp/string_utils/base64/base64.cpp b/library/cpp/string_utils/base64/base64.cpp
new file mode 100644
index 0000000000..05c201f0de
--- /dev/null
+++ b/library/cpp/string_utils/base64/base64.cpp
@@ -0,0 +1,268 @@
+#include "base64.h"
+
+#include <contrib/libs/base64/avx2/libbase64.h>
+#include <contrib/libs/base64/ssse3/libbase64.h>
+#include <contrib/libs/base64/neon32/libbase64.h>
+#include <contrib/libs/base64/neon64/libbase64.h>
+#include <contrib/libs/base64/plain32/libbase64.h>
+#include <contrib/libs/base64/plain64/libbase64.h>
+
+#include <util/generic/yexception.h>
+#include <util/system/cpu_id.h>
+#include <util/system/platform.h>
+
+#include <cstdlib>
+
+namespace {
+ struct TImpl {
+ void (*Encode)(const char* src, size_t srclen, char* out, size_t* outlen);
+ int (*Decode)(const char* src, size_t srclen, char* out, size_t* outlen);
+
+ TImpl() {
+#if defined(_arm32_)
+ const bool haveNEON32 = true;
+#else
+ const bool haveNEON32 = false;
+#endif
+
+#if defined(_arm64_)
+ const bool haveNEON64 = true;
+#else
+ const bool haveNEON64 = false;
+#endif
+
+# ifdef _windows_
+ // msvc does something wrong in release-build, so we temprorary disable this branch on windows
+ // https://developercommunity.visualstudio.com/content/problem/334085/release-build-has-made-wrong-optimizaion-in-base64.html
+ const bool isWin = true;
+# else
+ const bool isWin = false;
+# endif
+ if (!isWin && NX86::HaveAVX() && NX86::HaveAVX2()) {
+ Encode = avx2_base64_encode;
+ Decode = avx2_base64_decode;
+ } else if (NX86::HaveSSSE3()) {
+ Encode = ssse3_base64_encode;
+ Decode = ssse3_base64_decode;
+ } else if (haveNEON64) {
+ Encode = neon64_base64_encode;
+ Decode = neon64_base64_decode;
+ } else if (haveNEON32) {
+ Encode = neon32_base64_encode;
+ Decode = neon32_base64_decode;
+ } else if (sizeof(void*) == 8) {
+ // running on a 64 bit platform
+ Encode = plain64_base64_encode;
+ Decode = plain64_base64_decode;
+ } else if (sizeof(void*) == 4) {
+ // running on a 32 bit platform (actually impossible in Arcadia)
+ Encode = plain32_base64_encode;
+ Decode = plain32_base64_decode;
+ } else {
+ // failed to find appropriate implementation
+ std::abort();
+ }
+ }
+ };
+
+ const TImpl GetImpl() {
+ static const TImpl IMPL;
+ return IMPL;
+ }
+}
+
+static const char base64_etab_std[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
+static const char base64_bkw[] = {
+ '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', // 0..15
+ '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', // 16..31
+ '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\76', '\0', '\76', '\0', '\77', // 32.47
+ '\64', '\65', '\66', '\67', '\70', '\71', '\72', '\73', '\74', '\75', '\0', '\0', '\0', '\0', '\0', '\0', // 48..63
+ '\0', '\0', '\1', '\2', '\3', '\4', '\5', '\6', '\7', '\10', '\11', '\12', '\13', '\14', '\15', '\16', // 64..79
+ '\17', '\20', '\21', '\22', '\23', '\24', '\25', '\26', '\27', '\30', '\31', '\0', '\0', '\0', '\0', '\77', // 80..95
+ '\0', '\32', '\33', '\34', '\35', '\36', '\37', '\40', '\41', '\42', '\43', '\44', '\45', '\46', '\47', '\50', // 96..111
+ '\51', '\52', '\53', '\54', '\55', '\56', '\57', '\60', '\61', '\62', '\63', '\0', '\0', '\0', '\0', '\0', // 112..127
+ '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', // 128..143
+ '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0',
+ '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0',
+ '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0',
+ '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0',
+ '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0',
+ '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0',
+ '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0'};
+
+static_assert(Y_ARRAY_SIZE(base64_bkw) == 256, "wrong size");
+
+// Base64 for url encoding, RFC3548
+static const char base64_etab_url[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_";
+
+static inline unsigned char GetBase64EncodedIndex0(unsigned char octet0) {
+ return (octet0 >> 2);
+}
+
+static inline unsigned char GetBase64EncodedIndex1(unsigned char octet0, unsigned char octet1) {
+ return (((octet0 << 4) & 0x30) | ((octet1 >> 4) & 0x0f));
+}
+
+static inline unsigned char GetBase64EncodedIndex2(unsigned char octet1, unsigned char octet2) {
+ return (((octet1 << 2) & 0x3c) | ((octet2 >> 6) & 0x03));
+}
+
+static inline unsigned char GetBase64EncodedIndex3(unsigned char octet2) {
+ return (octet2 & 0x3f);
+}
+
+template <bool urlVersion>
+static inline char* Base64EncodeImpl(char* outstr, const unsigned char* instr, size_t len) {
+ const char* const base64_etab = (urlVersion ? base64_etab_url : base64_etab_std);
+ const char pad = (urlVersion ? ',' : '=');
+
+ size_t idx = 0;
+
+ while (idx + 2 < len) {
+ *outstr++ = base64_etab[GetBase64EncodedIndex0(instr[idx])];
+ *outstr++ = base64_etab[GetBase64EncodedIndex1(instr[idx], instr[idx + 1])];
+ *outstr++ = base64_etab[GetBase64EncodedIndex2(instr[idx + 1], instr[idx + 2])];
+ *outstr++ = base64_etab[GetBase64EncodedIndex3(instr[idx + 2])];
+ idx += 3;
+ }
+ if (idx < len) {
+ *outstr++ = base64_etab[GetBase64EncodedIndex0(instr[idx])];
+ if (idx + 1 < len) {
+ *outstr++ = base64_etab[GetBase64EncodedIndex1(instr[idx], instr[idx + 1])];
+ *outstr++ = base64_etab[GetBase64EncodedIndex2(instr[idx + 1], '\0')];
+ } else {
+ *outstr++ = base64_etab[GetBase64EncodedIndex1(instr[idx], '\0')];
+ *outstr++ = pad;
+ }
+ *outstr++ = pad;
+ }
+ *outstr = 0;
+
+ return outstr;
+}
+
+static char* Base64EncodePlain(char* outstr, const unsigned char* instr, size_t len) {
+ return Base64EncodeImpl<false>(outstr, instr, len);
+}
+
+char* Base64EncodeUrl(char* outstr, const unsigned char* instr, size_t len) {
+ return Base64EncodeImpl<true>(outstr, instr, len);
+}
+
+inline void uudecode_1(char* dst, unsigned char* src) {
+ dst[0] = char((base64_bkw[src[0]] << 2) | (base64_bkw[src[1]] >> 4));
+ dst[1] = char((base64_bkw[src[1]] << 4) | (base64_bkw[src[2]] >> 2));
+ dst[2] = char((base64_bkw[src[2]] << 6) | base64_bkw[src[3]]);
+}
+
+static size_t Base64DecodePlain(void* dst, const char* b, const char* e) {
+ size_t n = 0;
+ while (b < e) {
+ uudecode_1((char*)dst + n, (unsigned char*)b);
+
+ b += 4;
+ n += 3;
+ }
+
+ if (n > 0) {
+ if (b[-1] == ',' || b[-1] == '=') {
+ n--;
+
+ if (b[-2] == ',' || b[-2] == '=') {
+ n--;
+ }
+ }
+ }
+
+ return n;
+}
+
+// Table for Base64StrictDecode
+static const char base64_bkw_strict[] =
+ "\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100"
+ "\100\100\100\100\100\100\100\100\100\100\100\76\101\76\100\77\64\65\66\67\70\71\72\73\74\75\100\100\100\101\100\100"
+ "\100\0\1\2\3\4\5\6\7\10\11\12\13\14\15\16\17\20\21\22\23\24\25\26\27\30\31\100\100\100\100\77"
+ "\100\32\33\34\35\36\37\40\41\42\43\44\45\46\47\50\51\52\53\54\55\56\57\60\61\62\63\100\100\100\100\100"
+ "\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100"
+ "\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100"
+ "\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100"
+ "\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100";
+
+size_t Base64StrictDecode(void* out, const char* b, const char* e) {
+ char* dst = (char*)out;
+ const unsigned char* src = (unsigned char*)b;
+ const unsigned char* const end = (unsigned char*)e;
+
+ Y_ENSURE(!((e - b) % 4), "incorrect input length for base64 decode");
+
+ while (src < end) {
+ const char zeroth = base64_bkw_strict[src[0]];
+ const char first = base64_bkw_strict[src[1]];
+ const char second = base64_bkw_strict[src[2]];
+ const char third = base64_bkw_strict[src[3]];
+
+ constexpr char invalid = 64;
+ constexpr char padding = 65;
+ if (Y_UNLIKELY(zeroth == invalid || first == invalid ||
+ second == invalid || third == invalid ||
+ zeroth == padding || first == padding))
+ {
+ ythrow yexception() << "invalid character in input";
+ }
+
+ dst[0] = char((zeroth << 2) | (first >> 4));
+ dst[1] = char((first << 4) | (second >> 2));
+ dst[2] = char((second << 6) | third);
+
+ src += 4;
+ dst += 3;
+
+ if (src[-1] == ',' || src[-1] == '=') {
+ --dst;
+
+ if (src[-2] == ',' || src[-2] == '=') {
+ --dst;
+ }
+ } else if (Y_UNLIKELY(src[-2] == ',' || src[-2] == '=')) {
+ ythrow yexception() << "incorrect padding";
+ }
+ }
+
+ return dst - (char*)out;
+}
+
+size_t Base64Decode(void* dst, const char* b, const char* e) {
+ static const TImpl IMPL = GetImpl();
+ const auto size = e - b;
+ Y_ENSURE(!(size % 4), "incorrect input length for base64 decode");
+ if (Y_LIKELY(size < 8)) {
+ return Base64DecodePlain(dst, b, e);
+ }
+
+ size_t outLen;
+ IMPL.Decode(b, size, (char*)dst, &outLen);
+
+ return outLen;
+}
+
+TString Base64DecodeUneven(const TStringBuf s) {
+ if (s.length() % 4 == 0) {
+ return Base64Decode(s);
+ }
+
+ // padding to 4
+ return Base64Decode(TString(s) + TString(4 - (s.length() % 4), '='));
+}
+
+char* Base64Encode(char* outstr, const unsigned char* instr, size_t len) {
+ static const TImpl IMPL = GetImpl();
+ if (Y_LIKELY(len < 8)) {
+ return Base64EncodePlain(outstr, instr, len);
+ }
+
+ size_t outLen;
+ IMPL.Encode((char*)instr, len, outstr, &outLen);
+
+ *(outstr + outLen) = '\0';
+ return outstr + outLen;
+}
diff --git a/library/cpp/string_utils/base64/base64.h b/library/cpp/string_utils/base64/base64.h
new file mode 100644
index 0000000000..f778a6425a
--- /dev/null
+++ b/library/cpp/string_utils/base64/base64.h
@@ -0,0 +1,130 @@
+#pragma once
+
+#include <util/system/defaults.h>
+#include <util/generic/strbuf.h>
+#include <util/generic/string.h>
+
+/* @return Size of the buffer required to decode Base64 encoded data of size `len`.
+ */
+constexpr size_t Base64DecodeBufSize(const size_t len) noexcept {
+ return (len + 3) / 4 * 3;
+}
+
+/* Decode Base64 encoded data. Can decode both regular Base64 and Base64URL encoded data. Can decode
+ * only valid Base64[URL] data, behaviour for invalid data is unspecified.
+ *
+ * @throws Throws exception in case of incorrect padding.
+ *
+ * @param dst memory for writing output.
+ * @param b pointer to the beginning of base64 encoded string.
+ * @param a pointer to the end of base64 encoded string
+ *
+ * @return Return number of bytes decoded.
+ */
+size_t Base64Decode(void* dst, const char* b, const char* e);
+
+inline TStringBuf Base64Decode(const TStringBuf src, void* dst) {
+ return TStringBuf((const char*)dst, Base64Decode(dst, src.begin(), src.end()));
+}
+
+inline void Base64Decode(const TStringBuf src, TString& dst) {
+ dst.ReserveAndResize(Base64DecodeBufSize(src.size()));
+ dst.resize(Base64Decode(src, dst.begin()).size());
+}
+
+//WARNING: can process not whole input silently, use Base64StrictDecode instead of this function
+inline TString Base64Decode(const TStringBuf s) {
+ TString ret;
+ Base64Decode(s, ret);
+ return ret;
+}
+
+///
+/// @brief Decodes Base64 string with strict verification
+/// of invalid symbols, also tries to decode Base64 string with padding
+/// inside.
+//
+/// @throws Throws exceptions on inputs which contain invalid symbols
+/// or incorrect padding.
+/// @{
+///
+/// @param b a pointer to the beginning of base64 encoded string.
+/// @param e a pointer to the end of base64 encoded string.
+/// @param dst memory for writing output.
+///
+/// @return Returns number of bytes decoded.
+///
+size_t Base64StrictDecode(void* dst, const char* b, const char* e);
+
+///
+/// @param src a base64 encoded string.
+/// @param dst an pointer to allocated memory
+/// for writing result.
+///
+/// @return Returns dst wrapped into TStringBuf.
+///
+inline TStringBuf Base64StrictDecode(const TStringBuf src, void* dst) {
+ return TStringBuf((const char*)dst, Base64StrictDecode(dst, src.begin(), src.end()));
+}
+
+///
+/// @param src a base64 encoded string.
+/// @param dst a decoded string.
+///
+inline void Base64StrictDecode(const TStringBuf src, TString& dst) {
+ dst.ReserveAndResize(Base64DecodeBufSize(src.size()));
+ dst.resize(Base64StrictDecode(src, dst.begin()).size());
+}
+
+///
+/// @param src a base64 encoded string.
+///
+/// @returns a decoded string.
+///
+inline TString Base64StrictDecode(const TStringBuf src) {
+ TString ret;
+ Base64StrictDecode(src, ret);
+ return ret;
+}
+/// @}
+
+/// Works with strings which length is not divisible by 4.
+TString Base64DecodeUneven(const TStringBuf s);
+
+//encode
+constexpr size_t Base64EncodeBufSize(const size_t len) noexcept {
+ return (len + 2) / 3 * 4 + 1;
+}
+
+char* Base64Encode(char* outstr, const unsigned char* instr, size_t len);
+char* Base64EncodeUrl(char* outstr, const unsigned char* instr, size_t len);
+
+inline TStringBuf Base64Encode(const TStringBuf src, void* tmp) {
+ return TStringBuf((const char*)tmp, Base64Encode((char*)tmp, (const unsigned char*)src.data(), src.size()));
+}
+
+inline TStringBuf Base64EncodeUrl(const TStringBuf src, void* tmp) {
+ return TStringBuf((const char*)tmp, Base64EncodeUrl((char*)tmp, (const unsigned char*)src.data(), src.size()));
+}
+
+inline void Base64Encode(const TStringBuf src, TString& dst) {
+ dst.ReserveAndResize(Base64EncodeBufSize(src.size()));
+ dst.resize(Base64Encode(src, dst.begin()).size());
+}
+
+inline void Base64EncodeUrl(const TStringBuf src, TString& dst) {
+ dst.ReserveAndResize(Base64EncodeBufSize(src.size()));
+ dst.resize(Base64EncodeUrl(src, dst.begin()).size());
+}
+
+inline TString Base64Encode(const TStringBuf s) {
+ TString ret;
+ Base64Encode(s, ret);
+ return ret;
+}
+
+inline TString Base64EncodeUrl(const TStringBuf s) {
+ TString ret;
+ Base64EncodeUrl(s, ret);
+ return ret;
+}
diff --git a/library/cpp/string_utils/base64/base64_decode_uneven_ut.cpp b/library/cpp/string_utils/base64/base64_decode_uneven_ut.cpp
new file mode 100644
index 0000000000..c3ed068a37
--- /dev/null
+++ b/library/cpp/string_utils/base64/base64_decode_uneven_ut.cpp
@@ -0,0 +1,46 @@
+#include <library/cpp/testing/unittest/registar.h>
+
+#include <library/cpp/string_utils/base64/base64.h>
+
+Y_UNIT_TEST_SUITE(TBase64DecodeUneven) {
+ Y_UNIT_TEST(Base64DecodeUneven) {
+ const TString wikipedia_slogan =
+ "Man is distinguished, not only by his reason, "
+ "but by this singular passion from other animals, which is a lust of the "
+ "mind, that by a perseverance of delight in the continued and "
+ "indefatigable generation of knowledge, exceeds the short "
+ "vehemence of any carnal pleasure.";
+ const TString encoded =
+ "TWFuIGlzIGRpc3Rpbmd1aXNoZWQsIG5vdCBvbmx5IGJ5IGhpcyByZWFzb24sIGJ1dCBieSB0"
+ "aGlzIHNpbmd1bGFyIHBhc3Npb24gZnJvbSBvdGhlciBhbmltYWxzLCB3aGljaCBpcyBhIGx1"
+ "c3Qgb2YgdGhlIG1pbmQsIHRoYXQgYnkgYSBwZXJzZXZlcmFuY2Ugb2YgZGVsaWdodCBpbiB0"
+ "aGUgY29udGludWVkIGFuZCBpbmRlZmF0aWdhYmxlIGdlbmVyYXRpb24gb2Yga25vd2xlZGdl"
+ "LCBleGNlZWRzIHRoZSBzaG9ydCB2ZWhlbWVuY2Ugb2YgYW55IGNhcm5hbCBwbGVhc3VyZS4=";
+
+ UNIT_ASSERT_VALUES_EQUAL(encoded, Base64Encode(wikipedia_slogan));
+ UNIT_ASSERT_VALUES_EQUAL(wikipedia_slogan, Base64DecodeUneven(encoded));
+
+ const TString encoded_url1 =
+ "TWFuIGlzIGRpc3Rpbmd1aXNoZWQsIG5vdCBvbmx5IGJ5IGhpcyByZWFzb24sIGJ1dCBieSB0"
+ "aGlzIHNpbmd1bGFyIHBhc3Npb24gZnJvbSBvdGhlciBhbmltYWxzLCB3aGljaCBpcyBhIGx1"
+ "c3Qgb2YgdGhlIG1pbmQsIHRoYXQgYnkgYSBwZXJzZXZlcmFuY2Ugb2YgZGVsaWdodCBpbiB0"
+ "aGUgY29udGludWVkIGFuZCBpbmRlZmF0aWdhYmxlIGdlbmVyYXRpb24gb2Yga25vd2xlZGdl"
+ "LCBleGNlZWRzIHRoZSBzaG9ydCB2ZWhlbWVuY2Ugb2YgYW55IGNhcm5hbCBwbGVhc3VyZS4,";
+ const TString encoded_url2 =
+ "TWFuIGlzIGRpc3Rpbmd1aXNoZWQsIG5vdCBvbmx5IGJ5IGhpcyByZWFzb24sIGJ1dCBieSB0"
+ "aGlzIHNpbmd1bGFyIHBhc3Npb24gZnJvbSBvdGhlciBhbmltYWxzLCB3aGljaCBpcyBhIGx1"
+ "c3Qgb2YgdGhlIG1pbmQsIHRoYXQgYnkgYSBwZXJzZXZlcmFuY2Ugb2YgZGVsaWdodCBpbiB0"
+ "aGUgY29udGludWVkIGFuZCBpbmRlZmF0aWdhYmxlIGdlbmVyYXRpb24gb2Yga25vd2xlZGdl"
+ "LCBleGNlZWRzIHRoZSBzaG9ydCB2ZWhlbWVuY2Ugb2YgYW55IGNhcm5hbCBwbGVhc3VyZS4";
+ UNIT_ASSERT_VALUES_EQUAL(wikipedia_slogan, Base64DecodeUneven(encoded_url1));
+ UNIT_ASSERT_VALUES_EQUAL(wikipedia_slogan, Base64DecodeUneven(encoded_url2));
+
+ const TString lp = "Linkin Park";
+ UNIT_ASSERT_VALUES_EQUAL(lp, Base64DecodeUneven(Base64Encode(lp)));
+ UNIT_ASSERT_VALUES_EQUAL(lp, Base64DecodeUneven(Base64EncodeUrl(lp)));
+
+ const TString dp = "ADP GmbH\nAnalyse Design & Programmierung\nGesellschaft mit beschränkter Haftung";
+ UNIT_ASSERT_VALUES_EQUAL(dp, Base64DecodeUneven(Base64Encode(dp)));
+ UNIT_ASSERT_VALUES_EQUAL(dp, Base64DecodeUneven(Base64EncodeUrl(dp)));
+ }
+}
diff --git a/library/cpp/string_utils/base64/base64_ut.cpp b/library/cpp/string_utils/base64/base64_ut.cpp
new file mode 100644
index 0000000000..bcc1e65879
--- /dev/null
+++ b/library/cpp/string_utils/base64/base64_ut.cpp
@@ -0,0 +1,497 @@
+#include "base64.h"
+
+#include <contrib/libs/base64/avx2/libbase64.h>
+#include <contrib/libs/base64/neon32/libbase64.h>
+#include <contrib/libs/base64/neon64/libbase64.h>
+#include <contrib/libs/base64/plain32/libbase64.h>
+#include <contrib/libs/base64/plain64/libbase64.h>
+#include <contrib/libs/base64/ssse3/libbase64.h>
+
+#include <library/cpp/testing/unittest/registar.h>
+
+#include <util/generic/vector.h>
+#include <util/random/fast.h>
+#include <util/system/cpu_id.h>
+#include <util/system/platform.h>
+
+#include <array>
+
+using namespace std::string_view_literals;
+
+#define BASE64_UT_DECLARE_BASE64_IMPL(prefix, encFunction, decFunction) \
+ Y_DECLARE_UNUSED \
+ static size_t prefix##Base64Decode(void* dst, const char* b, const char* e) { \
+ const auto size = e - b; \
+ Y_ENSURE(!(size % 4), "incorrect input length for base64 decode"); \
+ \
+ size_t outLen; \
+ decFunction(b, size, (char*)dst, &outLen); \
+ return outLen; \
+ } \
+ \
+ Y_DECLARE_UNUSED \
+ static inline TStringBuf prefix##Base64Decode(const TStringBuf& src, void* dst) { \
+ return TStringBuf((const char*)dst, ::NB64Etalon::prefix##Base64Decode(dst, src.begin(), src.end())); \
+ } \
+ \
+ Y_DECLARE_UNUSED \
+ static inline void prefix##Base64Decode(const TStringBuf& src, TString& dst) { \
+ dst.ReserveAndResize(Base64DecodeBufSize(src.size())); \
+ dst.resize(::NB64Etalon::prefix##Base64Decode(src, dst.begin()).size()); \
+ } \
+ \
+ Y_DECLARE_UNUSED \
+ static inline TString prefix##Base64Decode(const TStringBuf& s) { \
+ TString ret; \
+ prefix##Base64Decode(s, ret); \
+ return ret; \
+ } \
+ \
+ Y_DECLARE_UNUSED \
+ static char* prefix##Base64Encode(char* outstr, const unsigned char* instr, size_t len) { \
+ size_t outLen; \
+ encFunction((char*)instr, len, outstr, &outLen); \
+ *(outstr + outLen) = '\0'; \
+ return outstr + outLen; \
+ } \
+ \
+ Y_DECLARE_UNUSED \
+ static inline TStringBuf prefix##Base64Encode(const TStringBuf& src, void* tmp) { \
+ return TStringBuf((const char*)tmp, ::NB64Etalon::prefix##Base64Encode((char*)tmp, (const unsigned char*)src.data(), src.size())); \
+ } \
+ \
+ Y_DECLARE_UNUSED \
+ static inline void prefix##Base64Encode(const TStringBuf& src, TString& dst) { \
+ dst.ReserveAndResize(Base64EncodeBufSize(src.size())); \
+ dst.resize(::NB64Etalon::prefix##Base64Encode(src, dst.begin()).size()); \
+ } \
+ \
+ Y_DECLARE_UNUSED \
+ static inline TString prefix##Base64Encode(const TStringBuf& s) { \
+ TString ret; \
+ prefix##Base64Encode(s, ret); \
+ return ret; \
+ }
+
+namespace NB64Etalon {
+ BASE64_UT_DECLARE_BASE64_IMPL(PLAIN32, plain32_base64_encode, plain32_base64_decode);
+ BASE64_UT_DECLARE_BASE64_IMPL(PLAIN64, plain64_base64_encode, plain64_base64_decode);
+ BASE64_UT_DECLARE_BASE64_IMPL(NEON32, neon32_base64_encode, neon32_base64_decode);
+ BASE64_UT_DECLARE_BASE64_IMPL(NEON64, neon64_base64_encode, neon64_base64_decode);
+ BASE64_UT_DECLARE_BASE64_IMPL(AVX2, avx2_base64_encode, avx2_base64_decode);
+ BASE64_UT_DECLARE_BASE64_IMPL(SSSE3, ssse3_base64_encode, ssse3_base64_decode);
+
+#undef BASE64_UT_DECLARE_BASE64_IMPL
+
+ struct TImpls {
+ enum EImpl : size_t {
+ PLAIN32_IMPL,
+ PLAIN64_IMPL,
+ NEON32_IMPL,
+ NEON64_IMPL,
+ AVX2_IMPL,
+ SSSE3_IMPL,
+ MAX_IMPL
+ };
+
+ using TEncodeF = void (*)(const TStringBuf&, TString&);
+ using TDecodeF = void (*)(const TStringBuf&, TString&);
+
+ struct TImpl {
+ TEncodeF Encode = nullptr;
+ TDecodeF Decode = nullptr;
+ };
+
+ std::array<TImpl, MAX_IMPL> Impl;
+
+ TImpls() {
+ Impl[PLAIN32_IMPL].Encode = PLAIN32Base64Encode;
+ Impl[PLAIN32_IMPL].Decode = PLAIN32Base64Decode;
+ Impl[PLAIN64_IMPL].Encode = PLAIN64Base64Encode;
+ Impl[PLAIN64_IMPL].Decode = PLAIN64Base64Decode;
+#if defined(_arm32_)
+ Impl[NEON32_IMPL].Encode = NEON32Base64Encode;
+ Impl[NEON32_IMPL].Decode = NEON32Base64Decode;
+#elif defined(_arm64_)
+ Impl[NEON64_IMPL].Encode = NEON64Base64Encode;
+ Impl[NEON64_IMPL].Decode = NEON64Base64Decode;
+#elif defined(_x86_64_)
+ if (NX86::HaveSSSE3()) {
+ Impl[SSSE3_IMPL].Encode = SSSE3Base64Encode;
+ Impl[SSSE3_IMPL].Decode = SSSE3Base64Decode;
+ }
+
+ if (NX86::HaveAVX2()) {
+ Impl[AVX2_IMPL].Encode = AVX2Base64Encode;
+ Impl[AVX2_IMPL].Decode = AVX2Base64Decode;
+ }
+#else
+ ythrow yexception() << "Failed to identify the platform";
+#endif
+ }
+ };
+
+ TImpls GetImpls() {
+ static const TImpls IMPLS;
+ return IMPLS;
+ }
+}
+
+template <>
+void Out<NB64Etalon::TImpls::EImpl>(IOutputStream& o, typename TTypeTraits<NB64Etalon::TImpls::EImpl>::TFuncParam v) {
+ switch (v) {
+ case NB64Etalon::TImpls::PLAIN32_IMPL:
+ o << TStringBuf{"PLAIN32"};
+ return;
+ case NB64Etalon::TImpls::PLAIN64_IMPL:
+ o << TStringBuf{"PLAIN64"};
+ return;
+ case NB64Etalon::TImpls::NEON64_IMPL:
+ o << TStringBuf{"NEON64"};
+ return;
+ case NB64Etalon::TImpls::NEON32_IMPL:
+ o << TStringBuf{"NEON32"};
+ return;
+ case NB64Etalon::TImpls::SSSE3_IMPL:
+ o << TStringBuf{"SSSE3"};
+ return;
+ case NB64Etalon::TImpls::AVX2_IMPL:
+ o << TStringBuf{"AVX2"};
+ return;
+ default:
+ ythrow yexception() << "invalid";
+ }
+}
+
+static void TestEncodeDecodeIntoString(const TString& plain, const TString& encoded, const TString& encodedUrl) {
+ TString a, b;
+
+ Base64Encode(plain, a);
+ UNIT_ASSERT_VALUES_EQUAL(a, encoded);
+
+ Base64Decode(a, b);
+ UNIT_ASSERT_VALUES_EQUAL(b, plain);
+
+ Base64EncodeUrl(plain, a);
+ UNIT_ASSERT_VALUES_EQUAL(a, encodedUrl);
+
+ Base64Decode(a, b);
+ UNIT_ASSERT_VALUES_EQUAL(b, plain);
+}
+
+static void TestEncodeStrictDecodeIntoString(const TString& plain, const TString& encoded, const TString& encodedUrl) {
+ TString a, b;
+
+ Base64Encode(plain, a);
+ UNIT_ASSERT_VALUES_EQUAL(a, encoded);
+
+ Base64StrictDecode(a, b);
+ UNIT_ASSERT_VALUES_EQUAL(b, plain);
+
+ Base64EncodeUrl(plain, a);
+ UNIT_ASSERT_VALUES_EQUAL(a, encodedUrl);
+
+ Base64StrictDecode(a, b);
+ UNIT_ASSERT_VALUES_EQUAL(b, plain);
+}
+
+Y_UNIT_TEST_SUITE(TBase64) {
+ Y_UNIT_TEST(TestEncode) {
+ UNIT_ASSERT_VALUES_EQUAL(Base64Encode("12z"), "MTJ6");
+ UNIT_ASSERT_VALUES_EQUAL(Base64Encode("123"), "MTIz");
+ UNIT_ASSERT_VALUES_EQUAL(Base64Encode("12"), "MTI=");
+ UNIT_ASSERT_VALUES_EQUAL(Base64Encode("1"), "MQ==");
+ }
+
+ Y_UNIT_TEST(TestIntoString) {
+ {
+ TString str;
+ for (size_t i = 0; i < 256; ++i)
+ str += char(i);
+
+ const TString base64 =
+ "AAECAwQFBgcICQoLDA0ODxAREhMUFRYXGBkaGxwdHh8gISIjJCUmJy"
+ "gpKissLS4vMDEyMzQ1Njc4OTo7PD0+P0BBQkNERUZHSElKS0xNTk9Q"
+ "UVJTVFVWV1hZWltcXV5fYGFiY2RlZmdoaWprbG1ub3BxcnN0dXZ3eH"
+ "l6e3x9fn+AgYKDhIWGh4iJiouMjY6PkJGSk5SVlpeYmZqbnJ2en6Ch"
+ "oqOkpaanqKmqq6ytrq+wsbKztLW2t7i5uru8vb6/wMHCw8TFxsfIyc"
+ "rLzM3Oz9DR0tPU1dbX2Nna29zd3t/g4eLj5OXm5+jp6uvs7e7v8PHy"
+ "8/T19vf4+fr7/P3+/w==";
+ const TString base64Url =
+ "AAECAwQFBgcICQoLDA0ODxAREhMUFRYXGBkaGxwdHh8gISIjJCUmJy"
+ "gpKissLS4vMDEyMzQ1Njc4OTo7PD0-P0BBQkNERUZHSElKS0xNTk9Q"
+ "UVJTVFVWV1hZWltcXV5fYGFiY2RlZmdoaWprbG1ub3BxcnN0dXZ3eH"
+ "l6e3x9fn-AgYKDhIWGh4iJiouMjY6PkJGSk5SVlpeYmZqbnJ2en6Ch"
+ "oqOkpaanqKmqq6ytrq-wsbKztLW2t7i5uru8vb6_wMHCw8TFxsfIyc"
+ "rLzM3Oz9DR0tPU1dbX2Nna29zd3t_g4eLj5OXm5-jp6uvs7e7v8PHy"
+ "8_T19vf4-fr7_P3-_w,,";
+
+ TestEncodeDecodeIntoString(str, base64, base64Url);
+ TestEncodeStrictDecodeIntoString(str, base64, base64Url);
+ }
+
+ {
+ const TString str = "http://yandex.ru:1234/request?param=value&lll=fff#fragment";
+
+ const TString base64 = "aHR0cDovL3lhbmRleC5ydToxMjM0L3JlcXVlc3Q/cGFyYW09dmFsdWUmbGxsPWZmZiNmcmFnbWVudA==";
+ const TString base64Url = "aHR0cDovL3lhbmRleC5ydToxMjM0L3JlcXVlc3Q_cGFyYW09dmFsdWUmbGxsPWZmZiNmcmFnbWVudA,,";
+
+ TestEncodeDecodeIntoString(str, base64, base64Url);
+ TestEncodeStrictDecodeIntoString(str, base64, base64Url);
+ }
+ }
+
+ Y_UNIT_TEST(TestDecode) {
+ UNIT_ASSERT_EXCEPTION(Base64Decode("a"), yexception);
+ UNIT_ASSERT_EXCEPTION(Base64StrictDecode("a"), yexception);
+
+ UNIT_ASSERT_VALUES_EQUAL(Base64Decode(""), "");
+ UNIT_ASSERT_VALUES_EQUAL(Base64StrictDecode(""), "");
+
+ UNIT_ASSERT_VALUES_EQUAL(Base64Decode("MTI="), "12");
+ UNIT_ASSERT_VALUES_EQUAL(Base64StrictDecode("MTI="), "12");
+
+ UNIT_ASSERT_VALUES_EQUAL(Base64Decode("QQ=="), "A");
+ UNIT_ASSERT_VALUES_EQUAL(Base64StrictDecode("QQ=="), "A");
+
+ UNIT_ASSERT_EXCEPTION(Base64StrictDecode("M=I="), yexception);
+
+ UNIT_ASSERT_VALUES_EQUAL(Base64Decode("dnluZHg="), "vyndx");
+ UNIT_ASSERT_VALUES_EQUAL(Base64StrictDecode("dnluZHg="), "vyndx");
+
+ UNIT_ASSERT_VALUES_EQUAL(Base64StrictDecode("dnluZHg=dmlkZW8="), "vyndxvideo");
+
+ UNIT_ASSERT_EXCEPTION(Base64StrictDecode("aHR0cDovL2ltZy5tZWdhLXBvcm5vLnJ1Lw=a"), yexception);
+
+ UNIT_ASSERT_EXCEPTION(Base64StrictDecode("aHh=="), yexception);
+ UNIT_ASSERT_EXCEPTION(Base64StrictDecode("\1\1\1\2"), yexception);
+ }
+
+ Y_UNIT_TEST(TestDecodeUneven) {
+ UNIT_ASSERT_VALUES_EQUAL(Base64DecodeUneven(""), "");
+
+ UNIT_ASSERT_VALUES_EQUAL(Base64DecodeUneven("YWFh"), "aaa");
+
+ UNIT_ASSERT_VALUES_EQUAL(Base64DecodeUneven("MTI="), "12");
+ UNIT_ASSERT_VALUES_EQUAL(Base64DecodeUneven("MTI,"), "12");
+ UNIT_ASSERT_VALUES_EQUAL(Base64DecodeUneven("MTI"), "12");
+
+ UNIT_ASSERT_VALUES_EQUAL(Base64DecodeUneven("QQ=="), "A");
+ UNIT_ASSERT_VALUES_EQUAL(Base64DecodeUneven("QQ,,"), "A");
+ UNIT_ASSERT_VALUES_EQUAL(Base64DecodeUneven("QQ"), "A");
+
+ UNIT_ASSERT_VALUES_EQUAL(Base64DecodeUneven("dnluZHg="), "vyndx");
+ UNIT_ASSERT_VALUES_EQUAL(Base64DecodeUneven("dnluZHg,"), "vyndx");
+ UNIT_ASSERT_VALUES_EQUAL(Base64DecodeUneven("dnluZHg"), "vyndx");
+ }
+
+ Y_UNIT_TEST(TestDecodeRandom) {
+ TString input;
+ constexpr size_t testSize = 240000;
+ for (size_t i = 0; i < testSize; ++i) {
+ input.push_back(rand() % 256);
+ }
+ TString output;
+ TString encoded = Base64Encode(input);
+ UNIT_ASSERT_VALUES_EQUAL(Base64Decode(encoded), input);
+ UNIT_ASSERT_VALUES_EQUAL(Base64StrictDecode(encoded), input);
+ }
+
+ Y_UNIT_TEST(TestAllPossibleOctets) {
+ const TString x("\0\x01\x02\x03\x04\x05\x06\x07\b\t\n\x0B\f\r\x0E\x0F\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\x7F"sv);
+ const TString xEnc = "AAECAwQFBgcICQoLDA0ODxAREhMUFRYXGBkaGxwdHh8gISIjJCUmJygpKissLS4vMDEyMzQ1Njc4OTo7PD0+P0BBQkNERUZHSElKS0xNTk9QUVJTVFVWV1hZWltcXV5fYGFiY2RlZmdoaWprbG1ub3BxcnN0dXZ3eHl6e3x9fn8=";
+ const TString y = Base64Decode(xEnc);
+ const TString yEnc = Base64Encode(x);
+ UNIT_ASSERT_VALUES_EQUAL(x, y);
+ UNIT_ASSERT_VALUES_EQUAL(xEnc, yEnc);
+ }
+
+ Y_UNIT_TEST(TestTwoPaddingCharacters) {
+ const TString x("a");
+ const TString xEnc = "YQ==";
+ const TString y = Base64Decode(xEnc);
+ const TString yEnc = Base64Encode(x);
+ UNIT_ASSERT_VALUES_EQUAL(x, y);
+ UNIT_ASSERT_VALUES_EQUAL(xEnc, yEnc);
+ }
+
+ Y_UNIT_TEST(TestOnePaddingCharacter) {
+ const TString x("aa");
+ const TString xEnc = "YWE=";
+ const TString y = Base64Decode(xEnc);
+ const TString yEnc = Base64Encode(x);
+ UNIT_ASSERT_VALUES_EQUAL(x, y);
+ UNIT_ASSERT_VALUES_EQUAL(xEnc, yEnc);
+ }
+
+ Y_UNIT_TEST(TestNoPaddingCharacters) {
+ const TString x("aaa");
+ const TString xEnc = "YWFh";
+ const TString y = Base64Decode(xEnc);
+ const TString yEnc = Base64Encode(x);
+ UNIT_ASSERT_VALUES_EQUAL(x, y);
+ UNIT_ASSERT_VALUES_EQUAL(xEnc, yEnc);
+ }
+
+ Y_UNIT_TEST(TestTrailingZero) {
+ const TString x("foo\0"sv);
+ const TString xEnc = "Zm9vAA==";
+ const TString y = Base64Decode(xEnc);
+ const TString yEnc = Base64Encode(x);
+ UNIT_ASSERT_VALUES_EQUAL(x, y);
+ UNIT_ASSERT_VALUES_EQUAL(xEnc, yEnc);
+ }
+
+ Y_UNIT_TEST(TestTwoTrailingZeroes) {
+ const TString x("foo\0\0"sv);
+ const TString xEnc = "Zm9vAAA=";
+ const TString y = Base64Decode(xEnc);
+ const TString yEnc = Base64Encode(x);
+ UNIT_ASSERT_VALUES_EQUAL(x, y);
+ UNIT_ASSERT_VALUES_EQUAL(xEnc, yEnc);
+ }
+
+ Y_UNIT_TEST(TestZero) {
+ const TString x("\0"sv);
+ const TString xEnc = "AA==";
+ const TString y = Base64Decode(xEnc);
+ const TString yEnc = Base64Encode(x);
+ UNIT_ASSERT_VALUES_EQUAL(x, y);
+ UNIT_ASSERT_VALUES_EQUAL(xEnc, yEnc);
+ }
+
+ Y_UNIT_TEST(TestSymbolsAfterZero) {
+ const TString x("\0a"sv);
+ const TString xEnc = "AGE=";
+ const TString y = Base64Decode(xEnc);
+ const TString yEnc = Base64Encode(x);
+ UNIT_ASSERT_VALUES_EQUAL(x, y);
+ UNIT_ASSERT_VALUES_EQUAL(xEnc, yEnc);
+ }
+
+ Y_UNIT_TEST(TestEmptyString) {
+ const TString x = "";
+ const TString xEnc = "";
+ const TString y = Base64Decode(xEnc);
+ const TString yEnc = Base64Encode(x);
+ UNIT_ASSERT_VALUES_EQUAL(x, y);
+ UNIT_ASSERT_VALUES_EQUAL(xEnc, yEnc);
+ }
+
+ Y_UNIT_TEST(TestBackendsConsistencyOnRandomData) {
+ constexpr size_t TEST_CASES_COUNT = 1000;
+ constexpr size_t MAX_DATA_SIZE = 1000;
+ TFastRng<ui32> prng{42};
+ TVector<TString> xs{TEST_CASES_COUNT};
+ TString xEnc;
+ TString xDec;
+ TString yEnc;
+ TString yDec;
+
+ for (auto& x : xs) {
+ const size_t size = prng() % MAX_DATA_SIZE;
+ for (size_t j = 0; j < size; ++j) {
+ x += static_cast<char>(prng() % 256);
+ }
+ }
+
+ static const auto IMPLS = NB64Etalon::GetImpls();
+ for (size_t i = 0; i < static_cast<size_t>(NB64Etalon::TImpls::MAX_IMPL); ++i) {
+ for (size_t j = 0; j < static_cast<size_t>(NB64Etalon::TImpls::MAX_IMPL); ++j) {
+ const auto ei = static_cast<NB64Etalon::TImpls::EImpl>(i);
+ const auto ej = static_cast<NB64Etalon::TImpls::EImpl>(j);
+ const auto impl = IMPLS.Impl[i];
+ const auto otherImpl = IMPLS.Impl[j];
+ if (!impl.Encode && !impl.Decode || !otherImpl.Encode && !otherImpl.Decode) {
+ continue;
+ }
+
+ for (const auto& x : xs) {
+ impl.Encode(x, xEnc);
+ impl.Decode(xEnc, xDec);
+ Y_ENSURE(x == xDec, "something is wrong with " << ei << " implementation");
+
+ otherImpl.Encode(x, yEnc);
+ otherImpl.Decode(xEnc, yDec);
+ Y_ENSURE(x == yDec, "something is wrong with " << ej << " implementation");
+
+ UNIT_ASSERT_VALUES_EQUAL(xEnc, yEnc);
+ UNIT_ASSERT_VALUES_EQUAL(xDec, yDec);
+ }
+ }
+ }
+ }
+
+ Y_UNIT_TEST(TestIfEncodedDataIsZeroTerminatedOnRandomData) {
+ constexpr size_t TEST_CASES_COUNT = 1000;
+ constexpr size_t MAX_DATA_SIZE = 1000;
+ TFastRng<ui32> prng{42};
+ TString x;
+ TVector<char> buf;
+ for (size_t i = 0; i < TEST_CASES_COUNT; ++i) {
+ const size_t size = prng() % MAX_DATA_SIZE;
+ x.clear();
+ for (size_t j = 0; j < size; ++j) {
+ x += static_cast<char>(prng() % 256);
+ }
+
+ buf.assign(Base64EncodeBufSize(x.size()), Max<char>());
+ const auto* const xEncEnd = Base64Encode(buf.data(), (const unsigned char*)x.data(), x.size());
+ UNIT_ASSERT_VALUES_EQUAL(*xEncEnd, '\0');
+ }
+ }
+
+ Y_UNIT_TEST(TestDecodeURLEncodedNoPadding) {
+ const auto x = "123";
+ const auto xDec = Base64Decode("MTIz");
+ UNIT_ASSERT_VALUES_EQUAL(x, xDec);
+ }
+
+ Y_UNIT_TEST(TestDecodeURLEncodedOnePadding) {
+ const auto x = "12";
+ const auto xDec = Base64Decode("MTI,");
+ UNIT_ASSERT_VALUES_EQUAL(x, xDec);
+ }
+
+ Y_UNIT_TEST(TestDecodeURLEncodedTwoPadding) {
+ const auto x = "1";
+ const auto xDec = Base64Decode("MQ,,");
+ UNIT_ASSERT_VALUES_EQUAL(x, xDec);
+ }
+
+ Y_UNIT_TEST(TestDecodeNoPaddingLongString) {
+ const auto x = "How do I convert between big-endian and little-endian values in C++?a";
+ const auto xDec = Base64Decode("SG93IGRvIEkgY29udmVydCBiZXR3ZWVuIGJpZy1lbmRpYW4gYW5kIGxpdHRsZS1lbmRpYW4gdmFsdWVzIGluIEMrKz9h");
+ UNIT_ASSERT_VALUES_EQUAL(x, xDec);
+ }
+
+ Y_UNIT_TEST(TestDecodeOnePaddingLongString) {
+ const auto x = "How do I convert between big-endian and little-endian values in C++?";
+ const auto xDec = Base64Decode("SG93IGRvIEkgY29udmVydCBiZXR3ZWVuIGJpZy1lbmRpYW4gYW5kIGxpdHRsZS1lbmRpYW4gdmFsdWVzIGluIEMrKz8=");
+ UNIT_ASSERT_VALUES_EQUAL(x, xDec);
+ }
+
+ Y_UNIT_TEST(TestDecodeTwoPaddingLongString) {
+ const auto x = "How do I convert between big-endian and little-endian values in C++?aa";
+ const auto xDec = Base64Decode("SG93IGRvIEkgY29udmVydCBiZXR3ZWVuIGJpZy1lbmRpYW4gYW5kIGxpdHRsZS1lbmRpYW4gdmFsdWVzIGluIEMrKz9hYQ==");
+ UNIT_ASSERT_VALUES_EQUAL(x, xDec);
+ }
+
+ Y_UNIT_TEST(TestDecodeURLEncodedNoPaddingLongString) {
+ const auto x = "How do I convert between big-endian and little-endian values in C++?a";
+ const auto xDec = Base64Decode("SG93IGRvIEkgY29udmVydCBiZXR3ZWVuIGJpZy1lbmRpYW4gYW5kIGxpdHRsZS1lbmRpYW4gdmFsdWVzIGluIEMrKz9h");
+ UNIT_ASSERT_VALUES_EQUAL(x, xDec);
+ }
+
+ Y_UNIT_TEST(TestDecodeURLEncodedOnePaddingLongString) {
+ const auto x = "How do I convert between big-endian and little-endian values in C++?";
+ const auto xDec = Base64Decode("SG93IGRvIEkgY29udmVydCBiZXR3ZWVuIGJpZy1lbmRpYW4gYW5kIGxpdHRsZS1lbmRpYW4gdmFsdWVzIGluIEMrKz8,");
+ UNIT_ASSERT_VALUES_EQUAL(x, xDec);
+ }
+
+ Y_UNIT_TEST(TestDecodeURLEncodedTwoPaddingLongString) {
+ const auto x = "How do I convert between big-endian and little-endian values in C++?aa";
+ const auto xDec = Base64Decode("SG93IGRvIEkgY29udmVydCBiZXR3ZWVuIGJpZy1lbmRpYW4gYW5kIGxpdHRsZS1lbmRpYW4gdmFsdWVzIGluIEMrKz9hYQ,,");
+ UNIT_ASSERT_VALUES_EQUAL(x, xDec);
+ }
+}
diff --git a/library/cpp/string_utils/base64/bench/main.cpp b/library/cpp/string_utils/base64/bench/main.cpp
new file mode 100644
index 0000000000..10e09bc1c7
--- /dev/null
+++ b/library/cpp/string_utils/base64/bench/main.cpp
@@ -0,0 +1,326 @@
+#include <library/cpp/string_utils/base64/base64.h>
+
+#include <library/cpp/testing/benchmark/bench.h>
+
+#include <util/generic/buffer.h>
+#include <util/generic/singleton.h>
+#include <util/generic/string.h>
+#include <util/generic/vector.h>
+#include <util/generic/xrange.h>
+#include <util/generic/yexception.h>
+#include <util/random/random.h>
+
+#include <array>
+
+static TString GenerateRandomData(const size_t minSize, const size_t maxSize) {
+ Y_ENSURE(minSize <= maxSize, "wow");
+ TString r;
+ for (size_t i = 0; i < minSize; ++i) {
+ r.push_back(RandomNumber<char>());
+ }
+
+ if (minSize == maxSize) {
+ return r;
+ }
+
+ const size_t size = RandomNumber<size_t>() % (maxSize - minSize + 1);
+ for (size_t i = 0; i < size; ++i) {
+ r.push_back(RandomNumber<char>());
+ }
+
+ return r;
+}
+
+template <size_t N>
+static std::array<TString, N> GenerateRandomDataVector(const size_t minSize, const size_t maxSize) {
+ std::array<TString, N> r;
+ for (size_t i = 0; i < N; ++i) {
+ r[i] = GenerateRandomData(minSize, maxSize);
+ }
+
+ return r;
+}
+
+template <size_t N>
+static std::array<TString, N> Encode(const std::array<TString, N>& d) {
+ std::array<TString, N> r;
+ for (size_t i = 0, iEnd = d.size(); i < iEnd; ++i) {
+ r[i] = Base64Encode(d[i]);
+ }
+
+ return r;
+}
+
+namespace {
+ template <size_t N, size_t MinSize, size_t MaxSize>
+ struct TRandomDataHolder {
+ TRandomDataHolder()
+ : Data(GenerateRandomDataVector<N>(MinSize, MaxSize))
+ , DataEncoded(Encode<N>(Data))
+ {
+ for (size_t i = 0; i < N; ++i) {
+ const size_t size = Data[i].size();
+ const size_t sizeEnc = DataEncoded[i].size();
+ PlaceToEncode[i].Resize(Base64EncodeBufSize(size));
+ PlaceToDecode[i].Resize(Base64DecodeBufSize(sizeEnc));
+ }
+ }
+
+ static constexpr size_t Size = N;
+ const std::array<TString, N> Data;
+ const std::array<TString, N> DataEncoded;
+ std::array<TBuffer, N> PlaceToEncode;
+ std::array<TBuffer, N> PlaceToDecode;
+ };
+
+ template <size_t N, size_t Size>
+ using TFixedSizeRandomDataHolder = TRandomDataHolder<N, Size, Size>;
+
+ using FSRDH_1 = TFixedSizeRandomDataHolder<10, 1>;
+ using FSRDH_2 = TFixedSizeRandomDataHolder<10, 2>;
+ using FSRDH_4 = TFixedSizeRandomDataHolder<10, 4>;
+ using FSRDH_8 = TFixedSizeRandomDataHolder<10, 8>;
+ using FSRDH_16 = TFixedSizeRandomDataHolder<10, 16>;
+ using FSRDH_32 = TFixedSizeRandomDataHolder<10, 32>;
+ using FSRDH_64 = TFixedSizeRandomDataHolder<10, 64>;
+ using FSRDH_128 = TFixedSizeRandomDataHolder<10, 128>;
+ using FSRDH_1024 = TFixedSizeRandomDataHolder<10, 1024>;
+ using FSRDH_10240 = TFixedSizeRandomDataHolder<10, 10240>;
+ using FSRDH_102400 = TFixedSizeRandomDataHolder<10, 102400>;
+ using FSRDH_1048576 = TFixedSizeRandomDataHolder<10, 1048576>;
+ using FSRDH_10485760 = TFixedSizeRandomDataHolder<10, 10485760>;
+}
+
+template <typename T>
+static inline void BenchEncode(T& d, const NBench::NCpu::TParams& iface) {
+ for (const auto it : xrange(iface.Iterations())) {
+ Y_UNUSED(it);
+ for (size_t i = 0; i < d.Size; ++i) {
+ NBench::Escape(d.PlaceToEncode[i].data());
+ Y_DO_NOT_OPTIMIZE_AWAY(
+ Base64Encode(d.PlaceToEncode[i].data(), (const unsigned char*)d.Data[i].data(), d.Data[i].size()));
+ NBench::Clobber();
+ }
+ }
+}
+
+template <typename T>
+static inline void BenchEncodeUrl(T& d, const NBench::NCpu::TParams& iface) {
+ for (const auto it : xrange(iface.Iterations())) {
+ Y_UNUSED(it);
+ for (size_t i = 0; i < d.Size; ++i) {
+ NBench::Escape(d.PlaceToEncode[i].data());
+ Y_DO_NOT_OPTIMIZE_AWAY(
+ Base64EncodeUrl(d.PlaceToEncode[i].data(), (const unsigned char*)d.Data[i].data(), d.Data[i].size()));
+ NBench::Clobber();
+ }
+ }
+}
+
+template <typename T>
+static inline void BenchDecode(T& d, const NBench::NCpu::TParams& iface) {
+ for (const auto it : xrange(iface.Iterations())) {
+ Y_UNUSED(it);
+ for (size_t i = 0; i < d.Size; ++i) {
+ NBench::Escape(d.PlaceToDecode[i].data());
+ Y_DO_NOT_OPTIMIZE_AWAY(
+ Base64Decode(d.PlaceToDecode[i].data(), (const char*)d.DataEncoded[i].data(), (const char*)(d.DataEncoded[i].data() + d.DataEncoded[i].size())));
+ NBench::Clobber();
+ }
+ }
+}
+
+Y_CPU_BENCHMARK(EncodeF1, iface) {
+ auto& d = *Singleton<FSRDH_1>();
+ BenchEncode(d, iface);
+}
+
+Y_CPU_BENCHMARK(DecodeF1, iface) {
+ auto& d = *Singleton<FSRDH_1>();
+ BenchDecode(d, iface);
+}
+
+Y_CPU_BENCHMARK(EncodeF2, iface) {
+ auto& d = *Singleton<FSRDH_2>();
+ BenchEncode(d, iface);
+}
+
+Y_CPU_BENCHMARK(DecodeF2, iface) {
+ auto& d = *Singleton<FSRDH_2>();
+ BenchDecode(d, iface);
+}
+
+Y_CPU_BENCHMARK(EncodeF4, iface) {
+ auto& d = *Singleton<FSRDH_4>();
+ BenchEncode(d, iface);
+}
+
+Y_CPU_BENCHMARK(DecodeF4, iface) {
+ auto& d = *Singleton<FSRDH_4>();
+ BenchDecode(d, iface);
+}
+
+Y_CPU_BENCHMARK(EncodeF8, iface) {
+ auto& d = *Singleton<FSRDH_8>();
+ BenchEncode(d, iface);
+}
+
+Y_CPU_BENCHMARK(DecodeF8, iface) {
+ auto& d = *Singleton<FSRDH_8>();
+ BenchDecode(d, iface);
+}
+
+Y_CPU_BENCHMARK(EncodeF16, iface) {
+ auto& d = *Singleton<FSRDH_16>();
+ BenchEncode(d, iface);
+}
+
+Y_CPU_BENCHMARK(DecodeF16, iface) {
+ auto& d = *Singleton<FSRDH_16>();
+ BenchDecode(d, iface);
+}
+
+Y_CPU_BENCHMARK(EncodeF32, iface) {
+ auto& d = *Singleton<FSRDH_32>();
+ BenchEncode(d, iface);
+}
+
+Y_CPU_BENCHMARK(DecodeF32, iface) {
+ auto& d = *Singleton<FSRDH_32>();
+ BenchDecode(d, iface);
+}
+
+Y_CPU_BENCHMARK(EncodeF64, iface) {
+ auto& d = *Singleton<FSRDH_64>();
+ BenchEncode(d, iface);
+}
+
+Y_CPU_BENCHMARK(DecodeF64, iface) {
+ auto& d = *Singleton<FSRDH_64>();
+ BenchDecode(d, iface);
+}
+
+Y_CPU_BENCHMARK(EncodeF128, iface) {
+ auto& d = *Singleton<FSRDH_128>();
+ BenchEncode(d, iface);
+}
+
+Y_CPU_BENCHMARK(DecodeF128, iface) {
+ auto& d = *Singleton<FSRDH_128>();
+ BenchDecode(d, iface);
+}
+
+Y_CPU_BENCHMARK(EncodeF1024, iface) {
+ auto& d = *Singleton<FSRDH_1024>();
+ BenchEncode(d, iface);
+}
+
+Y_CPU_BENCHMARK(DecodeF1024, iface) {
+ auto& d = *Singleton<FSRDH_1024>();
+ BenchDecode(d, iface);
+}
+
+Y_CPU_BENCHMARK(EncodeF10240, iface) {
+ auto& d = *Singleton<FSRDH_10240>();
+ BenchEncode(d, iface);
+}
+
+Y_CPU_BENCHMARK(DecodeF10240, iface) {
+ auto& d = *Singleton<FSRDH_10240>();
+ BenchDecode(d, iface);
+}
+
+Y_CPU_BENCHMARK(EncodeF102400, iface) {
+ auto& d = *Singleton<FSRDH_102400>();
+ BenchEncode(d, iface);
+}
+
+Y_CPU_BENCHMARK(DecodeF102400, iface) {
+ auto& d = *Singleton<FSRDH_102400>();
+ BenchDecode(d, iface);
+}
+
+Y_CPU_BENCHMARK(EncodeF1048576, iface) {
+ auto& d = *Singleton<FSRDH_1048576>();
+ BenchEncode(d, iface);
+}
+
+Y_CPU_BENCHMARK(DecodeF1048576, iface) {
+ auto& d = *Singleton<FSRDH_1048576>();
+ BenchDecode(d, iface);
+}
+
+Y_CPU_BENCHMARK(EncodeF10485760, iface) {
+ auto& d = *Singleton<FSRDH_10485760>();
+ BenchEncode(d, iface);
+}
+
+Y_CPU_BENCHMARK(DecodeF10485760, iface) {
+ auto& d = *Singleton<FSRDH_10485760>();
+ BenchDecode(d, iface);
+}
+
+Y_CPU_BENCHMARK(EncodeUrlF1, iface) {
+ auto& d = *Singleton<FSRDH_1>();
+ BenchEncodeUrl(d, iface);
+}
+
+Y_CPU_BENCHMARK(EncodeUrlF2, iface) {
+ auto& d = *Singleton<FSRDH_2>();
+ BenchEncodeUrl(d, iface);
+}
+
+Y_CPU_BENCHMARK(EncodeUrlF4, iface) {
+ auto& d = *Singleton<FSRDH_4>();
+ BenchEncodeUrl(d, iface);
+}
+
+Y_CPU_BENCHMARK(EncodeUrlF8, iface) {
+ auto& d = *Singleton<FSRDH_8>();
+ BenchEncodeUrl(d, iface);
+}
+
+Y_CPU_BENCHMARK(EncodeUrlF16, iface) {
+ auto& d = *Singleton<FSRDH_16>();
+ BenchEncodeUrl(d, iface);
+}
+
+Y_CPU_BENCHMARK(EncodeUrlF32, iface) {
+ auto& d = *Singleton<FSRDH_32>();
+ BenchEncodeUrl(d, iface);
+}
+
+Y_CPU_BENCHMARK(EncodeUrlF64, iface) {
+ auto& d = *Singleton<FSRDH_64>();
+ BenchEncodeUrl(d, iface);
+}
+
+Y_CPU_BENCHMARK(EncodeUrlF128, iface) {
+ auto& d = *Singleton<FSRDH_128>();
+ BenchEncodeUrl(d, iface);
+}
+
+Y_CPU_BENCHMARK(EncodeUrlF1024, iface) {
+ auto& d = *Singleton<FSRDH_1024>();
+ BenchEncodeUrl(d, iface);
+}
+
+Y_CPU_BENCHMARK(EncodeUrlF10240, iface) {
+ auto& d = *Singleton<FSRDH_10240>();
+ BenchEncodeUrl(d, iface);
+}
+
+Y_CPU_BENCHMARK(EncodeUrlF102400, iface) {
+ auto& d = *Singleton<FSRDH_102400>();
+ BenchEncodeUrl(d, iface);
+}
+
+Y_CPU_BENCHMARK(EncodeUrlF1048576, iface) {
+ auto& d = *Singleton<FSRDH_1048576>();
+ BenchEncodeUrl(d, iface);
+}
+
+Y_CPU_BENCHMARK(EncodeUrlF10485760, iface) {
+ auto& d = *Singleton<FSRDH_10485760>();
+ BenchEncodeUrl(d, iface);
+}
diff --git a/library/cpp/string_utils/base64/bench/metrics/main.py b/library/cpp/string_utils/base64/bench/metrics/main.py
new file mode 100644
index 0000000000..c35fd6d8cd
--- /dev/null
+++ b/library/cpp/string_utils/base64/bench/metrics/main.py
@@ -0,0 +1,5 @@
+import yatest.common as yc
+
+
+def test_export_metrics(metrics):
+ metrics.set_benchmark(yc.execute_benchmark('library/cpp/string_utils/base64/bench/bench'))
diff --git a/library/cpp/string_utils/base64/bench/metrics/ya.make b/library/cpp/string_utils/base64/bench/metrics/ya.make
new file mode 100644
index 0000000000..b0406516c3
--- /dev/null
+++ b/library/cpp/string_utils/base64/bench/metrics/ya.make
@@ -0,0 +1,20 @@
+OWNER(
+ yazevnul
+ g:util
+)
+
+PY2TEST()
+
+SIZE(LARGE)
+
+TAG(
+ ya:force_sandbox
+ sb:intel_e5_2660v1
+ ya:fat
+)
+
+TEST_SRCS(main.py)
+
+DEPENDS(library/cpp/string_utils/base64/bench)
+
+END()
diff --git a/library/cpp/string_utils/base64/bench/ya.make b/library/cpp/string_utils/base64/bench/ya.make
new file mode 100644
index 0000000000..5ac5f3d6ce
--- /dev/null
+++ b/library/cpp/string_utils/base64/bench/ya.make
@@ -0,0 +1,16 @@
+OWNER(
+ yazevnul
+ g:util
+)
+
+Y_BENCHMARK()
+
+SRCS(
+ main.cpp
+)
+
+PEERDIR(
+ library/cpp/string_utils/base64
+)
+
+END()
diff --git a/library/cpp/string_utils/base64/fuzz/generic/ya.make b/library/cpp/string_utils/base64/fuzz/generic/ya.make
new file mode 100644
index 0000000000..d155e2b0a0
--- /dev/null
+++ b/library/cpp/string_utils/base64/fuzz/generic/ya.make
@@ -0,0 +1,12 @@
+OWNER(
+ yazevnul
+ g:util
+)
+
+FUZZ()
+
+PEERDIR(
+ library/cpp/string_utils/base64/fuzz/lib
+)
+
+END()
diff --git a/library/cpp/string_utils/base64/fuzz/lib/main.cpp b/library/cpp/string_utils/base64/fuzz/lib/main.cpp
new file mode 100644
index 0000000000..28547ae7a5
--- /dev/null
+++ b/library/cpp/string_utils/base64/fuzz/lib/main.cpp
@@ -0,0 +1,13 @@
+#include <library/cpp/string_utils/base64/base64.h>
+
+#include <util/system/types.h>
+#include <util/system/yassert.h>
+
+extern "C" int LLVMFuzzerTestOneInput(const ui8* data, size_t size) {
+ const TStringBuf example{reinterpret_cast<const char*>(data), size};
+ const auto converted = Base64Decode(Base64Encode(example));
+
+ Y_VERIFY(example == converted);
+
+ return 0;
+}
diff --git a/library/cpp/string_utils/base64/fuzz/lib/ya.make b/library/cpp/string_utils/base64/fuzz/lib/ya.make
new file mode 100644
index 0000000000..7b981b86a3
--- /dev/null
+++ b/library/cpp/string_utils/base64/fuzz/lib/ya.make
@@ -0,0 +1,16 @@
+OWNER(
+ yazevnul
+ g:util
+)
+
+LIBRARY()
+
+SRCS(
+ main.cpp
+)
+
+PEERDIR(
+ library/cpp/string_utils/base64
+)
+
+END()
diff --git a/library/cpp/string_utils/base64/fuzz/uneven/main.cpp b/library/cpp/string_utils/base64/fuzz/uneven/main.cpp
new file mode 100644
index 0000000000..915e81a7e5
--- /dev/null
+++ b/library/cpp/string_utils/base64/fuzz/uneven/main.cpp
@@ -0,0 +1,10 @@
+#include <library/cpp/string_utils/base64/base64.h>
+
+#include <util/system/types.h>
+#include <util/system/yassert.h>
+
+extern "C" int LLVMFuzzerTestOneInput(const ui8* data, size_t size) {
+ const TStringBuf example{reinterpret_cast<const char*>(data), size};
+ Y_UNUSED(Base64DecodeUneven(example));
+ return 0;
+}
diff --git a/library/cpp/string_utils/base64/fuzz/uneven/ya.make b/library/cpp/string_utils/base64/fuzz/uneven/ya.make
new file mode 100644
index 0000000000..18cb18ef52
--- /dev/null
+++ b/library/cpp/string_utils/base64/fuzz/uneven/ya.make
@@ -0,0 +1,15 @@
+FUZZ()
+
+OWNER(
+ g:util
+)
+
+SRCS(
+ main.cpp
+)
+
+PEERDIR(
+ library/cpp/string_utils/base64
+)
+
+END()
diff --git a/library/cpp/string_utils/base64/fuzz/ya.make b/library/cpp/string_utils/base64/fuzz/ya.make
new file mode 100644
index 0000000000..bef82061c4
--- /dev/null
+++ b/library/cpp/string_utils/base64/fuzz/ya.make
@@ -0,0 +1,10 @@
+OWNER(
+ yazevnul
+ g:util
+)
+
+RECURSE(
+ generic
+ lib
+ uneven
+)
diff --git a/library/cpp/string_utils/base64/ut/ya.make b/library/cpp/string_utils/base64/ut/ya.make
new file mode 100644
index 0000000000..9b61241f0e
--- /dev/null
+++ b/library/cpp/string_utils/base64/ut/ya.make
@@ -0,0 +1,22 @@
+OWNER(
+ g:util
+ yazevnul
+)
+
+UNITTEST_FOR(library/cpp/string_utils/base64)
+
+SRCS(
+ base64_ut.cpp
+ base64_decode_uneven_ut.cpp
+)
+
+PEERDIR(
+ contrib/libs/base64/avx2
+ contrib/libs/base64/ssse3
+ contrib/libs/base64/neon32
+ contrib/libs/base64/neon64
+ contrib/libs/base64/plain32
+ contrib/libs/base64/plain64
+)
+
+END()
diff --git a/library/cpp/string_utils/base64/ya.make b/library/cpp/string_utils/base64/ya.make
new file mode 100644
index 0000000000..f5258c446c
--- /dev/null
+++ b/library/cpp/string_utils/base64/ya.make
@@ -0,0 +1,23 @@
+OWNER(
+ g:util
+ yazevnul
+)
+
+LIBRARY()
+
+SRCS(
+ base64.cpp
+)
+
+PEERDIR(
+ contrib/libs/base64/avx2
+ contrib/libs/base64/ssse3
+ contrib/libs/base64/neon32
+ contrib/libs/base64/neon64
+ contrib/libs/base64/plain32
+ contrib/libs/base64/plain64
+)
+
+END()
+
+RECURSE_FOR_TESTS(ut)
diff --git a/library/cpp/string_utils/indent_text/indent_text.cpp b/library/cpp/string_utils/indent_text/indent_text.cpp
new file mode 100644
index 0000000000..09a4f6bca8
--- /dev/null
+++ b/library/cpp/string_utils/indent_text/indent_text.cpp
@@ -0,0 +1,25 @@
+#include "indent_text.h"
+
+#include <util/stream/str.h>
+
+TString IndentText(TStringBuf text, TStringBuf indent) {
+ if (text.empty())
+ return TString();
+
+ TStringStream ss;
+ ss.Reserve(text.size() + 20);
+
+ char pc = 0;
+ for (size_t i = 0; i < text.size(); ++i) {
+ if (i == 0 || pc == '\n')
+ ss << indent;
+
+ char c = text.at(i);
+ ss << c;
+ pc = c;
+ }
+ if (pc != '\n')
+ ss << '\n';
+
+ return ss.Str();
+}
diff --git a/library/cpp/string_utils/indent_text/indent_text.h b/library/cpp/string_utils/indent_text/indent_text.h
new file mode 100644
index 0000000000..7117d6c0ee
--- /dev/null
+++ b/library/cpp/string_utils/indent_text/indent_text.h
@@ -0,0 +1,6 @@
+#pragma once
+
+#include <util/generic/string.h>
+#include <util/generic/strbuf.h>
+
+TString IndentText(TStringBuf text, TStringBuf indent = TStringBuf(" "));
diff --git a/library/cpp/string_utils/indent_text/ya.make b/library/cpp/string_utils/indent_text/ya.make
new file mode 100644
index 0000000000..cd0ed9ec61
--- /dev/null
+++ b/library/cpp/string_utils/indent_text/ya.make
@@ -0,0 +1,9 @@
+LIBRARY()
+
+OWNER(nga)
+
+SRCS(
+ indent_text.cpp
+)
+
+END()
diff --git a/library/cpp/string_utils/levenshtein_diff/levenshtein_diff.cpp b/library/cpp/string_utils/levenshtein_diff/levenshtein_diff.cpp
new file mode 100644
index 0000000000..8883d7df07
--- /dev/null
+++ b/library/cpp/string_utils/levenshtein_diff/levenshtein_diff.cpp
@@ -0,0 +1 @@
+#include "levenshtein_diff.h"
diff --git a/library/cpp/string_utils/levenshtein_diff/levenshtein_diff.h b/library/cpp/string_utils/levenshtein_diff/levenshtein_diff.h
new file mode 100644
index 0000000000..8a240bfed8
--- /dev/null
+++ b/library/cpp/string_utils/levenshtein_diff/levenshtein_diff.h
@@ -0,0 +1,192 @@
+#pragma once
+
+#include <util/draft/matrix.h>
+#include <util/generic/algorithm.h>
+#include <util/generic/vector.h>
+#include <util/system/yassert.h>
+
+#include <type_traits>
+#include <utility>
+
+namespace NLevenshtein {
+ enum EEditMoveType {
+ EMT_SPECIAL,
+ EMT_PRESERVE,
+ EMT_REPLACE,
+ EMT_DELETE,
+ EMT_INSERT
+ };
+
+ inline bool IsImportantEditMove(EEditMoveType p) {
+ return (p != EMT_SPECIAL && p != EMT_PRESERVE);
+ }
+
+ inline void MakeMove(EEditMoveType t, int& p1, int& p2) {
+ switch (t) {
+ case EMT_PRESERVE:
+ case EMT_REPLACE:
+ p1++;
+ p2++;
+ break;
+ case EMT_DELETE:
+ p1++;
+ break;
+ case EMT_INSERT:
+ p2++;
+ break;
+ default:
+ break;
+ }
+ }
+
+ using TEditChain = TVector<EEditMoveType>;
+
+ template <typename TArgType>
+ struct TWeightOneUnaryGetter {
+ int operator()(const TArgType&) const {
+ return 1;
+ }
+ };
+
+ template <typename TArgType>
+ struct TWeightOneBinaryGetter {
+ int operator()(const TArgType&, const TArgType&) const {
+ return 1;
+ }
+ };
+
+ template <typename TStringType>
+ using TCharType = typename std::decay_t<decltype(std::add_const_t<TStringType>()[0])>;
+
+ /// Finds sequence of "edit moves" for two strings
+ template <class TStringType, class TWeightType = int,
+ class TReplaceWeigher = TWeightOneBinaryGetter<TCharType<TStringType>>,
+ class TDeleteWeigher = TWeightOneUnaryGetter<TCharType<TStringType>>,
+ class TInsertWeigher = TWeightOneUnaryGetter<TCharType<TStringType>>
+ >
+ void GetEditChain(const TStringType& str1, const TStringType& str2, TEditChain& res, TWeightType* weight = nullptr,
+ const TReplaceWeigher& replaceWeigher = TReplaceWeigher(),
+ const TDeleteWeigher& deleteWeigher = TDeleteWeigher(),
+ const TInsertWeigher& insertWeigher = TInsertWeigher())
+ {
+ int l1 = (int)str1.size();
+ int l2 = (int)str2.size();
+
+ TMatrix<std::pair<TWeightType, EEditMoveType>> ma(l1 + 1, l2 + 1); /// ma[i][j].first = diff(str1[0..i-1], str2[0..j-1])
+ ma[0][0] = std::make_pair(0, EMT_SPECIAL); // starting point
+ for (int i = 1; i <= l1; i++) {
+ ma[i][0] = std::make_pair(ma[i - 1][0].first + deleteWeigher(str1[i - 1]), EMT_DELETE);
+ }
+ for (int i = 1; i <= l2; i++) {
+ ma[0][i] = std::make_pair(ma[0][i - 1].first + insertWeigher(str2[i - 1]), EMT_INSERT);
+ }
+ // Here goes basic Levestein's algorithm
+ for (int i = 1; i <= l1; i++) {
+ for (int j = 1; j <= l2; j++) {
+ if (str1[i - 1] == str2[j - 1]) {
+ ma[i][j] = std::make_pair(ma[i - 1][j - 1].first, EMT_PRESERVE);
+ } else {
+ const TWeightType replaceWeight = replaceWeigher(str1[i - 1], str2[j - 1]);
+ Y_ASSERT(replaceWeight >= 0);
+ ma[i][j] = std::make_pair(ma[i - 1][j - 1].first + replaceWeight, EMT_REPLACE);
+ }
+
+ if (ma[i][j].first > ma[i - 1][j].first) {
+ const TWeightType deleteWeight = deleteWeigher(str1[i - 1]);
+ Y_ASSERT(deleteWeight >= 0);
+ const TWeightType deletePathWeight = ma[i - 1][j].first + deleteWeight;
+ if (deletePathWeight <= ma[i][j].first) {
+ ma[i][j] = std::make_pair(deletePathWeight, EMT_DELETE);
+ }
+ }
+
+ if (ma[i][j].first > ma[i][j - 1].first) {
+ const TWeightType insertWeight = insertWeigher(str2[j - 1]);
+ Y_ASSERT(insertWeight >= 0);
+ const TWeightType insertPathWeight = ma[i][j - 1].first + insertWeight;
+ if (insertPathWeight <= ma[i][j].first) {
+ ma[i][j] = std::make_pair(insertPathWeight, EMT_INSERT);
+ }
+ }
+ }
+ }
+ // Tracing the path from final point
+ res.clear();
+ res.reserve(Max<size_t>(l1, l2));
+ for (int i = l1, j = l2; ma[i][j].second != EMT_SPECIAL;) {
+ res.push_back(ma[i][j].second);
+ switch (ma[i][j].second) {
+ case EMT_PRESERVE:
+ case EMT_REPLACE:
+ --i;
+ --j;
+ break;
+ case EMT_DELETE:
+ --i;
+ break;
+ case EMT_INSERT:
+ --j;
+ break;
+ default:
+ // TODO: throw exception
+ break;
+ }
+ }
+ std::reverse(res.begin(), res.end());
+
+ if (weight != nullptr) {
+ *weight = ma[l1][l2].first;
+ }
+ }
+
+ template <class TStringType>
+ size_t Distance(const TStringType& str1, const TStringType& str2) {
+ TEditChain editChain;
+ GetEditChain(str1, str2, editChain);
+ size_t result = 0;
+ for (auto edit : editChain) {
+ if (IsImportantEditMove(edit))
+ result++;
+ }
+ return result;
+ }
+
+ /// Calculates substrings to be replaced for str1->str2 transformation
+ struct TReplacement {
+ int CorrectOffset, CorrectLength, MisspelledOffset, MisspelledLength;
+ TReplacement()
+ : CorrectOffset(0)
+ , CorrectLength(0)
+ , MisspelledOffset(0)
+ , MisspelledLength(0)
+ {
+ }
+ TReplacement(int correctOffset, int correctLength, int misspelledOffset, int misspelledLength)
+ : CorrectOffset(correctOffset)
+ , CorrectLength(correctLength)
+ , MisspelledOffset(misspelledOffset)
+ , MisspelledLength(misspelledLength)
+ {
+ }
+ };
+
+ template <class TStringType>
+ void GetStringReplacements(const TStringType& str1, const TStringType& str2, TVector<TReplacement>& res) {
+ TEditChain editChain;
+ GetEditChain(str1, str2, editChain);
+ editChain.push_back(EMT_SPECIAL);
+ int c1 = 0, c2 = 0;
+ res.clear();
+ for (TEditChain::const_iterator it = editChain.begin(); it != editChain.end(); it++) {
+ if (IsImportantEditMove(*it)) {
+ int sc1 = c1, sc2 = c2;
+ do {
+ MakeMove(*it, c1, c2);
+ ++it;
+ } while (IsImportantEditMove(*it));
+ res.push_back(TReplacement(sc1, c1 - sc1, sc2, c2 - sc2));
+ }
+ MakeMove(*it, c1, c2);
+ }
+ }
+}
diff --git a/library/cpp/string_utils/levenshtein_diff/levenshtein_diff_ut.cpp b/library/cpp/string_utils/levenshtein_diff/levenshtein_diff_ut.cpp
new file mode 100644
index 0000000000..cf0f78637f
--- /dev/null
+++ b/library/cpp/string_utils/levenshtein_diff/levenshtein_diff_ut.cpp
@@ -0,0 +1,190 @@
+#include "levenshtein_diff.h"
+
+#include <library/cpp/testing/unittest/registar.h>
+
+#include <util/generic/string.h>
+
+namespace {
+
+ float unaryZeroWeigher(const char&) {
+ return 0.0f;
+ };
+
+ float unaryMaxWeigher(const char&) {
+ return 1.0f;
+ };
+
+ float binaryZeroWeigher(const char&, const char&) {
+ return 0.0f;
+ };
+
+ float binaryMaxWeigher(const char&, const char&) {
+ return 1.0f;
+ };
+
+}
+
+Y_UNIT_TEST_SUITE(Levenstein) {
+ Y_UNIT_TEST(Distance) {
+ UNIT_ASSERT_VALUES_EQUAL(NLevenshtein::Distance(TStringBuf("hello"), TStringBuf("hulloah")), 3);
+ UNIT_ASSERT_VALUES_EQUAL(NLevenshtein::Distance(TStringBuf("yeoman"), TStringBuf("yo man")), 2);
+ }
+}
+
+Y_UNIT_TEST_SUITE(WeightedLevenstein) {
+ Y_UNIT_TEST(EqualStrings) {
+ NLevenshtein::TEditChain chain;
+ float distance = 0.0f;
+ NLevenshtein::GetEditChain(TString("12345"), TString("12345"), chain, &distance, binaryMaxWeigher, unaryMaxWeigher, unaryMaxWeigher);
+ UNIT_ASSERT_VALUES_EQUAL(distance, 0.0f);
+ UNIT_ASSERT_VALUES_EQUAL(chain.size(), 5);
+ }
+
+ Y_UNIT_TEST(EmptyStrings) {
+ NLevenshtein::TEditChain chain;
+ float distance = 0.0f;
+ NLevenshtein::GetEditChain(TString(""), TString(""), chain, &distance, binaryMaxWeigher, unaryMaxWeigher, unaryMaxWeigher);
+ UNIT_ASSERT_VALUES_EQUAL(distance, 0.0f);
+ UNIT_ASSERT_VALUES_EQUAL(chain.size(), 0);
+ }
+
+ Y_UNIT_TEST(InsertsOnly) {
+ auto unaryWeigher = [](const char&) {
+ return 2.0f;
+ };
+ NLevenshtein::TEditChain chain;
+ float distance = 0.0f;
+ NLevenshtein::GetEditChain(TString(""), TString("12345"), chain, &distance, binaryZeroWeigher, unaryZeroWeigher, unaryWeigher);
+ UNIT_ASSERT_VALUES_EQUAL(distance, 10.0f);
+ UNIT_ASSERT_VALUES_EQUAL(chain.size(), 5);
+ }
+
+ Y_UNIT_TEST(DeletionsOnly) {
+ auto unaryWeigher = [](const char&) {
+ return 3.0f;
+ };
+ NLevenshtein::TEditChain chain;
+ float distance = 0.0f;
+ NLevenshtein::GetEditChain(TString("54321"), TString(""), chain, &distance, binaryZeroWeigher, unaryWeigher, unaryZeroWeigher);
+ UNIT_ASSERT_VALUES_EQUAL(distance, 15.0f);
+ UNIT_ASSERT_VALUES_EQUAL(chain.size(), 5);
+ }
+
+ Y_UNIT_TEST(SymmetryCheck) {
+ const TString str1 = "123x5";
+ const TString str2 = "x2345";
+ const float trgDistance = 2.0f;
+ const size_t trgChainLen = 5;
+
+ NLevenshtein::TEditChain chainLeftRight;
+ float distanceLeftRight = 0.0f;
+ NLevenshtein::GetEditChain(str1, str2, chainLeftRight, &distanceLeftRight, binaryMaxWeigher, unaryMaxWeigher, unaryMaxWeigher);
+ UNIT_ASSERT_VALUES_EQUAL(distanceLeftRight, trgDistance);
+ UNIT_ASSERT_VALUES_EQUAL(chainLeftRight.size(), trgChainLen);
+ UNIT_ASSERT_VALUES_EQUAL(static_cast<int>(chainLeftRight[0]), static_cast<int>(NLevenshtein::EMT_REPLACE));
+ UNIT_ASSERT_VALUES_EQUAL(static_cast<int>(chainLeftRight[1]), static_cast<int>(NLevenshtein::EMT_PRESERVE));
+ UNIT_ASSERT_VALUES_EQUAL(static_cast<int>(chainLeftRight[2]), static_cast<int>(NLevenshtein::EMT_PRESERVE));
+ UNIT_ASSERT_VALUES_EQUAL(static_cast<int>(chainLeftRight[3]), static_cast<int>(NLevenshtein::EMT_REPLACE));
+ UNIT_ASSERT_VALUES_EQUAL(static_cast<int>(chainLeftRight[4]), static_cast<int>(NLevenshtein::EMT_PRESERVE));
+
+ NLevenshtein::TEditChain chainRightLeft;
+ float distanceRightLeft = 0.0f;
+ NLevenshtein::GetEditChain(str2, str1, chainRightLeft, &distanceRightLeft, binaryMaxWeigher, unaryMaxWeigher, unaryMaxWeigher);
+ UNIT_ASSERT_VALUES_EQUAL(distanceRightLeft, trgDistance);
+ UNIT_ASSERT_VALUES_EQUAL(chainRightLeft.size(), trgChainLen);
+ UNIT_ASSERT(chainRightLeft == chainLeftRight);
+ }
+
+ Y_UNIT_TEST(PreferReplacements) {
+ auto binaryWeigher = [](const char&, const char&) {
+ return 0.0625f;
+ };
+ NLevenshtein::TEditChain chain;
+ float distance = 0.0f;
+ NLevenshtein::GetEditChain(TString("54321"), TString("43210"), chain, &distance, binaryWeigher, unaryMaxWeigher, unaryMaxWeigher);
+ UNIT_ASSERT_VALUES_EQUAL(distance, 0.3125f);
+ UNIT_ASSERT_VALUES_EQUAL(chain.size(), 5);
+ }
+
+ Y_UNIT_TEST(PreferInsertDeletions) {
+ auto unaryWeigher = [](const char&) {
+ return 0.0625f;
+ };
+ NLevenshtein::TEditChain chain;
+ float distance = 0.0f;
+ NLevenshtein::GetEditChain(TString("54321"), TString("98765"), chain, &distance, binaryMaxWeigher, unaryWeigher, unaryWeigher);
+ UNIT_ASSERT_VALUES_EQUAL(distance, 0.5f);
+ UNIT_ASSERT_VALUES_EQUAL(chain.size(), 9);
+ }
+
+ Y_UNIT_TEST(NoXDeletions) {
+ auto unaryWeigher = [](const char& c) {
+ return c == 'x' ? 100.0f : 1.0f;
+ };
+ NLevenshtein::TEditChain chain;
+ float distance = 0.0f;
+ NLevenshtein::GetEditChain(TString("543x1"), TString("5431"), chain, &distance, binaryMaxWeigher, unaryWeigher, unaryMaxWeigher);
+ UNIT_ASSERT_VALUES_EQUAL(chain.size(), 5);
+ UNIT_ASSERT_VALUES_EQUAL(static_cast<int>(chain[3]), static_cast<int>(NLevenshtein::EMT_REPLACE));
+ UNIT_ASSERT_VALUES_EQUAL(static_cast<int>(chain[4]), static_cast<int>(NLevenshtein::EMT_DELETE));
+ UNIT_ASSERT_VALUES_EQUAL(distance, 2.0f);
+ }
+
+ Y_UNIT_TEST(NoXInsertions) {
+ auto unaryWeigher = [](const char& c) {
+ return c == 'x' ? 100.0f : 1.0f;
+ };
+ NLevenshtein::TEditChain chain;
+ float distance = 0.0f;
+ NLevenshtein::GetEditChain(TString("5431"), TString("543x1"), chain, &distance, binaryMaxWeigher, unaryMaxWeigher, unaryWeigher);
+ UNIT_ASSERT_VALUES_EQUAL(chain.size(), 5);
+ UNIT_ASSERT_VALUES_EQUAL(static_cast<int>(chain[3]), static_cast<int>(NLevenshtein::EMT_REPLACE));
+ UNIT_ASSERT_VALUES_EQUAL(static_cast<int>(chain[4]), static_cast<int>(NLevenshtein::EMT_INSERT));
+ UNIT_ASSERT_VALUES_EQUAL(distance, 2.0f);
+ }
+
+ Y_UNIT_TEST(NoReplacementsOfX) {
+ auto binaryWeigher = [](const char& l, const char&) {
+ return l == 'x' ? 100.0f : 1.0f;
+ };
+ NLevenshtein::TEditChain chain;
+ float distance = 0.0f;
+ NLevenshtein::GetEditChain(TString("5432x"), TString("5432y"), chain, &distance, binaryWeigher, unaryMaxWeigher, unaryMaxWeigher);
+ UNIT_ASSERT_VALUES_EQUAL(chain.size(), 6);
+ UNIT_ASSERT_VALUES_EQUAL(static_cast<int>(chain[4]), static_cast<int>(NLevenshtein::EMT_DELETE));
+ UNIT_ASSERT_VALUES_EQUAL(static_cast<int>(chain[5]), static_cast<int>(NLevenshtein::EMT_INSERT));
+ UNIT_ASSERT_VALUES_EQUAL(distance, 2.0f);
+ }
+
+ Y_UNIT_TEST(NoReplacementsForX) {
+ auto binaryWeigher = [](const char&, const char& r) {
+ return r == 'x' ? 100.0f : 1.0f;
+ };
+ NLevenshtein::TEditChain chain;
+ float distance = 0.0f;
+ NLevenshtein::GetEditChain(TString("y4321"), TString("x4321"), chain, &distance, binaryWeigher, unaryMaxWeigher, unaryMaxWeigher);
+ UNIT_ASSERT_VALUES_EQUAL(chain.size(), 6);
+ UNIT_ASSERT_VALUES_EQUAL(static_cast<int>(chain[0]), static_cast<int>(NLevenshtein::EMT_DELETE));
+ UNIT_ASSERT_VALUES_EQUAL(static_cast<int>(chain[1]), static_cast<int>(NLevenshtein::EMT_INSERT));
+ UNIT_ASSERT_VALUES_EQUAL(distance, 2.0f);
+ }
+
+ Y_UNIT_TEST(SimilarOperationPriorities) {
+ auto replaceWeigher = [](const char&, const char&) {
+ return 0.5f;
+ };
+ auto deleteWeigher = [](const char&) {
+ return 0.2f;
+ };
+ auto insertWeigher = [](const char&) {
+ return 0.9f;
+ };
+ NLevenshtein::TEditChain chain;
+ float distance = 0.0f;
+ NLevenshtein::GetEditChain(TString("y0"), TString("0x"), chain, &distance, replaceWeigher, deleteWeigher, insertWeigher);
+ UNIT_ASSERT_VALUES_EQUAL(chain.size(), 2);
+ UNIT_ASSERT_VALUES_EQUAL(static_cast<int>(chain[0]), static_cast<int>(NLevenshtein::EMT_REPLACE));
+ UNIT_ASSERT_VALUES_EQUAL(static_cast<int>(chain[1]), static_cast<int>(NLevenshtein::EMT_REPLACE));
+ UNIT_ASSERT_VALUES_EQUAL(distance, 1.0f);
+ }
+}
diff --git a/library/cpp/string_utils/levenshtein_diff/ut/ya.make b/library/cpp/string_utils/levenshtein_diff/ut/ya.make
new file mode 100644
index 0000000000..a3b9b8fea5
--- /dev/null
+++ b/library/cpp/string_utils/levenshtein_diff/ut/ya.make
@@ -0,0 +1,9 @@
+UNITTEST_FOR(library/cpp/string_utils/levenshtein_diff)
+
+OWNER(myltsev)
+
+SRCS(
+ levenshtein_diff_ut.cpp
+)
+
+END()
diff --git a/library/cpp/string_utils/levenshtein_diff/ya.make b/library/cpp/string_utils/levenshtein_diff/ya.make
new file mode 100644
index 0000000000..bafefe5365
--- /dev/null
+++ b/library/cpp/string_utils/levenshtein_diff/ya.make
@@ -0,0 +1,13 @@
+LIBRARY()
+
+OWNER(g:mt)
+
+SRCS(
+ levenshtein_diff.cpp
+)
+
+PEERDIR(
+ util/draft
+)
+
+END()
diff --git a/library/cpp/string_utils/parse_size/parse_size.cpp b/library/cpp/string_utils/parse_size/parse_size.cpp
new file mode 100644
index 0000000000..39188d560b
--- /dev/null
+++ b/library/cpp/string_utils/parse_size/parse_size.cpp
@@ -0,0 +1,95 @@
+#include "parse_size.h"
+
+#include <util/generic/yexception.h>
+#include <util/generic/ylimits.h>
+#include <util/string/cast.h>
+#include <util/stream/output.h>
+
+namespace {
+ enum ESuffixShifts {
+ ESS_KILO_BYTES = 10,
+ ESS_MEGA_BYTES = 20,
+ ESS_GIGA_BYTES = 30,
+ ESS_TERA_BYTES = 40,
+ };
+
+ bool TryShiftValue(ui64& value, ui64 shift) {
+ if (value > (Max<ui64>() >> shift)) {
+ return false;
+ }
+
+ value <<= shift;
+ return true;
+ }
+
+ ui64 ShiftValue(ui64 value, ui64 shift) {
+ if (!TryShiftValue(value, shift)) {
+ ythrow yexception() << "value overflow '" << value << " << " << shift << "'";
+ } else {
+ return value;
+ }
+ }
+
+}
+
+namespace NSize {
+ ui64 ParseSize(TStringBuf str) {
+ if (! str.size())
+ ythrow yexception() << "Wrong size " << str;
+ char suff = tolower(str[str.size() - 1]);
+ if (isdigit(suff))
+ return FromString<ui64>(str);
+ ui64 shift = 1;
+ switch (suff) {
+ case 'k':
+ shift = ESS_KILO_BYTES;
+ break;
+ case 'm':
+ shift = ESS_MEGA_BYTES;
+ break;
+ case 'g':
+ shift = ESS_GIGA_BYTES;
+ break;
+ case 't':
+ shift = ESS_TERA_BYTES;
+ break;
+ default:
+ ythrow yexception() << "Unknown suffix " << str;
+ }
+
+ ui64 value = FromString<ui64>(str.substr(0, str.size() - 1));
+
+ if (!TryShiftValue(value, shift)) {
+ ythrow yexception() << "Value overflow " << str;
+ } else {
+ return value;
+ }
+ }
+
+ TSize FromKiloBytes(ui64 value) {
+ return TSize(ShiftValue(value, ESS_KILO_BYTES));
+ }
+
+ TSize FromMegaBytes(ui64 value) {
+ return TSize(ShiftValue(value, ESS_MEGA_BYTES));
+ }
+
+ TSize FromGigaBytes(ui64 value) {
+ return TSize(ShiftValue(value, ESS_GIGA_BYTES));
+ }
+
+ TSize FromTeraBytes(ui64 value) {
+ return TSize(ShiftValue(value, ESS_TERA_BYTES));
+ }
+
+}
+
+template <>
+NSize::TSize FromStringImpl<NSize::TSize>(const char* data, size_t len) {
+ return NSize::TSize(NSize::ParseSize(TStringBuf(data, len)));
+}
+
+template <>
+void Out<NSize::TSize>(IOutputStream& os, const NSize::TSize& size) {
+ os << size.GetValue();
+}
diff --git a/library/cpp/string_utils/parse_size/parse_size.h b/library/cpp/string_utils/parse_size/parse_size.h
new file mode 100644
index 0000000000..ad235ef02f
--- /dev/null
+++ b/library/cpp/string_utils/parse_size/parse_size.h
@@ -0,0 +1,33 @@
+#pragma once
+
+#include <util/generic/strbuf.h>
+
+namespace NSize {
+ ui64 ParseSize(TStringBuf size);
+
+ // Convenient disk size representation with string parsing and integer comparison
+ class TSize {
+ public:
+ TSize(ui64 value = 0)
+ : Value(value)
+ {
+ }
+
+ ui64 GetValue() const {
+ return Value;
+ }
+
+ operator ui64() const {
+ return Value;
+ }
+
+ private:
+ ui64 Value;
+ };
+
+ TSize FromKiloBytes(ui64 value);
+ TSize FromMegaBytes(ui64 value);
+ TSize FromGigaBytes(ui64 value);
+ TSize FromTeraBytes(ui64 value);
+
+}
diff --git a/library/cpp/string_utils/parse_size/parse_size_ut.cpp b/library/cpp/string_utils/parse_size/parse_size_ut.cpp
new file mode 100644
index 0000000000..8fff4f56b2
--- /dev/null
+++ b/library/cpp/string_utils/parse_size/parse_size_ut.cpp
@@ -0,0 +1,63 @@
+#include "parse_size.h"
+
+#include <library/cpp/testing/unittest/registar.h>
+
+using namespace NSize;
+
+class TParseSizeTest: public TTestBase {
+ UNIT_TEST_SUITE(TParseSizeTest);
+
+ UNIT_TEST(TestPlain);
+ UNIT_TEST(TestKiloBytes);
+ UNIT_TEST(TestMegaBytes);
+ UNIT_TEST(TestGigaBytes);
+ UNIT_TEST(TestTeraBytes);
+ UNIT_TEST(TestOverflow);
+ UNIT_TEST(TestStaticCreators);
+ UNIT_TEST(TestToString);
+
+ UNIT_TEST_SUITE_END();
+
+private:
+ void TestPlain() {
+ UNIT_ASSERT(ParseSize("1024") == 1024);
+ }
+
+ void TestKiloBytes() {
+ UNIT_ASSERT(ParseSize("10K") == 1024 * 10);
+ UNIT_ASSERT(ParseSize("10k") == 1024 * 10);
+ }
+
+ void TestMegaBytes() {
+ UNIT_ASSERT(ParseSize("10M") == 1024 * 1024 * 10);
+ UNIT_ASSERT(ParseSize("10m") == 1024 * 1024 * 10);
+ }
+
+ void TestGigaBytes() {
+ UNIT_ASSERT(ParseSize("10G") == 1024ul * 1024ul * 1024ul * 10ul);
+ UNIT_ASSERT(ParseSize("10g") == 1024ul * 1024ul * 1024ul * 10ul);
+ }
+
+ void TestTeraBytes() {
+ UNIT_ASSERT(ParseSize("10T") == 1024ul * 1024ul * 1024ul * 1024ul * 10ul);
+ UNIT_ASSERT(ParseSize("10t") == 1024ul * 1024ul * 1024ul * 1024ul * 10ul);
+ }
+
+ void TestStaticCreators() {
+ UNIT_ASSERT_EQUAL(FromKiloBytes(10), 1024ul * 10ul);
+ UNIT_ASSERT_EQUAL(FromMegaBytes(10), 1024ul * 1024ul * 10ul);
+ UNIT_ASSERT_EQUAL(FromGigaBytes(10), 1024ul * 1024ul * 1024ul * 10ul);
+ UNIT_ASSERT_EQUAL(FromTeraBytes(10), 1024ul * 1024ul * 1024ul * 1024ul * 10ul);
+ }
+
+ void TestOverflow() {
+ UNIT_ASSERT_EXCEPTION(ParseSize("20000000000G"), yexception);
+ UNIT_ASSERT_EXCEPTION(FromGigaBytes(20000000000ull), yexception);
+ }
+
+ void TestToString() {
+ UNIT_ASSERT_VALUES_EQUAL(ToString(FromKiloBytes(1)), TString("1024"));
+ }
+};
+
+UNIT_TEST_SUITE_REGISTRATION(TParseSizeTest);
diff --git a/library/cpp/string_utils/parse_size/ut/ya.make b/library/cpp/string_utils/parse_size/ut/ya.make
new file mode 100644
index 0000000000..da19cf025b
--- /dev/null
+++ b/library/cpp/string_utils/parse_size/ut/ya.make
@@ -0,0 +1,9 @@
+UNITTEST_FOR(library/cpp/string_utils/parse_size)
+
+OWNER(g:images-robot)
+
+SRCS(
+ parse_size_ut.cpp
+)
+
+END()
diff --git a/library/cpp/string_utils/parse_size/ya.make b/library/cpp/string_utils/parse_size/ya.make
new file mode 100644
index 0000000000..4a62abcac2
--- /dev/null
+++ b/library/cpp/string_utils/parse_size/ya.make
@@ -0,0 +1,10 @@
+LIBRARY()
+
+OWNER(g:images-robot)
+
+SRCS(
+ parse_size.cpp
+ parse_size.h
+)
+
+END()
diff --git a/library/cpp/string_utils/quote/quote.cpp b/library/cpp/string_utils/quote/quote.cpp
new file mode 100644
index 0000000000..e523350b80
--- /dev/null
+++ b/library/cpp/string_utils/quote/quote.cpp
@@ -0,0 +1,311 @@
+#include "quote.h"
+
+#include <util/memory/tempbuf.h>
+#include <util/string/ascii.h>
+#include <util/string/cstriter.h>
+
+#include <cctype>
+
+/* note: (x & 0xdf) makes x upper case */
+#define GETXC \
+ do { \
+ c *= 16; \
+ c += (x[0] >= 'A' ? ((x[0] & 0xdf) - 'A') + 10 : (x[0] - '0')); \
+ ++x; \
+ } while (0)
+
+#define GETSBXC \
+ do { \
+ c *= 16; \
+ c += (x[0] >= 'A' ? ((x[0] & 0xdf) - 'A') + 10 : (x[0] - '0')); \
+ x.Skip(1); \
+ } while (0)
+
+
+namespace {
+ class TFromHexZeroTerm {
+ public:
+ static inline char x2c(const char*& x) {
+ if (!IsAsciiHex((ui8)x[0]) || !IsAsciiHex((ui8)x[1]))
+ return '%';
+ ui8 c = 0;
+
+ GETXC;
+ GETXC;
+ return c;
+ }
+
+ static inline char x2c(TStringBuf& x) {
+ if (!IsAsciiHex((ui8)x[0]) || !IsAsciiHex((ui8)x[1]))
+ return '%';
+ ui8 c = 0;
+
+ GETSBXC;
+ GETSBXC;
+ return c;
+ }
+ };
+
+ class TFromHexLenLimited {
+ public:
+ TFromHexLenLimited(const char* end)
+ : End(end)
+ {
+ }
+
+ inline char x2c(const char*& x) {
+ if (x + 2 > End)
+ return '%';
+ return TFromHexZeroTerm::x2c(x);
+ }
+
+ private:
+ const char* End;
+ };
+}
+
+static inline char d2x(unsigned x) {
+ return (char)((x < 10) ? ('0' + x) : ('A' + x - 10));
+}
+
+static inline const char* FixZero(const char* s) noexcept {
+ return s ? s : "";
+}
+
+// we escape:
+// '\"', '|', '(', ')',
+// '%', '&', '+', ',',
+// '#', '<', '=', '>',
+// '[', '\\',']', '?',
+// ':', '{', '}',
+// all below ' ' (0x20) and above '~' (0x7E).
+// ' ' converted to '+'
+static const bool chars_to_url_escape[256] = {
+ // 0 1 2 3 4 5 6 7 8 9 A B C D E F
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, //0
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, //1
+ 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 0, 0, 0, //2
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, //3
+
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, //4
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, //5
+ 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, //6
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1, //7
+
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, //8
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, //9
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, //A
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, //B
+
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, //C
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, //D
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, //E
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, //F
+};
+
+template <class It1, class It2, class It3>
+static inline It1 Escape(It1 to, It2 from, It3 end, const bool* escape_map = chars_to_url_escape) {
+ while (from != end) {
+ if (escape_map[(unsigned char)*from]) {
+ *to++ = '%';
+ *to++ = d2x((unsigned char)*from >> 4);
+ *to++ = d2x((unsigned char)*from & 0xF);
+ } else {
+ *to++ = (*from == ' ' ? '+' : *from);
+ }
+
+ ++from;
+ }
+
+ *to = 0;
+
+ return to;
+}
+
+template <class It1, class It2, class It3, class FromHex>
+static inline It1 Unescape(It1 to, It2 from, It3 end, FromHex fromHex) {
+ (void)fromHex;
+
+ while (from != end) {
+ switch (*from) {
+ case '%':
+ ++from;
+ *to++ = fromHex.x2c(from);
+ break;
+ case '+':
+ *to++ = ' ';
+ ++from;
+ break;
+ default:
+ *to++ = *from++;
+ }
+ }
+ *to = 0;
+ return to;
+}
+
+// CGIEscape returns pointer to the end of the result string
+// so as it could be possible to populate single long buffer
+// with several calls to CGIEscape in a row.
+char* CGIEscape(char* to, const char* from) {
+ return Escape(to, FixZero(from), TCStringEndIterator());
+}
+
+char* CGIEscape(char* to, const char* from, size_t len) {
+ return Escape(to, from, from + len);
+}
+
+void CGIEscape(TString& url) {
+ TTempBuf tempBuf(CgiEscapeBufLen(url.size()));
+ char* to = tempBuf.Data();
+
+ url.AssignNoAlias(to, CGIEscape(to, url.data(), url.size()));
+}
+
+TString CGIEscapeRet(const TStringBuf url) {
+ TString to;
+ to.ReserveAndResize(CgiEscapeBufLen(url.size()));
+ to.resize(CGIEscape(to.begin(), url.data(), url.size()) - to.data());
+ return to;
+}
+
+TString& AppendCgiEscaped(const TStringBuf value, TString& to) {
+ const size_t origLength = to.length();
+ to.ReserveAndResize(origLength + CgiEscapeBufLen(value.size()));
+ to.resize(CGIEscape(to.begin() + origLength, value.data(), value.size()) - to.data());
+ return to;
+}
+
+// More general version of CGIEscape. The optional safe parameter specifies
+// additional characters that should not be quoted — its default value is '/'.
+
+// Also returns pointer to the end of result string.
+
+template <class It1, class It2, class It3>
+static inline It1 Quote(It1 to, It2 from, It3 end, const char* safe) {
+ bool escape_map[256];
+ memcpy(escape_map, chars_to_url_escape, 256);
+ // RFC 3986 Uniform Resource Identifiers (URI): Generic Syntax
+ // lists following reserved characters:
+ const char* reserved = ":/?#[]@!$&\'()*+,;=";
+ for (const char* p = reserved; *p; ++p) {
+ escape_map[(unsigned char)*p] = 1;
+ }
+ // characters we think are safe at the moment
+ for (const char* p = safe; *p; ++p) {
+ escape_map[(unsigned char)*p] = 0;
+ }
+
+ return Escape(to, from, end, escape_map);
+}
+
+char* Quote(char* to, const char* from, const char* safe) {
+ return Quote(to, FixZero(from), TCStringEndIterator(), safe);
+}
+
+char* Quote(char* to, const TStringBuf s, const char* safe) {
+ return Quote(to, s.data(), s.data() + s.size(), safe);
+}
+
+void Quote(TString& url, const char* safe) {
+ TTempBuf tempBuf(CgiEscapeBufLen(url.size()));
+ char* to = tempBuf.Data();
+
+ url.AssignNoAlias(to, Quote(to, url, safe));
+}
+
+char* CGIUnescape(char* to, const char* from) {
+ return Unescape(to, FixZero(from), TCStringEndIterator(), TFromHexZeroTerm());
+}
+
+char* CGIUnescape(char* to, const char* from, size_t len) {
+ return Unescape(to, from, from + len, TFromHexLenLimited(from + len));
+}
+
+void CGIUnescape(TString& url) {
+ if (url.empty()) {
+ return;
+ }
+ if (url.IsDetached()) { // in-place when refcount == 1
+ char* resBegin = url.begin();
+ const char* resEnd = CGIUnescape(resBegin, resBegin, url.size());
+ url.resize(resEnd - resBegin);
+ } else {
+ url = CGIUnescapeRet(url);
+ }
+}
+
+TString CGIUnescapeRet(const TStringBuf from) {
+ TString to;
+ to.ReserveAndResize(CgiUnescapeBufLen(from.size()));
+ to.resize(CGIUnescape(to.begin(), from.data(), from.size()) - to.data());
+ return to;
+}
+
+char* UrlUnescape(char* to, TStringBuf from) {
+ while (!from.empty()) {
+ char ch = from[0];
+ from.Skip(1);
+ if ('%' == ch && 2 <= from.length())
+ ch = TFromHexZeroTerm::x2c(from);
+ *to++ = ch;
+ }
+
+ *to = 0;
+
+ return to;
+}
+
+void UrlUnescape(TString& url) {
+ if (url.empty()) {
+ return;
+ }
+ if (url.IsDetached()) { // in-place when refcount == 1
+ char* resBegin = url.begin();
+ const char* resEnd = UrlUnescape(resBegin, url);
+ url.resize(resEnd - resBegin);
+ } else {
+ url = UrlUnescapeRet(url);
+ }
+}
+
+TString UrlUnescapeRet(const TStringBuf from) {
+ TString to;
+ to.ReserveAndResize(CgiUnescapeBufLen(from.size()));
+ to.resize(UrlUnescape(to.begin(), from) - to.data());
+ return to;
+}
+
+char* UrlEscape(char* to, const char* from, bool forceEscape) {
+ from = FixZero(from);
+
+ while (*from) {
+ const bool escapePercent = (*from == '%') &&
+ (forceEscape || !((*(from + 1) && IsAsciiHex(*(from + 1)) && *(from + 2) && IsAsciiHex(*(from + 2)))));
+
+ if (escapePercent || (unsigned char)*from <= ' ' || (unsigned char)*from > '~') {
+ *to++ = '%';
+ *to++ = d2x((unsigned char)*from >> 4);
+ *to++ = d2x((unsigned char)*from & 0xF);
+ } else
+ *to++ = *from;
+ ++from;
+ }
+
+ *to = 0;
+
+ return to;
+}
+
+void UrlEscape(TString& url, bool forceEscape) {
+ TTempBuf tempBuf(CgiEscapeBufLen(url.size()));
+ char* to = tempBuf.Data();
+ url.AssignNoAlias(to, UrlEscape(to, url.data(), forceEscape));
+}
+
+TString UrlEscapeRet(const TStringBuf from, bool forceEscape) {
+ TString to;
+ to.ReserveAndResize(CgiEscapeBufLen(from.size()));
+ to.resize(UrlEscape(to.begin(), from.begin(), forceEscape) - to.data());
+ return to;
+}
diff --git a/library/cpp/string_utils/quote/quote.h b/library/cpp/string_utils/quote/quote.h
new file mode 100644
index 0000000000..3b7221154e
--- /dev/null
+++ b/library/cpp/string_utils/quote/quote.h
@@ -0,0 +1,72 @@
+#pragma once
+
+#include <util/generic/strbuf.h>
+#include <util/generic/string.h>
+
+//CGIEscape*:
+// ' ' converted to '+',
+// Some punctuation and chars outside [32, 126] range are converted to %xx
+// Use function CgiEscapeBufLen to determine number of characters needed for 'char* to' parameter.
+// Returns pointer to the end of the result string
+char* CGIEscape(char* to, const char* from);
+char* CGIEscape(char* to, const char* from, size_t len);
+inline char* CGIEscape(char* to, const TStringBuf from) {
+ return CGIEscape(to, from.data(), from.size());
+}
+void CGIEscape(TString& url);
+TString CGIEscapeRet(const TStringBuf url);
+TString& AppendCgiEscaped(const TStringBuf value, TString& to);
+
+inline TStringBuf CgiEscapeBuf(char* to, const TStringBuf from) {
+ return TStringBuf(to, CGIEscape(to, from.data(), from.size()));
+}
+inline TStringBuf CgiEscape(void* tmp, const TStringBuf s) {
+ return CgiEscapeBuf(static_cast<char*>(tmp), s);
+}
+
+//CgiUnescape*:
+// Decodes '%xx' to bytes, '+' to space.
+// Use function CgiUnescapeBufLen to determine number of characters needed for 'char* to' parameter.
+// If pointer returned, then this is pointer to the end of the result string.
+char* CGIUnescape(char* to, const char* from);
+char* CGIUnescape(char* to, const char* from, size_t len);
+void CGIUnescape(TString& url);
+TString CGIUnescapeRet(const TStringBuf from);
+
+inline TStringBuf CgiUnescapeBuf(char* to, const TStringBuf from) {
+ return TStringBuf(to, CGIUnescape(to, from.data(), from.size()));
+}
+inline TStringBuf CgiUnescape(void* tmp, const TStringBuf s) {
+ return CgiUnescapeBuf(static_cast<char*>(tmp), s);
+}
+
+//Quote:
+// Is like CGIEscape, also skips encoding of user-supplied 'safe' characters.
+char* Quote(char* to, const char* from, const char* safe = "/");
+char* Quote(char* to, const TStringBuf s, const char* safe = "/");
+void Quote(TString& url, const char* safe = "/");
+
+//UrlEscape:
+// Can't be used for cgi parameters ('&' character is not escaped)!
+// escapes only '%' not followed by two hex-digits or if forceEscape set to ture,
+// and chars outside [32, 126] range.
+// Can't handle '\0'-chars in TString.
+char* UrlEscape(char* to, const char* from, bool forceEscape = false);
+void UrlEscape(TString& url, bool forceEscape = false);
+TString UrlEscapeRet(const TStringBuf from, bool forceEscape = false);
+
+//UrlUnescape:
+// '+' is NOT converted to space!
+// %xx converted to bytes, other characters are copied unchanged.
+char* UrlUnescape(char* to, TStringBuf from);
+void UrlUnescape(TString& url);
+TString UrlUnescapeRet(const TStringBuf from);
+
+//*BufLen: how much characters you should allocate for 'char* to' buffers.
+constexpr size_t CgiEscapeBufLen(const size_t len) noexcept {
+ return 3 * len + 1;
+}
+
+constexpr size_t CgiUnescapeBufLen(const size_t len) noexcept {
+ return len + 1;
+}
diff --git a/library/cpp/string_utils/quote/quote_ut.cpp b/library/cpp/string_utils/quote/quote_ut.cpp
new file mode 100644
index 0000000000..6c552b279e
--- /dev/null
+++ b/library/cpp/string_utils/quote/quote_ut.cpp
@@ -0,0 +1,319 @@
+#include "quote.h"
+
+#include <library/cpp/testing/unittest/registar.h>
+
+Y_UNIT_TEST_SUITE(TCGIEscapeTest) {
+ Y_UNIT_TEST(ReturnsEndOfTo) {
+ char r[10];
+ const char* returned = CGIEscape(r, "123");
+ UNIT_ASSERT_VALUES_EQUAL(r + strlen("123"), returned);
+ UNIT_ASSERT_VALUES_EQUAL('\0', *returned);
+ }
+
+ Y_UNIT_TEST(NotZeroTerminated) {
+ char r[] = {'1', '2', '3', '4'};
+ char buf[sizeof(r) * 3 + 2];
+
+ TString ret(buf, CGIEscape(buf, r, sizeof(r)));
+
+ UNIT_ASSERT_EQUAL(ret, "1234");
+ }
+
+ Y_UNIT_TEST(StringBuf) {
+ char tmp[100];
+
+ UNIT_ASSERT_VALUES_EQUAL(CgiEscape(tmp, "!@#$%^&*(){}[]\" "), TStringBuf("!@%23$%25^%26*%28%29%7B%7D%5B%5D%22+"));
+ }
+
+ Y_UNIT_TEST(StrokaRet) {
+ UNIT_ASSERT_VALUES_EQUAL(CGIEscapeRet("!@#$%^&*(){}[]\" "), TString("!@%23$%25^%26*%28%29%7B%7D%5B%5D%22+"));
+ }
+
+ Y_UNIT_TEST(StrokaAppendRet) {
+ TString param;
+ AppendCgiEscaped("!@#$%^&*(){}[]\" ", param);
+ UNIT_ASSERT_VALUES_EQUAL(param, TString("!@%23$%25^%26*%28%29%7B%7D%5B%5D%22+"));
+
+ TString param2 = "&param=";
+ AppendCgiEscaped("!@#$%^&*(){}[]\" ", param2);
+ UNIT_ASSERT_VALUES_EQUAL(param2,
+ TString("&param=!@%23$%25^%26*%28%29%7B%7D%5B%5D%22+"));
+
+ param2.append("&param_param=");
+ AppendCgiEscaped("!@#$%^&*(){}[]\" ", param2);
+ UNIT_ASSERT_VALUES_EQUAL(param2,
+ TString("&param=!@%23$%25^%26*%28%29%7B%7D%5B%5D%22+&param_param=!@%23$%25^%26*%28%29%7B%7D%5B%5D%22+"));
+ }
+
+}
+
+Y_UNIT_TEST_SUITE(TCGIUnescapeTest) {
+ Y_UNIT_TEST(StringBuf) {
+ char tmp[100];
+
+ UNIT_ASSERT_VALUES_EQUAL(CgiUnescape(tmp, "!@%23$%25^%26*%28%29"), TStringBuf("!@#$%^&*()"));
+ }
+
+ Y_UNIT_TEST(TestValidZeroTerm) {
+ char r[10];
+
+ CGIUnescape(r, "1234");
+ UNIT_ASSERT_VALUES_EQUAL(r, "1234");
+
+ CGIUnescape(r, "%3d");
+ UNIT_ASSERT_VALUES_EQUAL(r, "=");
+
+ CGIUnescape(r, "12%3D34");
+ UNIT_ASSERT_VALUES_EQUAL(r, "12=34");
+ }
+
+ Y_UNIT_TEST(TestInvalidZeroTerm) {
+ char r[10];
+
+ CGIUnescape(r, "%");
+ UNIT_ASSERT_VALUES_EQUAL(r, "%");
+
+ CGIUnescape(r, "%3");
+ UNIT_ASSERT_VALUES_EQUAL(r, "%3");
+
+ CGIUnescape(r, "%3g");
+ UNIT_ASSERT_VALUES_EQUAL(r, "%3g");
+
+ CGIUnescape(r, "12%3g34");
+ UNIT_ASSERT_VALUES_EQUAL(r, "12%3g34");
+
+ CGIUnescape(r, "%3u123");
+ UNIT_ASSERT_VALUES_EQUAL(r, "%3u123");
+ }
+
+ Y_UNIT_TEST(TestValidNotZeroTerm) {
+ char r[10];
+
+ CGIUnescape(r, "123456789", 4);
+ UNIT_ASSERT_VALUES_EQUAL(r, "1234");
+
+ CGIUnescape(r, "%3d1234", 3);
+ UNIT_ASSERT_VALUES_EQUAL(r, "=");
+
+ CGIUnescape(r, "12%3D345678", 7);
+ UNIT_ASSERT_VALUES_EQUAL(r, "12=34");
+ }
+
+ Y_UNIT_TEST(TestInvalidNotZeroTerm) {
+ char r[10];
+
+ CGIUnescape(r, "%3d", 1);
+ UNIT_ASSERT_VALUES_EQUAL(r, "%");
+
+ CGIUnescape(r, "%3d", 2);
+ UNIT_ASSERT_VALUES_EQUAL(r, "%3");
+
+ CGIUnescape(r, "%3g1234", 3);
+ UNIT_ASSERT_VALUES_EQUAL(r, "%3g");
+
+ CGIUnescape(r, "12%3g345678", 7);
+ UNIT_ASSERT_VALUES_EQUAL(r, "12%3g34");
+
+ CGIUnescape(r, "%3u1234", 2);
+ UNIT_ASSERT_VALUES_EQUAL(r, "%3");
+
+ CGIUnescape(r, "%3u1234", 3);
+ UNIT_ASSERT_VALUES_EQUAL(r, "%3u");
+
+ CGIUnescape(r, "%3u1234", 4);
+ UNIT_ASSERT_VALUES_EQUAL(r, "%3u1");
+ }
+
+ Y_UNIT_TEST(StrokaOutParameterInplace) {
+ TString s;
+
+ s = "hello%3dworld";
+ CGIUnescape(s);
+ UNIT_ASSERT_VALUES_EQUAL(s, "hello=world");
+
+ s = "+%23+";
+ CGIUnescape(s);
+ UNIT_ASSERT_VALUES_EQUAL(s, " # ");
+
+ s = "hello%3u";
+ CGIUnescape(s);
+ UNIT_ASSERT_VALUES_EQUAL(s, "hello%3u");
+
+ s = "0123456789012345";
+ CGIUnescape(s);
+ UNIT_ASSERT_VALUES_EQUAL(s, "0123456789012345");
+
+ s = "";
+ CGIUnescape(s);
+ UNIT_ASSERT_VALUES_EQUAL(s, "");
+ }
+
+ Y_UNIT_TEST(StrokaOutParameterNotInplace) {
+ TString s, sCopy;
+
+ s = "hello%3dworld";
+ sCopy = s;
+ CGIUnescape(s);
+ UNIT_ASSERT_VALUES_EQUAL(s, "hello=world");
+
+ s = "+%23+";
+ sCopy = s;
+ CGIUnescape(s);
+ UNIT_ASSERT_VALUES_EQUAL(s, " # ");
+
+ s = "hello%3u";
+ sCopy = s;
+ CGIUnescape(s);
+ UNIT_ASSERT_VALUES_EQUAL(s, "hello%3u");
+
+ s = "0123456789012345";
+ sCopy = s;
+ CGIUnescape(s);
+ UNIT_ASSERT_VALUES_EQUAL(s, "0123456789012345");
+
+ s = "";
+ sCopy = s;
+ CGIUnescape(s);
+ UNIT_ASSERT_VALUES_EQUAL(s, "");
+ }
+}
+
+Y_UNIT_TEST_SUITE(TUrlEscapeTest) {
+ Y_UNIT_TEST(EscapeEscaped) {
+ TString s;
+
+ s = "hello%3dworld";
+ UNIT_ASSERT_VALUES_EQUAL(UrlEscapeRet(s), "hello%3dworld");
+ UrlEscape(s);
+ UNIT_ASSERT_VALUES_EQUAL(s, "hello%3dworld");
+ }
+
+ Y_UNIT_TEST(EscapeUnescape) {
+ TString s;
+
+ s = "hello%3dworld";
+ UrlEscape(s);
+ UrlUnescape(s);
+ UNIT_ASSERT_VALUES_EQUAL(s, "hello=world");
+ }
+
+ Y_UNIT_TEST(EscapeUnescapeRet) {
+ TString s;
+
+ s = "hello%3dworld";
+ UNIT_ASSERT_VALUES_EQUAL(UrlUnescapeRet(UrlEscapeRet(s)), "hello=world");
+ }
+
+ Y_UNIT_TEST(EscapeEscapedForce) {
+ TString s;
+
+ s = "hello%3dworld";
+ UNIT_ASSERT_VALUES_EQUAL(UrlEscapeRet(s, true), "hello%253dworld");
+ UrlEscape(s, true);
+ UNIT_ASSERT_VALUES_EQUAL(s, "hello%253dworld");
+ }
+
+ Y_UNIT_TEST(EscapeUnescapeForce) {
+ TString s;
+
+ s = "hello%3dworld";
+ UrlEscape(s, true);
+ UrlUnescape(s);
+ UNIT_ASSERT_VALUES_EQUAL(s, "hello%3dworld");
+ }
+
+ Y_UNIT_TEST(EscapeUnescapeForceRet) {
+ TString s;
+
+ s = "hello%3dworld";
+ UNIT_ASSERT_VALUES_EQUAL(UrlUnescapeRet(UrlEscapeRet(s, true)), "hello%3dworld");
+ }
+}
+
+Y_UNIT_TEST_SUITE(TUrlUnescapeTest) {
+ Y_UNIT_TEST(StrokaOutParameterInplace) {
+ TString s;
+
+ s = "hello%3dworld";
+ UrlUnescape(s);
+ UNIT_ASSERT_VALUES_EQUAL(s, "hello=world");
+
+ s = "+%23+";
+ UrlUnescape(s);
+ UNIT_ASSERT_VALUES_EQUAL(s, "+#+");
+
+ s = "hello%3u";
+ UrlUnescape(s);
+ UNIT_ASSERT_VALUES_EQUAL(s, "hello%3u");
+
+ s = "0123456789012345";
+ UrlUnescape(s);
+ UNIT_ASSERT_VALUES_EQUAL(s, "0123456789012345");
+
+ s = "";
+ UrlUnescape(s);
+ UNIT_ASSERT_VALUES_EQUAL(s, "");
+ }
+
+ Y_UNIT_TEST(StrokaOutParameterNotInplace) {
+ TString s, sCopy;
+
+ s = "hello%3dworld";
+ sCopy = s;
+ UrlUnescape(s);
+ UNIT_ASSERT_VALUES_EQUAL(s, "hello=world");
+
+ s = "+%23+";
+ sCopy = s;
+ UrlUnescape(s);
+ UNIT_ASSERT_VALUES_EQUAL(s, "+#+");
+
+ s = "hello%3u";
+ sCopy = s;
+ UrlUnescape(s);
+ UNIT_ASSERT_VALUES_EQUAL(s, "hello%3u");
+
+ s = "0123456789012345";
+ sCopy = s;
+ UrlUnescape(s);
+ UNIT_ASSERT_VALUES_EQUAL(s, "0123456789012345");
+
+ s = "";
+ sCopy = s;
+ UrlUnescape(s);
+ UNIT_ASSERT_VALUES_EQUAL(s, "");
+ }
+}
+
+Y_UNIT_TEST_SUITE(TQuoteTest) {
+ Y_UNIT_TEST(ReturnsEndOfTo) {
+ char r[10];
+ const char* returned = Quote(r, "123");
+ UNIT_ASSERT_VALUES_EQUAL(r + strlen("123"), returned);
+ UNIT_ASSERT_VALUES_EQUAL('\0', *returned);
+ }
+
+ Y_UNIT_TEST(SlashIsSafeByDefault) {
+ char r[100];
+ Quote(r, "/path;tail/path,tail/");
+ UNIT_ASSERT_VALUES_EQUAL("/path%3Btail/path%2Ctail/", r);
+ TString s("/path;tail/path,tail/");
+ Quote(s);
+ UNIT_ASSERT_VALUES_EQUAL("/path%3Btail/path%2Ctail/", s.c_str());
+ }
+
+ Y_UNIT_TEST(SafeColons) {
+ char r[100];
+ Quote(r, "/path;tail/path,tail/", ";,");
+ UNIT_ASSERT_VALUES_EQUAL("%2Fpath;tail%2Fpath,tail%2F", r);
+ TString s("/path;tail/path,tail/");
+ Quote(s, ";,");
+ UNIT_ASSERT_VALUES_EQUAL("%2Fpath;tail%2Fpath,tail%2F", s.c_str());
+ }
+
+ Y_UNIT_TEST(StringBuf) {
+ char r[100];
+ char* end = Quote(r, "abc\0/path", "");
+ UNIT_ASSERT_VALUES_EQUAL("abc\0%2Fpath", TStringBuf(r, end));
+ }
+}
diff --git a/library/cpp/string_utils/quote/ut/ya.make b/library/cpp/string_utils/quote/ut/ya.make
new file mode 100644
index 0000000000..eca955144f
--- /dev/null
+++ b/library/cpp/string_utils/quote/ut/ya.make
@@ -0,0 +1,9 @@
+UNITTEST_FOR(library/cpp/string_utils/quote)
+
+OWNER(vladon)
+
+SRCS(
+ quote_ut.cpp
+)
+
+END()
diff --git a/library/cpp/string_utils/quote/ya.make b/library/cpp/string_utils/quote/ya.make
new file mode 100644
index 0000000000..55bb3cf939
--- /dev/null
+++ b/library/cpp/string_utils/quote/ya.make
@@ -0,0 +1,10 @@
+LIBRARY()
+
+OWNER(g:util)
+
+SRCS(
+ quote.cpp
+ quote.h
+)
+
+END()
diff --git a/library/cpp/string_utils/relaxed_escaper/relaxed_escaper.cpp b/library/cpp/string_utils/relaxed_escaper/relaxed_escaper.cpp
new file mode 100644
index 0000000000..ac624dca85
--- /dev/null
+++ b/library/cpp/string_utils/relaxed_escaper/relaxed_escaper.cpp
@@ -0,0 +1 @@
+#include "relaxed_escaper.h"
diff --git a/library/cpp/string_utils/relaxed_escaper/relaxed_escaper.h b/library/cpp/string_utils/relaxed_escaper/relaxed_escaper.h
new file mode 100644
index 0000000000..d7ea7c1259
--- /dev/null
+++ b/library/cpp/string_utils/relaxed_escaper/relaxed_escaper.h
@@ -0,0 +1,208 @@
+#pragma once
+
+#include <util/stream/output.h>
+#include <util/string/escape.h>
+#include <util/memory/tempbuf.h>
+#include <util/generic/strbuf.h>
+
+namespace NEscJ {
+ // almost copypaste from util/string/escape.h
+ // todo: move there (note difference in IsPrintable and handling of string)
+
+ inline char HexDigit(char value) {
+ if (value < 10)
+ return '0' + value;
+ else
+ return 'A' + value - 10;
+ }
+
+ inline char OctDigit(char value) {
+ return '0' + value;
+ }
+
+ inline bool IsUTF8(ui8 c) {
+ return c < 0xf5 && c != 0xC0 && c != 0xC1;
+ }
+
+ inline bool IsControl(ui8 c) {
+ return c < 0x20 || c == 0x7f;
+ }
+
+ inline bool IsPrintable(ui8 c) {
+ return IsUTF8(c) && !IsControl(c);
+ }
+
+ inline bool IsHexDigit(ui8 c) {
+ return (c >= '0' && c <= '9') || (c >= 'A' && c <= 'F') || (c >= 'a' && c <= 'f');
+ }
+
+ inline bool IsOctDigit(ui8 c) {
+ return c >= '0' && c <= '7';
+ }
+
+ struct TEscapeUtil {
+ static const size_t ESCAPE_C_BUFFER_SIZE = 6;
+
+ template <bool asunicode>
+ static inline size_t EscapeJ(ui8 c, ui8 next, char r[ESCAPE_C_BUFFER_SIZE], TStringBuf safe, TStringBuf unsafe) {
+ // (1) Printable characters go as-is, except backslash and double quote.
+ // (2) Characters \r, \n, \t and \0 ... \7 replaced by their simple escape characters (if possible).
+ // (3) Otherwise, character is encoded using hexadecimal escape sequence (if possible), or octal.
+ if (safe.find(c) != TStringBuf::npos) {
+ r[0] = c;
+ return 1;
+ }
+ if (c == '\"') {
+ r[0] = '\\';
+ r[1] = '\"';
+ return 2;
+ } else if (c == '\\') {
+ r[0] = '\\';
+ r[1] = '\\';
+ return 2;
+ } else if (IsPrintable(c) && unsafe.find(c) == TStringBuf::npos) {
+ r[0] = c;
+ return 1;
+ } else if (c == '\b') {
+ r[0] = '\\';
+ r[1] = 'b';
+ return 2;
+ } else if (c == '\f') {
+ r[0] = '\\';
+ r[1] = 'f';
+ return 2;
+ } else if (c == '\r') {
+ r[0] = '\\';
+ r[1] = 'r';
+ return 2;
+ } else if (c == '\n') {
+ r[0] = '\\';
+ r[1] = 'n';
+ return 2;
+ } else if (c == '\t') {
+ r[0] = '\\';
+ r[1] = 't';
+ return 2;
+ } else if (asunicode && IsUTF8(c)) { // utf8 controls escape for json
+ r[0] = '\\';
+ r[1] = 'u';
+ r[2] = '0';
+ r[3] = '0';
+ r[4] = HexDigit((c & 0xF0) >> 4);
+ r[5] = HexDigit((c & 0x0F) >> 0);
+ return 6;
+ } else if (c < 8 && !IsOctDigit(next)) {
+ r[0] = '\\';
+ r[1] = OctDigit(c);
+ return 2;
+ } else if (!IsHexDigit(next)) {
+ r[0] = '\\';
+ r[1] = 'x';
+ r[2] = HexDigit((c & 0xF0) >> 4);
+ r[3] = HexDigit((c & 0x0F) >> 0);
+ return 4;
+ } else {
+ r[0] = '\\';
+ r[1] = OctDigit((c & 0700) >> 6);
+ r[2] = OctDigit((c & 0070) >> 3);
+ r[3] = OctDigit((c & 0007) >> 0);
+ return 4;
+ }
+ }
+
+ static inline size_t EscapeJ(ui8 c, ui8 next, char r[ESCAPE_C_BUFFER_SIZE], TStringBuf safe, TStringBuf unsafe) {
+ return EscapeJ<false>(c, next, r, safe, unsafe);
+ }
+ };
+
+ inline size_t SuggestBuffer(size_t len) {
+ return len * TEscapeUtil::ESCAPE_C_BUFFER_SIZE;
+ }
+
+ template <bool tounicode>
+ inline size_t EscapeJ(const char* str, size_t len, char* out, TStringBuf safe = TStringBuf(), TStringBuf unsafe = TStringBuf()) {
+ char* out0 = out;
+ char buffer[TEscapeUtil::ESCAPE_C_BUFFER_SIZE];
+
+ size_t i, j;
+ for (i = 0, j = 0; i < len; ++i) {
+ size_t rlen = TEscapeUtil::EscapeJ<tounicode>(str[i], (i + 1 < len ? str[i + 1] : 0), buffer, safe, unsafe);
+
+ if (rlen > 1) {
+ strncpy(out, str + j, i - j);
+ out += i - j;
+ j = i + 1;
+
+ strncpy(out, buffer, rlen);
+ out += rlen;
+ }
+ }
+
+ if (j > 0) {
+ strncpy(out, str + j, len - j);
+ out += len - j;
+ } else {
+ strncpy(out, str, len);
+ out += len;
+ }
+
+ return out - out0;
+ }
+
+ template <bool quote, bool tounicode>
+ inline void EscapeJ(TStringBuf in, IOutputStream& out, TStringBuf safe = TStringBuf(), TStringBuf unsafe = TStringBuf()) {
+ TTempBuf b(SuggestBuffer(in.size()) + 2);
+
+ if (quote)
+ b.Append("\"", 1);
+
+ b.Proceed(EscapeJ<tounicode>(in.data(), in.size(), b.Current(), safe, unsafe));
+
+ if (quote)
+ b.Append("\"", 1);
+
+ out.Write(b.Data(), b.Filled());
+ }
+
+ template <bool quote, bool tounicode>
+ inline void EscapeJ(TStringBuf in, TString& out, TStringBuf safe = TStringBuf(), TStringBuf unsafe = TStringBuf()) {
+ TTempBuf b(SuggestBuffer(in.size()) + 2);
+
+ if (quote)
+ b.Append("\"", 1);
+
+ b.Proceed(EscapeJ<tounicode>(in.data(), in.size(), b.Current(), safe, unsafe));
+
+ if (quote)
+ b.Append("\"", 1);
+
+ out.append(b.Data(), b.Filled());
+ }
+
+ template <bool quote, bool tounicode>
+ inline TString EscapeJ(TStringBuf in, TStringBuf safe = TStringBuf(), TStringBuf unsafe = TStringBuf()) {
+ TString s;
+ EscapeJ<quote, tounicode>(in, s, safe, unsafe);
+ return s;
+ }
+
+ // If the template parameter "tounicode" is ommited, then use the default value false
+ inline size_t EscapeJ(const char* str, size_t len, char* out, TStringBuf safe = TStringBuf(), TStringBuf unsafe = TStringBuf()) {
+ return EscapeJ<false>(str, len, out, safe, unsafe);
+ }
+
+ template <bool quote>
+ inline void EscapeJ(TStringBuf in, IOutputStream& out, TStringBuf safe = TStringBuf(), TStringBuf unsafe = TStringBuf()) {
+ EscapeJ<quote, false>(in, out, safe, unsafe);
+ }
+
+ template <bool quote>
+ inline void EscapeJ(TStringBuf in, TString& out, TStringBuf safe = TStringBuf(), TStringBuf unsafe = TStringBuf()) {
+ EscapeJ<quote, false>(in, out, safe, unsafe);
+ }
+
+ template <bool quote>
+ inline TString EscapeJ(TStringBuf in, TStringBuf safe = TStringBuf(), TStringBuf unsafe = TStringBuf()) {
+ return EscapeJ<quote, false>(in, safe, unsafe);
+ }
+}
diff --git a/library/cpp/string_utils/relaxed_escaper/relaxed_escaper_ut.cpp b/library/cpp/string_utils/relaxed_escaper/relaxed_escaper_ut.cpp
new file mode 100644
index 0000000000..768555ea3a
--- /dev/null
+++ b/library/cpp/string_utils/relaxed_escaper/relaxed_escaper_ut.cpp
@@ -0,0 +1,66 @@
+#include "relaxed_escaper.h"
+
+#include <library/cpp/testing/unittest/registar.h>
+
+#define RESC_FIXED_STR(s) TStringBuf(s, sizeof(s) - 1)
+static const TStringBuf CommonTestData[] = {
+ // Should be valid UTF-8.
+ RESC_FIXED_STR("http://ya.ru/"), RESC_FIXED_STR("http://ya.ru/"),
+ RESC_FIXED_STR("http://ya.ru/\\x17\\n"), RESC_FIXED_STR("http://ya.ru/\x17\n"),
+
+ RESC_FIXED_STR("http://ya.ru/\\0"), RESC_FIXED_STR("http://ya.ru/\0"),
+ RESC_FIXED_STR("http://ya.ru/\\0\\0"), RESC_FIXED_STR("http://ya.ru/\0\0"),
+ RESC_FIXED_STR("http://ya.ru/\\0\\0000"), RESC_FIXED_STR("http://ya.ru/\0\0"
+ "0"),
+ RESC_FIXED_STR("http://ya.ru/\\0\\0001"), RESC_FIXED_STR("http://ya.ru/\0\x00"
+ "1"),
+
+ RESC_FIXED_STR("\\2\\4\\00678"), RESC_FIXED_STR("\2\4\6"
+ "78"),
+ RESC_FIXED_STR("\\2\\4\\689"), RESC_FIXED_STR("\2\4\689"),
+
+ RESC_FIXED_STR("\\\"Hello\\\", Alice said."), RESC_FIXED_STR("\"Hello\", Alice said."),
+ RESC_FIXED_STR("Slash\\\\dash!"), RESC_FIXED_STR("Slash\\dash!"),
+ RESC_FIXED_STR("There\\nare\\r\\nnewlines."), RESC_FIXED_STR("There\nare\r\nnewlines."),
+ RESC_FIXED_STR("There\\tare\\ttabs."), RESC_FIXED_STR("There\tare\ttabs.")};
+#undef RESC_FIXED_STR
+
+Y_UNIT_TEST_SUITE(TRelaxedEscaperTest) {
+ Y_UNIT_TEST(TestEscaper) {
+ using namespace NEscJ;
+ for (size_t i = 0; i < Y_ARRAY_SIZE(CommonTestData); i += 2) {
+ TString expected(CommonTestData[i].data(), CommonTestData[i].size());
+ TString source(CommonTestData[i + 1].data(), CommonTestData[i + 1].size());
+ TString actual(EscapeJ<false>(source));
+ TString actual2(UnescapeC(expected));
+
+ UNIT_ASSERT_VALUES_EQUAL(expected, actual);
+ UNIT_ASSERT_VALUES_EQUAL(source, actual2);
+ }
+
+ UNIT_ASSERT_VALUES_EQUAL("http://ya.ru/\\x17\\n\xAB", EscapeJ<false>("http://ya.ru/\x17\n\xab"));
+ TString s = EscapeJ<false, true>("http://ya.ru/\x17\n\xab\xff");
+ UNIT_ASSERT_VALUES_EQUAL("http://ya.ru/\\u0017\\n\xAB\\xFF", s);
+ UNIT_ASSERT_VALUES_EQUAL("http://ya.ru/\\x17\n\xAB", EscapeJ<false>("http://ya.ru/\x17\n\xab", "\n"));
+ UNIT_ASSERT_VALUES_EQUAL("http:\\x2F\\x2Fya.ru\\x2F\\x17\n\xAB'", EscapeJ<false>("http://ya.ru/\x17\n\xab'", "\n'", "/"));
+ UNIT_ASSERT_VALUES_EQUAL("http://ya.ru/\x17\n\xab", UnescapeC("http:\\x2F\\x2Fya.ru\\x2F\\x17\n\xAB"));
+ UNIT_ASSERT_VALUES_EQUAL("http://ya.ru/\x17\n\xab", UnescapeC("http://ya.ru/\\x17\\n\xAB"));
+ UNIT_ASSERT_VALUES_EQUAL("h", EscapeJ<false>("h"));
+ UNIT_ASSERT_VALUES_EQUAL("\"h\"", EscapeJ<true>("h"));
+ UNIT_ASSERT_VALUES_EQUAL("h", UnescapeC("h"));
+ UNIT_ASSERT_VALUES_EQUAL("\\xFF", EscapeJ<false>("\xFF"));
+ UNIT_ASSERT_VALUES_EQUAL("\"\\xFF\"", EscapeJ<true>("\xFF"));
+ UNIT_ASSERT_VALUES_EQUAL("\xFF", UnescapeC("\\xFF"));
+
+ UNIT_ASSERT_VALUES_EQUAL("\\377f", EscapeJ<false>("\xff"
+ "f"));
+ UNIT_ASSERT_VALUES_EQUAL("\xff"
+ "f",
+ UnescapeC("\\377f"));
+ UNIT_ASSERT_VALUES_EQUAL("\\xFFg", EscapeJ<false>("\xff"
+ "g"));
+ UNIT_ASSERT_VALUES_EQUAL("\xff"
+ "g",
+ UnescapeC("\\xFFg"));
+ }
+}
diff --git a/library/cpp/string_utils/relaxed_escaper/ut/ya.make b/library/cpp/string_utils/relaxed_escaper/ut/ya.make
new file mode 100644
index 0000000000..7ebd393c48
--- /dev/null
+++ b/library/cpp/string_utils/relaxed_escaper/ut/ya.make
@@ -0,0 +1,9 @@
+UNITTEST_FOR(library/cpp/string_utils/relaxed_escaper)
+
+OWNER(velavokr)
+
+SRCS(
+ relaxed_escaper_ut.cpp
+)
+
+END()
diff --git a/library/cpp/string_utils/relaxed_escaper/ya.make b/library/cpp/string_utils/relaxed_escaper/ya.make
new file mode 100644
index 0000000000..3f0fa5bc07
--- /dev/null
+++ b/library/cpp/string_utils/relaxed_escaper/ya.make
@@ -0,0 +1,9 @@
+LIBRARY()
+
+OWNER(velavokr)
+
+SRCS(
+ relaxed_escaper.cpp
+)
+
+END()
diff --git a/library/cpp/string_utils/scan/scan.cpp b/library/cpp/string_utils/scan/scan.cpp
new file mode 100644
index 0000000000..fbc1fdf08f
--- /dev/null
+++ b/library/cpp/string_utils/scan/scan.cpp
@@ -0,0 +1 @@
+#include "scan.h"
diff --git a/library/cpp/string_utils/scan/scan.h b/library/cpp/string_utils/scan/scan.h
new file mode 100644
index 0000000000..703db54321
--- /dev/null
+++ b/library/cpp/string_utils/scan/scan.h
@@ -0,0 +1,22 @@
+#pragma once
+
+#include <util/generic/strbuf.h>
+
+template <bool addAll, char sep, char sepKeyVal, class F>
+static inline void ScanKeyValue(TStringBuf s, F&& f) {
+ TStringBuf key, val;
+
+ while (!s.empty()) {
+ val = s.NextTok(sep);
+
+ if (val.empty()) {
+ continue; // && case
+ }
+
+ key = val.NextTok(sepKeyVal);
+
+ if (addAll || val.IsInited()) {
+ f(key, val); // includes empty keys
+ }
+ }
+}
diff --git a/library/cpp/string_utils/scan/ya.make b/library/cpp/string_utils/scan/ya.make
new file mode 100644
index 0000000000..2faae86b09
--- /dev/null
+++ b/library/cpp/string_utils/scan/ya.make
@@ -0,0 +1,11 @@
+OWNER(
+ g:util
+)
+
+LIBRARY()
+
+SRCS(
+ scan.cpp
+)
+
+END()
diff --git a/library/cpp/string_utils/url/url.cpp b/library/cpp/string_utils/url/url.cpp
new file mode 100644
index 0000000000..85f4ac5d69
--- /dev/null
+++ b/library/cpp/string_utils/url/url.cpp
@@ -0,0 +1,421 @@
+#include "url.h"
+
+#include <util/string/cast.h>
+#include <util/string/util.h>
+#include <util/string/cstriter.h>
+#include <util/string/ascii.h>
+#include <util/string/strip.h>
+
+#include <util/charset/unidata.h> // for ToLower
+#include <util/system/defaults.h>
+#include <util/generic/algorithm.h>
+#include <util/generic/hash_set.h>
+#include <util/generic/yexception.h>
+#include <util/generic/singleton.h>
+
+#include <cstdlib>
+
+namespace {
+ struct TUncheckedSize {
+ static bool Has(size_t) {
+ return true;
+ }
+ };
+
+ struct TKnownSize {
+ size_t MySize;
+ explicit TKnownSize(size_t sz)
+ : MySize(sz)
+ {
+ }
+ bool Has(size_t sz) const {
+ return sz <= MySize;
+ }
+ };
+
+ template <typename TChar1, typename TChar2>
+ int Compare1Case2(const TChar1* s1, const TChar2* s2, size_t n) {
+ for (size_t i = 0; i < n; ++i) {
+ if ((TChar1)ToLower(s1[i]) != s2[i])
+ return (TChar1)ToLower(s1[i]) < s2[i] ? -1 : 1;
+ }
+ return 0;
+ }
+
+ template <typename TChar, typename TBounds>
+ inline size_t GetHttpPrefixSizeImpl(const TChar* url, const TBounds& urlSize, bool ignorehttps) {
+ const TChar httpPrefix[] = {'h', 't', 't', 'p', ':', '/', '/', 0};
+ const TChar httpsPrefix[] = {'h', 't', 't', 'p', 's', ':', '/', '/', 0};
+ if (urlSize.Has(7) && Compare1Case2(url, httpPrefix, 7) == 0)
+ return 7;
+ if (!ignorehttps && urlSize.Has(8) && Compare1Case2(url, httpsPrefix, 8) == 0)
+ return 8;
+ return 0;
+ }
+
+ template <typename T>
+ inline T CutHttpPrefixImpl(const T& url, bool ignorehttps) {
+ size_t prefixSize = GetHttpPrefixSizeImpl<typename T::char_type>(url.data(), TKnownSize(url.size()), ignorehttps);
+ if (prefixSize)
+ return url.substr(prefixSize);
+ return url;
+ }
+}
+
+namespace NUrl {
+
+ TSplitUrlToHostAndPathResult SplitUrlToHostAndPath(const TStringBuf url) {
+ TStringBuf host = GetSchemeHostAndPort(url, /*trimHttp=*/false, /*trimDefaultPort=*/false);
+ TStringBuf path = url;
+ path.SkipPrefix(host);
+ return {host, path};
+ }
+
+} // namespace NUrl
+
+size_t GetHttpPrefixSize(const char* url, bool ignorehttps) noexcept {
+ return GetHttpPrefixSizeImpl<char>(url, TUncheckedSize(), ignorehttps);
+}
+
+size_t GetHttpPrefixSize(const wchar16* url, bool ignorehttps) noexcept {
+ return GetHttpPrefixSizeImpl<wchar16>(url, TUncheckedSize(), ignorehttps);
+}
+
+size_t GetHttpPrefixSize(const TStringBuf url, bool ignorehttps) noexcept {
+ return GetHttpPrefixSizeImpl<char>(url.data(), TKnownSize(url.size()), ignorehttps);
+}
+
+size_t GetHttpPrefixSize(const TWtringBuf url, bool ignorehttps) noexcept {
+ return GetHttpPrefixSizeImpl<wchar16>(url.data(), TKnownSize(url.size()), ignorehttps);
+}
+
+TStringBuf CutHttpPrefix(const TStringBuf url, bool ignorehttps) noexcept {
+ return CutHttpPrefixImpl(url, ignorehttps);
+}
+
+TWtringBuf CutHttpPrefix(const TWtringBuf url, bool ignorehttps) noexcept {
+ return CutHttpPrefixImpl(url, ignorehttps);
+}
+
+size_t GetSchemePrefixSize(const TStringBuf url) noexcept {
+ struct TDelim: public str_spn {
+ inline TDelim()
+ : str_spn("!-/:-@[-`{|}", true)
+ {
+ }
+ };
+
+ const auto& delim = *Singleton<TDelim>();
+ const char* n = delim.brk(url.data(), url.end());
+
+ if (n + 2 >= url.end() || *n != ':' || n[1] != '/' || n[2] != '/') {
+ return 0;
+ }
+
+ return n + 3 - url.begin();
+}
+
+TStringBuf GetSchemePrefix(const TStringBuf url) noexcept {
+ return url.Head(GetSchemePrefixSize(url));
+}
+
+TStringBuf CutSchemePrefix(const TStringBuf url) noexcept {
+ return url.Tail(GetSchemePrefixSize(url));
+}
+
+template <bool KeepPort>
+static inline TStringBuf GetHostAndPortImpl(const TStringBuf url) {
+ TStringBuf urlNoScheme = url;
+
+ urlNoScheme.Skip(GetHttpPrefixSize(url));
+
+ struct TDelim: public str_spn {
+ inline TDelim()
+ : str_spn(KeepPort ? "/;?#" : "/:;?#")
+ {
+ }
+ };
+
+ const auto& nonHostCharacters = *Singleton<TDelim>();
+ const char* firstNonHostCharacter = nonHostCharacters.brk(urlNoScheme.begin(), urlNoScheme.end());
+
+ if (firstNonHostCharacter != urlNoScheme.end()) {
+ return urlNoScheme.substr(0, firstNonHostCharacter - urlNoScheme.data());
+ }
+
+ return urlNoScheme;
+}
+
+TStringBuf GetHost(const TStringBuf url) noexcept {
+ return GetHostAndPortImpl<false>(url);
+}
+
+TStringBuf GetHostAndPort(const TStringBuf url) noexcept {
+ return GetHostAndPortImpl<true>(url);
+}
+
+TStringBuf GetSchemeHostAndPort(const TStringBuf url, bool trimHttp, bool trimDefaultPort) noexcept {
+ const size_t schemeSize = GetSchemePrefixSize(url);
+ const TStringBuf scheme = url.Head(schemeSize);
+
+ const bool isHttp = (schemeSize == 0 || scheme == TStringBuf("http://"));
+
+ TStringBuf hostAndPort = GetHostAndPort(url.Tail(schemeSize));
+
+ if (trimDefaultPort) {
+ const size_t pos = hostAndPort.find(':');
+ if (pos != TStringBuf::npos) {
+ const bool isHttps = (scheme == TStringBuf("https://"));
+
+ const TStringBuf port = hostAndPort.Tail(pos + 1);
+ if ((isHttp && port == TStringBuf("80")) || (isHttps && port == TStringBuf("443"))) {
+ // trimming default port
+ hostAndPort = hostAndPort.Head(pos);
+ }
+ }
+ }
+
+ if (isHttp && trimHttp) {
+ return hostAndPort;
+ } else {
+ return TStringBuf(scheme.begin(), hostAndPort.end());
+ }
+}
+
+void SplitUrlToHostAndPath(const TStringBuf url, TStringBuf& host, TStringBuf& path) {
+ auto [hostBuf, pathBuf] = NUrl::SplitUrlToHostAndPath(url);
+ host = hostBuf;
+ path = pathBuf;
+}
+
+void SplitUrlToHostAndPath(const TStringBuf url, TString& host, TString& path) {
+ auto [hostBuf, pathBuf] = NUrl::SplitUrlToHostAndPath(url);
+ host = hostBuf;
+ path = pathBuf;
+}
+
+void SeparateUrlFromQueryAndFragment(const TStringBuf url, TStringBuf& sanitizedUrl, TStringBuf& query, TStringBuf& fragment) {
+ TStringBuf urlWithoutFragment;
+ if (!url.TrySplit('#', urlWithoutFragment, fragment)) {
+ fragment = "";
+ urlWithoutFragment = url;
+ }
+ if (!urlWithoutFragment.TrySplit('?', sanitizedUrl, query)) {
+ query = "";
+ sanitizedUrl = urlWithoutFragment;
+ }
+}
+
+bool TryGetSchemeHostAndPort(const TStringBuf url, TStringBuf& scheme, TStringBuf& host, ui16& port) {
+ const size_t schemeSize = GetSchemePrefixSize(url);
+ if (schemeSize != 0) {
+ scheme = url.Head(schemeSize);
+ }
+
+ TStringBuf portStr;
+ TStringBuf hostAndPort = GetHostAndPort(url.Tail(schemeSize));
+ if (hostAndPort && hostAndPort.back() != ']' && hostAndPort.TryRSplit(':', host, portStr)) {
+ // URL has port
+ if (!TryFromString(portStr, port)) {
+ return false;
+ }
+ } else {
+ host = hostAndPort;
+ if (scheme == TStringBuf("https://")) {
+ port = 443;
+ } else if (scheme == TStringBuf("http://")) {
+ port = 80;
+ }
+ }
+ return true;
+}
+
+void GetSchemeHostAndPort(const TStringBuf url, TStringBuf& scheme, TStringBuf& host, ui16& port) {
+ bool isOk = TryGetSchemeHostAndPort(url, scheme, host, port);
+ Y_ENSURE(isOk, "cannot parse port number from URL: " << url);
+}
+
+TStringBuf GetOnlyHost(const TStringBuf url) noexcept {
+ return GetHost(CutSchemePrefix(url));
+}
+
+TStringBuf GetPathAndQuery(const TStringBuf url, bool trimFragment) noexcept {
+ const size_t off = url.find('/', GetHttpPrefixSize(url));
+ TStringBuf hostUnused, path;
+ if (!url.TrySplitAt(off, hostUnused, path))
+ return "/";
+
+ return trimFragment ? path.Before('#') : path;
+}
+
+// this strange creature returns 2nd level domain, possibly with port
+TStringBuf GetDomain(const TStringBuf host) noexcept {
+ const char* c = !host ? host.data() : host.end() - 1;
+ for (bool wasPoint = false; c != host.data(); --c) {
+ if (*c == '.') {
+ if (wasPoint) {
+ ++c;
+ break;
+ }
+ wasPoint = true;
+ }
+ }
+ return TStringBuf(c, host.end());
+}
+
+TStringBuf GetParentDomain(const TStringBuf host, size_t level) noexcept {
+ size_t pos = host.size();
+ for (size_t i = 0; i < level; ++i) {
+ pos = host.rfind('.', pos);
+ if (pos == TString::npos)
+ return host;
+ }
+ return host.SubStr(pos + 1);
+}
+
+TStringBuf GetZone(const TStringBuf host) noexcept {
+ return GetParentDomain(host, 1);
+}
+
+TStringBuf CutWWWPrefix(const TStringBuf url) noexcept {
+ if (url.size() >= 4 && url[3] == '.' && !strnicmp(url.data(), "www", 3))
+ return url.substr(4);
+ return url;
+}
+
+TStringBuf CutWWWNumberedPrefix(const TStringBuf url) noexcept {
+ auto it = url.begin();
+
+ StripRangeBegin(it, url.end(), [](auto& it){ return *it == 'w' || *it == 'W'; });
+ if (it == url.begin()) {
+ return url;
+ }
+
+ StripRangeBegin(it, url.end(), [](auto& it){ return IsAsciiDigit(*it); });
+ if (it == url.end()) {
+ return url;
+ }
+
+ if (*it++ == '.') {
+ return url.Tail(it - url.begin());
+ }
+
+ return url;
+}
+
+TStringBuf CutMPrefix(const TStringBuf url) noexcept {
+ if (url.size() >= 2 && url[1] == '.' && (url[0] == 'm' || url[0] == 'M')) {
+ return url.substr(2);
+ }
+ return url;
+}
+
+static inline bool IsSchemeChar(char c) noexcept {
+ return IsAsciiAlnum(c); //what about '+' ?..
+}
+
+static bool HasPrefix(const TStringBuf url) noexcept {
+ TStringBuf scheme, unused;
+ if (!url.TrySplit(TStringBuf("://"), scheme, unused))
+ return false;
+
+ return AllOf(scheme, IsSchemeChar);
+}
+
+TString AddSchemePrefix(const TString& url) {
+ return AddSchemePrefix(url, TStringBuf("http"));
+}
+
+TString AddSchemePrefix(const TString& url, TStringBuf scheme) {
+ if (HasPrefix(url)) {
+ return url;
+ }
+
+ return TString::Join(scheme, TStringBuf("://"), url);
+}
+
+#define X(c) (c >= 'A' ? ((c & 0xdf) - 'A') + 10 : (c - '0'))
+
+static inline int x2c(unsigned char* x) {
+ if (!IsAsciiHex(x[0]) || !IsAsciiHex(x[1]))
+ return -1;
+ return X(x[0]) * 16 + X(x[1]);
+}
+
+#undef X
+
+static inline int Unescape(char* str) {
+ char *to, *from;
+ int dlen = 0;
+ if ((str = strchr(str, '%')) == nullptr)
+ return dlen;
+ for (to = str, from = str; *from; from++, to++) {
+ if ((*to = *from) == '%') {
+ int c = x2c((unsigned char*)from + 1);
+ *to = char((c > 0) ? c : '0');
+ from += 2;
+ dlen += 2;
+ }
+ }
+ *to = 0; /* terminate it at the new length */
+ return dlen;
+}
+
+size_t NormalizeUrlName(char* dest, const TStringBuf source, size_t dest_size) {
+ if (source.empty() || source[0] == '?')
+ return strlcpy(dest, "/", dest_size);
+ size_t len = Min(dest_size - 1, source.length());
+ memcpy(dest, source.data(), len);
+ dest[len] = 0;
+ len -= Unescape(dest);
+ strlwr(dest);
+ return len;
+}
+
+size_t NormalizeHostName(char* dest, const TStringBuf source, size_t dest_size, ui16 defport) {
+ size_t len = Min(dest_size - 1, source.length());
+ memcpy(dest, source.data(), len);
+ dest[len] = 0;
+ char buf[8] = ":";
+ size_t buflen = 1 + ToString(defport, buf + 1, sizeof(buf) - 2);
+ buf[buflen] = '\0';
+ char* ptr = strstr(dest, buf);
+ if (ptr && ptr[buflen] == 0) {
+ len -= buflen;
+ *ptr = 0;
+ }
+ strlwr(dest);
+ return len;
+}
+
+TStringBuf RemoveFinalSlash(TStringBuf str) noexcept {
+ if (str.EndsWith('/')) {
+ str.Chop(1);
+ }
+ return str;
+}
+
+TStringBuf CutUrlPrefixes(TStringBuf url) noexcept {
+ url = CutSchemePrefix(url);
+ url = CutWWWPrefix(url);
+ return url;
+}
+
+bool DoesUrlPathStartWithToken(TStringBuf url, const TStringBuf& token) noexcept {
+ url = CutSchemePrefix(url);
+ const TStringBuf noHostSuffix = url.After('/');
+ if (noHostSuffix == url) {
+ // no slash => no suffix with token info
+ return false;
+ }
+ const bool suffixHasPrefix = noHostSuffix.StartsWith(token);
+ if (!suffixHasPrefix) {
+ return false;
+ }
+ const bool slashAfterPrefix = noHostSuffix.find("/", token.length()) == token.length();
+ const bool qMarkAfterPrefix = noHostSuffix.find("?", token.length()) == token.length();
+ const bool nothingAfterPrefix = noHostSuffix.length() <= token.length();
+ const bool prefixIsToken = slashAfterPrefix || qMarkAfterPrefix || nothingAfterPrefix;
+ return prefixIsToken;
+}
+
diff --git a/library/cpp/string_utils/url/url.h b/library/cpp/string_utils/url/url.h
new file mode 100644
index 0000000000..84137ccc57
--- /dev/null
+++ b/library/cpp/string_utils/url/url.h
@@ -0,0 +1,170 @@
+#pragma once
+
+#include <util/generic/fwd.h>
+#include <util/generic/strbuf.h>
+
+namespace NUrl {
+
+ /**
+ * Splits URL to host and path
+ * Example:
+ * auto [host, path] = SplitUrlToHostAndPath(url);
+ *
+ * @param[in] url any URL
+ * @param[out] <host, path> parsed host and path
+ */
+ struct TSplitUrlToHostAndPathResult {
+ TStringBuf host;
+ TStringBuf path;
+ };
+
+ Y_PURE_FUNCTION
+ TSplitUrlToHostAndPathResult SplitUrlToHostAndPath(const TStringBuf url);
+
+} // namespace NUrl
+
+Y_PURE_FUNCTION
+size_t GetHttpPrefixSize(const char* url, bool ignorehttps = false) noexcept;
+Y_PURE_FUNCTION
+size_t GetHttpPrefixSize(const wchar16* url, bool ignorehttps = false) noexcept;
+
+Y_PURE_FUNCTION
+size_t GetHttpPrefixSize(const TStringBuf url, bool ignorehttps = false) noexcept;
+
+Y_PURE_FUNCTION
+size_t GetHttpPrefixSize(const TWtringBuf url, bool ignorehttps = false) noexcept;
+
+/** BEWARE of TStringBuf! You can not use operator ~ or c_str() like in TString
+ !!!!!!!!!!!! */
+Y_PURE_FUNCTION
+size_t GetSchemePrefixSize(const TStringBuf url) noexcept;
+
+Y_PURE_FUNCTION
+TStringBuf GetSchemePrefix(const TStringBuf url) noexcept;
+
+//! removes protocol prefixes 'http://' and 'https://' from given URL
+//! @note if URL has no prefix or some other prefix the function does nothing
+//! @param url URL from which the prefix should be removed
+//! @param ignorehttps if true, leaves https://
+//! @return a new URL without protocol prefix
+Y_PURE_FUNCTION
+TStringBuf CutHttpPrefix(const TStringBuf url, bool ignorehttps = false) noexcept;
+
+Y_PURE_FUNCTION
+TWtringBuf CutHttpPrefix(const TWtringBuf url, bool ignorehttps = false) noexcept;
+
+Y_PURE_FUNCTION
+TStringBuf CutSchemePrefix(const TStringBuf url) noexcept;
+
+//! adds specified scheme prefix if URL has no scheme
+//! @note if URL has scheme prefix already the function returns unchanged URL
+TString AddSchemePrefix(const TString& url, const TStringBuf scheme);
+
+//! Same as `AddSchemePrefix(url, "http")`.
+TString AddSchemePrefix(const TString& url);
+
+Y_PURE_FUNCTION
+TStringBuf GetHost(const TStringBuf url) noexcept;
+
+Y_PURE_FUNCTION
+TStringBuf GetHostAndPort(const TStringBuf url) noexcept;
+
+Y_PURE_FUNCTION
+TStringBuf GetSchemeHostAndPort(const TStringBuf url, bool trimHttp = true, bool trimDefaultPort = true) noexcept;
+
+/**
+ * Splits URL to host and path
+ *
+ * @param[in] url any URL
+ * @param[out] host parsed host
+ * @param[out] path parsed path
+ */
+void SplitUrlToHostAndPath(const TStringBuf url, TStringBuf& host, TStringBuf& path);
+void SplitUrlToHostAndPath(const TStringBuf url, TString& host, TString& path);
+
+/**
+ * Separates URL into url prefix, query (aka cgi params list), and fragment (aka part after #)
+ *
+ * @param[in] url any URL
+ * @param[out] sanitizedUrl parsed URL without query and fragment parts
+ * @param[out] query parsed query
+ * @param[out] fragment parsed fragment
+ */
+void SeparateUrlFromQueryAndFragment(const TStringBuf url, TStringBuf& sanitizedUrl, TStringBuf& query, TStringBuf& fragment);
+
+/**
+ * Extracts scheme, host and port from URL.
+ *
+ * Port will be parsed from URL with checks against ui16 overflow. If URL doesn't
+ * contain port it will be determined by one of the known schemes (currently
+ * https:// and http:// only).
+ * Given parameters will not be modified if URL has no appropriate components.
+ *
+ * @param[in] url any URL
+ * @param[out] scheme URL scheme
+ * @param[out] host host name
+ * @param[out] port parsed port number
+ * @return false if present port number cannot be parsed into ui16
+ * true otherwise.
+ */
+bool TryGetSchemeHostAndPort(const TStringBuf url, TStringBuf& scheme, TStringBuf& host, ui16& port);
+
+/**
+ * Extracts scheme, host and port from URL.
+ *
+ * This function perform the same actions as TryGetSchemeHostAndPort(), but in
+ * case of impossibility to parse port number throws yexception.
+ *
+ * @param[in] url any URL
+ * @param[out] scheme URL scheme
+ * @param[out] host host name
+ * @param[out] port parsed port number
+ * @throws yexception if present port number cannot be parsed into ui16.
+ */
+void GetSchemeHostAndPort(const TStringBuf url, TStringBuf& scheme, TStringBuf& host, ui16& port);
+
+Y_PURE_FUNCTION
+TStringBuf GetPathAndQuery(const TStringBuf url, bool trimFragment = true) noexcept;
+/**
+ * Extracts host from url and cuts http(https) protocol prefix and port if any.
+ * @param[in] url any URL
+ * @return host without port and http(https) prefix.
+ */
+Y_PURE_FUNCTION
+TStringBuf GetOnlyHost(const TStringBuf url) noexcept;
+
+Y_PURE_FUNCTION
+TStringBuf GetParentDomain(const TStringBuf host, size_t level) noexcept; // ("www.ya.ru", 2) -> "ya.ru"
+
+Y_PURE_FUNCTION
+TStringBuf GetZone(const TStringBuf host) noexcept;
+
+Y_PURE_FUNCTION
+TStringBuf CutWWWPrefix(const TStringBuf url) noexcept;
+
+Y_PURE_FUNCTION
+TStringBuf CutWWWNumberedPrefix(const TStringBuf url) noexcept;
+
+/**
+ * Cuts 'm.' prefix from url if and only if the url starts with it
+ * Example: 'm.some-domain.com' -> 'some-domain.com'.
+ * 'http://m.some-domain.com' is not changed
+ *
+ * @param[in] url any URL
+ * @return url without 'm.' or 'M.' prefix.
+ */
+Y_PURE_FUNCTION
+TStringBuf CutMPrefix(const TStringBuf url) noexcept;
+
+Y_PURE_FUNCTION
+TStringBuf GetDomain(const TStringBuf host) noexcept; // should not be used
+
+size_t NormalizeUrlName(char* dest, const TStringBuf source, size_t dest_size);
+size_t NormalizeHostName(char* dest, const TStringBuf source, size_t dest_size, ui16 defport = 80);
+
+Y_PURE_FUNCTION
+TStringBuf RemoveFinalSlash(TStringBuf str) noexcept;
+
+TStringBuf CutUrlPrefixes(TStringBuf url) noexcept;
+bool DoesUrlPathStartWithToken(TStringBuf url, const TStringBuf& token) noexcept;
+
diff --git a/library/cpp/string_utils/url/url_ut.cpp b/library/cpp/string_utils/url/url_ut.cpp
new file mode 100644
index 0000000000..1588013893
--- /dev/null
+++ b/library/cpp/string_utils/url/url_ut.cpp
@@ -0,0 +1,281 @@
+#include "url.h"
+
+#include <util/string/cast.h>
+
+#include <library/cpp/testing/unittest/registar.h>
+
+Y_UNIT_TEST_SUITE(TUtilUrlTest) {
+ Y_UNIT_TEST(TestGetHostAndGetHostAndPort) {
+ UNIT_ASSERT_VALUES_EQUAL("ya.ru", GetHost("ya.ru/bebe"));
+ UNIT_ASSERT_VALUES_EQUAL("ya.ru", GetHostAndPort("ya.ru/bebe"));
+ UNIT_ASSERT_VALUES_EQUAL("ya.ru", GetHost("ya.ru"));
+ UNIT_ASSERT_VALUES_EQUAL("ya.ru", GetHostAndPort("ya.ru"));
+ UNIT_ASSERT_VALUES_EQUAL("ya.ru", GetHost("ya.ru:8080"));
+ UNIT_ASSERT_VALUES_EQUAL("ya.ru:8080", GetHostAndPort("ya.ru:8080"));
+ UNIT_ASSERT_VALUES_EQUAL("ya.ru", GetHost("ya.ru/bebe:8080"));
+ UNIT_ASSERT_VALUES_EQUAL("ya.ru", GetHostAndPort("ya.ru/bebe:8080"));
+ UNIT_ASSERT_VALUES_EQUAL("ya.ru", GetHost("ya.ru:8080/bebe"));
+ UNIT_ASSERT_VALUES_EQUAL("ya.ru", GetHost("https://ya.ru:8080/bebe"));
+ UNIT_ASSERT_VALUES_EQUAL("www.ya.ru", GetHost("www.ya.ru:8080/bebe"));
+ UNIT_ASSERT_VALUES_EQUAL("www.ya.ru", GetHost("https://www.ya.ru:8080/bebe"));
+ UNIT_ASSERT_VALUES_EQUAL("ya.ru:8080", GetHostAndPort("ya.ru:8080/bebe"));
+ // irl RFC3986 sometimes gets ignored
+ UNIT_ASSERT_VALUES_EQUAL("pravda-kmv.ru", GetHost("pravda-kmv.ru?page=news&id=6973"));
+ UNIT_ASSERT_VALUES_EQUAL("pravda-kmv.ru", GetHostAndPort("pravda-kmv.ru?page=news&id=6973"));
+ // check simple string
+ UNIT_ASSERT_VALUES_EQUAL("some_blender_url", GetHost("some_blender_url"));
+ UNIT_ASSERT_VALUES_EQUAL("", GetHost(""));
+ }
+
+ Y_UNIT_TEST(TestGetPathAndQuery) {
+ UNIT_ASSERT_VALUES_EQUAL("/", GetPathAndQuery("ru.wikipedia.org"));
+ UNIT_ASSERT_VALUES_EQUAL("/", GetPathAndQuery("ru.wikipedia.org/"));
+ UNIT_ASSERT_VALUES_EQUAL("/", GetPathAndQuery("ru.wikipedia.org:8080"));
+ UNIT_ASSERT_VALUES_EQUAL("/index.php?123/", GetPathAndQuery("ru.wikipedia.org/index.php?123/"));
+ UNIT_ASSERT_VALUES_EQUAL("/", GetPathAndQuery("http://ru.wikipedia.org:8080"));
+ UNIT_ASSERT_VALUES_EQUAL("/index.php?123/", GetPathAndQuery("https://ru.wikipedia.org/index.php?123/"));
+ UNIT_ASSERT_VALUES_EQUAL("/", GetPathAndQuery("ru.wikipedia.org/#comment"));
+ UNIT_ASSERT_VALUES_EQUAL("/?1", GetPathAndQuery("ru.wikipedia.org/?1#comment"));
+ UNIT_ASSERT_VALUES_EQUAL("/?1#comment", GetPathAndQuery("ru.wikipedia.org/?1#comment", false));
+ }
+
+ Y_UNIT_TEST(TestGetDomain) {
+ UNIT_ASSERT_VALUES_EQUAL("ya.ru", GetDomain("www.ya.ru"));
+ UNIT_ASSERT_VALUES_EQUAL("ya.ru", GetDomain("ya.ru"));
+ UNIT_ASSERT_VALUES_EQUAL("ya.ru", GetDomain("a.b.ya.ru"));
+ UNIT_ASSERT_VALUES_EQUAL("ya.ru", GetDomain("ya.ru"));
+ UNIT_ASSERT_VALUES_EQUAL("ya", GetDomain("ya"));
+ UNIT_ASSERT_VALUES_EQUAL("", GetDomain(""));
+ }
+
+ Y_UNIT_TEST(TestGetParentDomain) {
+ UNIT_ASSERT_VALUES_EQUAL("", GetParentDomain("www.ya.ru", 0));
+ UNIT_ASSERT_VALUES_EQUAL("ru", GetParentDomain("www.ya.ru", 1));
+ UNIT_ASSERT_VALUES_EQUAL("ya.ru", GetParentDomain("www.ya.ru", 2));
+ UNIT_ASSERT_VALUES_EQUAL("www.ya.ru", GetParentDomain("www.ya.ru", 3));
+ UNIT_ASSERT_VALUES_EQUAL("www.ya.ru", GetParentDomain("www.ya.ru", 4));
+ UNIT_ASSERT_VALUES_EQUAL("com", GetParentDomain("ya.com", 1));
+ UNIT_ASSERT_VALUES_EQUAL("ya.com", GetParentDomain("ya.com", 2));
+ UNIT_ASSERT_VALUES_EQUAL("RU", GetParentDomain("RU", 1));
+ UNIT_ASSERT_VALUES_EQUAL("RU", GetParentDomain("RU", 2));
+ UNIT_ASSERT_VALUES_EQUAL("", GetParentDomain("", 0));
+ UNIT_ASSERT_VALUES_EQUAL("", GetParentDomain("", 1));
+ }
+
+ Y_UNIT_TEST(TestGetZone) {
+ UNIT_ASSERT_VALUES_EQUAL("ru", GetZone("www.ya.ru"));
+ UNIT_ASSERT_VALUES_EQUAL("com", GetZone("ya.com"));
+ UNIT_ASSERT_VALUES_EQUAL("RU", GetZone("RU"));
+ UNIT_ASSERT_VALUES_EQUAL("FHFBN", GetZone("ya.FHFBN"));
+ UNIT_ASSERT_VALUES_EQUAL("", GetZone(""));
+ }
+
+ Y_UNIT_TEST(TestAddSchemePrefix) {
+ UNIT_ASSERT_VALUES_EQUAL("http://yandex.ru", AddSchemePrefix("yandex.ru"));
+ UNIT_ASSERT_VALUES_EQUAL("http://yandex.ru", AddSchemePrefix("http://yandex.ru"));
+ UNIT_ASSERT_VALUES_EQUAL("https://yandex.ru", AddSchemePrefix("https://yandex.ru"));
+ UNIT_ASSERT_VALUES_EQUAL("file://yandex.ru", AddSchemePrefix("file://yandex.ru"));
+ UNIT_ASSERT_VALUES_EQUAL("ftp://ya.ru", AddSchemePrefix("ya.ru", "ftp"));
+ }
+
+ Y_UNIT_TEST(TestSchemeGet) {
+ UNIT_ASSERT_VALUES_EQUAL("http://", GetSchemePrefix("http://ya.ru/bebe"));
+ UNIT_ASSERT_VALUES_EQUAL("", GetSchemePrefix("yaru"));
+ UNIT_ASSERT_VALUES_EQUAL("yaru://", GetSchemePrefix("yaru://ya.ru://zzz"));
+ UNIT_ASSERT_VALUES_EQUAL("", GetSchemePrefix("ya.ru://zzz"));
+ UNIT_ASSERT_VALUES_EQUAL("ftp://", GetSchemePrefix("ftp://ya.ru://zzz"));
+ UNIT_ASSERT_VALUES_EQUAL("https://", GetSchemePrefix("https://")); // is that right?
+ }
+
+ Y_UNIT_TEST(TestSchemeCut) {
+ UNIT_ASSERT_VALUES_EQUAL("ya.ru/bebe", CutSchemePrefix("http://ya.ru/bebe"));
+ UNIT_ASSERT_VALUES_EQUAL("yaru", CutSchemePrefix("yaru"));
+ UNIT_ASSERT_VALUES_EQUAL("ya.ru://zzz", CutSchemePrefix("yaru://ya.ru://zzz"));
+ UNIT_ASSERT_VALUES_EQUAL("ya.ru://zzz", CutSchemePrefix("ya.ru://zzz"));
+ UNIT_ASSERT_VALUES_EQUAL("ya.ru://zzz", CutSchemePrefix("ftp://ya.ru://zzz"));
+ UNIT_ASSERT_VALUES_EQUAL("", CutSchemePrefix("https://")); // is that right?
+
+ UNIT_ASSERT_VALUES_EQUAL("ftp://ya.ru", CutHttpPrefix("ftp://ya.ru"));
+ UNIT_ASSERT_VALUES_EQUAL("ya.ru/zzz", CutHttpPrefix("http://ya.ru/zzz"));
+ UNIT_ASSERT_VALUES_EQUAL("ya.ru/zzz", CutHttpPrefix("http://ya.ru/zzz", true));
+ UNIT_ASSERT_VALUES_EQUAL("ya.ru/zzz", CutHttpPrefix("https://ya.ru/zzz"));
+ UNIT_ASSERT_VALUES_EQUAL("https://ya.ru/zzz", CutHttpPrefix("https://ya.ru/zzz", true));
+ UNIT_ASSERT_VALUES_EQUAL("", CutHttpPrefix("https://")); // is that right?
+ UNIT_ASSERT_VALUES_EQUAL("https://", CutHttpPrefix("https://", true)); // is that right?
+ }
+
+ Y_UNIT_TEST(TestMisc) {
+ UNIT_ASSERT_VALUES_EQUAL("", CutWWWPrefix("www."));
+ UNIT_ASSERT_VALUES_EQUAL("", CutWWWPrefix("WwW."));
+ UNIT_ASSERT_VALUES_EQUAL("www", CutWWWPrefix("www"));
+ UNIT_ASSERT_VALUES_EQUAL("ya.ru", CutWWWPrefix("www.ya.ru"));
+
+ UNIT_ASSERT_VALUES_EQUAL("", CutWWWNumberedPrefix("www."));
+ UNIT_ASSERT_VALUES_EQUAL("www", CutWWWNumberedPrefix("www"));
+ UNIT_ASSERT_VALUES_EQUAL("www27", CutWWWNumberedPrefix("www27"));
+ UNIT_ASSERT_VALUES_EQUAL("", CutWWWNumberedPrefix("www27."));
+ UNIT_ASSERT_VALUES_EQUAL("ya.ru", CutWWWNumberedPrefix("www.ya.ru"));
+ UNIT_ASSERT_VALUES_EQUAL("ya.ru", CutWWWNumberedPrefix("www2.ya.ru"));
+ UNIT_ASSERT_VALUES_EQUAL("ya.ru", CutWWWNumberedPrefix("www12.ya.ru"));
+ UNIT_ASSERT_VALUES_EQUAL("ya.ru", CutWWWNumberedPrefix("ww2.ya.ru"));
+ UNIT_ASSERT_VALUES_EQUAL("w1w2w3.ya.ru", CutWWWNumberedPrefix("w1w2w3.ya.ru"));
+ UNIT_ASSERT_VALUES_EQUAL("123.ya.ru", CutWWWNumberedPrefix("123.ya.ru"));
+
+ UNIT_ASSERT_VALUES_EQUAL("", CutMPrefix("m."));
+ UNIT_ASSERT_VALUES_EQUAL("", CutMPrefix("M."));
+ UNIT_ASSERT_VALUES_EQUAL("m", CutMPrefix("m"));
+ UNIT_ASSERT_VALUES_EQUAL("ya.ru", CutMPrefix("m.ya.ru"));
+ }
+
+ Y_UNIT_TEST(TestSplitUrlToHostAndPath) {
+ TStringBuf host, path;
+
+ SplitUrlToHostAndPath("https://yandex.ru/yandsearch", host, path);
+ UNIT_ASSERT_STRINGS_EQUAL(host, "https://yandex.ru");
+ UNIT_ASSERT_STRINGS_EQUAL(path, "/yandsearch");
+
+ SplitUrlToHostAndPath("yandex.ru/yandsearch", host, path);
+ UNIT_ASSERT_STRINGS_EQUAL(host, "yandex.ru");
+ UNIT_ASSERT_STRINGS_EQUAL(path, "/yandsearch");
+
+ SplitUrlToHostAndPath("https://yandex.ru", host, path);
+ UNIT_ASSERT_STRINGS_EQUAL(host, "https://yandex.ru");
+ UNIT_ASSERT_STRINGS_EQUAL(path, "");
+
+ SplitUrlToHostAndPath("invalid url /", host, path);
+ UNIT_ASSERT_STRINGS_EQUAL(host, "invalid url ");
+ UNIT_ASSERT_STRINGS_EQUAL(path, "/");
+
+ SplitUrlToHostAndPath("some_blender_url", host, path);
+ UNIT_ASSERT_STRINGS_EQUAL(host, "some_blender_url");
+ UNIT_ASSERT_STRINGS_EQUAL(path, "");
+ }
+
+ Y_UNIT_TEST(TestSeparateUrlFromQueryAndFragment) {
+ TStringBuf sanitizedUrl, query, fragment;
+
+ SeparateUrlFromQueryAndFragment("https://yandex.ru/yandsearch", sanitizedUrl, query, fragment);
+ UNIT_ASSERT_STRINGS_EQUAL(sanitizedUrl, "https://yandex.ru/yandsearch");
+ UNIT_ASSERT_STRINGS_EQUAL(query, "");
+ UNIT_ASSERT_STRINGS_EQUAL(fragment, "");
+
+ SeparateUrlFromQueryAndFragment("https://yandex.ru/yandsearch?param1=val1&param2=val2", sanitizedUrl, query, fragment);
+ UNIT_ASSERT_STRINGS_EQUAL(sanitizedUrl, "https://yandex.ru/yandsearch");
+ UNIT_ASSERT_STRINGS_EQUAL(query, "param1=val1&param2=val2");
+ UNIT_ASSERT_STRINGS_EQUAL(fragment, "");
+
+ SeparateUrlFromQueryAndFragment("https://yandex.ru/yandsearch#fragment", sanitizedUrl, query, fragment);
+ UNIT_ASSERT_STRINGS_EQUAL(sanitizedUrl, "https://yandex.ru/yandsearch");
+ UNIT_ASSERT_STRINGS_EQUAL(query, "");
+ UNIT_ASSERT_STRINGS_EQUAL(fragment, "fragment");
+
+ SeparateUrlFromQueryAndFragment("https://yandex.ru/yandsearch?param1=val1&param2=val2#fragment", sanitizedUrl, query, fragment);
+ UNIT_ASSERT_STRINGS_EQUAL(sanitizedUrl, "https://yandex.ru/yandsearch");
+ UNIT_ASSERT_STRINGS_EQUAL(query, "param1=val1&param2=val2");
+ UNIT_ASSERT_STRINGS_EQUAL(fragment, "fragment");
+ }
+
+ Y_UNIT_TEST(TestGetSchemeHostAndPort) {
+ { // all components are present
+ TStringBuf scheme("unknown"), host("unknown");
+ ui16 port = 0;
+ GetSchemeHostAndPort("https://ya.ru:8080/bebe", scheme, host, port);
+ UNIT_ASSERT_VALUES_EQUAL(scheme, "https://");
+ UNIT_ASSERT_VALUES_EQUAL(host, "ya.ru");
+ UNIT_ASSERT_VALUES_EQUAL(port, 8080);
+ }
+ { // scheme is abset
+ TStringBuf scheme("unknown"), host("unknown");
+ ui16 port = 0;
+ GetSchemeHostAndPort("ya.ru:8080/bebe", scheme, host, port);
+ UNIT_ASSERT_VALUES_EQUAL(scheme, "unknown");
+ UNIT_ASSERT_VALUES_EQUAL(host, "ya.ru");
+ UNIT_ASSERT_VALUES_EQUAL(port, 8080);
+ }
+ { // scheme and port are absent
+ TStringBuf scheme("unknown"), host("unknown");
+ ui16 port = 0;
+ GetSchemeHostAndPort("ya.ru/bebe", scheme, host, port);
+ UNIT_ASSERT_VALUES_EQUAL(scheme, "unknown");
+ UNIT_ASSERT_VALUES_EQUAL(host, "ya.ru");
+ UNIT_ASSERT_VALUES_EQUAL(port, 0);
+ }
+ { // port is absent, but returned its default value for HTTP
+ TStringBuf scheme("unknown"), host("unknown");
+ ui16 port = 0;
+ GetSchemeHostAndPort("http://ya.ru/bebe", scheme, host, port);
+ UNIT_ASSERT_VALUES_EQUAL(scheme, "http://");
+ UNIT_ASSERT_VALUES_EQUAL(host, "ya.ru");
+ UNIT_ASSERT_VALUES_EQUAL(port, 80);
+ }
+ { // port is absent, but returned its default value for HTTPS
+ TStringBuf scheme("unknown"), host("unknown");
+ ui16 port = 0;
+ GetSchemeHostAndPort("https://ya.ru/bebe", scheme, host, port);
+ UNIT_ASSERT_VALUES_EQUAL(scheme, "https://");
+ UNIT_ASSERT_VALUES_EQUAL(host, "ya.ru");
+ UNIT_ASSERT_VALUES_EQUAL(port, 443);
+ }
+ { // ipv6
+ TStringBuf scheme("unknown"), host("unknown");
+ ui16 port = 0;
+ GetSchemeHostAndPort("https://[1080:0:0:0:8:800:200C:417A]:443/bebe", scheme, host, port);
+ UNIT_ASSERT_VALUES_EQUAL(scheme, "https://");
+ UNIT_ASSERT_VALUES_EQUAL(host, "[1080:0:0:0:8:800:200C:417A]");
+ UNIT_ASSERT_VALUES_EQUAL(port, 443);
+ }
+ { // ipv6
+ TStringBuf scheme("unknown"), host("unknown");
+ ui16 port = 0;
+ GetSchemeHostAndPort("[::1]/bebe", scheme, host, port);
+ UNIT_ASSERT_VALUES_EQUAL(scheme, "unknown");
+ UNIT_ASSERT_VALUES_EQUAL(host, "[::1]");
+ UNIT_ASSERT_VALUES_EQUAL(port, 0);
+ }
+ { // ipv6
+ TStringBuf scheme("unknown"), host("unknown");
+ ui16 port = 0;
+ GetSchemeHostAndPort("unknown:///bebe", scheme, host, port);
+ UNIT_ASSERT_VALUES_EQUAL(scheme, "unknown://");
+ UNIT_ASSERT_VALUES_EQUAL(host, "");
+ UNIT_ASSERT_VALUES_EQUAL(port, 0);
+ }
+ // port overflow
+ auto testCase = []() {
+ TStringBuf scheme("unknown"), host("unknown");
+ ui16 port = 0;
+ GetSchemeHostAndPort("https://ya.ru:65536/bebe", scheme, host, port);
+ };
+ UNIT_ASSERT_EXCEPTION(testCase(), yexception);
+ }
+
+ Y_UNIT_TEST(TestCutUrlPrefixes) {
+ UNIT_ASSERT_VALUES_EQUAL("ya.ru/bebe", CutUrlPrefixes("http://ya.ru/bebe"));
+ UNIT_ASSERT_VALUES_EQUAL("yaru", CutUrlPrefixes("yaru"));
+ UNIT_ASSERT_VALUES_EQUAL("ya.ru://zzz", CutUrlPrefixes("yaru://ya.ru://zzz"));
+ UNIT_ASSERT_VALUES_EQUAL("ya.ru://zzz", CutUrlPrefixes("ya.ru://zzz"));
+ UNIT_ASSERT_VALUES_EQUAL("ya.ru://zzz", CutUrlPrefixes("ftp://ya.ru://zzz"));
+ UNIT_ASSERT_VALUES_EQUAL("", CutUrlPrefixes("https://"));
+
+ UNIT_ASSERT_VALUES_EQUAL("ya.ru/bebe", CutUrlPrefixes("https://www.ya.ru/bebe"));
+ UNIT_ASSERT_VALUES_EQUAL("yaru", CutUrlPrefixes("www.yaru"));
+ UNIT_ASSERT_VALUES_EQUAL("ya.ru://zzz", CutUrlPrefixes("yaru://www.ya.ru://zzz"));
+ UNIT_ASSERT_VALUES_EQUAL("ya.ru://zzz", CutUrlPrefixes("www.ya.ru://zzz"));
+ UNIT_ASSERT_VALUES_EQUAL("ya.ru://zzz", CutUrlPrefixes("ftp://www.ya.ru://zzz"));
+ UNIT_ASSERT_VALUES_EQUAL("", CutUrlPrefixes("http://www."));
+ }
+
+ Y_UNIT_TEST(TestUrlPathStartWithToken) {
+ UNIT_ASSERT_VALUES_EQUAL(true, DoesUrlPathStartWithToken("http://ya.ru/bebe/zzz", "bebe"));
+ UNIT_ASSERT_VALUES_EQUAL(true, DoesUrlPathStartWithToken("http://ya.ru/bebe?zzz", "bebe"));
+ UNIT_ASSERT_VALUES_EQUAL(true, DoesUrlPathStartWithToken("http://ya.ru/bebe/", "bebe"));
+ UNIT_ASSERT_VALUES_EQUAL(true, DoesUrlPathStartWithToken("http://ya.ru/bebe?", "bebe"));
+ UNIT_ASSERT_VALUES_EQUAL(true, DoesUrlPathStartWithToken("https://ya.ru/bebe", "bebe"));
+ UNIT_ASSERT_VALUES_EQUAL(false, DoesUrlPathStartWithToken("http://ya.ru/bebezzz", "bebe"));
+ UNIT_ASSERT_VALUES_EQUAL(false, DoesUrlPathStartWithToken("http://ya.ru/bebe.zzz", "bebe"));
+ UNIT_ASSERT_VALUES_EQUAL(false, DoesUrlPathStartWithToken("http://ya.ru/", "bebe"));
+ UNIT_ASSERT_VALUES_EQUAL(false, DoesUrlPathStartWithToken("http://ya.ru", "bebe"));
+ UNIT_ASSERT_VALUES_EQUAL(false, DoesUrlPathStartWithToken("http://bebe", "bebe"));
+ UNIT_ASSERT_VALUES_EQUAL(false, DoesUrlPathStartWithToken("https://bebe/", "bebe"));
+ }
+}
diff --git a/library/cpp/string_utils/url/ut/ya.make b/library/cpp/string_utils/url/ut/ya.make
new file mode 100644
index 0000000000..0efa30e4d2
--- /dev/null
+++ b/library/cpp/string_utils/url/ut/ya.make
@@ -0,0 +1,9 @@
+UNITTEST_FOR(library/cpp/string_utils/url)
+
+OWNER(g:util)
+
+SRCS(
+ url_ut.cpp
+)
+
+END()
diff --git a/library/cpp/string_utils/url/ya.make b/library/cpp/string_utils/url/ya.make
new file mode 100644
index 0000000000..b08d69ec83
--- /dev/null
+++ b/library/cpp/string_utils/url/ya.make
@@ -0,0 +1,10 @@
+LIBRARY()
+
+OWNER(g:util)
+
+SRCS(
+ url.cpp
+ url.h
+)
+
+END()
diff --git a/library/cpp/string_utils/ya.make b/library/cpp/string_utils/ya.make
new file mode 100644
index 0000000000..cd731bda95
--- /dev/null
+++ b/library/cpp/string_utils/ya.make
@@ -0,0 +1,37 @@
+RECURSE(
+ ascii_encode
+ ascii_encode/ut
+ base64
+ base64/bench
+ base64/bench/metrics
+ base64/ut
+ base64/fuzz
+ csv
+ csv/bench
+ csv/ut
+ col_diff
+ col_diff/ut
+ indent_text
+ levenshtein_diff
+ levenshtein_diff/ut
+ old_url_normalize
+ old_url_normalize/ut
+ parse_size
+ parse_size/ut
+ parse_vector
+ parse_vector/ut
+ secret_string
+ quote
+ quote/ut
+ relaxed_escaper
+ relaxed_escaper/ut
+ scan
+ subst_buf
+ subst_buf/ut
+ tskv_format
+ tskv_format/ut
+ tskv_format/fuzz
+ url
+ url/ut
+ ztstrbuf
+)
diff --git a/library/cpp/string_utils/ztstrbuf/ya.make b/library/cpp/string_utils/ztstrbuf/ya.make
new file mode 100644
index 0000000000..28b3f32f58
--- /dev/null
+++ b/library/cpp/string_utils/ztstrbuf/ya.make
@@ -0,0 +1,9 @@
+LIBRARY()
+
+OWNER(myltsev)
+
+SRCS(
+ ztstrbuf.cpp
+)
+
+END()
diff --git a/library/cpp/string_utils/ztstrbuf/ztstrbuf.cpp b/library/cpp/string_utils/ztstrbuf/ztstrbuf.cpp
new file mode 100644
index 0000000000..4a7269ff4a
--- /dev/null
+++ b/library/cpp/string_utils/ztstrbuf/ztstrbuf.cpp
@@ -0,0 +1,8 @@
+#include "ztstrbuf.h"
+
+#include <util/stream/output.h>
+
+template <>
+void Out<TZtStringBuf>(IOutputStream& os, const TZtStringBuf& sb) {
+ os << static_cast<const TStringBuf&>(sb);
+}
diff --git a/library/cpp/string_utils/ztstrbuf/ztstrbuf.h b/library/cpp/string_utils/ztstrbuf/ztstrbuf.h
new file mode 100644
index 0000000000..5fab768d8c
--- /dev/null
+++ b/library/cpp/string_utils/ztstrbuf/ztstrbuf.h
@@ -0,0 +1,36 @@
+#pragma once
+
+#include <util/generic/strbuf.h>
+#include <util/generic/string.h>
+
+/*
+ * Zero-terminated string view.
+ *
+ * Has a c_str() for use with system/cstdlib calls (like TString)
+ * but can be constructed from a string literal or command-line arg
+ * without memory allocation (like TStringBuf).
+ *
+ * Use it to reference filenames, thread names, string formats etc.
+ */
+
+class TZtStringBuf: public TStringBuf {
+public:
+ TZtStringBuf(const char* s)
+ : TStringBuf(s)
+ {
+ }
+
+ TZtStringBuf(const TString& s)
+ : TStringBuf(s)
+ {
+ }
+
+ TZtStringBuf()
+ : TZtStringBuf(TString{})
+ {
+ }
+
+ const char* c_str() const {
+ return data();
+ }
+};