aboutsummaryrefslogtreecommitdiffstats
path: root/util/string
diff options
context:
space:
mode:
authorDevtools Arcadia <arcadia-devtools@yandex-team.ru>2022-02-07 18:08:42 +0300
committerDevtools Arcadia <arcadia-devtools@mous.vla.yp-c.yandex.net>2022-02-07 18:08:42 +0300
commit1110808a9d39d4b808aef724c861a2e1a38d2a69 (patch)
treee26c9fed0de5d9873cce7e00bc214573dc2195b7 /util/string
downloadydb-1110808a9d39d4b808aef724c861a2e1a38d2a69.tar.gz
intermediate changes
ref:cde9a383711a11544ce7e107a78147fb96cc4029
Diffstat (limited to 'util/string')
-rw-r--r--util/string/ascii.cpp59
-rw-r--r--util/string/ascii.h247
-rw-r--r--util/string/ascii_ut.cpp98
-rw-r--r--util/string/benchmark/ascii/main.cpp123
-rw-r--r--util/string/benchmark/ascii/ya.make9
-rw-r--r--util/string/benchmark/cast/main.cpp66
-rw-r--r--util/string/benchmark/cast/ya.make9
-rw-r--r--util/string/benchmark/float_to_string/main.cpp253
-rw-r--r--util/string/benchmark/float_to_string/metrics/main.py5
-rw-r--r--util/string/benchmark/float_to_string/metrics/ya.make21
-rw-r--r--util/string/benchmark/float_to_string/ya.make12
-rw-r--r--util/string/benchmark/join/main.cpp95
-rw-r--r--util/string/benchmark/join/metrics/main.py5
-rw-r--r--util/string/benchmark/join/metrics/ya.make21
-rw-r--r--util/string/benchmark/join/ya.make13
-rw-r--r--util/string/benchmark/subst_global/main.cpp203
-rw-r--r--util/string/benchmark/subst_global/metrics/main.py5
-rw-r--r--util/string/benchmark/subst_global/metrics/ya.make21
-rw-r--r--util/string/benchmark/subst_global/ya.make12
-rw-r--r--util/string/benchmark/ya.make16
-rw-r--r--util/string/builder.cpp8
-rw-r--r--util/string/builder.h39
-rw-r--r--util/string/builder_ut.cpp63
-rw-r--r--util/string/cast.cpp844
-rw-r--r--util/string/cast.h357
-rw-r--r--util/string/cast.pxd10
-rw-r--r--util/string/cast.py27
-rw-r--r--util/string/cast_ut.cpp602
-rw-r--r--util/string/cast_ut.pyx13
-rw-r--r--util/string/cstriter.cpp1
-rw-r--r--util/string/cstriter.h14
-rw-r--r--util/string/escape.cpp433
-rw-r--r--util/string/escape.h70
-rw-r--r--util/string/escape_ut.cpp148
-rw-r--r--util/string/fuzzing/collapse/main.cpp12
-rw-r--r--util/string/fuzzing/collapse/ya.make13
-rw-r--r--util/string/fuzzing/escape_c/main.cpp11
-rw-r--r--util/string/fuzzing/escape_c/ya.make13
-rw-r--r--util/string/fuzzing/strtod/main.cpp9
-rw-r--r--util/string/fuzzing/strtod/ya.make13
-rw-r--r--util/string/fuzzing/ya.make11
-rw-r--r--util/string/hex.cpp63
-rw-r--r--util/string/hex.h59
-rw-r--r--util/string/hex_ut.cpp19
-rw-r--r--util/string/join.cpp1
-rw-r--r--util/string/join.h265
-rw-r--r--util/string/join_ut.cpp163
-rw-r--r--util/string/printf.cpp38
-rw-r--r--util/string/printf.h13
-rw-r--r--util/string/printf_ut.cpp30
-rw-r--r--util/string/reverse.cpp33
-rw-r--r--util/string/reverse.h16
-rw-r--r--util/string/split.cpp24
-rw-r--r--util/string/split.h1085
-rw-r--r--util/string/split_ut.cpp831
-rw-r--r--util/string/strip.cpp23
-rw-r--r--util/string/strip.h257
-rw-r--r--util/string/strip_ut.cpp138
-rw-r--r--util/string/strspn.cpp1
-rw-r--r--util/string/strspn.h65
-rw-r--r--util/string/subst.cpp201
-rw-r--r--util/string/subst.h56
-rw-r--r--util/string/subst_ut.cpp253
-rw-r--r--util/string/type.cpp86
-rw-r--r--util/string/type.h42
-rw-r--r--util/string/type_ut.cpp76
-rw-r--r--util/string/ut/ya.make24
-rw-r--r--util/string/util.cpp72
-rw-r--r--util/string/util.h195
-rw-r--r--util/string/util_ut.cpp46
-rw-r--r--util/string/vector.cpp91
-rw-r--r--util/string/vector.h132
-rw-r--r--util/string/vector_ut.cpp38
-rw-r--r--util/string/ya.make6
74 files changed, 8446 insertions, 0 deletions
diff --git a/util/string/ascii.cpp b/util/string/ascii.cpp
new file mode 100644
index 0000000000..95edb95cc8
--- /dev/null
+++ b/util/string/ascii.cpp
@@ -0,0 +1,59 @@
+#include "ascii.h"
+
+#include <util/system/yassert.h>
+#include <util/system/compat.h>
+
+// clang-format off
+extern const unsigned char NPrivate::ASCII_CLASS[256] = {
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x01, 0x01, 0x01, 0x01, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x01, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+ 0x68, 0x68, 0x68, 0x68, 0x68, 0x68, 0x68, 0x68, 0x68, 0x68, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+ 0x80, 0x72, 0x72, 0x72, 0x72, 0x72, 0x72, 0x32, 0x32, 0x32, 0x32, 0x32, 0x32, 0x32, 0x32, 0x32,
+ 0x32, 0x32, 0x32, 0x32, 0x32, 0x32, 0x32, 0x32, 0x32, 0x32, 0x32, 0x80, 0x80, 0x80, 0x80, 0x80,
+ 0x80, 0x74, 0x74, 0x74, 0x74, 0x74, 0x74, 0x34, 0x34, 0x34, 0x34, 0x34, 0x34, 0x34, 0x34, 0x34,
+ 0x34, 0x34, 0x34, 0x34, 0x34, 0x34, 0x34, 0x34, 0x34, 0x34, 0x34, 0x80, 0x80, 0x80, 0x80, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+};
+
+extern const unsigned char NPrivate::ASCII_LOWER[256] = {
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
+ 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
+ 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
+ 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
+ 64, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111,
+ 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 91, 92, 93, 94, 95,
+ 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111,
+ 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127,
+ 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143,
+ 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159,
+ 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175,
+ 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191,
+ 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207,
+ 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223,
+ 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239,
+ 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255,
+};
+// clang-format on
+
+int AsciiCompareIgnoreCase(const TStringBuf s1, const TStringBuf s2) noexcept {
+ if (s1.size() <= s2.size()) {
+ if (int cmp = strnicmp(s1.data(), s2.data(), s1.size())) {
+ return cmp;
+ }
+ return (s1.size() < s2.size()) ? -1 : 0;
+ }
+
+ Y_ASSERT(s1.size() > s2.size());
+ if (int cmp = strnicmp(s1.data(), s2.data(), s2.size())) {
+ return cmp;
+ }
+ return 1;
+}
diff --git a/util/string/ascii.h b/util/string/ascii.h
new file mode 100644
index 0000000000..10344384d3
--- /dev/null
+++ b/util/string/ascii.h
@@ -0,0 +1,247 @@
+#pragma once
+
+#include <util/system/defaults.h>
+#include <util/system/compat.h>
+#include <util/generic/string.h>
+
+// ctype.h-like functions, locale-independent:
+// IsAscii{Upper,Lower,Digit,Alpha,Alnum,Space} and
+// AsciiTo{Upper,Lower}
+//
+// standard functions from <ctype.h> are locale dependent,
+// and cause undefined behavior when called on chars outside [0..127] range
+
+namespace NPrivate {
+ enum ECharClass {
+ CC_SPACE = 1,
+ CC_UPPER = 2,
+ CC_LOWER = 4,
+ CC_DIGIT = 8,
+ CC_ALPHA = 16,
+ CC_ALNUM = 32,
+ CC_ISHEX = 64,
+ CC_PUNCT = 128,
+ };
+
+ extern const unsigned char ASCII_CLASS[256];
+ extern const unsigned char ASCII_LOWER[256];
+
+ template <class T>
+ struct TDereference {
+ using type = T;
+ };
+
+#ifndef TSTRING_IS_STD_STRING
+ template <class String>
+ struct TDereference<TBasicCharRef<String>> {
+ using type = typename String::value_type;
+ };
+#endif
+
+ template <class T>
+ using TDereferenced = typename TDereference<T>::type;
+
+ template <class T>
+ bool RangeOk(T c) noexcept {
+ static_assert(std::is_integral<T>::value, "Integral type character expected");
+
+ if (sizeof(T) == 1) {
+ return true;
+ }
+
+ return c >= static_cast<T>(0) && c <= static_cast<T>(127);
+ }
+
+#ifndef TSTRING_IS_STD_STRING
+ template <class String>
+ bool RangeOk(const TBasicCharRef<String>& c) {
+ return RangeOk(static_cast<typename String::value_type>(c));
+ }
+#endif
+}
+
+constexpr bool IsAscii(const int c) noexcept {
+ return !(c & ~0x7f);
+}
+
+inline bool IsAsciiSpace(unsigned char c) {
+ return ::NPrivate::ASCII_CLASS[c] & ::NPrivate::CC_SPACE;
+}
+
+inline bool IsAsciiUpper(unsigned char c) {
+ return ::NPrivate::ASCII_CLASS[c] & ::NPrivate::CC_UPPER;
+}
+
+inline bool IsAsciiLower(unsigned char c) {
+ return ::NPrivate::ASCII_CLASS[c] & ::NPrivate::CC_LOWER;
+}
+
+inline bool IsAsciiDigit(unsigned char c) {
+ return ::NPrivate::ASCII_CLASS[c] & ::NPrivate::CC_DIGIT;
+}
+
+inline bool IsAsciiAlpha(unsigned char c) {
+ return ::NPrivate::ASCII_CLASS[c] & ::NPrivate::CC_ALPHA;
+}
+
+inline bool IsAsciiAlnum(unsigned char c) {
+ return ::NPrivate::ASCII_CLASS[c] & ::NPrivate::CC_ALNUM;
+}
+
+inline bool IsAsciiHex(unsigned char c) {
+ return ::NPrivate::ASCII_CLASS[c] & ::NPrivate::CC_ISHEX;
+}
+
+inline bool IsAsciiPunct(unsigned char c) {
+ return ::NPrivate::ASCII_CLASS[c] & ::NPrivate::CC_PUNCT;
+}
+
+// some overloads
+
+template <class T>
+inline bool IsAsciiSpace(T c) {
+ return ::NPrivate::RangeOk(c) && IsAsciiSpace(static_cast<unsigned char>(c));
+}
+
+template <class T>
+inline bool IsAsciiUpper(T c) {
+ return ::NPrivate::RangeOk(c) && IsAsciiUpper(static_cast<unsigned char>(c));
+}
+
+template <class T>
+inline bool IsAsciiLower(T c) {
+ return ::NPrivate::RangeOk(c) && IsAsciiLower(static_cast<unsigned char>(c));
+}
+
+template <class T>
+inline bool IsAsciiDigit(T c) {
+ return ::NPrivate::RangeOk(c) && IsAsciiDigit(static_cast<unsigned char>(c));
+}
+
+template <class T>
+inline bool IsAsciiAlpha(T c) {
+ return ::NPrivate::RangeOk(c) && IsAsciiAlpha(static_cast<unsigned char>(c));
+}
+
+template <class T>
+inline bool IsAsciiAlnum(T c) {
+ return ::NPrivate::RangeOk(c) && IsAsciiAlnum(static_cast<unsigned char>(c));
+}
+
+template <class T>
+inline bool IsAsciiHex(T c) {
+ return ::NPrivate::RangeOk(c) && IsAsciiHex(static_cast<unsigned char>(c));
+}
+
+template <class T>
+inline bool IsAsciiPunct(T c) {
+ return ::NPrivate::RangeOk(c) && IsAsciiPunct(static_cast<unsigned char>(c));
+}
+
+// some extra helpers
+inline ui8 AsciiToLower(ui8 c) noexcept {
+ return ::NPrivate::ASCII_LOWER[c];
+}
+
+inline char AsciiToLower(char c) noexcept {
+ return (char)AsciiToLower((ui8)c);
+}
+
+template <class T>
+inline ::NPrivate::TDereferenced<T> AsciiToLower(T c) noexcept {
+ return (c >= 0 && c <= 127) ? (::NPrivate::TDereferenced<T>)AsciiToLower((ui8)c) : c;
+}
+
+template <class T>
+inline ::NPrivate::TDereferenced<T> AsciiToUpper(T c) noexcept {
+ return IsAsciiLower(c) ? (c + ('A' - 'a')) : c;
+}
+
+/**
+ * ASCII case-insensitive string comparison (for proper UTF8 strings
+ * case-insensitive comparison consider using @c library/cpp/charset).
+ *
+ * BUGS: Currently will NOT work properly with strings that contain
+ * 0-terminator character inside. See IGNIETFERRO-1641 for details.
+ *
+ * @return true iff @c s1 ans @c s2 are case-insensitively equal.
+ */
+static inline bool AsciiEqualsIgnoreCase(const char* s1, const char* s2) noexcept {
+ return stricmp(s1, s2) == 0;
+}
+
+/**
+ * ASCII case-insensitive string comparison (for proper UTF8 strings
+ * case-insensitive comparison consider using @c library/cpp/charset).
+ *
+ * BUGS: Currently will NOT work properly with strings that contain
+ * 0-terminator character inside. See IGNIETFERRO-1641 for details.
+ *
+ * @return true iff @c s1 ans @c s2 are case-insensitively equal.
+ */
+static inline bool AsciiEqualsIgnoreCase(const TStringBuf s1, const TStringBuf s2) noexcept {
+ return (s1.size() == s2.size()) && strnicmp(s1.data(), s2.data(), s1.size()) == 0;
+}
+
+/**
+ * ASCII case-insensitive string comparison (for proper UTF8 strings
+ * case-insensitive comparison consider using @c library/cpp/charset).
+ *
+ * BUGS: Currently will NOT work properly with strings that contain
+ * 0-terminator character inside. See IGNIETFERRO-1641 for details.
+ *
+ * @return 0 if strings are equal, negative if @c s1 < @c s2
+ * and positive otherwise.
+ * (same value as @c stricmp does).
+ */
+static inline int AsciiCompareIgnoreCase(const char* s1, const char* s2) noexcept {
+ return stricmp(s1, s2);
+}
+
+/**
+ * ASCII case-insensitive string comparison (for proper UTF8 strings
+ * case-insensitive comparison consider using @c library/cpp/charset).
+ *
+ * BUGS: Currently will NOT work properly with strings that contain
+ * 0-terminator character inside. See IGNIETFERRO-1641 for details.
+ *
+ * @return
+ * - zero if strings are equal
+ * - negative if @c s1 < @c s2
+ * - positive otherwise,
+ * similar to stricmp.
+ */
+Y_PURE_FUNCTION int AsciiCompareIgnoreCase(const TStringBuf s1, const TStringBuf s2) noexcept;
+
+/**
+ * ASCII case-sensitive string comparison (for proper UTF8 strings
+ * case-sensitive comparison consider using @c library/cpp/charset).
+ *
+ * BUGS: Currently will NOT work properly with strings that contain
+ * 0-terminator character inside. See IGNIETFERRO-1641 for details.
+ *
+ * @return true iff @c s2 are case-sensitively prefix of @c s1.
+ */
+static inline bool AsciiHasPrefix(const TStringBuf s1, const TStringBuf s2) noexcept {
+ return (s1.size() >= s2.size()) && memcmp(s1.data(), s2.data(), s2.size()) == 0;
+}
+
+/**
+ * ASCII case-insensitive string comparison (for proper UTF8 strings
+ * case-insensitive comparison consider using @c library/cpp/charset).
+ *
+ * @return true iff @c s2 are case-insensitively prefix of @c s1.
+ */
+static inline bool AsciiHasPrefixIgnoreCase(const TStringBuf s1, const TStringBuf s2) noexcept {
+ return (s1.size() >= s2.size()) && strnicmp(s1.data(), s2.data(), s2.size()) == 0;
+}
+
+/**
+ * ASCII case-insensitive string comparison (for proper UTF8 strings
+ * case-insensitive comparison consider using @c library/cpp/charset).
+ *
+ * @return true iff @c s2 are case-insensitively suffix of @c s1.
+ */
+static inline bool AsciiHasSuffixIgnoreCase(const TStringBuf s1, const TStringBuf s2) noexcept {
+ return (s1.size() >= s2.size()) && strnicmp((s1.data() + (s1.size() - s2.size())), s2.data(), s2.size()) == 0;
+}
diff --git a/util/string/ascii_ut.cpp b/util/string/ascii_ut.cpp
new file mode 100644
index 0000000000..89069fee50
--- /dev/null
+++ b/util/string/ascii_ut.cpp
@@ -0,0 +1,98 @@
+#include "ascii.h"
+#include <ctype.h>
+
+#include <library/cpp/testing/unittest/registar.h>
+
+Y_UNIT_TEST_SUITE(TAsciiTest) {
+ Y_UNIT_TEST(TestAscii) {
+ UNIT_ASSERT(IsAsciiDigit('3'));
+ UNIT_ASSERT(!IsAsciiDigit('x'));
+
+ UNIT_ASSERT(IsAsciiAlpha('r'));
+ UNIT_ASSERT(IsAsciiAlpha('R'));
+ UNIT_ASSERT(!IsAsciiAlpha('3'));
+
+ UNIT_ASSERT_EQUAL(AsciiToLower('3'), '3');
+ UNIT_ASSERT_EQUAL(AsciiToLower('A'), 'a');
+ UNIT_ASSERT_EQUAL(AsciiToLower('a'), 'a');
+
+ UNIT_ASSERT_EQUAL(AsciiToUpper('3'), '3');
+ UNIT_ASSERT_EQUAL(AsciiToUpper('A'), 'A');
+ UNIT_ASSERT_EQUAL(AsciiToUpper('a'), 'A');
+
+ UNIT_ASSERT(IsAscii('a'));
+ UNIT_ASSERT(!IsAscii(-100));
+ UNIT_ASSERT(!IsAscii(+200));
+ UNIT_ASSERT(!IsAscii(int('a') + 256));
+
+ for (int i = 0; i < 128; ++i) {
+ UNIT_ASSERT_VALUES_EQUAL((bool)isxdigit(i), IsAsciiHex(i));
+ UNIT_ASSERT_VALUES_EQUAL((bool)isspace(i), IsAsciiSpace((char)i));
+ UNIT_ASSERT_VALUES_EQUAL((bool)isspace(i), IsAsciiSpace((char)i));
+ UNIT_ASSERT_VALUES_EQUAL((bool)isalnum(i), IsAsciiAlnum((char)i));
+ UNIT_ASSERT_VALUES_EQUAL((bool)isalpha(i), IsAsciiAlpha((char)i));
+ UNIT_ASSERT_VALUES_EQUAL((bool)isupper(i), IsAsciiUpper((char)i));
+ UNIT_ASSERT_VALUES_EQUAL((bool)islower(i), IsAsciiLower((char)i));
+ UNIT_ASSERT_VALUES_EQUAL((bool)isdigit(i), IsAsciiDigit((char)i));
+ UNIT_ASSERT_VALUES_EQUAL((bool)ispunct(i), IsAsciiPunct((char)i));
+ }
+ }
+
+ Y_UNIT_TEST(Test1) {
+ for (int i = 128; i < 1000; ++i) {
+ UNIT_ASSERT(!IsAsciiHex(i));
+ UNIT_ASSERT(!IsAsciiSpace(i));
+ UNIT_ASSERT(!IsAsciiAlnum(i));
+ UNIT_ASSERT(!IsAsciiAlpha(i));
+ UNIT_ASSERT(!IsAsciiUpper(i));
+ UNIT_ASSERT(!IsAsciiLower(i));
+ UNIT_ASSERT(!IsAsciiDigit(i));
+ UNIT_ASSERT(!IsAsciiPunct(i));
+ }
+
+ for (int i = -1000; i < 0; ++i) {
+ UNIT_ASSERT(!IsAsciiHex(i));
+ UNIT_ASSERT(!IsAsciiSpace(i));
+ UNIT_ASSERT(!IsAsciiAlnum(i));
+ UNIT_ASSERT(!IsAsciiAlpha(i));
+ UNIT_ASSERT(!IsAsciiUpper(i));
+ UNIT_ASSERT(!IsAsciiLower(i));
+ UNIT_ASSERT(!IsAsciiDigit(i));
+ UNIT_ASSERT(!IsAsciiPunct(i));
+ }
+ }
+
+ Y_UNIT_TEST(CompareTest) {
+ UNIT_ASSERT(AsciiEqualsIgnoreCase("qqq", "qQq"));
+ UNIT_ASSERT(AsciiEqualsIgnoreCase("qqq", TStringBuf("qQq")));
+ TString qq = "qq";
+ TString qQ = "qQ";
+ UNIT_ASSERT(AsciiEqualsIgnoreCase(qq, qQ));
+
+ TString x = "qqqA";
+ TString y = "qQqB";
+ TString z = "qQnB";
+ TString zz = "qQqq";
+ TString zzz = "qQqqq";
+ TStringBuf xs = TStringBuf(x.data(), 3);
+ TStringBuf ys = TStringBuf(y.data(), 3);
+ TStringBuf zs = TStringBuf(z.data(), 3);
+ UNIT_ASSERT(AsciiCompareIgnoreCase(xs, ys) == 0);
+ UNIT_ASSERT(AsciiCompareIgnoreCase(xs, zs) > 0);
+ UNIT_ASSERT(AsciiCompareIgnoreCase(xs, zz) < 0);
+ UNIT_ASSERT(AsciiCompareIgnoreCase(zzz, zz) > 0);
+
+ UNIT_ASSERT(AsciiCompareIgnoreCase("qqQ", "qq") > 0);
+ UNIT_ASSERT(AsciiCompareIgnoreCase("qq", "qq") == 0);
+
+ UNIT_ASSERT_EQUAL(AsciiHasPrefix("qweasd", "qwe"), true);
+ UNIT_ASSERT_EQUAL(AsciiHasPrefix("qweasd", "qWe"), false);
+ UNIT_ASSERT_EQUAL(AsciiHasPrefix("qweasd", "eWq"), false);
+
+ UNIT_ASSERT_EQUAL(AsciiHasPrefixIgnoreCase("qweasd", "qWe"), true);
+ UNIT_ASSERT_EQUAL(AsciiHasPrefixIgnoreCase("qweasd", "eWq"), false);
+
+ UNIT_ASSERT_EQUAL(AsciiHasSuffixIgnoreCase("qweasd", "asD"), true);
+ UNIT_ASSERT_EQUAL(AsciiHasSuffixIgnoreCase("qweasd", "ast"), false);
+ }
+}
diff --git a/util/string/benchmark/ascii/main.cpp b/util/string/benchmark/ascii/main.cpp
new file mode 100644
index 0000000000..673047025d
--- /dev/null
+++ b/util/string/benchmark/ascii/main.cpp
@@ -0,0 +1,123 @@
+#include <library/cpp/testing/benchmark/bench.h>
+
+#include <util/generic/xrange.h>
+#include <util/string/ascii.h>
+#include <util/generic/bitmap.h>
+#include <util/generic/singleton.h>
+
+namespace {
+ struct TUpperMap: public TBitMap<256> {
+ inline TUpperMap() noexcept {
+ for (unsigned i = 'A'; i <= 'Z'; ++i) {
+ Set((ui8)i);
+ }
+ }
+
+ inline char ToLower(char x) const noexcept {
+ return Get((ui8)x) ? x + ('a' - 'A') : x;
+ }
+ };
+
+ struct TToLowerLookup {
+ char Table[256];
+
+ TToLowerLookup() {
+ for (size_t i : xrange(256)) {
+ Table[i] = AsciiToLower(i);
+ }
+ }
+
+ char ToLower(char x) const noexcept {
+ return Table[(ui8)x];
+ }
+ };
+}
+
+static inline char FastAsciiToLower(char c) {
+ return (c >= 'A' && c <= 'Z') ? (c + ('a' - 'A')) : c;
+}
+
+static inline char FastAsciiToLower2(char c) {
+ return c + ('a' - 'A') * (int)(c >= 'A' && c <= 'Z');
+}
+
+Y_CPU_BENCHMARK(AsciiToLower, iface) {
+ for (const auto i : xrange(iface.Iterations())) {
+ Y_UNUSED(i);
+
+ for (int j = 0; j < 256; ++j) {
+ Y_DO_NOT_OPTIMIZE_AWAY(AsciiToLower(j));
+ }
+ }
+}
+
+Y_CPU_BENCHMARK(AsciiToLowerChar, iface) {
+ for (const auto i : xrange(iface.Iterations())) {
+ Y_UNUSED(i);
+
+ for (int j = 0; j < 256; ++j) {
+ Y_DO_NOT_OPTIMIZE_AWAY(AsciiToLower((char)j));
+ }
+ }
+}
+
+Y_CPU_BENCHMARK(FastAsciiToLower, iface) {
+ for (const auto i : xrange(iface.Iterations())) {
+ Y_UNUSED(i);
+
+ for (int j = 0; j < 256; ++j) {
+ Y_DO_NOT_OPTIMIZE_AWAY(FastAsciiToLower(j));
+ }
+ }
+}
+
+Y_CPU_BENCHMARK(FastAsciiToLower2, iface) {
+ for (const auto i : xrange(iface.Iterations())) {
+ Y_UNUSED(i);
+
+ for (int j = 0; j < 256; ++j) {
+ Y_DO_NOT_OPTIMIZE_AWAY(FastAsciiToLower2(j));
+ }
+ }
+}
+
+Y_CPU_BENCHMARK(BitMapAsciiToLower, iface) {
+ for (const auto i : xrange(iface.Iterations())) {
+ Y_UNUSED(i);
+
+ for (int j = 0; j < 256; ++j) {
+ Y_DO_NOT_OPTIMIZE_AWAY(Singleton<TUpperMap>()->ToLower(j));
+ }
+ }
+}
+
+Y_CPU_BENCHMARK(LookupAsciiToLower, iface) {
+ for (const auto i : xrange(iface.Iterations())) {
+ Y_UNUSED(i);
+
+ for (int j = 0; j < 256; ++j) {
+ Y_DO_NOT_OPTIMIZE_AWAY(Singleton<TToLowerLookup>()->ToLower(j));
+ }
+ }
+}
+
+Y_CPU_BENCHMARK(LookupAsciiToLowerNoSingleton, iface) {
+ TToLowerLookup lookup;
+ for (const auto i : xrange(iface.Iterations())) {
+ Y_UNUSED(i);
+
+ for (int j = 0; j < 256; ++j) {
+ Y_DO_NOT_OPTIMIZE_AWAY(lookup.ToLower(j));
+ }
+ }
+}
+
+Y_CPU_BENCHMARK(tolower, iface) {
+ for (const auto i : xrange(iface.Iterations())) {
+ Y_UNUSED(i);
+
+ for (int j = 0; j < 256; ++j) {
+ Y_DO_NOT_OPTIMIZE_AWAY(tolower(j));
+ }
+ }
+}
diff --git a/util/string/benchmark/ascii/ya.make b/util/string/benchmark/ascii/ya.make
new file mode 100644
index 0000000000..f95b9e0fa8
--- /dev/null
+++ b/util/string/benchmark/ascii/ya.make
@@ -0,0 +1,9 @@
+Y_BENCHMARK()
+
+OWNER(pg)
+
+SRCS(
+ main.cpp
+)
+
+END()
diff --git a/util/string/benchmark/cast/main.cpp b/util/string/benchmark/cast/main.cpp
new file mode 100644
index 0000000000..f604712ab6
--- /dev/null
+++ b/util/string/benchmark/cast/main.cpp
@@ -0,0 +1,66 @@
+#include <library/cpp/testing/benchmark/bench.h>
+
+#include <util/string/cast.h>
+#include <util/generic/xrange.h>
+
+char str1[] = "1";
+char str12[] = "12";
+char str1234[] = "1234";
+char str12345678[] = "12345678";
+
+Y_CPU_BENCHMARK(Parse_1, iface) {
+ for (const auto i : xrange(iface.Iterations())) {
+ Y_UNUSED(i);
+ Y_DO_NOT_OPTIMIZE_AWAY(FromString<ui32>(str1, 1));
+ }
+}
+
+Y_CPU_BENCHMARK(Parse_12, iface) {
+ for (const auto i : xrange(iface.Iterations())) {
+ Y_UNUSED(i);
+ Y_DO_NOT_OPTIMIZE_AWAY(FromString<ui32>(str12, 2));
+ }
+}
+
+Y_CPU_BENCHMARK(Parse_1234, iface) {
+ for (const auto i : xrange(iface.Iterations())) {
+ Y_UNUSED(i);
+ Y_DO_NOT_OPTIMIZE_AWAY(FromString<ui32>(str1234, 4));
+ }
+}
+
+Y_CPU_BENCHMARK(Parse_12345678, iface) {
+ for (const auto i : xrange(iface.Iterations())) {
+ Y_UNUSED(i);
+ Y_DO_NOT_OPTIMIZE_AWAY(FromString<ui32>(str12345678, 8));
+ }
+}
+
+//atoi
+Y_CPU_BENCHMARK(Atoi_1, iface) {
+ for (const auto i : xrange(iface.Iterations())) {
+ Y_UNUSED(i);
+ Y_DO_NOT_OPTIMIZE_AWAY(atoi(str1));
+ }
+}
+
+Y_CPU_BENCHMARK(Atoi_12, iface) {
+ for (const auto i : xrange(iface.Iterations())) {
+ Y_UNUSED(i);
+ Y_DO_NOT_OPTIMIZE_AWAY(atoi(str12));
+ }
+}
+
+Y_CPU_BENCHMARK(Atoi_1234, iface) {
+ for (const auto i : xrange(iface.Iterations())) {
+ Y_UNUSED(i);
+ Y_DO_NOT_OPTIMIZE_AWAY(atoi(str1234));
+ }
+}
+
+Y_CPU_BENCHMARK(Atoi_12345678, iface) {
+ for (const auto i : xrange(iface.Iterations())) {
+ Y_UNUSED(i);
+ Y_DO_NOT_OPTIMIZE_AWAY(atoi(str12345678));
+ }
+}
diff --git a/util/string/benchmark/cast/ya.make b/util/string/benchmark/cast/ya.make
new file mode 100644
index 0000000000..f95b9e0fa8
--- /dev/null
+++ b/util/string/benchmark/cast/ya.make
@@ -0,0 +1,9 @@
+Y_BENCHMARK()
+
+OWNER(pg)
+
+SRCS(
+ main.cpp
+)
+
+END()
diff --git a/util/string/benchmark/float_to_string/main.cpp b/util/string/benchmark/float_to_string/main.cpp
new file mode 100644
index 0000000000..1c7c0684a3
--- /dev/null
+++ b/util/string/benchmark/float_to_string/main.cpp
@@ -0,0 +1,253 @@
+#include <library/cpp/testing/benchmark/bench.h>
+
+#include <util/generic/singleton.h>
+#include <util/generic/vector.h>
+#include <util/generic/xrange.h>
+#include <util/generic/ymath.h>
+#include <util/random/fast.h>
+#include <util/string/cast.h>
+#include <util/string/printf.h>
+
+#include <limits>
+
+#include <cmath>
+
+/* Please be careful before making any decisions based on this benchmark.
+ *
+ * Only `Sprintf("%.<decimals>f", x)` and `FloatToString(x, PREC_POINT_DIGITS, decimals` produce
+ * equal results in general case. However, results for cases when x \in [0, 1) must be equal for
+ * both `Sprintf` and `FloatToString`.
+ *
+ * Read more about formatting in STL [1, 2] and Yandex Util formatting [3]
+ *
+ * [1] http://www.cplusplus.com/reference/cstdio/printf/
+ * [2] http://en.cppreference.com/w/c/io/fprintf
+ * [3] https://a.yandex-team.ru/arc/trunk/arcadia/util/string/cast.h?rev=2432660#L29
+ */
+
+namespace {
+ template <typename T>
+ struct TExample {
+ T Value{};
+ int DigitsCount{};
+ };
+
+ template <typename T, size_t N>
+ struct TExamplesHolder {
+ TVector<TExample<T>> Examples;
+
+ TExamplesHolder()
+ : Examples(N)
+ {
+ TFastRng<ui64> prng{N * sizeof(T) * 42};
+ for (auto& x : Examples) {
+ x.Value = prng.GenRandReal4() + prng.Uniform(Max<ui16>());
+ x.DigitsCount = prng.Uniform(std::numeric_limits<T>::max_digits10 + 1);
+ }
+ }
+ };
+
+ template <typename T, size_t N>
+ struct TNearZeroExamplesHolder {
+ TVector<TExample<T>> Examples;
+
+ TNearZeroExamplesHolder()
+ : Examples(N)
+ {
+ TFastRng<ui64> prng{N * sizeof(T) * 42};
+ for (auto& x : Examples) {
+ x.Value = prng.GenRandReal4();
+ x.DigitsCount = prng.Uniform(std::numeric_limits<T>::max_digits10 + 1);
+ }
+ }
+ };
+}
+
+static const char* FORMAT_FIXED[] = {
+ "%.0f",
+ "%.1f",
+ "%.2f",
+ "%.3f",
+ "%.4f",
+ "%.5f",
+ "%.6f",
+ "%.7f",
+ "%.8f",
+ "%.9f",
+ "%.10f",
+ "%.11f",
+ "%.12f",
+ "%.13f",
+ "%.14f",
+ "%.15f",
+ "%.16f",
+ "%.17f",
+};
+
+static const char* FORMAT_SIGNIFICANT[] = {
+ "%.0g",
+ "%.1g",
+ "%.2g",
+ "%.3g",
+ "%.4g",
+ "%.5g",
+ "%.6g",
+ "%.7g",
+ "%.8g",
+ "%.9g",
+ "%.10g",
+ "%.11g",
+ "%.12g",
+ "%.13g",
+ "%.14g",
+ "%.15g",
+ "%.16g",
+ "%.17g",
+};
+
+#define DEFINE_BENCHMARK(type, count) \
+ Y_CPU_BENCHMARK(SprintfAuto_##type##_##count, iface) { \
+ const auto& examples = Default<TExamplesHolder<type, count>>().Examples; \
+ for (const auto i : xrange(iface.Iterations())) { \
+ Y_UNUSED(i); \
+ for (const auto e : examples) { \
+ /* this is in fact equal to Sprintf("%.6f", e.Value) and that is why it is faster */ \
+ /* than FloatToString(e.Value) */ \
+ Y_DO_NOT_OPTIMIZE_AWAY(Sprintf("%f", e.Value)); \
+ } \
+ } \
+ } \
+ \
+ Y_CPU_BENCHMARK(FloatToStringAuto_##type##_##count, iface) { \
+ const auto& examples = Default<TExamplesHolder<type, count>>().Examples; \
+ for (const auto i : xrange(iface.Iterations())) { \
+ Y_UNUSED(i); \
+ for (const auto e : examples) { \
+ Y_DO_NOT_OPTIMIZE_AWAY(FloatToString(e.Value)); \
+ } \
+ } \
+ } \
+ \
+ Y_CPU_BENCHMARK(SprintfFixed_##type##_##count, iface) { \
+ const auto& examples = Default<TExamplesHolder<type, count>>().Examples; \
+ for (const auto i : xrange(iface.Iterations())) { \
+ Y_UNUSED(i); \
+ for (const auto e : examples) { \
+ Y_DO_NOT_OPTIMIZE_AWAY(Sprintf(FORMAT_FIXED[e.DigitsCount], e.Value)); \
+ } \
+ } \
+ } \
+ \
+ Y_CPU_BENCHMARK(FloatToStringFixed_##type##_##count, iface) { \
+ const auto& examples = Default<TExamplesHolder<type, count>>().Examples; \
+ for (const auto i : xrange(iface.Iterations())) { \
+ Y_UNUSED(i); \
+ for (const auto e : examples) { \
+ Y_DO_NOT_OPTIMIZE_AWAY(FloatToString(e.Value, PREC_NDIGITS, e.DigitsCount)); \
+ } \
+ } \
+ } \
+ \
+ Y_CPU_BENCHMARK(SprintfSignificant_##type##_##count, iface) { \
+ const auto& examples = Default<TExamplesHolder<type, count>>().Examples; \
+ for (const auto i : xrange(iface.Iterations())) { \
+ Y_UNUSED(i); \
+ for (const auto e : examples) { \
+ Y_DO_NOT_OPTIMIZE_AWAY(Sprintf(FORMAT_SIGNIFICANT[e.DigitsCount], e.Value)); \
+ } \
+ } \
+ } \
+ \
+ Y_CPU_BENCHMARK(FloatToStringSignificant_##type##_##count, iface) { \
+ const auto& examples = Default<TExamplesHolder<type, count>>().Examples; \
+ for (const auto i : xrange(iface.Iterations())) { \
+ Y_UNUSED(i); \
+ for (const auto e : examples) { \
+ Y_DO_NOT_OPTIMIZE_AWAY(FloatToString(e.Value, PREC_POINT_DIGITS, e.DigitsCount)); \
+ } \
+ } \
+ } \
+ \
+ Y_CPU_BENCHMARK(NearZeroSprintfAuto_##type##_##count, iface) { \
+ const auto& examples = Default<TNearZeroExamplesHolder<type, count>>().Examples; \
+ for (const auto i : xrange(iface.Iterations())) { \
+ Y_UNUSED(i); \
+ for (const auto e : examples) { \
+ /* this is in fact equal to Sprintf("%.6f", e.Value) and that is why it is faster */ \
+ /* than FloatToString(e.Value) */ \
+ Y_DO_NOT_OPTIMIZE_AWAY(Sprintf("%f", e.Value)); \
+ } \
+ } \
+ } \
+ \
+ Y_CPU_BENCHMARK(NearZeroFloatToStringAuto_##type##_##count, iface) { \
+ const auto& examples = Default<TNearZeroExamplesHolder<type, count>>().Examples; \
+ for (const auto i : xrange(iface.Iterations())) { \
+ Y_UNUSED(i); \
+ for (const auto e : examples) { \
+ Y_DO_NOT_OPTIMIZE_AWAY(FloatToString(e.Value)); \
+ } \
+ } \
+ } \
+ \
+ Y_CPU_BENCHMARK(NearZeroSprintfFixed_##type##_##count, iface) { \
+ const auto& examples = Default<TNearZeroExamplesHolder<type, count>>().Examples; \
+ for (const auto i : xrange(iface.Iterations())) { \
+ Y_UNUSED(i); \
+ for (const auto e : examples) { \
+ Y_DO_NOT_OPTIMIZE_AWAY(Sprintf(FORMAT_FIXED[e.DigitsCount], e.Value)); \
+ } \
+ } \
+ } \
+ \
+ Y_CPU_BENCHMARK(NearZeroFloatToStringFixed_##type##_##count, iface) { \
+ const auto& examples = Default<TNearZeroExamplesHolder<type, count>>().Examples; \
+ for (const auto i : xrange(iface.Iterations())) { \
+ Y_UNUSED(i); \
+ for (const auto e : examples) { \
+ Y_DO_NOT_OPTIMIZE_AWAY(FloatToString(e.Value, PREC_NDIGITS, e.DigitsCount)); \
+ } \
+ } \
+ } \
+ \
+ Y_CPU_BENCHMARK(NearZeroSprintfSignificant_##type##_##count, iface) { \
+ const auto& examples = Default<TNearZeroExamplesHolder<type, count>>().Examples; \
+ for (const auto i : xrange(iface.Iterations())) { \
+ Y_UNUSED(i); \
+ for (const auto e : examples) { \
+ Y_DO_NOT_OPTIMIZE_AWAY(Sprintf(FORMAT_SIGNIFICANT[e.DigitsCount], e.Value)); \
+ } \
+ } \
+ } \
+ \
+ Y_CPU_BENCHMARK(NearZeroFloatToStringSignificant_##type##_##count, iface) { \
+ const auto& examples = Default<TNearZeroExamplesHolder<type, count>>().Examples; \
+ for (const auto i : xrange(iface.Iterations())) { \
+ Y_UNUSED(i); \
+ for (const auto e : examples) { \
+ Y_DO_NOT_OPTIMIZE_AWAY(FloatToString(e.Value, PREC_POINT_DIGITS, e.DigitsCount)); \
+ } \
+ } \
+ }
+
+DEFINE_BENCHMARK(float, 1);
+DEFINE_BENCHMARK(float, 2);
+DEFINE_BENCHMARK(float, 4);
+DEFINE_BENCHMARK(float, 8);
+DEFINE_BENCHMARK(float, 16);
+DEFINE_BENCHMARK(float, 32);
+DEFINE_BENCHMARK(float, 64);
+DEFINE_BENCHMARK(float, 128);
+DEFINE_BENCHMARK(float, 256);
+
+DEFINE_BENCHMARK(double, 1);
+DEFINE_BENCHMARK(double, 2);
+DEFINE_BENCHMARK(double, 4);
+DEFINE_BENCHMARK(double, 8);
+DEFINE_BENCHMARK(double, 16);
+DEFINE_BENCHMARK(double, 32);
+DEFINE_BENCHMARK(double, 64);
+DEFINE_BENCHMARK(double, 128);
+DEFINE_BENCHMARK(double, 256);
+
+#undef DEFINE_BENCHMARK
diff --git a/util/string/benchmark/float_to_string/metrics/main.py b/util/string/benchmark/float_to_string/metrics/main.py
new file mode 100644
index 0000000000..e9d4b7ac1d
--- /dev/null
+++ b/util/string/benchmark/float_to_string/metrics/main.py
@@ -0,0 +1,5 @@
+import yatest.common as yc
+
+
+def test_export_metrics(metrics):
+ metrics.set_benchmark(yc.execute_benchmark('util/string/benchmark/float_to_string/float_to_string', threads=8))
diff --git a/util/string/benchmark/float_to_string/metrics/ya.make b/util/string/benchmark/float_to_string/metrics/ya.make
new file mode 100644
index 0000000000..4b8c4cc07d
--- /dev/null
+++ b/util/string/benchmark/float_to_string/metrics/ya.make
@@ -0,0 +1,21 @@
+OWNER(
+ yazevnul
+ g:util
+)
+SUBSCRIBER(g:util-subscribers)
+
+PY2TEST()
+
+SIZE(LARGE)
+
+TAG(
+ ya:force_sandbox
+ sb:intel_e5_2660v1
+ ya:fat
+)
+
+TEST_SRCS(main.py)
+
+DEPENDS(util/string/benchmark/float_to_string)
+
+END()
diff --git a/util/string/benchmark/float_to_string/ya.make b/util/string/benchmark/float_to_string/ya.make
new file mode 100644
index 0000000000..8136ad34f0
--- /dev/null
+++ b/util/string/benchmark/float_to_string/ya.make
@@ -0,0 +1,12 @@
+OWNER(yazevnul)
+
+Y_BENCHMARK()
+
+# to minimize allocations overhead
+ALLOCATOR(B)
+
+SRCS(
+ main.cpp
+)
+
+END()
diff --git a/util/string/benchmark/join/main.cpp b/util/string/benchmark/join/main.cpp
new file mode 100644
index 0000000000..1a8633d3a8
--- /dev/null
+++ b/util/string/benchmark/join/main.cpp
@@ -0,0 +1,95 @@
+#include <library/cpp/testing/benchmark/bench.h>
+
+#include <util/generic/function.h>
+#include <util/generic/singleton.h>
+#include <util/generic/vector.h>
+#include <util/generic/xrange.h>
+#include <util/random/fast.h>
+#include <util/string/cast.h>
+#include <util/string/join.h>
+
+namespace {
+ // This class assigns random values to variadic lists of variables of different types.
+ // It can be used to randomize a tuple via Apply() (arcadia version of std::apply).
+ class TRandomizer {
+ public:
+ TRandomizer(ui64 seed)
+ : Prng(seed)
+ {
+ }
+
+ void Randomize(ui16& i) {
+ i = static_cast<ui16>(Prng.GenRand());
+ }
+
+ void Randomize(ui32& i) {
+ i = static_cast<ui32>(Prng.GenRand());
+ }
+
+ void Randomize(double& d) {
+ d = Prng.GenRandReal4() + Prng.Uniform(Max<ui16>());
+ }
+
+ void Randomize(TString& s) {
+ s = ::ToString(Prng.GenRand());
+ }
+
+ template <typename T, typename... TArgs>
+ void Randomize(T& t, TArgs&... args) {
+ Randomize(t);
+ Randomize(args...);
+ }
+
+ private:
+ TFastRng<ui64> Prng;
+ };
+
+ template <size_t N, typename... T>
+ struct TExamplesHolder {
+ using TExamples = TVector<std::tuple<T...>>;
+ TExamples Examples;
+
+ TExamplesHolder()
+ : Examples(N)
+ {
+ TRandomizer r{N * sizeof(typename TExamples::value_type) * 42};
+ for (auto& x : Examples) {
+ Apply([&r](T&... t) { r.Randomize(t...); }, x);
+ }
+ }
+ };
+
+ template <typename... TArgs>
+ TString JoinTuple(std::tuple<TArgs...> t) {
+ return Apply([](TArgs... x) -> TString { return Join("-", x...); }, t);
+ }
+}
+
+#define DEFINE_BENCHMARK(count, types, ...) \
+ Y_CPU_BENCHMARK(Join_##count##_##types, iface) { \
+ const auto& examples = Default<TExamplesHolder<count, __VA_ARGS__>>().Examples; \
+ for (const auto i : xrange(iface.Iterations())) { \
+ Y_UNUSED(i); \
+ for (const auto e : examples) { \
+ Y_DO_NOT_OPTIMIZE_AWAY(JoinTuple(e)); \
+ } \
+ } \
+ }
+
+DEFINE_BENCHMARK(100, SS, TString, TString);
+DEFINE_BENCHMARK(100, SSS, TString, TString, TString);
+DEFINE_BENCHMARK(100, SSSSS, TString, TString, TString, TString, TString);
+
+DEFINE_BENCHMARK(100, ss, ui16, ui16);
+DEFINE_BENCHMARK(100, SsS, TString, ui16, TString);
+DEFINE_BENCHMARK(100, SsSsS, TString, ui16, TString, ui16, TString);
+
+DEFINE_BENCHMARK(100, ii, ui32, ui32);
+DEFINE_BENCHMARK(100, SiS, TString, ui32, TString);
+DEFINE_BENCHMARK(100, SiSiS, TString, ui32, TString, ui32, TString);
+
+DEFINE_BENCHMARK(100, dd, double, double);
+DEFINE_BENCHMARK(100, SdS, TString, double, TString);
+DEFINE_BENCHMARK(100, SdSdS, TString, double, TString, double, TString);
+
+#undef DEFINE_BENCHMARK
diff --git a/util/string/benchmark/join/metrics/main.py b/util/string/benchmark/join/metrics/main.py
new file mode 100644
index 0000000000..1ed5014808
--- /dev/null
+++ b/util/string/benchmark/join/metrics/main.py
@@ -0,0 +1,5 @@
+import yatest.common as yc
+
+
+def test_export_metrics(metrics):
+ metrics.set_benchmark(yc.execute_benchmark('util/string/benchmark/join/join', threads=8))
diff --git a/util/string/benchmark/join/metrics/ya.make b/util/string/benchmark/join/metrics/ya.make
new file mode 100644
index 0000000000..08ff3a149f
--- /dev/null
+++ b/util/string/benchmark/join/metrics/ya.make
@@ -0,0 +1,21 @@
+OWNER(
+ salmin
+ g:util
+)
+SUBSCRIBER(g:util-subscribers)
+
+PY2TEST()
+
+SIZE(LARGE)
+
+TAG(
+ ya:force_sandbox
+ sb:intel_e5_2660v1
+ ya:fat
+)
+
+TEST_SRCS(main.py)
+
+DEPENDS(util/string/benchmark/join)
+
+END()
diff --git a/util/string/benchmark/join/ya.make b/util/string/benchmark/join/ya.make
new file mode 100644
index 0000000000..dfcc1d264e
--- /dev/null
+++ b/util/string/benchmark/join/ya.make
@@ -0,0 +1,13 @@
+Y_BENCHMARK()
+
+OWNER(
+ salmin
+ g:util
+)
+SUBSCRIBER(g:util-subscribers)
+
+SRCS(
+ main.cpp
+)
+
+END()
diff --git a/util/string/benchmark/subst_global/main.cpp b/util/string/benchmark/subst_global/main.cpp
new file mode 100644
index 0000000000..e0decfa042
--- /dev/null
+++ b/util/string/benchmark/subst_global/main.cpp
@@ -0,0 +1,203 @@
+#include <library/cpp/testing/benchmark/bench.h>
+
+#include <util/generic/cast.h>
+#include <util/generic/singleton.h>
+#include <util/generic/string.h>
+#include <util/generic/xrange.h>
+#include <util/random/fast.h>
+#include <util/string/cast.h>
+#include <util/string/subst.h>
+
+namespace {
+ template <size_t N, char What, char With>
+ struct TNoMatches {
+ enum : char {
+ WHAT = What,
+ WITH = With
+ };
+ TString Str;
+
+ TNoMatches() {
+ for (const auto dummy : xrange(N)) {
+ Y_UNUSED(dummy);
+ Str += WHAT + 1;
+ }
+ }
+ };
+
+ template <size_t N, char What, char With>
+ struct TOneMatchInTheBeginning {
+ enum : char {
+ WHAT = What,
+ WITH = With
+ };
+ TString Str;
+
+ TOneMatchInTheBeginning() {
+ if (!N) {
+ return;
+ }
+
+ Str += WHAT;
+ if (N > 1) {
+ for (const auto dummy : xrange(N - 1)) {
+ Y_UNUSED(dummy);
+ Str += WHAT + 1;
+ }
+ }
+ }
+ };
+
+ template <size_t N, char What, char With>
+ struct TOneMatchInTheEnd {
+ enum : char {
+ WHAT = What,
+ WITH = With
+ };
+ TString Str;
+
+ TOneMatchInTheEnd() {
+ if (!N) {
+ return;
+ }
+
+ if (N > 1) {
+ for (const auto dummy : xrange(N - 1)) {
+ Y_UNUSED(dummy);
+ Str += WHAT + 1;
+ }
+ }
+ Str += WHAT;
+ }
+ };
+
+ template <size_t N, char What, char With>
+ struct TOneMatchInTheMiddle {
+ enum : char {
+ WHAT = What,
+ WITH = With
+ };
+ TString Str;
+
+ TOneMatchInTheMiddle() {
+ if (!N) {
+ return;
+ }
+
+ for (size_t i = 0; i < N / 2; ++i) {
+ Str += WHAT + 1;
+ }
+ Str += WHAT;
+ for (; Str.size() < N;) {
+ Str += WHAT + 1;
+ }
+ }
+ };
+
+ template <size_t N, char What, char With>
+ struct TFirstHalfMatches {
+ enum : char {
+ WHAT = What,
+ WITH = With
+ };
+ TString Str;
+
+ TFirstHalfMatches() {
+ for (size_t i = 0; i < N / 2; ++i) {
+ Str += WHAT;
+ }
+ for (; Str.size() != N;) {
+ Str += WHAT + 1;
+ }
+ }
+ };
+
+ template <size_t N, char What, char With>
+ struct TSecondHalfMatches {
+ enum : char {
+ WHAT = What,
+ WITH = With
+ };
+ TString Str;
+
+ TSecondHalfMatches() {
+ for (size_t i = 0; i < N / 2; ++i) {
+ Str += WHAT + 1;
+ }
+ for (; Str.size() != N;) {
+ Str += WHAT;
+ }
+ }
+ };
+
+ template <size_t N, size_t K, char What, char With>
+ struct TEveryKth {
+ enum : char {
+ WHAT = What,
+ WITH = With
+ };
+ TString Str;
+
+ TEveryKth() {
+ TFastRng<ui64> prng{N * K * 101};
+ for (size_t i = 0; i < N; ++i) {
+ Str += (prng() % K) ? (WHAT + 1) : WHAT;
+ }
+ }
+ };
+}
+
+#define DEFINE_BENCHMARK(type, N) \
+ Y_CPU_BENCHMARK(type##_##N, i) { \
+ using D = T##type<N, 'a', 'z'>; \
+ const auto& str = Default<D>().Str; \
+ for (const auto dummy : xrange(i.Iterations())) { \
+ Y_UNUSED(dummy); \
+ auto s = str; \
+ NBench::Escape(s.data()); \
+ Y_DO_NOT_OPTIMIZE_AWAY(SubstGlobal(s, ToUnderlying(D::WHAT), ToUnderlying(D::WITH))); \
+ NBench::Clobber(); \
+ } \
+ }
+
+#define DEFINE_RNG_BENCHMARK(N, K) \
+ Y_CPU_BENCHMARK(Random_##N##_##K, i) { \
+ using D = TEveryKth<N, K, 'a', 'z'>; \
+ const auto& str = Default<D>().Str; \
+ for (const auto dummy : xrange(i.Iterations())) { \
+ Y_UNUSED(dummy); \
+ auto s = str; \
+ NBench::Escape(s.data()); \
+ Y_DO_NOT_OPTIMIZE_AWAY(SubstGlobal(s, ToUnderlying(D::WHAT), ToUnderlying(D::WITH))); \
+ NBench::Clobber(); \
+ } \
+ }
+
+DEFINE_BENCHMARK(NoMatches, 0)
+DEFINE_BENCHMARK(NoMatches, 1)
+DEFINE_BENCHMARK(NoMatches, 128)
+DEFINE_BENCHMARK(NoMatches, 4096)
+DEFINE_BENCHMARK(OneMatchInTheBeginning, 1)
+DEFINE_BENCHMARK(OneMatchInTheBeginning, 16)
+DEFINE_BENCHMARK(OneMatchInTheBeginning, 128)
+DEFINE_BENCHMARK(OneMatchInTheBeginning, 4096)
+DEFINE_BENCHMARK(OneMatchInTheEnd, 16)
+DEFINE_BENCHMARK(OneMatchInTheEnd, 128)
+DEFINE_BENCHMARK(OneMatchInTheEnd, 4096)
+DEFINE_BENCHMARK(OneMatchInTheMiddle, 16)
+DEFINE_BENCHMARK(OneMatchInTheMiddle, 128)
+DEFINE_BENCHMARK(OneMatchInTheMiddle, 4096)
+DEFINE_BENCHMARK(FirstHalfMatches, 16)
+DEFINE_BENCHMARK(FirstHalfMatches, 128)
+DEFINE_BENCHMARK(FirstHalfMatches, 4096)
+DEFINE_BENCHMARK(SecondHalfMatches, 16)
+DEFINE_BENCHMARK(SecondHalfMatches, 128)
+DEFINE_BENCHMARK(SecondHalfMatches, 4096)
+
+DEFINE_RNG_BENCHMARK(4096, 1)
+DEFINE_RNG_BENCHMARK(4096, 2)
+DEFINE_RNG_BENCHMARK(4096, 3)
+DEFINE_RNG_BENCHMARK(4096, 4)
+DEFINE_RNG_BENCHMARK(4096, 10)
+DEFINE_RNG_BENCHMARK(4096, 32)
+DEFINE_RNG_BENCHMARK(4096, 100)
diff --git a/util/string/benchmark/subst_global/metrics/main.py b/util/string/benchmark/subst_global/metrics/main.py
new file mode 100644
index 0000000000..62f2f3d76d
--- /dev/null
+++ b/util/string/benchmark/subst_global/metrics/main.py
@@ -0,0 +1,5 @@
+import yatest.common as yc
+
+
+def test_export_metrics(metrics):
+ metrics.set_benchmark(yc.execute_benchmark('util/string/benchmark/subst_global/subst_global', threads=8))
diff --git a/util/string/benchmark/subst_global/metrics/ya.make b/util/string/benchmark/subst_global/metrics/ya.make
new file mode 100644
index 0000000000..d8c30ad460
--- /dev/null
+++ b/util/string/benchmark/subst_global/metrics/ya.make
@@ -0,0 +1,21 @@
+OWNER(
+ yazevnul
+ g:util
+)
+SUBSCRIBER(g:util-subscribers)
+
+PY2TEST()
+
+SIZE(LARGE)
+
+TAG(
+ ya:force_sandbox
+ sb:intel_e5_2660v1
+ ya:fat
+)
+
+TEST_SRCS(main.py)
+
+DEPENDS(util/string/benchmark/subst_global)
+
+END()
diff --git a/util/string/benchmark/subst_global/ya.make b/util/string/benchmark/subst_global/ya.make
new file mode 100644
index 0000000000..8136ad34f0
--- /dev/null
+++ b/util/string/benchmark/subst_global/ya.make
@@ -0,0 +1,12 @@
+OWNER(yazevnul)
+
+Y_BENCHMARK()
+
+# to minimize allocations overhead
+ALLOCATOR(B)
+
+SRCS(
+ main.cpp
+)
+
+END()
diff --git a/util/string/benchmark/ya.make b/util/string/benchmark/ya.make
new file mode 100644
index 0000000000..266b53c7b3
--- /dev/null
+++ b/util/string/benchmark/ya.make
@@ -0,0 +1,16 @@
+OWNER(
+ g:util
+ yazevnul
+)
+SUBSCRIBER(g:util-subscribers)
+
+RECURSE(
+ ascii
+ cast
+ float_to_string
+ float_to_string/metrics
+ join
+ join/metrics
+ subst_global
+ subst_global/metrics
+)
diff --git a/util/string/builder.cpp b/util/string/builder.cpp
new file mode 100644
index 0000000000..a3821d3399
--- /dev/null
+++ b/util/string/builder.cpp
@@ -0,0 +1,8 @@
+#include "builder.h"
+
+#include <util/stream/output.h>
+
+template <>
+void Out<TStringBuilder>(IOutputStream& os, const TStringBuilder& sb) {
+ os << static_cast<const TString&>(sb);
+}
diff --git a/util/string/builder.h b/util/string/builder.h
new file mode 100644
index 0000000000..7b54821151
--- /dev/null
+++ b/util/string/builder.h
@@ -0,0 +1,39 @@
+#pragma once
+
+#include <util/stream/str.h>
+#include <utility>
+#include <util/generic/string.h>
+
+namespace NPrivateStringBuilder {
+ class TStringBuilder: public TString {
+ public:
+ inline TStringBuilder()
+ : Out(*this)
+ {
+ }
+
+ TStringBuilder(TStringBuilder&& rhs)
+ : TString(std::move(rhs))
+ , Out(*this)
+ {
+ }
+
+ TStringOutput Out;
+ };
+
+ template <class T>
+ static inline TStringBuilder& operator<<(TStringBuilder& builder, const T& t) {
+ builder.Out << t;
+
+ return builder;
+ }
+
+ template <class T>
+ static inline TStringBuilder&& operator<<(TStringBuilder&& builder, const T& t) {
+ builder.Out << t;
+
+ return std::move(builder);
+ }
+}
+
+using TStringBuilder = NPrivateStringBuilder::TStringBuilder;
diff --git a/util/string/builder_ut.cpp b/util/string/builder_ut.cpp
new file mode 100644
index 0000000000..22def683ec
--- /dev/null
+++ b/util/string/builder_ut.cpp
@@ -0,0 +1,63 @@
+#include "builder.h"
+
+#include <library/cpp/testing/unittest/registar.h>
+
+static void TestEquals(const TString& expected, const TString& actual) {
+ UNIT_ASSERT_VALUES_EQUAL(expected, actual);
+}
+
+struct TClassWithStreamOperator {
+ ui32 Id;
+ TString Name;
+
+ TClassWithStreamOperator(ui32 id, const TString& name)
+ : Id(id)
+ , Name(name)
+ {
+ }
+};
+
+IOutputStream& operator<<(IOutputStream& out, const TClassWithStreamOperator& value) {
+ return out << value.Id << " " << value.Name;
+}
+
+Y_UNIT_TEST_SUITE(TStringBuilderTest) {
+ Y_UNIT_TEST(TestStringBuilder) {
+ TestEquals("", TStringBuilder());
+ TestEquals("a", TStringBuilder() << "a");
+ TestEquals("a1", TStringBuilder() << "a" << 1);
+ TestEquals("value: 123 name", TStringBuilder() << "value: " << TClassWithStreamOperator(123, "name"));
+ }
+
+ Y_UNIT_TEST(TestStringBuilderOut) {
+ TString s;
+ TStringOutput out(s);
+ TStringBuilder sb;
+ sb << "a";
+ out << sb;
+ TestEquals("a", s);
+ }
+
+ Y_UNIT_TEST(TestStringBuilderRValue) {
+ struct TRValueAcceptTester {
+ static bool IsRValue(const TString&) {
+ return false;
+ }
+
+ static bool IsRValue(TString&&) {
+ return true;
+ }
+ };
+
+ UNIT_ASSERT(TRValueAcceptTester::IsRValue(TStringBuilder() << "a" << 1));
+
+ TStringBuilder b;
+ UNIT_ASSERT(!TRValueAcceptTester::IsRValue(b << "a" << 1));
+ TStringBuilder b2;
+ UNIT_ASSERT(!TRValueAcceptTester::IsRValue(b2 << "a" << 1 << TStringBuilder() << "a"));
+ UNIT_ASSERT_VALUES_EQUAL("a1a", b2);
+
+ UNIT_ASSERT(TRValueAcceptTester::IsRValue(TStringBuilder() << b2));
+ UNIT_ASSERT_VALUES_EQUAL("a1a", TStringBuilder() << b2);
+ }
+}
diff --git a/util/string/cast.cpp b/util/string/cast.cpp
new file mode 100644
index 0000000000..aa1e65a8e9
--- /dev/null
+++ b/util/string/cast.cpp
@@ -0,0 +1,844 @@
+#include <util/system/defaults.h>
+
+#if defined(_freebsd_) && !defined(__LONG_LONG_SUPPORTED)
+ #define __LONG_LONG_SUPPORTED
+#endif
+
+#include <cstdio>
+#include <string>
+#include <cmath>
+
+#include <util/string/type.h>
+#include <util/string/cast.h>
+#include <util/string/escape.h>
+
+#include <contrib/libs/double-conversion/double-conversion.h>
+
+#include <util/generic/string.h>
+#include <util/system/yassert.h>
+#include <util/generic/yexception.h>
+#include <util/generic/typetraits.h>
+#include <util/generic/ylimits.h>
+#include <util/generic/singleton.h>
+#include <util/generic/utility.h>
+
+using double_conversion::DoubleToStringConverter;
+using double_conversion::StringBuilder;
+using double_conversion::StringToDoubleConverter;
+
+/*
+ * ------------------------------ formatters ------------------------------
+ */
+
+namespace {
+ constexpr char IntToChar[] = {'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F'};
+
+ static_assert(Y_ARRAY_SIZE(IntToChar) == 16, "expect Y_ARRAY_SIZE(IntToChar) == 16");
+
+ // clang-format off
+ constexpr int LetterToIntMap[] = {
+ 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
+ 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
+ 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
+ 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
+ 20, 20, 20, 20, 20, 20, 20, 20, 0, 1,
+ 2, 3, 4, 5, 6, 7, 8, 9, 20, 20,
+ 20, 20, 20, 20, 20, 10, 11, 12, 13, 14,
+ 15, 20, 20, 20, 20, 20, 20, 20, 20, 20,
+ 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
+ 20, 20, 20, 20, 20, 20, 20, 10, 11, 12,
+ 13, 14, 15,
+ };
+ // clang-format on
+
+ template <class T>
+ std::enable_if_t<std::is_signed<T>::value, std::make_unsigned_t<T>> NegateNegativeSigned(T value) noexcept {
+ return std::make_unsigned_t<T>(-(value + 1)) + std::make_unsigned_t<T>(1);
+ }
+
+ template <class T>
+ std::enable_if_t<std::is_unsigned<T>::value, std::make_unsigned_t<T>> NegateNegativeSigned(T) noexcept {
+ Y_UNREACHABLE();
+ }
+
+ template <class T>
+ std::make_signed_t<T> NegatePositiveSigned(T value) noexcept {
+ return value > 0 ? (-std::make_signed_t<T>(value - 1) - 1) : 0;
+ }
+
+ template <class T, unsigned base, class TChar>
+ struct TBasicIntFormatter {
+ static_assert(1 < base && base < 17, "expect 1 < base && base < 17");
+ static_assert(std::is_unsigned<T>::value, "TBasicIntFormatter can only handle unsigned integers.");
+
+ static inline size_t Format(T value, TChar* buf, size_t len) {
+ Y_ENSURE(len, TStringBuf("zero length"));
+
+ TChar* tmp = buf;
+
+ do {
+ // divide only once, do not use mod
+ const T nextVal = static_cast<T>(value / base);
+ *tmp++ = IntToChar[base == 2 || base == 4 || base == 8 || base == 16 ? value & (base - 1) : value - base * nextVal];
+ value = nextVal;
+ } while (value && --len);
+
+ Y_ENSURE(!value, TStringBuf("not enough room in buffer"));
+
+ const size_t result = tmp - buf;
+
+ --tmp;
+
+ while (buf < tmp) {
+ TChar c = *buf;
+
+ *buf = *tmp;
+ *tmp = c;
+ ++buf;
+ --tmp;
+ }
+
+ return result;
+ }
+ };
+
+ template <class T, unsigned base, class TChar>
+ struct TIntFormatter {
+ static_assert(1 < base && base < 17, "expect 1 < base && base < 17");
+ static_assert(std::is_integral<T>::value, "T must be an integral type.");
+
+ static inline size_t Format(T value, TChar* buf, size_t len) {
+ using TUFmt = TBasicIntFormatter<std::make_unsigned_t<T>, base, TChar>;
+
+ if (std::is_signed<T>::value && value < 0) {
+ Y_ENSURE(len >= 2, TStringBuf("not enough room in buffer"));
+
+ *buf = '-';
+
+ return 1 + TUFmt::Format(NegateNegativeSigned(value), buf + 1, len - 1);
+ }
+
+ return TUFmt::Format(value, buf, len);
+ }
+ };
+
+ template <class T>
+ struct TFltModifiers;
+
+ template <class T, int base, class TChar>
+ Y_NO_INLINE size_t FormatInt(T value, TChar* buf, size_t len) {
+ return TIntFormatter<T, base, TChar>::Format(value, buf, len);
+ }
+
+ template <class T>
+ inline size_t FormatFlt(T t, char* buf, size_t len) {
+ const int ret = snprintf(buf, len, TFltModifiers<T>::ModifierWrite, t);
+
+ Y_ENSURE(ret >= 0 && (size_t)ret <= len, TStringBuf("cannot format float"));
+
+ return (size_t)ret;
+ }
+
+ enum EParseStatus {
+ PS_OK = 0,
+ PS_EMPTY_STRING,
+ PS_PLUS_STRING,
+ PS_MINUS_STRING,
+ PS_BAD_SYMBOL,
+ PS_OVERFLOW,
+ };
+
+ constexpr ui8 SAFE_LENS[4][17] = {
+ {0, 0, 7, 5, 3, 3, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1},
+ {0, 0, 15, 10, 7, 6, 6, 5, 5, 5, 4, 4, 4, 4, 4, 4, 3},
+ {0, 0, 31, 20, 15, 13, 12, 11, 10, 10, 9, 9, 8, 8, 8, 8, 7},
+ {0, 0, 63, 40, 31, 27, 24, 22, 21, 20, 19, 18, 17, 17, 16, 16, 15},
+ };
+
+ inline constexpr ui8 ConstLog2(ui8 x) noexcept {
+ return x == 1 ? 0 : 1 + ConstLog2(x / 2);
+ }
+
+ template <unsigned BASE, class TChar, class T>
+ inline std::enable_if_t<(BASE > 10), bool> CharToDigit(TChar c, T* digit) noexcept {
+ unsigned uc = c;
+
+ if (uc >= Y_ARRAY_SIZE(LetterToIntMap)) {
+ return false;
+ }
+
+ *digit = LetterToIntMap[uc];
+
+ return *digit < BASE;
+ }
+
+ template <unsigned BASE, class TChar, class T>
+ inline std::enable_if_t<(BASE <= 10), bool> CharToDigit(TChar c, T* digit) noexcept {
+ return (c >= '0') && ((*digit = (c - '0')) < BASE);
+ }
+
+ template <class T, unsigned base, class TChar>
+ struct TBasicIntParser {
+ static_assert(1 < base && base < 17, "Expect 1 < base && base < 17.");
+ static_assert(std::is_unsigned<T>::value, "TBasicIntParser can only handle unsigned integers.");
+
+ enum : unsigned {
+ BASE_POW_2 = base * base,
+ };
+
+ static inline EParseStatus Parse(const TChar** ppos, const TChar* end, T max, T* target) noexcept {
+ Y_ASSERT(*ppos != end); /* This check should be somewhere up the stack. */
+ const size_t maxSafeLen = SAFE_LENS[ConstLog2(sizeof(T))][base];
+
+ // can parse without overflow
+ if (size_t(end - *ppos) <= maxSafeLen) {
+ T result;
+
+ if (ParseFast(*ppos, end, &result) && result <= max) {
+ *target = result;
+
+ return PS_OK;
+ }
+ }
+
+ return ParseSlow(ppos, end, max, target);
+ }
+
+ static inline bool ParseFast(const TChar* pos, const TChar* end, T* target) noexcept {
+ T result = T();
+ T d1;
+ T d2;
+
+ // we have end > pos
+ auto beforeEnd = end - 1;
+
+ while (pos < beforeEnd && CharToDigit<base>(*pos, &d1) && CharToDigit<base>(*(pos + 1), &d2)) {
+ result = result * BASE_POW_2 + d1 * base + d2;
+ pos += 2;
+ }
+
+ while (pos != end && CharToDigit<base>(*pos, &d1)) {
+ result = result * base + d1;
+ ++pos;
+ }
+
+ *target = result;
+
+ return pos == end;
+ }
+
+ static inline EParseStatus ParseSlow(const TChar** ppos, const TChar* end, T max, T* target) noexcept {
+ T result = T();
+ T preMulMax = max / base;
+ const TChar* pos = *ppos;
+
+ while (pos != end) {
+ T digit;
+
+ if (!CharToDigit<base>(*pos, &digit)) {
+ *ppos = pos;
+
+ return PS_BAD_SYMBOL;
+ }
+
+ if (result > preMulMax) {
+ return PS_OVERFLOW;
+ }
+
+ result *= base;
+
+ if (result > max - digit) {
+ return PS_OVERFLOW;
+ }
+
+ result += digit;
+ pos++;
+ }
+
+ *target = result;
+
+ return PS_OK;
+ }
+ };
+
+ template <class T>
+ struct TBounds {
+ T PositiveMax;
+ T NegativeMax;
+ };
+
+ template <class T, unsigned base, class TChar>
+ struct TIntParser {
+ static_assert(1 < base && base < 17, "Expect 1 < base && base < 17.");
+ static_assert(std::is_integral<T>::value, "T must be an integral type.");
+
+ enum {
+ IsSigned = std::is_signed<T>::value
+ };
+
+ using TUnsigned = std::make_unsigned_t<T>;
+
+ static inline EParseStatus Parse(const TChar** ppos, const TChar* end, const TBounds<TUnsigned>& bounds, T* target) {
+ const TChar* pos = *ppos;
+ if (pos == end) {
+ return PS_EMPTY_STRING;
+ }
+
+ bool negative = false;
+ TUnsigned max;
+ if (*pos == '+') {
+ pos++;
+ max = bounds.PositiveMax;
+
+ if (pos == end) {
+ return PS_PLUS_STRING;
+ }
+ } else if (IsSigned && *pos == '-') {
+ pos++;
+ max = bounds.NegativeMax;
+ negative = true;
+
+ if (pos == end) {
+ return PS_MINUS_STRING;
+ }
+ } else {
+ max = bounds.PositiveMax;
+ }
+
+ TUnsigned result;
+ EParseStatus error = TBasicIntParser<TUnsigned, base, TChar>::Parse(&pos, end, max, &result);
+ if (error != PS_OK) {
+ *ppos = pos;
+ return error;
+ }
+
+ if (IsSigned) {
+ *target = negative ? NegatePositiveSigned(result) : static_cast<T>(result);
+ } else {
+ *target = result;
+ }
+ return PS_OK;
+ }
+ };
+
+ template <class TChar>
+ [[noreturn]] static Y_NO_INLINE void ThrowParseError(EParseStatus status, const TChar* data, size_t len, const TChar* pos) {
+ Y_ASSERT(status != PS_OK);
+
+ typedef TBasicString<TChar> TStringType;
+
+ switch (status) {
+ case PS_EMPTY_STRING:
+ ythrow TFromStringException() << TStringBuf("Cannot parse empty string as number. ");
+ case PS_PLUS_STRING:
+ ythrow TFromStringException() << TStringBuf("Cannot parse string \"+\" as number. ");
+ case PS_MINUS_STRING:
+ ythrow TFromStringException() << TStringBuf("Cannot parse string \"-\" as number. ");
+ case PS_BAD_SYMBOL:
+ ythrow TFromStringException() << TStringBuf("Unexpected symbol \"") << EscapeC(*pos) << TStringBuf("\" at pos ") << (pos - data) << TStringBuf(" in string ") << TStringType(data, len).Quote() << TStringBuf(". ");
+ case PS_OVERFLOW:
+ ythrow TFromStringException() << TStringBuf("Integer overflow in string ") << TStringType(data, len).Quote() << TStringBuf(". ");
+ default:
+ ythrow yexception() << TStringBuf("Unknown error code in string converter. ");
+ }
+ }
+
+ template <typename T, typename TUnsigned, int base, typename TChar>
+ Y_NO_INLINE T ParseInt(const TChar* data, size_t len, const TBounds<TUnsigned>& bounds) {
+ T result;
+ const TChar* pos = data;
+ EParseStatus status = TIntParser<T, base, TChar>::Parse(&pos, pos + len, bounds, &result);
+
+ if (status == PS_OK) {
+ return result;
+ } else {
+ ThrowParseError(status, data, len, pos);
+ }
+ }
+
+ template <typename T, typename TUnsigned, int base, typename TChar>
+ Y_NO_INLINE bool TryParseInt(const TChar* data, size_t len, const TBounds<TUnsigned>& bounds, T* result) {
+ return TIntParser<T, base, TChar>::Parse(&data, data + len, bounds, result) == PS_OK;
+ }
+
+ template <class T>
+ inline T ParseFlt(const char* data, size_t len) {
+ /*
+ * TODO
+ */
+
+ if (len > 256) {
+ len = 256;
+ }
+
+ char* c = (char*)alloca(len + 1);
+ memcpy(c, data, len);
+ c[len] = 0;
+
+ T ret;
+ char ec;
+
+ // try to read a value and an extra character in order to catch cases when
+ // the string start with a valid float but is followed by unexpected characters
+ if (sscanf(c, TFltModifiers<T>::ModifierReadAndChar, &ret, &ec) == 1) {
+ return ret;
+ }
+
+ ythrow TFromStringException() << TStringBuf("cannot parse float(") << TStringBuf(data, len) << TStringBuf(")");
+ }
+
+#define DEF_FLT_MOD(type, modifierWrite, modifierRead) \
+ template <> \
+ struct TFltModifiers<type> { \
+ static const char* const ModifierWrite; \
+ static const char* const ModifierReadAndChar; \
+ }; \
+ \
+ const char* const TFltModifiers<type>::ModifierWrite = modifierWrite; \
+ const char* const TFltModifiers<type>::ModifierReadAndChar = modifierRead "%c";
+
+ DEF_FLT_MOD(long double, "%.10Lg", "%Lg")
+
+#undef DEF_FLT_MOD
+
+ /* The following constants are initialized in terms of <climits> constants to make
+ * sure they go into binary as actual values and there is no associated
+ * initialization code.
+ * */
+ constexpr TBounds<ui64> bSBounds = {static_cast<ui64>(SCHAR_MAX), static_cast<ui64>(UCHAR_MAX - SCHAR_MAX)};
+ constexpr TBounds<ui64> bUBounds = {static_cast<ui64>(UCHAR_MAX), 0};
+ constexpr TBounds<ui64> sSBounds = {static_cast<ui64>(SHRT_MAX), static_cast<ui64>(USHRT_MAX - SHRT_MAX)};
+ constexpr TBounds<ui64> sUBounds = {static_cast<ui64>(USHRT_MAX), 0};
+ constexpr TBounds<ui64> iSBounds = {static_cast<ui64>(INT_MAX), static_cast<ui64>(UINT_MAX - INT_MAX)};
+ constexpr TBounds<ui64> iUBounds = {static_cast<ui64>(UINT_MAX), 0};
+ constexpr TBounds<ui64> lSBounds = {static_cast<ui64>(LONG_MAX), static_cast<ui64>(ULONG_MAX - LONG_MAX)};
+ constexpr TBounds<ui64> lUBounds = {static_cast<ui64>(ULONG_MAX), 0};
+ constexpr TBounds<ui64> llSBounds = {static_cast<ui64>(LLONG_MAX), static_cast<ui64>(ULLONG_MAX - LLONG_MAX)};
+ constexpr TBounds<ui64> llUBounds = {static_cast<ui64>(ULLONG_MAX), 0};
+}
+
+#define DEF_INT_SPEC_II(TYPE, ITYPE, BASE) \
+ template <> \
+ size_t IntToString<BASE, TYPE>(TYPE value, char* buf, size_t len) { \
+ return FormatInt<ITYPE, BASE, char>(value, buf, len); \
+ }
+
+#define DEF_INT_SPEC_I(TYPE, ITYPE) \
+ template <> \
+ size_t ToStringImpl<TYPE>(TYPE value, char* buf, size_t len) { \
+ return FormatInt<ITYPE, 10, char>(value, buf, len); \
+ } \
+ DEF_INT_SPEC_II(TYPE, ITYPE, 2) \
+ DEF_INT_SPEC_II(TYPE, ITYPE, 8) \
+ DEF_INT_SPEC_II(TYPE, ITYPE, 10) \
+ DEF_INT_SPEC_II(TYPE, ITYPE, 16)
+
+#define DEF_INT_SPEC(TYPE) \
+ DEF_INT_SPEC_I(signed TYPE, i64) \
+ DEF_INT_SPEC_I(unsigned TYPE, ui64)
+
+DEF_INT_SPEC(char)
+DEF_INT_SPEC(short)
+DEF_INT_SPEC(int)
+DEF_INT_SPEC(long)
+DEF_INT_SPEC(long long)
+
+#ifdef __cpp_char8_t
+template <>
+size_t ToStringImpl<char8_t>(char8_t value, char* buf, size_t len) {
+ return FormatInt<ui64, 10, char>(value, buf, len);
+}
+#endif
+
+using TCharIType = std::conditional_t<std::is_signed<char>::value, i64, ui64>;
+using TWCharIType = std::conditional_t<std::is_signed<wchar_t>::value, i64, ui64>;
+
+DEF_INT_SPEC_I(char, TCharIType)
+DEF_INT_SPEC_I(wchar_t, TWCharIType)
+DEF_INT_SPEC_I(wchar16, ui64) // wchar16 is always unsigned
+DEF_INT_SPEC_I(wchar32, ui64) // wchar32 is always unsigned
+
+#undef DEF_INT_SPEC
+#undef DEF_INT_SPEC_I
+#undef DEF_INT_SPEC_II
+
+#define DEF_FLT_SPEC(type) \
+ template <> \
+ size_t ToStringImpl<type>(type t, char* buf, size_t len) { \
+ return FormatFlt<type>(t, buf, len); \
+ }
+
+DEF_FLT_SPEC(long double)
+
+#undef DEF_FLT_SPEC
+
+template <>
+size_t ToStringImpl<bool>(bool t, char* buf, size_t len) {
+ Y_ENSURE(len, TStringBuf("zero length"));
+ *buf = t ? '1' : '0';
+ return 1;
+}
+
+/*
+ * ------------------------------ parsers ------------------------------
+ */
+
+template <>
+bool TryFromStringImpl<bool>(const char* data, size_t len, bool& result) {
+ if (len == 1) {
+ if (data[0] == '0') {
+ result = false;
+ return true;
+ } else if (data[0] == '1') {
+ result = true;
+ return true;
+ }
+ }
+ TStringBuf buf(data, len);
+ if (IsTrue(buf)) {
+ result = true;
+ return true;
+ } else if (IsFalse(buf)) {
+ result = false;
+ return true;
+ }
+ return false;
+}
+
+template <>
+bool FromStringImpl<bool>(const char* data, size_t len) {
+ bool result;
+
+ if (!TryFromStringImpl<bool>(data, len, result)) {
+ ythrow TFromStringException() << TStringBuf("Cannot parse bool(") << TStringBuf(data, len) << TStringBuf("). ");
+ }
+
+ return result;
+}
+
+template <>
+TString FromStringImpl<TString>(const char* data, size_t len) {
+ return TString(data, len);
+}
+
+template <>
+TStringBuf FromStringImpl<TStringBuf>(const char* data, size_t len) {
+ return TStringBuf(data, len);
+}
+
+template <>
+std::string FromStringImpl<std::string>(const char* data, size_t len) {
+ return std::string(data, len);
+}
+
+template <>
+TUtf16String FromStringImpl<TUtf16String>(const wchar16* data, size_t len) {
+ return TUtf16String(data, len);
+}
+
+template <>
+TWtringBuf FromStringImpl<TWtringBuf>(const wchar16* data, size_t len) {
+ return TWtringBuf(data, len);
+}
+
+// Try-versions
+template <>
+bool TryFromStringImpl<TStringBuf>(const char* data, size_t len, TStringBuf& result) {
+ result = {data, len};
+ return true;
+}
+
+template <>
+bool TryFromStringImpl<TString>(const char* data, size_t len, TString& result) {
+ result = TString(data, len);
+ return true;
+}
+
+template <>
+bool TryFromStringImpl<std::string>(const char* data, size_t len, std::string& result) {
+ result.assign(data, len);
+ return true;
+}
+
+template <>
+bool TryFromStringImpl<TWtringBuf>(const wchar16* data, size_t len, TWtringBuf& result) {
+ result = {data, len};
+ return true;
+}
+
+template <>
+bool TryFromStringImpl<TUtf16String>(const wchar16* data, size_t len, TUtf16String& result) {
+ result = TUtf16String(data, len);
+ return true;
+}
+
+#define DEF_INT_SPEC_III(CHAR, TYPE, ITYPE, BOUNDS, BASE) \
+ template <> \
+ TYPE IntFromString<TYPE, BASE>(const CHAR* data, size_t len) { \
+ return ParseInt<ITYPE, ui64, BASE>(data, len, BOUNDS); \
+ } \
+ template <> \
+ bool TryIntFromString<BASE>(const CHAR* data, size_t len, TYPE& result) { \
+ ITYPE tmp; \
+ bool status = TryParseInt<ITYPE, ui64, BASE>(data, len, BOUNDS, &tmp); \
+ if (status) { \
+ result = tmp; \
+ } \
+ return status; \
+ }
+
+#define DEF_INT_SPEC_II(CHAR, TYPE, ITYPE, BOUNDS) \
+ template <> \
+ TYPE FromStringImpl<TYPE>(const CHAR* data, size_t len) { \
+ return ParseInt<ITYPE, ui64, 10>(data, len, BOUNDS); \
+ } \
+ template <> \
+ bool TryFromStringImpl<TYPE>(const CHAR* data, size_t len, TYPE& result) { \
+ ITYPE tmp; \
+ bool status = TryParseInt<ITYPE, ui64, 10>(data, len, BOUNDS, &tmp); \
+ if (status) { \
+ result = tmp; \
+ } \
+ return status; \
+ } \
+ DEF_INT_SPEC_III(CHAR, TYPE, ITYPE, BOUNDS, 2) \
+ DEF_INT_SPEC_III(CHAR, TYPE, ITYPE, BOUNDS, 8) \
+ DEF_INT_SPEC_III(CHAR, TYPE, ITYPE, BOUNDS, 10) \
+ DEF_INT_SPEC_III(CHAR, TYPE, ITYPE, BOUNDS, 16)
+
+#define DEF_INT_SPEC_I(TYPE, ITYPE, BOUNDS) \
+ DEF_INT_SPEC_II(char, TYPE, ITYPE, BOUNDS) \
+ DEF_INT_SPEC_II(wchar16, TYPE, ITYPE, BOUNDS)
+
+#define DEF_INT_SPEC(TYPE, ID) \
+ DEF_INT_SPEC_I(signed TYPE, i64, ID##SBounds) \
+ DEF_INT_SPEC_I(unsigned TYPE, ui64, ID##UBounds)
+
+#define DEF_INT_SPEC_FIXED_WIDTH(TYPE, ID) \
+ DEF_INT_SPEC_I(TYPE, i64, ID##SBounds) \
+ DEF_INT_SPEC_I(u##TYPE, ui64, ID##UBounds)
+
+DEF_INT_SPEC_FIXED_WIDTH(i8, b)
+DEF_INT_SPEC(short, s)
+DEF_INT_SPEC(int, i)
+DEF_INT_SPEC(long, l)
+DEF_INT_SPEC(long long, ll)
+
+#undef DEF_INT_SPEC_FIXED_WIDTH
+#undef DEF_INT_SPEC
+#undef DEF_INT_SPEC_I
+#undef DEF_INT_SPEC_II
+#undef DEF_INT_SPEC_III
+
+#define DEF_FLT_SPEC(type) \
+ template <> \
+ type FromStringImpl<type>(const char* data, size_t len) { \
+ return ParseFlt<type>(data, len); \
+ }
+
+DEF_FLT_SPEC(long double)
+
+#undef DEF_FLT_SPEC
+
+// Using StrToD for float and double because it is faster than sscanf.
+// Exception-free, specialized for float types
+template <>
+bool TryFromStringImpl<double>(const char* data, size_t len, double& result) {
+ if (!len) {
+ return false;
+ }
+
+ char* se = nullptr;
+ double d = StrToD(data, data + len, &se);
+
+ if (se != data + len) {
+ return false;
+ }
+ result = d;
+ return true;
+}
+
+template <>
+bool TryFromStringImpl<float>(const char* data, size_t len, float& result) {
+ double d;
+ if (TryFromStringImpl<double>(data, len, d)) {
+ result = static_cast<float>(d);
+ return true;
+ }
+ return false;
+}
+
+template <>
+bool TryFromStringImpl<long double>(const char* data, size_t len, long double& result) {
+ double d;
+ if (TryFromStringImpl<double>(data, len, d)) {
+ result = static_cast<long double>(d);
+ return true;
+ }
+ return false;
+}
+
+// Exception-throwing, specialized for float types
+template <>
+double FromStringImpl<double>(const char* data, size_t len) {
+ double d = 0.0;
+ if (!TryFromStringImpl(data, len, d)) {
+ ythrow TFromStringException() << TStringBuf("cannot parse float(") << TStringBuf(data, len) << TStringBuf(")");
+ }
+ return d;
+}
+
+template <>
+float FromStringImpl<float>(const char* data, size_t len) {
+ return static_cast<float>(FromStringImpl<double>(data, len));
+}
+
+double StrToD(const char* b, const char* e, char** se) {
+ struct TCvt: public StringToDoubleConverter {
+ inline TCvt()
+ : StringToDoubleConverter(ALLOW_TRAILING_JUNK | ALLOW_HEX | ALLOW_LEADING_SPACES, 0.0, NAN, nullptr, nullptr)
+ {
+ }
+ };
+
+ int out = 0;
+
+ const auto res = SingletonWithPriority<TCvt, 0>()->StringToDouble(b, e - b, &out);
+
+ if (se) {
+ *se = (char*)(b + out);
+ }
+
+ return res;
+}
+
+double StrToD(const char* b, char** se) {
+ return StrToD(b, b + strlen(b), se);
+}
+
+namespace {
+ static inline DoubleToStringConverter& ToStringConverterNoPad() noexcept {
+ struct TCvt: public DoubleToStringConverter {
+ inline TCvt() noexcept
+ : DoubleToStringConverter(EMIT_POSITIVE_EXPONENT_SIGN, "inf", "nan", 'e', -10, 21, 4, 0)
+ {
+ }
+ };
+
+ return *SingletonWithPriority<TCvt, 0>();
+ }
+
+ struct TBuilder {
+ alignas(StringBuilder) char Store[sizeof(StringBuilder)];
+ StringBuilder* SB;
+
+ inline TBuilder(char* buf, size_t len) noexcept
+ : SB(new (Store) StringBuilder(buf, len))
+ {
+ }
+ };
+
+ static inline size_t FixZeros(char* buf, size_t len) noexcept {
+ auto end = buf + len;
+ auto point = (char*)memchr(buf, '.', len);
+
+ if (!point) {
+ return len;
+ }
+
+ auto exp = (char*)memchr(point, 'e', end - point);
+
+ if (!exp) {
+ exp = end;
+ }
+
+ auto c = exp;
+
+ c -= 1;
+
+ while (point < c && *c == '0') {
+ --c;
+ }
+
+ if (*c == '.') {
+ --c;
+ }
+
+ memmove(c + 1, exp, end - exp);
+
+ return c - buf + 1 + end - exp;
+ }
+
+ static inline size_t FixEnd(char* buf, size_t len) noexcept {
+ if (len > 2) {
+ auto sign = buf[len - 2];
+
+ if (sign == '-' || sign == '+') {
+ buf[len] = buf[len - 1];
+ buf[len - 1] = '0';
+ ++len;
+ }
+ }
+
+ buf[len] = 0;
+
+ return len;
+ }
+
+ static inline size_t DoDtoa(double d, char* buf, size_t len, int prec) noexcept {
+ TBuilder sb(buf, len);
+
+ Y_VERIFY(ToStringConverterNoPad().ToPrecision(d, prec, sb.SB), "conversion failed");
+
+ return FixEnd(buf, FixZeros(buf, sb.SB->position()));
+ }
+}
+
+template <>
+size_t ToStringImpl<double>(double d, char* buf, size_t len) {
+ return DoDtoa(d, buf, len, 10);
+}
+
+template <>
+size_t ToStringImpl<float>(float f, char* buf, size_t len) {
+ return DoDtoa(f, buf, len, 6);
+}
+
+size_t FloatToString(float t, char* buf, size_t len, EFloatToStringMode mode, int ndigits) {
+ if (mode == PREC_AUTO) {
+ TBuilder sb(buf, len);
+
+ Y_VERIFY(ToStringConverterNoPad().ToShortestSingle(t, sb.SB), "conversion failed");
+
+ return FixEnd(buf, sb.SB->position());
+ }
+
+ return FloatToString((double)t, buf, len, mode, ndigits);
+}
+
+size_t FloatToString(double t, char* buf, size_t len, EFloatToStringMode mode, int ndigits) {
+ if (mode == PREC_NDIGITS) {
+ auto minDigits = DoubleToStringConverter::kMinPrecisionDigits;
+ auto maxDigits = DoubleToStringConverter::kMaxPrecisionDigits;
+
+ return DoDtoa(t, buf, len, ClampVal(ndigits, minDigits, maxDigits));
+ }
+
+ TBuilder sb(buf, len);
+
+ if (mode == PREC_AUTO) {
+ Y_VERIFY(ToStringConverterNoPad().ToShortest(t, sb.SB), "conversion failed");
+
+ return FixEnd(buf, sb.SB->position());
+ }
+
+ if (!ToStringConverterNoPad().ToFixed(t, ndigits, sb.SB)) {
+ return FloatToString(t, buf, len, PREC_AUTO);
+ }
+
+ if (mode == PREC_POINT_DIGITS_STRIP_ZEROES) {
+ return FixZeros(buf, sb.SB->position());
+ }
+
+ return sb.SB->position();
+}
diff --git a/util/string/cast.h b/util/string/cast.h
new file mode 100644
index 0000000000..90e925c194
--- /dev/null
+++ b/util/string/cast.h
@@ -0,0 +1,357 @@
+#pragma once
+
+#include <util/system/defaults.h>
+#include <util/stream/str.h>
+#include <util/generic/string.h>
+#include <util/generic/strbuf.h>
+#include <util/generic/typetraits.h>
+#include <util/generic/yexception.h>
+
+/*
+ * specialized for all arithmetic types
+ */
+
+template <class T>
+size_t ToStringImpl(T t, char* buf, size_t len);
+
+/**
+ * Converts @c t to string writing not more than @c len bytes to output buffer @c buf.
+ * No NULL terminator appended! Throws exception on buffer overflow.
+ * @return number of bytes written
+ */
+template <class T>
+inline size_t ToString(const T& t, char* buf, size_t len) {
+ using TParam = typename TTypeTraits<T>::TFuncParam;
+
+ return ToStringImpl<TParam>(t, buf, len);
+}
+
+/**
+ * Floating point to string conversion mode, values are enforced by `dtoa_impl.cpp`.
+ */
+enum EFloatToStringMode {
+ /** 0.1f -> "0.1", 0.12345678f -> "0.12345678", ignores ndigits. */
+ PREC_AUTO = 0,
+
+ /** "%g" mode, writes up to the given number of significant digits:
+ * 0.1f -> "0.1", 0.12345678f -> "0.123457" for ndigits=6, 1.2e-06f -> "1.2e-06" */
+ PREC_NDIGITS = 2,
+
+ /** "%f" mode, writes the given number of digits after decimal point:
+ * 0.1f -> "0.100000", 1.2e-06f -> "0.000001" for ndigits=6 */
+ PREC_POINT_DIGITS = 3,
+
+ /** same as PREC_POINT_DIGITS, but stripping trailing zeroes:
+ * 0.1f for ndgigits=6 -> "0.1" */
+ PREC_POINT_DIGITS_STRIP_ZEROES = 4
+};
+
+size_t FloatToString(float t, char* buf, size_t len, EFloatToStringMode mode = PREC_AUTO, int ndigits = 0);
+size_t FloatToString(double t, char* buf, size_t len, EFloatToStringMode mode = PREC_AUTO, int ndigits = 0);
+
+template <typename T>
+inline TString FloatToString(const T& t, EFloatToStringMode mode = PREC_AUTO, int ndigits = 0) {
+ char buf[512]; // Max<double>() with mode = PREC_POINT_DIGITS has 309 digits before the decimal point
+ size_t count = FloatToString(t, buf, sizeof(buf), mode, ndigits);
+ return TString(buf, count);
+}
+
+namespace NPrivate {
+ template <class T, bool isSimple>
+ struct TToString {
+ static inline TString Cvt(const T& t) {
+ char buf[512];
+
+ return TString(buf, ToString<T>(t, buf, sizeof(buf)));
+ }
+ };
+
+ template <class T>
+ struct TToString<T, false> {
+ static inline TString Cvt(const T& t) {
+ TString s;
+ TStringOutput o(s);
+ o << t;
+ return s;
+ }
+ };
+}
+
+/*
+ * some clever implementations...
+ */
+template <class T>
+inline TString ToString(const T& t) {
+ using TR = std::remove_cv_t<T>;
+
+ return ::NPrivate::TToString<TR, std::is_arithmetic<TR>::value>::Cvt((const TR&)t);
+}
+
+inline const TString& ToString(const TString& s) noexcept {
+ return s;
+}
+
+inline const TString& ToString(TString& s) noexcept {
+ return s;
+}
+
+inline TString ToString(const char* s) {
+ return s;
+}
+
+inline TString ToString(char* s) {
+ return s;
+}
+
+/*
+ * Wrapper for wide strings.
+ */
+template <class T>
+inline TUtf16String ToWtring(const T& t) {
+ return TUtf16String::FromAscii(ToString(t));
+}
+
+inline const TUtf16String& ToWtring(const TUtf16String& w) {
+ return w;
+}
+
+inline const TUtf16String& ToWtring(TUtf16String& w) {
+ return w;
+}
+
+struct TFromStringException: public TBadCastException {
+};
+
+/*
+ * specialized for:
+ * bool
+ * short
+ * unsigned short
+ * int
+ * unsigned int
+ * long
+ * unsigned long
+ * long long
+ * unsigned long long
+ * float
+ * double
+ * long double
+ */
+template <typename T, typename TChar>
+T FromStringImpl(const TChar* data, size_t len);
+
+template <typename T, typename TChar>
+inline T FromString(const TChar* data, size_t len) {
+ return ::FromStringImpl<T>(data, len);
+}
+
+template <typename T, typename TChar>
+inline T FromString(const TChar* data) {
+ return ::FromString<T>(data, std::char_traits<TChar>::length(data));
+}
+
+template <class T>
+inline T FromString(const TStringBuf& s) {
+ return ::FromString<T>(s.data(), s.size());
+}
+
+template <class T>
+inline T FromString(const TString& s) {
+ return ::FromString<T>(s.data(), s.size());
+}
+
+template <class T>
+inline T FromString(const std::string& s) {
+ return ::FromString<T>(s.data(), s.size());
+}
+
+template <>
+inline TString FromString<TString>(const TString& s) {
+ return s;
+}
+
+template <class T>
+inline T FromString(const TWtringBuf& s) {
+ return ::FromString<T, typename TWtringBuf::char_type>(s.data(), s.size());
+}
+
+template <class T>
+inline T FromString(const TUtf16String& s) {
+ return ::FromString<T, wchar16>(s.data(), s.size());
+}
+
+namespace NPrivate {
+ template <typename TChar>
+ class TFromString {
+ const TChar* const Data;
+ const size_t Len;
+
+ public:
+ inline TFromString(const TChar* data, size_t len)
+ : Data(data)
+ , Len(len)
+ {
+ }
+
+ template <typename T>
+ inline operator T() const {
+ return FromString<T, TChar>(Data, Len);
+ }
+ };
+}
+
+template <typename TChar>
+inline ::NPrivate::TFromString<TChar> FromString(const TChar* data, size_t len) {
+ return ::NPrivate::TFromString<TChar>(data, len);
+}
+
+template <typename TChar>
+inline ::NPrivate::TFromString<TChar> FromString(const TChar* data) {
+ return ::NPrivate::TFromString<TChar>(data, std::char_traits<TChar>::length(data));
+}
+
+template <typename T>
+inline ::NPrivate::TFromString<typename T::TChar> FromString(const T& s) {
+ return ::NPrivate::TFromString<typename T::TChar>(s.data(), s.size());
+}
+
+// Conversion exception free versions
+template <typename T, typename TChar>
+bool TryFromStringImpl(const TChar* data, size_t len, T& result);
+
+/**
+ * @param data Source string buffer pointer
+ * @param len Source string length, in characters
+ * @param result Place to store conversion result value.
+ * If conversion error occurs, no value stored in @c result
+ * @return @c true in case of successful conversion, @c false otherwise
+ **/
+template <typename T, typename TChar>
+inline bool TryFromString(const TChar* data, size_t len, T& result) {
+ return TryFromStringImpl<T>(data, len, result);
+}
+
+template <typename T, typename TChar>
+inline bool TryFromString(const TChar* data, T& result) {
+ return TryFromString<T>(data, std::char_traits<TChar>::length(data), result);
+}
+
+template <class T, class TChar>
+inline bool TryFromString(const TChar* data, const size_t len, T& result, const T& def) {
+ if (TryFromString<T>(data, len, result)) {
+ return true;
+ }
+ result = def;
+ return false;
+}
+
+template <class T>
+inline bool TryFromString(const TStringBuf& s, T& result) {
+ return TryFromString<T>(s.data(), s.size(), result);
+}
+
+template <class T>
+inline bool TryFromString(const TString& s, T& result) {
+ return TryFromString<T>(s.data(), s.size(), result);
+}
+
+template <class T>
+inline bool TryFromString(const std::string& s, T& result) {
+ return TryFromString<T>(s.data(), s.size(), result);
+}
+
+template <class T>
+inline bool TryFromString(const TWtringBuf& s, T& result) {
+ return TryFromString<T>(s.data(), s.size(), result);
+}
+
+template <class T>
+inline bool TryFromString(const TUtf16String& s, T& result) {
+ return TryFromString<T>(s.data(), s.size(), result);
+}
+
+template <class T, class TStringType>
+inline bool TryFromStringWithDefault(const TStringType& s, T& result, const T& def) {
+ return TryFromString<T>(s.data(), s.size(), result, def);
+}
+
+template <class T>
+inline bool TryFromStringWithDefault(const char* s, T& result, const T& def) {
+ return TryFromStringWithDefault<T>(TStringBuf(s), result, def);
+}
+
+template <class T, class TStringType>
+inline bool TryFromStringWithDefault(const TStringType& s, T& result) {
+ return TryFromStringWithDefault<T>(s, result, T());
+}
+
+// FromString methods with default value if data is invalid
+template <class T, class TChar>
+inline T FromString(const TChar* data, const size_t len, const T& def) {
+ T result;
+ TryFromString<T>(data, len, result, def);
+ return result;
+}
+
+template <class T, class TStringType>
+inline T FromStringWithDefault(const TStringType& s, const T& def) {
+ return FromString<T>(s.data(), s.size(), def);
+}
+
+template <class T>
+inline T FromStringWithDefault(const char* s, const T& def) {
+ return FromStringWithDefault<T>(TStringBuf(s), def);
+}
+
+template <class T, class TStringType>
+inline T FromStringWithDefault(const TStringType& s) {
+ return FromStringWithDefault<T>(s, T());
+}
+
+double StrToD(const char* b, char** se);
+double StrToD(const char* b, const char* e, char** se);
+
+template <int base, class T>
+size_t IntToString(T t, char* buf, size_t len);
+
+template <int base, class T>
+inline TString IntToString(T t) {
+ static_assert(std::is_arithmetic<std::remove_cv_t<T>>::value, "expect std::is_arithmetic<std::remove_cv_t<T>>::value");
+
+ char buf[256];
+
+ return TString(buf, IntToString<base>(t, buf, sizeof(buf)));
+}
+
+template <int base, class TInt, class TChar>
+bool TryIntFromString(const TChar* data, size_t len, TInt& result);
+
+template <int base, class TInt, class TStringType>
+inline bool TryIntFromString(const TStringType& s, TInt& result) {
+ return TryIntFromString<base>(s.data(), s.size(), result);
+}
+
+template <class TInt, int base, class TChar>
+TInt IntFromString(const TChar* str, size_t len);
+
+template <class TInt, int base, class TChar>
+inline TInt IntFromString(const TChar* str) {
+ return IntFromString<TInt, base>(str, std::char_traits<TChar>::length(str));
+}
+
+template <class TInt, int base, class TStringType>
+inline TInt IntFromString(const TStringType& str) {
+ return IntFromString<TInt, base>(str.data(), str.size());
+}
+
+static inline TString ToString(const TStringBuf str) {
+ return TString(str);
+}
+
+static inline TUtf16String ToWtring(const TWtringBuf wtr) {
+ return TUtf16String(wtr);
+}
+
+static inline TUtf32String ToUtf32String(const TUtf32StringBuf wtr) {
+ return TUtf32String(wtr);
+}
diff --git a/util/string/cast.pxd b/util/string/cast.pxd
new file mode 100644
index 0000000000..dc23619e1e
--- /dev/null
+++ b/util/string/cast.pxd
@@ -0,0 +1,10 @@
+from util.generic.string cimport TString
+
+from libcpp cimport bool as bool_t
+
+cdef extern from "<util/string/cast.h>" nogil:
+ T FromString[T](const TString&) except +
+ bool_t TryFromString[T](const TString&, T&) except +
+ TString ToString[T](const T&) except +
+
+ cdef double StrToD(const char* b, char** se) except +
diff --git a/util/string/cast.py b/util/string/cast.py
new file mode 100644
index 0000000000..4787f6ef44
--- /dev/null
+++ b/util/string/cast.py
@@ -0,0 +1,27 @@
+print 'static const ui8 SAFE_LENS[4][15] = {'
+
+
+def nb(n, b):
+ if n == 0:
+ return [0]
+
+ digits = []
+
+ while n:
+ digits.append(int(n % b))
+ n /= b
+
+ return digits[::-1]
+
+
+for p in (1, 2, 4, 8):
+
+ def it1():
+ for base in range(2, 17):
+ m = 2 ** (8 * p) - 1
+
+ yield len(nb(m, base)) - 1
+
+ print ' {0, 0, ' + ', '.join(str(x) for x in it1()) + '},'
+
+print '};'
diff --git a/util/string/cast_ut.cpp b/util/string/cast_ut.cpp
new file mode 100644
index 0000000000..033450c38c
--- /dev/null
+++ b/util/string/cast_ut.cpp
@@ -0,0 +1,602 @@
+#include "cast.h"
+
+#include <library/cpp/testing/unittest/registar.h>
+
+#include <util/charset/wide.h>
+#include <util/system/defaults.h>
+
+#include <limits>
+
+// positive test (return true or no exception)
+#define test1(t, v) \
+ F<t>().CheckTryOK(v); \
+ F<t>().CheckOK(v)
+
+// negative test (return false or exception)
+#define test2(t, v) \
+ F<t>().CheckTryFail(v); \
+ F<t>().CheckExc(v)
+
+#define EPS 10E-7
+
+#define HEX_MACROS_MAP(mac, type, val) mac(type, val, 2) mac(type, val, 8) mac(type, val, 10) mac(type, val, 16)
+
+#define OK_HEX_CHECK(type, val, base) UNIT_ASSERT_EQUAL((IntFromStringForCheck<base>(IntToString<base>(val))), val);
+#define EXC_HEX_CHECK(type, val, base) UNIT_ASSERT_EXCEPTION((IntFromString<type, base>(IntToString<base>(val))), yexception);
+
+#define TRY_HEX_MACROS_MAP(mac, type, val, result, def) \
+ mac(type, val, result, def, 2) \
+ mac(type, val, result, def, 8) \
+ mac(type, val, result, def, 10) \
+ mac(type, val, result, def, 16)
+
+#define TRY_OK_HEX_CHECK(type, val, result, def, base) \
+ result = def; \
+ UNIT_ASSERT_EQUAL(TryIntFromStringForCheck<base>(IntToString<base>(val), result), true); \
+ UNIT_ASSERT_EQUAL(result, val);
+
+#define TRY_FAIL_HEX_CHECK(type, val, result, def, base) \
+ result = def; \
+ UNIT_ASSERT_VALUES_EQUAL(TryIntFromStringForCheck<base>(IntToString<base>(val), result), false); \
+ UNIT_ASSERT_VALUES_EQUAL(result, def);
+
+template <class A>
+struct TRet {
+ template <int base>
+ inline A IntFromStringForCheck(const TString& str) {
+ return IntFromString<A, base>(str);
+ }
+
+ template <int base>
+ inline bool TryIntFromStringForCheck(const TString& str, A& result) {
+ return TryIntFromString<base>(str, result);
+ }
+
+ template <class B>
+ inline void CheckOK(B v) {
+ UNIT_ASSERT_VALUES_EQUAL(FromString<A>(ToString(v)), v); // char
+ UNIT_ASSERT_VALUES_EQUAL(FromString<A>(ToWtring(v)), v); // wide char
+ HEX_MACROS_MAP(OK_HEX_CHECK, A, v);
+ }
+
+ template <class B>
+ inline void CheckExc(B v) {
+ UNIT_ASSERT_EXCEPTION(FromString<A>(ToString(v)), yexception); // char
+ UNIT_ASSERT_EXCEPTION(FromString<A>(ToWtring(v)), yexception); // wide char
+ HEX_MACROS_MAP(EXC_HEX_CHECK, A, v);
+ }
+
+ template <class B>
+ inline void CheckTryOK(B v) {
+ static const A defaultV = 42;
+ A convV;
+ UNIT_ASSERT_VALUES_EQUAL(TryFromString<A>(ToString(v), convV), true); // char
+ UNIT_ASSERT_VALUES_EQUAL(v, convV);
+ UNIT_ASSERT_VALUES_EQUAL(TryFromString<A>(ToWtring(v), convV), true); // wide char
+ UNIT_ASSERT_VALUES_EQUAL(v, convV);
+
+ TRY_HEX_MACROS_MAP(TRY_OK_HEX_CHECK, A, v, convV, defaultV);
+ }
+
+ template <class B>
+ inline void CheckTryFail(B v) {
+ static const A defaultV = 42;
+ A convV = defaultV; // to check that original value is not trashed on bad cast
+ UNIT_ASSERT_VALUES_EQUAL(TryFromString<A>(ToString(v), convV), false); // char
+ UNIT_ASSERT_VALUES_EQUAL(defaultV, convV);
+ UNIT_ASSERT_VALUES_EQUAL(TryFromString<A>(ToWtring(v), convV), false); // wide char
+ UNIT_ASSERT_VALUES_EQUAL(defaultV, convV);
+
+ TRY_HEX_MACROS_MAP(TRY_FAIL_HEX_CHECK, A, v, convV, defaultV);
+ }
+};
+
+template <>
+struct TRet<bool> {
+ template <class B>
+ inline void CheckOK(B v) {
+ UNIT_ASSERT_VALUES_EQUAL(FromString<bool>(ToString(v)), v);
+ }
+
+ template <class B>
+ inline void CheckTryOK(B v) {
+ B convV;
+ UNIT_ASSERT_VALUES_EQUAL(TryFromString<bool>(ToString(v), convV), true);
+ UNIT_ASSERT_VALUES_EQUAL(v, convV);
+ }
+
+ template <class B>
+ inline void CheckExc(B v) {
+ UNIT_ASSERT_EXCEPTION(FromString<bool>(ToString(v)), yexception);
+ }
+
+ template <class B>
+ inline void CheckTryFail(B v) {
+ static const bool defaultV = false;
+ bool convV = defaultV;
+ UNIT_ASSERT_VALUES_EQUAL(TryFromString<bool>(ToString(v), convV), false);
+ UNIT_ASSERT_VALUES_EQUAL(defaultV, convV);
+ }
+};
+
+template <class A>
+inline TRet<A> F() {
+ return TRet<A>();
+};
+
+#if 0
+template <class T>
+inline void CheckConvertToBuffer(const T& value, const size_t size, const TString& canonValue) {
+ const size_t maxSize = 256;
+ char buffer[maxSize];
+ const char magic = 0x7F;
+ memset(buffer, magic, maxSize);
+ size_t length = 0;
+ if (canonValue.size() > size) { // overflow will occur
+ UNIT_ASSERT_EXCEPTION(length = ToString(value, buffer, size), yexception);
+ // check that no bytes after size was trashed
+ for (size_t i = size; i < maxSize; ++i)
+ UNIT_ASSERT_VALUES_EQUAL(buffer[i], magic);
+ } else {
+ length = ToString(value, buffer, size);
+ UNIT_ASSERT(length < maxSize);
+ // check that no bytes after length was trashed
+ for (size_t i = length; i < maxSize; ++i)
+ UNIT_ASSERT_VALUES_EQUAL(buffer[i], magic);
+ TStringBuf result(buffer, length);
+ UNIT_ASSERT_VALUES_EQUAL(result, TStringBuf(canonValue));
+ }
+}
+#endif
+
+Y_UNIT_TEST_SUITE(TCastTest) {
+ template <class A>
+ inline TRet<A> F() {
+ return TRet<A>();
+ };
+
+ template <class TFloat>
+ void GoodFloatTester(const char* str, const TFloat canonValue, const double eps) {
+ TFloat f = canonValue + 42.0; // shift value to make it far from proper
+ UNIT_ASSERT_VALUES_EQUAL(TryFromString<TFloat>(str, f), true);
+ UNIT_ASSERT_DOUBLES_EQUAL(f, canonValue, eps);
+ f = FromString<TFloat>(str);
+ UNIT_ASSERT_DOUBLES_EQUAL(f, canonValue, eps);
+ }
+
+ template <class TFloat>
+ void BadFloatTester(const char* str) {
+ const double eps = 10E-5;
+ TFloat f = 42.0; // make it far from proper
+ auto res = TryFromString<TFloat>(str, f);
+
+ UNIT_ASSERT_VALUES_EQUAL(res, false);
+ UNIT_ASSERT_DOUBLES_EQUAL(f, 42.0, eps); // check value was not trashed
+ UNIT_ASSERT_EXCEPTION(f = FromString<TFloat>(str), TFromStringException);
+ Y_UNUSED(f); // shut up compiler about 'assigned value that is not used'
+ }
+
+ Y_UNIT_TEST(TestToFrom) {
+ test1(bool, true);
+ test1(bool, false);
+ test2(bool, "");
+ test2(bool, "a");
+
+ test2(ui8, -1);
+ test1(i8, -1);
+ test1(i8, SCHAR_MAX);
+ test1(i8, SCHAR_MIN);
+ test1(i8, SCHAR_MAX - 1);
+ test1(i8, SCHAR_MIN + 1);
+ test2(i8, (int)SCHAR_MAX + 1);
+ test2(i8, (int)SCHAR_MIN - 1);
+ test1(ui8, UCHAR_MAX);
+ test1(ui8, UCHAR_MAX - 1);
+ test2(ui8, (int)UCHAR_MAX + 1);
+ test2(ui8, -1);
+ test1(int, -1);
+ test2(unsigned int, -1);
+ test1(short int, -1);
+ test2(unsigned short int, -1);
+ test1(long int, -1);
+ test2(unsigned long int, -1);
+ test1(int, INT_MAX);
+ test1(int, INT_MIN);
+ test1(int, INT_MAX - 1);
+ test1(int, INT_MIN + 1);
+ test2(int, (long long int)INT_MAX + 1);
+ test2(int, (long long int)INT_MIN - 1);
+ test1(unsigned int, UINT_MAX);
+ test1(unsigned int, UINT_MAX - 1);
+ test2(unsigned int, (long long int)UINT_MAX + 1);
+ test1(short int, SHRT_MAX);
+ test1(short int, SHRT_MIN);
+ test1(short int, SHRT_MAX - 1);
+ test1(short int, SHRT_MIN + 1);
+ test2(short int, (long long int)SHRT_MAX + 1);
+ test2(short int, (long long int)SHRT_MIN - 1);
+ test1(unsigned short int, USHRT_MAX);
+ test1(unsigned short int, USHRT_MAX - 1);
+ test2(unsigned short int, (long long int)USHRT_MAX + 1);
+ test1(long int, LONG_MAX);
+ test1(long int, LONG_MIN);
+ test1(long int, LONG_MAX - 1);
+ test1(long int, LONG_MIN + 1);
+
+ test1(long long int, LLONG_MAX);
+ test1(long long int, LLONG_MIN);
+ test1(long long int, LLONG_MAX - 1);
+ test1(long long int, LLONG_MIN + 1);
+ }
+
+ Y_UNIT_TEST(TestVolatile) {
+ volatile int x = 1;
+ UNIT_ASSERT_VALUES_EQUAL(ToString(x), "1");
+ }
+
+ Y_UNIT_TEST(TestStrToD) {
+ UNIT_ASSERT_DOUBLES_EQUAL(StrToD("1.1", nullptr), 1.1, EPS);
+ UNIT_ASSERT_DOUBLES_EQUAL(StrToD("1.12345678", nullptr), 1.12345678, EPS);
+ UNIT_ASSERT_DOUBLES_EQUAL(StrToD("10E-5", nullptr), 10E-5, EPS);
+ UNIT_ASSERT_DOUBLES_EQUAL(StrToD("1.1E+5", nullptr), 1.1E+5, EPS);
+
+ char* ret = nullptr;
+
+ UNIT_ASSERT_DOUBLES_EQUAL(StrToD("1.1y", &ret), 1.1, EPS);
+ UNIT_ASSERT_VALUES_EQUAL(*ret, 'y');
+ UNIT_ASSERT_DOUBLES_EQUAL(StrToD("1.12345678z", &ret), 1.12345678, EPS);
+ UNIT_ASSERT_VALUES_EQUAL(*ret, 'z');
+ UNIT_ASSERT_DOUBLES_EQUAL(StrToD("10E-5y", &ret), 10E-5, EPS);
+ UNIT_ASSERT_VALUES_EQUAL(*ret, 'y');
+ UNIT_ASSERT_DOUBLES_EQUAL(StrToD("1.1E+5z", &ret), 1.1E+5, EPS);
+ UNIT_ASSERT_VALUES_EQUAL(*ret, 'z');
+ }
+
+ Y_UNIT_TEST(TestFloats) {
+ // "%g" mode
+ UNIT_ASSERT_VALUES_EQUAL(FloatToString(0.1f, PREC_NDIGITS, 6), "0.1"); // drop trailing zeroes
+ UNIT_ASSERT_VALUES_EQUAL(FloatToString(0.12345678f, PREC_NDIGITS, 6), "0.123457");
+ UNIT_ASSERT_VALUES_EQUAL(FloatToString(1e-20f, PREC_NDIGITS, 6), "1e-20");
+ // "%f" mode
+ UNIT_ASSERT_VALUES_EQUAL(FloatToString(0.1f, PREC_POINT_DIGITS, 6), "0.100000");
+ UNIT_ASSERT_VALUES_EQUAL(FloatToString(0.12345678f, PREC_POINT_DIGITS, 6), "0.123457");
+ UNIT_ASSERT_VALUES_EQUAL(FloatToString(1e-20f, PREC_POINT_DIGITS, 6), "0.000000");
+ UNIT_ASSERT_VALUES_EQUAL(FloatToString(12.34f, PREC_POINT_DIGITS, 0), "12"); // rounding to integers drops '.'
+ // strip trailing zeroes
+ UNIT_ASSERT_VALUES_EQUAL(FloatToString(0.1f, PREC_POINT_DIGITS_STRIP_ZEROES, 6), "0.1");
+ UNIT_ASSERT_VALUES_EQUAL(FloatToString(0.12345678f, PREC_POINT_DIGITS_STRIP_ZEROES, 6), "0.123457");
+ UNIT_ASSERT_VALUES_EQUAL(FloatToString(1e-20f, PREC_POINT_DIGITS_STRIP_ZEROES, 6), "0");
+ UNIT_ASSERT_VALUES_EQUAL(FloatToString(12.34f, PREC_POINT_DIGITS_STRIP_ZEROES, 0), "12"); // rounding to integers drops '.'
+ UNIT_ASSERT_VALUES_EQUAL(FloatToString(10000.0f, PREC_POINT_DIGITS_STRIP_ZEROES, 0), "10000");
+ // automatic selection of ndigits
+ UNIT_ASSERT_VALUES_EQUAL(FloatToString(0.1f), "0.1"); // drop trailing zeroes
+ UNIT_ASSERT_VALUES_EQUAL(FloatToString(0.12345678f), "0.12345678"); // 8 valid digits
+ UNIT_ASSERT_VALUES_EQUAL(FloatToString(1000.00006f), "1000.00006"); // 9 valid digits
+ UNIT_ASSERT_VALUES_EQUAL(FloatToString(1e-45f), "1e-45"); // denormalized: 1 valid digit
+ UNIT_ASSERT_VALUES_EQUAL(FloatToString(-0.0f), "-0"); // sign must be preserved
+ // version for double
+ UNIT_ASSERT_VALUES_EQUAL(FloatToString(1.0 / 10000), "0.0001"); // trailing zeroes
+ UNIT_ASSERT_VALUES_EQUAL(FloatToString(1.2345678901234567), "1.2345678901234567"); // no truncation
+ UNIT_ASSERT_VALUES_EQUAL(FloatToString(5e-324), "5e-324"); // denormalized
+ UNIT_ASSERT_VALUES_EQUAL(FloatToString(-0.0), "-0"); // sign must be preserved
+
+ UNIT_ASSERT_STRINGS_EQUAL(FloatToString(std::numeric_limits<double>::quiet_NaN()), "nan");
+ UNIT_ASSERT_STRINGS_EQUAL(FloatToString(std::numeric_limits<double>::infinity()), "inf");
+ UNIT_ASSERT_STRINGS_EQUAL(FloatToString(-std::numeric_limits<double>::infinity()), "-inf");
+
+ UNIT_ASSERT_STRINGS_EQUAL(FloatToString(std::numeric_limits<float>::quiet_NaN()), "nan");
+ UNIT_ASSERT_STRINGS_EQUAL(FloatToString(std::numeric_limits<float>::infinity()), "inf");
+ UNIT_ASSERT_STRINGS_EQUAL(FloatToString(-std::numeric_limits<float>::infinity()), "-inf");
+ }
+
+ Y_UNIT_TEST(TestReadFloats) {
+ GoodFloatTester<float>("0.0001", 0.0001f, EPS);
+ GoodFloatTester<double>("0.0001", 0.0001, EPS);
+ GoodFloatTester<long double>("0.0001", 0.0001, EPS);
+ GoodFloatTester<float>("10E-5", 10E-5f, EPS);
+ GoodFloatTester<double>("1.0001E5", 1.0001E5, EPS);
+ GoodFloatTester<long double>("1.0001e5", 1.0001e5, EPS);
+ GoodFloatTester<long double>(".0001e5", .0001e5, EPS);
+ BadFloatTester<float>("a10E-5");
+ BadFloatTester<float>("10 ");
+ BadFloatTester<float>("10\t");
+ //BadFloatTester<float>("10E");
+ //BadFloatTester<float>("10.E");
+ BadFloatTester<float>("..0");
+ BadFloatTester<float>(""); // IGNIETFERRO-300
+ BadFloatTester<double>("1.00.01");
+ BadFloatTester<double>("1.0001E5b");
+ BadFloatTester<double>("1.0001s");
+ BadFloatTester<double>("1..01");
+ BadFloatTester<double>(""); // IGNIETFERRO-300
+ BadFloatTester<long double>(".1.00");
+ BadFloatTester<long double>("1.00.");
+ BadFloatTester<long double>("1.0001e5-");
+ BadFloatTester<long double>("10e 2");
+ BadFloatTester<long double>(""); // IGNIETFERRO-300
+ }
+
+ Y_UNIT_TEST(TestLiteral) {
+ UNIT_ASSERT_VALUES_EQUAL(ToString("abc"), TString("abc"));
+ }
+
+ Y_UNIT_TEST(TestFromStringStringBuf) {
+ TString a = "xyz";
+ TStringBuf b = FromString<TStringBuf>(a);
+ UNIT_ASSERT_VALUES_EQUAL(a, b);
+ UNIT_ASSERT_VALUES_EQUAL((void*)a.data(), (void*)b.data());
+ }
+
+#if 0
+ Y_UNIT_TEST(TestBufferOverflow) {
+ CheckConvertToBuffer<float>(1.f, 5, "1");
+ CheckConvertToBuffer<float>(1.005f, 3, "1.005");
+ CheckConvertToBuffer<float>(1.00000000f, 3, "1");
+
+ CheckConvertToBuffer<double>(1.f, 5, "1");
+ CheckConvertToBuffer<double>(1.005f, 3, "1.005");
+ CheckConvertToBuffer<double>(1.00000000f, 3, "1");
+
+ CheckConvertToBuffer<int>(2, 5, "2");
+ CheckConvertToBuffer<int>(1005, 3, "1005");
+
+ CheckConvertToBuffer<size_t>(2, 5, "2");
+ CheckConvertToBuffer<ui64>(1005000000000000ull, 32, "1005000000000000");
+ CheckConvertToBuffer<ui64>(1005000000000000ull, 3, "1005000000000000");
+
+ // TString longNumber = TString("1.") + TString(1 << 20, '1');
+ // UNIT_ASSERT_EXCEPTION(FromString<double>(longNumber), yexception);
+ }
+#endif
+
+ Y_UNIT_TEST(TestWide) {
+ TUtf16String iw = u"-100500";
+ int iv = 0;
+ UNIT_ASSERT_VALUES_EQUAL(TryFromString(iw, iv), true);
+ UNIT_ASSERT_VALUES_EQUAL(iv, -100500);
+
+ ui64 uv = 0;
+ TUtf16String uw = u"21474836470";
+ UNIT_ASSERT_VALUES_EQUAL(TryFromString(uw, uv), true);
+ UNIT_ASSERT_VALUES_EQUAL(uv, 21474836470ull);
+
+ TWtringBuf bw(uw.data(), uw.size());
+ uv = 0;
+ UNIT_ASSERT_VALUES_EQUAL(TryFromString(uw, uv), true);
+ UNIT_ASSERT_VALUES_EQUAL(uv, 21474836470ull);
+
+ const wchar16* beg = uw.data();
+ uv = 0;
+ UNIT_ASSERT_VALUES_EQUAL(TryFromString(beg, uw.size(), uv), true);
+ UNIT_ASSERT_VALUES_EQUAL(uv, 21474836470ull);
+ }
+
+ Y_UNIT_TEST(TestDefault) {
+ size_t res = 0;
+ const size_t def1 = 42;
+
+ TString s1("100500");
+ UNIT_ASSERT_VALUES_EQUAL(TryFromStringWithDefault(s1, res, def1), true);
+ UNIT_ASSERT_VALUES_EQUAL(res, 100500);
+
+ UNIT_ASSERT_VALUES_EQUAL(TryFromStringWithDefault(s1, res), true);
+ UNIT_ASSERT_VALUES_EQUAL(res, 100500);
+
+ UNIT_ASSERT_VALUES_EQUAL(TryFromStringWithDefault("100500", res, def1), true);
+ UNIT_ASSERT_VALUES_EQUAL(res, 100500);
+
+ UNIT_CHECK_GENERATED_NO_EXCEPTION(FromStringWithDefault(s1, def1), yexception);
+ UNIT_ASSERT_VALUES_EQUAL(FromStringWithDefault(s1, def1), 100500);
+ UNIT_ASSERT_VALUES_EQUAL(FromStringWithDefault<size_t>(s1), 100500);
+ UNIT_ASSERT_VALUES_EQUAL(FromStringWithDefault("100500", def1), 100500);
+
+ TString s2("100q500");
+ UNIT_ASSERT_VALUES_EQUAL(TryFromStringWithDefault(s2, res), false);
+ UNIT_ASSERT_VALUES_EQUAL(res, size_t());
+
+ UNIT_ASSERT_VALUES_EQUAL(TryFromStringWithDefault(s2, res, def1), false);
+ UNIT_ASSERT_VALUES_EQUAL(res, def1);
+
+ UNIT_ASSERT_VALUES_EQUAL(TryFromStringWithDefault("100q500", res), false);
+ UNIT_ASSERT_VALUES_EQUAL(res, size_t());
+
+ UNIT_ASSERT_VALUES_EQUAL(TryFromStringWithDefault("100 500", res), false);
+ UNIT_ASSERT_VALUES_EQUAL(res, size_t());
+
+ UNIT_CHECK_GENERATED_NO_EXCEPTION(FromStringWithDefault(s2, def1), yexception);
+ UNIT_CHECK_GENERATED_NO_EXCEPTION(FromStringWithDefault("100q500", def1), yexception);
+ UNIT_ASSERT_VALUES_EQUAL(FromStringWithDefault(s2, def1), def1);
+ UNIT_ASSERT_VALUES_EQUAL(FromStringWithDefault<size_t>(s2), size_t());
+ UNIT_ASSERT_VALUES_EQUAL(FromStringWithDefault<size_t>("100q500"), size_t());
+ UNIT_CHECK_GENERATED_EXCEPTION(FromString<size_t>(s2), TFromStringException);
+
+ int res2 = 0;
+ const int def2 = -6;
+
+ TUtf16String s3 = u"-100500";
+ UNIT_ASSERT_VALUES_EQUAL(TryFromStringWithDefault(s3, res2, def2), true);
+ UNIT_ASSERT_VALUES_EQUAL(res2, -100500);
+
+ UNIT_ASSERT_VALUES_EQUAL(TryFromStringWithDefault(s3, res2), true);
+ UNIT_ASSERT_VALUES_EQUAL(res2, -100500);
+
+ UNIT_CHECK_GENERATED_NO_EXCEPTION(FromStringWithDefault(s3, def1), yexception);
+ UNIT_ASSERT_VALUES_EQUAL(FromStringWithDefault(s3, def2), -100500);
+ UNIT_ASSERT_VALUES_EQUAL(FromStringWithDefault<size_t>(s3), size_t());
+
+ TUtf16String s4 = u"-f100500";
+ UNIT_ASSERT_VALUES_EQUAL(TryFromStringWithDefault(s4, res2, def2), false);
+ UNIT_ASSERT_VALUES_EQUAL(res2, def2);
+
+ UNIT_ASSERT_VALUES_EQUAL(TryFromStringWithDefault(s4, res2), false);
+ UNIT_ASSERT_VALUES_EQUAL(res2, size_t());
+
+ UNIT_CHECK_GENERATED_NO_EXCEPTION(FromStringWithDefault(s4, def2), yexception);
+ UNIT_ASSERT_VALUES_EQUAL(FromStringWithDefault(s4, def2), def2);
+ UNIT_CHECK_GENERATED_EXCEPTION(FromString<size_t>(s4), yexception);
+ UNIT_ASSERT_VALUES_EQUAL(FromStringWithDefault<size_t>(s4), size_t());
+ }
+
+ Y_UNIT_TEST(TestBool) {
+ // True cases
+ UNIT_ASSERT_VALUES_EQUAL(FromString<bool>("yes"), true);
+ UNIT_ASSERT_VALUES_EQUAL(FromString<bool>("1"), true);
+ // False cases
+ UNIT_ASSERT_VALUES_EQUAL(FromString<bool>("no"), false);
+ UNIT_ASSERT_VALUES_EQUAL(FromString<bool>("0"), false);
+ // Strange cases
+ UNIT_ASSERT_EXCEPTION(FromString<bool>(""), yexception);
+ UNIT_ASSERT_EXCEPTION(FromString<bool>("something"), yexception);
+ }
+
+ Y_UNIT_TEST(TestAutoDetectType) {
+ UNIT_ASSERT_DOUBLES_EQUAL((float)FromString("0.0001"), 0.0001, EPS);
+ UNIT_ASSERT_DOUBLES_EQUAL((double)FromString("0.0015", sizeof("0.0015") - 2), 0.001, EPS);
+ UNIT_ASSERT_DOUBLES_EQUAL((long double)FromString(TStringBuf("0.0001")), 0.0001, EPS);
+ UNIT_ASSERT_DOUBLES_EQUAL((float)FromString(TString("10E-5")), 10E-5, EPS);
+ UNIT_ASSERT_VALUES_EQUAL((bool)FromString("da"), true);
+ UNIT_ASSERT_VALUES_EQUAL((bool)FromString("no"), false);
+ UNIT_ASSERT_VALUES_EQUAL((short)FromString(u"9000"), 9000);
+ UNIT_ASSERT_VALUES_EQUAL((int)FromString(u"-100500"), -100500);
+ UNIT_ASSERT_VALUES_EQUAL((unsigned long long)FromString(TWtringBuf(u"42", 1)), 4);
+ int integer = FromString("125");
+ ui16 wideCharacterCode = FromString(u"125");
+ UNIT_ASSERT_VALUES_EQUAL(integer, wideCharacterCode);
+ }
+
+ static void CheckMessage(TFromStringException& exc, const TString& phrase) {
+ TString message = exc.what();
+ if (!message.Contains(phrase)) {
+ Cerr << message << Endl;
+ UNIT_ASSERT(false);
+ }
+ }
+
+ Y_UNIT_TEST(ErrorMessages) {
+ try {
+ FromString<ui32>("");
+ UNIT_ASSERT(false);
+ } catch (TFromStringException& e) {
+ CheckMessage(e, "empty string as number");
+ }
+
+ try {
+ FromString<ui32>("-");
+ UNIT_ASSERT(false);
+ } catch (TFromStringException& e) {
+ // Unsigned should have no sign at all, so - is not expected
+ CheckMessage(e, "Unexpected symbol \"-\" at pos 0 in string \"-\"");
+ }
+
+ try {
+ FromString<i32>("-");
+ UNIT_ASSERT(false);
+ } catch (TFromStringException& e) {
+ CheckMessage(e, "Cannot parse string \"-\" as number");
+ }
+
+ try {
+ FromString<i32>("+");
+ UNIT_ASSERT(false);
+ } catch (TFromStringException& e) {
+ CheckMessage(e, "Cannot parse string \"+\" as number");
+ }
+
+ try {
+ FromString<ui32>("0.328413745072");
+ UNIT_ASSERT(false);
+ } catch (TFromStringException& e) {
+ CheckMessage(e, "Unexpected symbol \".\" at pos 1 in string \"0.328413745072\"");
+ }
+ }
+
+ Y_UNIT_TEST(TryStringBuf) {
+ {
+ constexpr TStringBuf hello = "hello";
+ TStringBuf out;
+ UNIT_ASSERT(TryFromString(hello, out));
+ UNIT_ASSERT_VALUES_EQUAL(hello, out);
+ }
+ {
+ constexpr TStringBuf empty = "";
+ TStringBuf out;
+ UNIT_ASSERT(TryFromString(empty, out));
+ UNIT_ASSERT_VALUES_EQUAL(empty, out);
+ }
+ {
+ constexpr TStringBuf empty;
+ TStringBuf out;
+ UNIT_ASSERT(TryFromString(empty, out));
+ UNIT_ASSERT_VALUES_EQUAL(empty, out);
+ }
+ {
+ const auto hello = u"hello";
+ TWtringBuf out;
+ UNIT_ASSERT(TryFromString(hello, out));
+ UNIT_ASSERT_VALUES_EQUAL(hello, out);
+ }
+ {
+ const TUtf16String empty;
+ TWtringBuf out;
+ UNIT_ASSERT(TryFromString(empty, out));
+ UNIT_ASSERT_VALUES_EQUAL(empty, out);
+ }
+ {
+ constexpr TWtringBuf empty;
+ TWtringBuf out;
+ UNIT_ASSERT(TryFromString(empty, out));
+ UNIT_ASSERT_VALUES_EQUAL(empty, out);
+ }
+ }
+
+ Y_UNIT_TEST(Nan) {
+ double xx = 0;
+
+ UNIT_ASSERT(!TryFromString("NaN", xx));
+ UNIT_ASSERT(!TryFromString("NAN", xx));
+ UNIT_ASSERT(!TryFromString("nan", xx));
+ }
+
+ Y_UNIT_TEST(Infinity) {
+ double xx = 0;
+
+ UNIT_ASSERT(!TryFromString("Infinity", xx));
+ UNIT_ASSERT(!TryFromString("INFINITY", xx));
+ UNIT_ASSERT(!TryFromString("infinity", xx));
+ }
+
+ Y_UNIT_TEST(TestBorderCases) {
+ UNIT_ASSERT_VALUES_EQUAL(ToString(0.0), "0");
+ UNIT_ASSERT_VALUES_EQUAL(ToString(1.0), "1");
+ UNIT_ASSERT_VALUES_EQUAL(ToString(10.0), "10");
+ UNIT_ASSERT_VALUES_EQUAL(ToString(NAN), "nan");
+ UNIT_ASSERT_VALUES_EQUAL(ToString(-NAN), "nan");
+ UNIT_ASSERT_VALUES_EQUAL(ToString(INFINITY), "inf");
+ UNIT_ASSERT_VALUES_EQUAL(ToString(-INFINITY), "-inf");
+ UNIT_ASSERT_VALUES_EQUAL(ToString(1.1e+100), "1.1e+100");
+ UNIT_ASSERT_VALUES_EQUAL(ToString(1e+100), "1e+100");
+ UNIT_ASSERT_VALUES_EQUAL(ToString(87423.2031250000001), "87423.20313");
+ UNIT_ASSERT_VALUES_EQUAL(FloatToString(1.0e60, PREC_POINT_DIGITS_STRIP_ZEROES, 0), "1e+60");
+ }
+
+ Y_UNIT_TEST(TestChar) {
+ // Given a character ch, ToString(ch) returns
+ // the decimal representation of its integral value
+
+ // char
+ UNIT_ASSERT_VALUES_EQUAL(ToString('\0'), "0");
+ UNIT_ASSERT_VALUES_EQUAL(ToString('0'), "48");
+
+ // wchar16
+ UNIT_ASSERT_VALUES_EQUAL(ToString(u'\0'), "0");
+ UNIT_ASSERT_VALUES_EQUAL(ToString(u'0'), "48");
+ UNIT_ASSERT_VALUES_EQUAL(ToString(u'я'), "1103");
+ UNIT_ASSERT_VALUES_EQUAL(ToString(u'\uFFFF'), "65535");
+
+ // wchar32
+ UNIT_ASSERT_VALUES_EQUAL(ToString(U'\0'), "0");
+ UNIT_ASSERT_VALUES_EQUAL(ToString(U'0'), "48");
+ UNIT_ASSERT_VALUES_EQUAL(ToString(U'я'), "1103");
+ UNIT_ASSERT_VALUES_EQUAL(ToString(U'\U0001F600'), "128512"); // 'GRINNING FACE' (U+1F600)
+ }
+};
diff --git a/util/string/cast_ut.pyx b/util/string/cast_ut.pyx
new file mode 100644
index 0000000000..88e86ef961
--- /dev/null
+++ b/util/string/cast_ut.pyx
@@ -0,0 +1,13 @@
+# cython: c_string_type=str, c_string_encoding=utf8
+
+from util.string.cast cimport FromString, ToString
+
+import unittest
+
+class TestFromString(unittest.TestCase):
+ def test_from_int(self):
+ self.assertEquals(FromString[int]("42"), 42)
+
+class TestToString(unittest.TestCase):
+ def test_from_int(self):
+ self.assertEquals(ToString(42), "42")
diff --git a/util/string/cstriter.cpp b/util/string/cstriter.cpp
new file mode 100644
index 0000000000..fd61359c3d
--- /dev/null
+++ b/util/string/cstriter.cpp
@@ -0,0 +1 @@
+#include "cstriter.h"
diff --git a/util/string/cstriter.h b/util/string/cstriter.h
new file mode 100644
index 0000000000..ca57728c39
--- /dev/null
+++ b/util/string/cstriter.h
@@ -0,0 +1,14 @@
+#pragma once
+
+struct TCStringEndIterator {
+};
+
+template <class It>
+static inline bool operator==(It b, TCStringEndIterator) {
+ return !*b;
+}
+
+template <class It>
+static inline bool operator!=(It b, TCStringEndIterator) {
+ return !!*b;
+}
diff --git a/util/string/escape.cpp b/util/string/escape.cpp
new file mode 100644
index 0000000000..cd09a7dbd0
--- /dev/null
+++ b/util/string/escape.cpp
@@ -0,0 +1,433 @@
+#include "escape.h"
+#include "cast.h"
+
+#include <util/system/defaults.h>
+#include <util/charset/utf8.h>
+#include <util/charset/wide.h>
+
+/// @todo: escape trigraphs (eg "??/" is "\")
+
+/* REFEREBCES FOR ESCAPE SEQUENCE INTERPRETATION:
+ * C99 p. 6.4.3 Universal character names.
+ * C99 p. 6.4.4.4 Character constants.
+ *
+ * <simple-escape-sequence> ::= {
+ * \' , \" , \? , \\ ,
+ * \a , \b , \f , \n , \r , \t , \v
+ * }
+ *
+ * <octal-escape-sequence> ::= \ <octal-digit> {1, 3}
+ * <hexadecimal-escape-sequence> ::= \x <hexadecimal-digit> +
+ * <universal-character-name> ::= \u <hexadecimal-digit> {4}
+ * || \U <hexadecimal-digit> {8}
+ *
+ * NOTE (6.4.4.4.7):
+ * Each octal or hexadecimal escape sequence is the longest sequence of characters that can
+ * constitute the escape sequence.
+ *
+ * THEREFORE:
+ * - Octal escape sequence spans until rightmost non-octal-digit character.
+ * - Octal escape sequence always terminates after three octal digits.
+ * - Hexadecimal escape sequence spans until rightmost non-hexadecimal-digit character.
+ * - Universal character name consists of exactly 4 or 8 hexadecimal digit.
+ *
+ * by kerzum@
+ * It is also required to escape trigraphs that are enabled in compilers by default and
+ * are also processed inside string literals
+ * The nine trigraphs and their replacements are
+ *
+ * Trigraph: ??( ??) ??< ??> ??= ??/ ??' ??! ??-
+ * Replacement: [ ] { } # \ ^ | ~
+ *
+ */
+namespace {
+ template <typename TChar>
+ static inline char HexDigit(TChar value) {
+ Y_ASSERT(value < 16);
+ if (value < 10) {
+ return '0' + value;
+ } else {
+ return 'A' + value - 10;
+ }
+ }
+
+ template <typename TChar>
+ static inline char OctDigit(TChar value) {
+ Y_ASSERT(value < 8);
+ return '0' + value;
+ }
+
+ template <typename TChar>
+ static inline bool IsPrintable(TChar c) {
+ return c >= 32 && c <= 126;
+ }
+
+ template <typename TChar>
+ static inline bool IsHexDigit(TChar c) {
+ return (c >= '0' && c <= '9') || (c >= 'A' && c <= 'F') || (c >= 'a' && c <= 'f');
+ }
+
+ template <typename TChar>
+ static inline bool IsOctDigit(TChar c) {
+ return c >= '0' && c <= '7';
+ }
+
+ template <typename TChar>
+ struct TEscapeUtil;
+
+ template <>
+ struct TEscapeUtil<char> {
+ static const size_t ESCAPE_C_BUFFER_SIZE = 4;
+
+ template <typename TNextChar, typename TBufferChar>
+ static inline size_t EscapeC(unsigned char c, TNextChar next, TBufferChar r[ESCAPE_C_BUFFER_SIZE]) {
+ // (1) Printable characters go as-is, except backslash and double quote.
+ // (2) Characters \r, \n, \t and \0 ... \7 replaced by their simple escape characters (if possible).
+ // (3) Otherwise, character is encoded using hexadecimal escape sequence (if possible), or octal.
+ if (c == '\"') {
+ r[0] = '\\';
+ r[1] = '\"';
+ return 2;
+ } else if (c == '\\') {
+ r[0] = '\\';
+ r[1] = '\\';
+ return 2;
+ } else if (IsPrintable(c) && (!(c == '?' && next == '?'))) {
+ r[0] = c;
+ return 1;
+ } else if (c == '\r') {
+ r[0] = '\\';
+ r[1] = 'r';
+ return 2;
+ } else if (c == '\n') {
+ r[0] = '\\';
+ r[1] = 'n';
+ return 2;
+ } else if (c == '\t') {
+ r[0] = '\\';
+ r[1] = 't';
+ return 2;
+ } else if (c < 8 && !IsOctDigit(next)) {
+ r[0] = '\\';
+ r[1] = OctDigit(c);
+ return 2;
+ } else if (!IsHexDigit(next)) {
+ r[0] = '\\';
+ r[1] = 'x';
+ r[2] = HexDigit((c & 0xF0) >> 4);
+ r[3] = HexDigit((c & 0x0F) >> 0);
+ return 4;
+ } else {
+ r[0] = '\\';
+ r[1] = OctDigit((c & 0700) >> 6);
+ r[2] = OctDigit((c & 0070) >> 3);
+ r[3] = OctDigit((c & 0007) >> 0);
+ return 4;
+ }
+ }
+ };
+
+ template <>
+ struct TEscapeUtil<wchar16> {
+ static const size_t ESCAPE_C_BUFFER_SIZE = 6;
+
+ template <typename TNextChar, typename TBufferChar>
+ static inline size_t EscapeC(wchar16 c, TNextChar next, TBufferChar r[ESCAPE_C_BUFFER_SIZE]) {
+ if (c < 0x100) {
+ return TEscapeUtil<char>::EscapeC(char(c), next, r);
+ } else {
+ r[0] = '\\';
+ r[1] = 'u';
+ r[2] = HexDigit((c & 0xF000) >> 12);
+ r[3] = HexDigit((c & 0x0F00) >> 8);
+ r[4] = HexDigit((c & 0x00F0) >> 4);
+ r[5] = HexDigit((c & 0x000F) >> 0);
+ return 6;
+ }
+ }
+ };
+}
+
+template <class TChar>
+TBasicString<TChar>& EscapeCImpl(const TChar* str, size_t len, TBasicString<TChar>& r) {
+ using TEscapeUtil = ::TEscapeUtil<TChar>;
+
+ TChar buffer[TEscapeUtil::ESCAPE_C_BUFFER_SIZE];
+
+ size_t i, j;
+ for (i = 0, j = 0; i < len; ++i) {
+ size_t rlen = TEscapeUtil::EscapeC(str[i], (i + 1 < len ? str[i + 1] : 0), buffer);
+
+ if (rlen > 1) {
+ r.append(str + j, i - j);
+ j = i + 1;
+ r.append(buffer, rlen);
+ }
+ }
+
+ if (j > 0) {
+ r.append(str + j, len - j);
+ } else {
+ r.append(str, len);
+ }
+
+ return r;
+}
+
+template TString& EscapeCImpl<TString::TChar>(const TString::TChar* str, size_t len, TString& r);
+template TUtf16String& EscapeCImpl<TUtf16String::TChar>(const TUtf16String::TChar* str, size_t len, TUtf16String& r);
+
+namespace {
+ template <class TStr>
+ inline void AppendUnicode(TStr& s, wchar32 v) {
+ char buf[10];
+ size_t sz = 0;
+
+ WriteUTF8Char(v, sz, (ui8*)buf);
+ s.AppendNoAlias(buf, sz);
+ }
+
+ inline void AppendUnicode(TUtf16String& s, wchar32 v) {
+ WriteSymbol(v, s);
+ }
+
+ template <ui32 sz, typename TChar>
+ inline size_t CountHex(const TChar* p, const TChar* pe) {
+ auto b = p;
+ auto e = Min(p + sz, pe);
+
+ while (b < e && IsHexDigit(*b)) {
+ ++b;
+ }
+
+ return b - p;
+ }
+
+ template <size_t sz, typename TChar, typename T>
+ inline bool ParseHex(const TChar* p, const TChar* pe, T& t) noexcept {
+ return (p + sz <= pe) && TryIntFromString<16>(p, sz, t);
+ }
+
+ template <ui32 sz, typename TChar>
+ inline size_t CountOct(const TChar* p, const TChar* pe) {
+ ui32 maxsz = Min<size_t>(sz, pe - p);
+
+ if (3 == sz && 3 == maxsz && !(*p >= '0' && *p <= '3')) {
+ maxsz = 2;
+ }
+
+ for (ui32 i = 0; i < maxsz; ++i, ++p) {
+ if (!IsOctDigit(*p)) {
+ return i;
+ }
+ }
+
+ return maxsz;
+ }
+}
+
+template <class TChar, class TStr>
+static TStr& DoUnescapeC(const TChar* p, size_t sz, TStr& res) {
+ const TChar* pe = p + sz;
+
+ while (p != pe) {
+ if ('\\' == *p) {
+ ++p;
+
+ if (p == pe) {
+ return res;
+ }
+
+ switch (*p) {
+ default:
+ res.append(*p);
+ break;
+ case 'a':
+ res.append('\a');
+ break;
+ case 'b':
+ res.append('\b');
+ break;
+ case 'f':
+ res.append('\f');
+ break;
+ case 'n':
+ res.append('\n');
+ break;
+ case 'r':
+ res.append('\r');
+ break;
+ case 't':
+ res.append('\t');
+ break;
+ case 'v':
+ res.append('\v');
+ break;
+ case 'u': {
+ ui16 cp[2];
+
+ if (ParseHex<4>(p + 1, pe, cp[0])) {
+ if (Y_UNLIKELY(cp[0] >= 0xD800 && cp[0] <= 0xDBFF && ParseHex<4>(p + 7, pe, cp[1]) && p[5] == '\\' && p[6] == 'u')) {
+ const wchar16 wbuf[] = {wchar16(cp[0]), wchar16(cp[1])};
+ AppendUnicode(res, ReadSymbol(wbuf, wbuf + 2));
+ p += 10;
+ } else {
+ AppendUnicode(res, (wchar32)cp[0]);
+ p += 4;
+ }
+ } else {
+ res.append(*p);
+ }
+
+ break;
+ }
+
+ case 'U':
+ if (CountHex<8>(p + 1, pe) != 8) {
+ res.append(*p);
+ } else {
+ AppendUnicode(res, IntFromString<ui32, 16>(p + 1, 8));
+ p += 8;
+ }
+ break;
+ case 'x':
+ if (ui32 v = CountHex<2>(p + 1, pe)) {
+ res.append((TChar)IntFromString<ui32, 16>(p + 1, v));
+ p += v;
+ } else {
+ res.append(*p);
+ }
+
+ break;
+ case '0':
+ case '1':
+ case '2':
+ case '3': {
+ ui32 v = CountOct<3>(p, pe); // v is always positive
+ res.append((TChar)IntFromString<ui32, 8>(p, v));
+ p += v - 1;
+ } break;
+ case '4':
+ case '5':
+ case '6':
+ case '7': {
+ ui32 v = CountOct<2>(p, pe); // v is always positive
+ res.append((TChar)IntFromString<ui32, 8>(p, v));
+ p += v - 1;
+ } break;
+ }
+
+ ++p;
+ } else {
+ const auto r = std::basic_string_view<TChar>(p, pe - p).find('\\');
+ const auto n = r != std::string::npos ? p + r : pe;
+
+ res.append(p, n);
+ p = n;
+ }
+ }
+
+ return res;
+}
+
+template <class TChar>
+TBasicString<TChar>& UnescapeCImpl(const TChar* p, size_t sz, TBasicString<TChar>& res) {
+ return DoUnescapeC(p, sz, res);
+}
+
+template <class TChar>
+TChar* UnescapeC(const TChar* str, size_t len, TChar* buf) {
+ struct TUnboundedString {
+ void append(TChar ch) noexcept {
+ *P++ = ch;
+ }
+
+ void append(const TChar* b, const TChar* e) noexcept {
+ while (b != e) {
+ append(*b++);
+ }
+ }
+
+ void AppendNoAlias(const TChar* s, size_t l) noexcept {
+ append(s, s + l);
+ }
+
+ TChar* P;
+ } bufbuf = {buf};
+
+ return DoUnescapeC(str, len, bufbuf).P;
+}
+
+template TString& UnescapeCImpl<TString::TChar>(const TString::TChar* str, size_t len, TString& r);
+template TUtf16String& UnescapeCImpl<TUtf16String::TChar>(const TUtf16String::TChar* str, size_t len, TUtf16String& r);
+
+template char* UnescapeC<char>(const char* str, size_t len, char* buf);
+
+template <class TChar>
+size_t UnescapeCCharLen(const TChar* begin, const TChar* end) {
+ if (begin >= end) {
+ return 0;
+ }
+ if (*begin != '\\') {
+ return 1;
+ }
+ if (++begin == end) {
+ return 1;
+ }
+
+ switch (*begin) {
+ default:
+ return 2;
+ case 'u':
+ return CountHex<4>(begin + 1, end) == 4 ? 6 : 2;
+ case 'U':
+ return CountHex<8>(begin + 1, end) == 8 ? 10 : 2;
+ case 'x':
+ return 2 + CountHex<2>(begin + 1, end);
+ case '0':
+ case '1':
+ case '2':
+ case '3':
+ return 1 + CountOct<3>(begin, end); // >= 2
+ case '4':
+ case '5':
+ case '6':
+ case '7':
+ return 1 + CountOct<2>(begin, end); // >= 2
+ }
+}
+
+template size_t UnescapeCCharLen<char>(const char* begin, const char* end);
+template size_t UnescapeCCharLen<TUtf16String::TChar>(const TUtf16String::TChar* begin, const TUtf16String::TChar* end);
+
+TString& EscapeC(const TStringBuf str, TString& s) {
+ return EscapeC(str.data(), str.size(), s);
+}
+
+TUtf16String& EscapeC(const TWtringBuf str, TUtf16String& w) {
+ return EscapeC(str.data(), str.size(), w);
+}
+
+TString EscapeC(const TString& str) {
+ return EscapeC(str.data(), str.size());
+}
+
+TUtf16String EscapeC(const TUtf16String& str) {
+ return EscapeC(str.data(), str.size());
+}
+
+TString& UnescapeC(const TStringBuf str, TString& s) {
+ return UnescapeC(str.data(), str.size(), s);
+}
+
+TUtf16String& UnescapeC(const TWtringBuf str, TUtf16String& w) {
+ return UnescapeC(str.data(), str.size(), w);
+}
+
+TString UnescapeC(const TStringBuf str) {
+ return UnescapeC(str.data(), str.size());
+}
+
+TUtf16String UnescapeC(const TWtringBuf str) {
+ return UnescapeC(str.data(), str.size());
+}
diff --git a/util/string/escape.h b/util/string/escape.h
new file mode 100644
index 0000000000..b01be65b0e
--- /dev/null
+++ b/util/string/escape.h
@@ -0,0 +1,70 @@
+#pragma once
+
+#include <util/generic/string.h>
+#include <util/generic/strbuf.h>
+
+template <class TChar>
+TBasicString<TChar>& EscapeCImpl(const TChar* str, size_t len, TBasicString<TChar>&);
+
+template <class TChar>
+TBasicString<TChar>& UnescapeCImpl(const TChar* str, size_t len, TBasicString<TChar>&);
+
+template <class TChar>
+TChar* UnescapeC(const TChar* str, size_t len, TChar* buf);
+
+template <typename TChar>
+static inline TBasicString<TChar>& EscapeC(const TChar* str, size_t len, TBasicString<TChar>& s) {
+ return EscapeCImpl(str, len, s);
+}
+
+template <typename TChar>
+static inline TBasicString<TChar> EscapeC(const TChar* str, size_t len) {
+ TBasicString<TChar> s;
+ return EscapeC(str, len, s);
+}
+
+template <typename TChar>
+static inline TBasicString<TChar> EscapeC(const TBasicStringBuf<TChar>& str) {
+ return EscapeC(str.data(), str.size());
+}
+
+template <typename TChar>
+static inline TBasicString<TChar>& UnescapeC(const TChar* str, size_t len, TBasicString<TChar>& s) {
+ return UnescapeCImpl(str, len, s);
+}
+
+template <typename TChar>
+static inline TBasicString<TChar> UnescapeC(const TChar* str, size_t len) {
+ TBasicString<TChar> s;
+ return UnescapeCImpl(str, len, s);
+}
+
+template <typename TChar>
+static inline TBasicString<TChar> EscapeC(TChar ch) {
+ return EscapeC(&ch, 1);
+}
+
+template <typename TChar>
+static inline TBasicString<TChar> EscapeC(const TChar* str) {
+ return EscapeC(str, std::char_traits<TChar>::length(str));
+}
+
+TString& EscapeC(const TStringBuf str, TString& res);
+TUtf16String& EscapeC(const TWtringBuf str, TUtf16String& res);
+
+// these two need to be methods, because of TBasicString::Quote implementation
+TString EscapeC(const TString& str);
+TUtf16String EscapeC(const TUtf16String& str);
+
+TString& UnescapeC(const TStringBuf str, TString& res);
+TUtf16String& UnescapeC(const TWtringBuf str, TUtf16String& res);
+
+TString UnescapeC(const TStringBuf str);
+TUtf16String UnescapeC(const TWtringBuf wtr);
+
+/// Returns number of chars in escape sequence.
+/// - 0, if begin >= end
+/// - 1, if [begin, end) starts with an unescaped char
+/// - at least 2 (including '\'), if [begin, end) starts with an escaped symbol
+template <class TChar>
+size_t UnescapeCCharLen(const TChar* begin, const TChar* end);
diff --git a/util/string/escape_ut.cpp b/util/string/escape_ut.cpp
new file mode 100644
index 0000000000..cd38ecffd3
--- /dev/null
+++ b/util/string/escape_ut.cpp
@@ -0,0 +1,148 @@
+#include "escape.h"
+
+#include <library/cpp/testing/unittest/registar.h>
+
+#include <util/generic/string.h>
+#include <util/charset/wide.h>
+
+using namespace std::string_view_literals;
+
+namespace {
+ struct TExample {
+ TString Expected;
+ TString Source;
+
+ TExample(const TStringBuf expected, const TStringBuf source)
+ : Expected{expected}
+ , Source{source}
+ {
+ }
+ };
+}
+
+static const TExample CommonTestData[] = {
+ // Should be valid UTF-8.
+ {"http://ya.ru/", "http://ya.ru/"},
+ {"http://ya.ru/\\x17\\n", "http://ya.ru/\x17\n"},
+
+ {"http://ya.ru/\\0", "http://ya.ru/\0"sv},
+ {"http://ya.ru/\\0\\0", "http://ya.ru/\0\0"sv},
+ {"http://ya.ru/\\0\\0000", "http://ya.ru/\0\0"
+ "0"sv},
+ {"http://ya.ru/\\0\\0001", "http://ya.ru/\0\x00"
+ "1"sv},
+
+ {R"(\2\4\00678)", "\2\4\6"
+ "78"sv}, // \6 -> \006 because next char '7' is "octal"
+ {R"(\2\4\689)", "\2\4\6"
+ "89"sv}, // \6 -> \6 because next char '8' is not "octal"
+
+ {R"(\"Hello\", Alice said.)", "\"Hello\", Alice said."},
+ {"Slash\\\\dash!", "Slash\\dash!"},
+ {R"(There\nare\r\nnewlines.)", "There\nare\r\nnewlines."},
+ {"There\\tare\\ttabs.", "There\tare\ttabs."},
+
+ {"There are questions \\x3F\\x3F?", "There are questions ???"},
+ {"There are questions \\x3F?", "There are questions ??"},
+};
+
+Y_UNIT_TEST_SUITE(TEscapeCTest) {
+ Y_UNIT_TEST(TestStrokaEscapeC) {
+ for (const auto& e : CommonTestData) {
+ TString expected(e.Expected);
+ TString source(e.Source);
+ TString actual(EscapeC(e.Source));
+ TString actual2(UnescapeC(e.Expected));
+
+ UNIT_ASSERT_VALUES_EQUAL(e.Expected, actual);
+ UNIT_ASSERT_VALUES_EQUAL(e.Source, actual2);
+ }
+
+ UNIT_ASSERT_VALUES_EQUAL("http://ya.ru/\\x17\\n\\xAB", EscapeC(TString("http://ya.ru/\x17\n\xab")));
+ UNIT_ASSERT_VALUES_EQUAL("http://ya.ru/\x17\n\xab", UnescapeC(TString("http://ya.ru/\\x17\\n\\xAB")));
+ UNIT_ASSERT_VALUES_EQUAL("h", EscapeC('h'));
+ UNIT_ASSERT_VALUES_EQUAL("h", UnescapeC(TString("h")));
+ UNIT_ASSERT_VALUES_EQUAL("\\xFF", EscapeC('\xFF'));
+ UNIT_ASSERT_VALUES_EQUAL("\xFF", UnescapeC(TString("\\xFF")));
+
+ UNIT_ASSERT_VALUES_EQUAL("\\377f", EscapeC(TString("\xff"
+ "f")));
+ UNIT_ASSERT_VALUES_EQUAL("\xff"
+ "f",
+ UnescapeC(TString("\\377f")));
+ UNIT_ASSERT_VALUES_EQUAL("\\xFFg", EscapeC(TString("\xff"
+ "g")));
+ UNIT_ASSERT_VALUES_EQUAL("\xff"
+ "g",
+ UnescapeC(TString("\\xFFg")));
+ UNIT_ASSERT_VALUES_EQUAL("\xEA\x9A\x96", UnescapeC(TString("\\uA696")));
+ UNIT_ASSERT_VALUES_EQUAL("Странный компроматтест", UnescapeC(TString("\\u0421\\u0442\\u0440\\u0430\\u043d\\u043d\\u044b\\u0439 \\u043a\\u043e\\u043c\\u043f\\u0440\\u043e\\u043c\\u0430\\u0442тест")));
+ }
+
+ Y_UNIT_TEST(TestWtrokaEscapeC) {
+ for (const auto& e : CommonTestData) {
+ TUtf16String expected(UTF8ToWide(e.Expected));
+ TUtf16String source(UTF8ToWide(e.Source));
+ TUtf16String actual(EscapeC(source));
+ TUtf16String actual2(UnescapeC(expected));
+
+ UNIT_ASSERT_VALUES_EQUAL(expected, actual);
+ UNIT_ASSERT_VALUES_EQUAL(source, actual2);
+ }
+
+ UNIT_ASSERT_VALUES_EQUAL(u"http://ya.ru/\\x17\\n\\u1234", EscapeC(u"http://ya.ru/\x17\n\u1234"));
+ UNIT_ASSERT_VALUES_EQUAL(u"h", EscapeC(u'h'));
+ UNIT_ASSERT_VALUES_EQUAL(u"\\xFF", EscapeC(wchar16(255)));
+ }
+
+ Y_UNIT_TEST(TestEscapeTrigraphs) {
+ UNIT_ASSERT_VALUES_EQUAL("?", EscapeC(TString("?")));
+ UNIT_ASSERT_VALUES_EQUAL("\\x3F?", EscapeC(TString("??")));
+ UNIT_ASSERT_VALUES_EQUAL("\\x3F\\x3F?", EscapeC(TString("???")));
+ // ok but may cause warning about trigraphs
+ // UNIT_ASSERT_VALUES_EQUAL("[x]?z", EscapeC(TString("??(x??)?z")));
+ UNIT_ASSERT_VALUES_EQUAL("\\x3F?x\\x3F\\x3F?z", EscapeC(TString("??x???z")));
+ }
+
+ Y_UNIT_TEST(TestUnescapeCCharLen) {
+ auto test = [](const char* str, size_t len) {
+ UNIT_ASSERT_EQUAL(UnescapeCCharLen(str, str + strlen(str)), len);
+ };
+
+ test("", 0);
+ test("abc", 1);
+ test("\\", 1);
+ test("\\\\", 2);
+ test("\\#", 2);
+ test("\\n10", 2);
+ test("\\r\\n", 2);
+ test("\\x05abc", 4);
+ test("\\u11117777", 6);
+ test("\\u123yyy", 2);
+ test("\\U11117777cccc", 10);
+ test("\\U111yyy", 2);
+ test("\\0\\1", 2);
+ test("\\01\\1", 3);
+ test("\\012\\1", 4);
+ test("\\0123\\1", 4);
+ test("\\4\\1", 2);
+ test("\\40\\1", 3);
+ test("\\400\\1", 3);
+ test("\\4xxx", 2);
+ }
+
+ Y_UNIT_TEST(TestUnbounded) {
+ char buf[100000];
+
+ for (const auto& x : CommonTestData) {
+ char* end = UnescapeC(x.Expected.data(), x.Expected.size(), buf);
+
+ UNIT_ASSERT_VALUES_EQUAL(x.Source, TStringBuf(buf, end));
+ }
+ }
+
+ Y_UNIT_TEST(TestCapitalUEscapes) {
+ UNIT_ASSERT_VALUES_EQUAL(UnescapeC("\\U00000020"), " ");
+ UNIT_ASSERT_VALUES_EQUAL(UnescapeC("\\Uxxx"), "Uxxx");
+ }
+}
diff --git a/util/string/fuzzing/collapse/main.cpp b/util/string/fuzzing/collapse/main.cpp
new file mode 100644
index 0000000000..e7b09f0f55
--- /dev/null
+++ b/util/string/fuzzing/collapse/main.cpp
@@ -0,0 +1,12 @@
+#include <util/string/strip.h>
+#include <util/charset/wide.h>
+
+extern "C" int LLVMFuzzerTestOneInput(const ui8* data, size_t size) {
+ TUtf16String w((const wchar16*)data, size / 2);
+ Collapse(w);
+
+ TString s((const char*)data, size);
+ CollapseInPlace(s);
+
+ return 0; // Non-zero return values are reserved for future use.
+}
diff --git a/util/string/fuzzing/collapse/ya.make b/util/string/fuzzing/collapse/ya.make
new file mode 100644
index 0000000000..b8614f6411
--- /dev/null
+++ b/util/string/fuzzing/collapse/ya.make
@@ -0,0 +1,13 @@
+FUZZ()
+
+OWNER(
+ pg
+ g:util
+)
+SUBSCRIBER(g:util-subscribers)
+
+SRCS(
+ main.cpp
+)
+
+END()
diff --git a/util/string/fuzzing/escape_c/main.cpp b/util/string/fuzzing/escape_c/main.cpp
new file mode 100644
index 0000000000..742126416a
--- /dev/null
+++ b/util/string/fuzzing/escape_c/main.cpp
@@ -0,0 +1,11 @@
+#include <util/generic/string.h>
+#include <util/string/escape.h>
+
+extern "C" int LLVMFuzzerTestOneInput(const ui8* const data, const size_t size) {
+ const TString src(reinterpret_cast<const char*>(data), size);
+ const auto escaped = EscapeC(src);
+ const auto dst = UnescapeC(escaped);
+
+ Y_VERIFY(src == dst);
+ return 0;
+}
diff --git a/util/string/fuzzing/escape_c/ya.make b/util/string/fuzzing/escape_c/ya.make
new file mode 100644
index 0000000000..61e64ac9de
--- /dev/null
+++ b/util/string/fuzzing/escape_c/ya.make
@@ -0,0 +1,13 @@
+OWNER(
+ yazevnul
+ g:util
+)
+SUBSCRIBER(g:util-subscribers)
+
+FUZZ()
+
+SRCS(
+ main.cpp
+)
+
+END()
diff --git a/util/string/fuzzing/strtod/main.cpp b/util/string/fuzzing/strtod/main.cpp
new file mode 100644
index 0000000000..50ea2a6afc
--- /dev/null
+++ b/util/string/fuzzing/strtod/main.cpp
@@ -0,0 +1,9 @@
+#include <util/string/cast.h>
+
+extern "C" int LLVMFuzzerTestOneInput(const ui8* data, size_t size) {
+ double res;
+
+ TryFromString<double>((const char*)data, size, res);
+
+ return 0; // Non-zero return values are reserved for future use.
+}
diff --git a/util/string/fuzzing/strtod/ya.make b/util/string/fuzzing/strtod/ya.make
new file mode 100644
index 0000000000..b8614f6411
--- /dev/null
+++ b/util/string/fuzzing/strtod/ya.make
@@ -0,0 +1,13 @@
+FUZZ()
+
+OWNER(
+ pg
+ g:util
+)
+SUBSCRIBER(g:util-subscribers)
+
+SRCS(
+ main.cpp
+)
+
+END()
diff --git a/util/string/fuzzing/ya.make b/util/string/fuzzing/ya.make
new file mode 100644
index 0000000000..617e0f2b1d
--- /dev/null
+++ b/util/string/fuzzing/ya.make
@@ -0,0 +1,11 @@
+OWNER(
+ g:util
+ pg
+)
+SUBSCRIBER(g:util-subscribers)
+
+RECURSE(
+ collapse
+ escape_c
+ strtod
+)
diff --git a/util/string/hex.cpp b/util/string/hex.cpp
new file mode 100644
index 0000000000..667397987f
--- /dev/null
+++ b/util/string/hex.cpp
@@ -0,0 +1,63 @@
+#include "hex.h"
+
+const char* const Char2DigitTable = ("\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff"
+ "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff"
+ "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff"
+ "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\xff\xff\xff\xff\xff\xff" //0-9
+ "\xff\x0a\x0b\x0c\x0d\x0e\x0f\xff\xff\xff\xff\xff\xff\xff\xff\xff" //A-Z
+ "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff"
+ "\xff\x0a\x0b\x0c\x0d\x0e\x0f\xff\xff\xff\xff\xff\xff\xff\xff\xff" //a-z
+ "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff"
+ "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff"
+ "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff"
+ "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff"
+ "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff"
+ "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff"
+ "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff"
+ "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff"
+ "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff");
+
+char* HexEncode(const void* in, size_t len, char* out) {
+ const unsigned char* b = (const unsigned char*)in;
+ const unsigned char* e = b + len;
+
+ while (b != e) {
+ *out++ = DigitToChar(*b / 16);
+ *out++ = DigitToChar(*b++ % 16);
+ }
+
+ return out;
+}
+
+void* HexDecode(const void* in, size_t len, void* ptr) {
+ const char* b = (const char*)in;
+ const char* e = b + len;
+ Y_ENSURE(!(len & 1), TStringBuf("Odd buffer length passed to HexDecode"));
+
+ char* out = (char*)ptr;
+
+ while (b != e) {
+ *out++ = (char)String2Byte(b);
+ b += 2;
+ }
+
+ return out;
+}
+
+TString HexEncode(const void* in, size_t len) {
+ TString ret;
+
+ ret.ReserveAndResize(len << 1);
+ HexEncode(in, len, ret.begin());
+
+ return ret;
+}
+
+TString HexDecode(const void* in, size_t len) {
+ TString ret;
+
+ ret.ReserveAndResize(len >> 1);
+ HexDecode(in, len, ret.begin());
+
+ return ret;
+}
diff --git a/util/string/hex.h b/util/string/hex.h
new file mode 100644
index 0000000000..af3d2d528f
--- /dev/null
+++ b/util/string/hex.h
@@ -0,0 +1,59 @@
+#pragma once
+
+#include <util/generic/string.h>
+#include <util/generic/yexception.h>
+#include <util/system/yassert.h>
+
+inline static char DigitToChar(unsigned char digit) {
+ if (digit < 10) {
+ return (char)digit + '0';
+ }
+
+ return (char)(digit - 10) + 'A';
+}
+
+extern const char* const Char2DigitTable;
+
+inline static int Char2Digit(char ch) {
+ char result = Char2DigitTable[(unsigned char)ch];
+ Y_ENSURE(result != '\xff', "invalid hex character " << (int)ch);
+ return result;
+}
+
+//! Convert a hex string of exactly 2 chars to int
+/*! @example String2Byte("10") => 16 */
+inline static int String2Byte(const char* s) {
+ return Char2Digit(*s) * 16 + Char2Digit(*(s + 1));
+}
+
+char* HexEncode(const void* in, size_t len, char* out);
+
+TString HexEncode(const void* in, size_t len);
+
+inline TString HexEncode(const TStringBuf h) {
+ return HexEncode(h.data(), h.size());
+}
+
+//! Convert a hex string @c in of @c len chars (case-insensitive) to array of ints stored at @c ptr and return this array.
+/*! @note len must be even (len % 2 == 0), otherwise an exception will be thrown.
+ * @return @c ptr, which is an array of chars, where each char holds the numeric value
+ * equal to the corresponding 2 digits of the input stream.
+ * @warning You must ensure that @c ptr has (len/2) allocated bytes, otherwise SIGSEGV will happen.
+ *
+ * @example HexDecode("beef", 4, ptr) => {190, 239}
+ */
+void* HexDecode(const void* in, size_t len, void* ptr);
+
+//! Convert a hex string @c in of @c len chars (case-insensitive) to array of ints and return this array.
+/*! @note len must be even (len % 2 == 0), otherwise an exception will be thrown.
+ * @return an array of chars, where each char holds the numeric value equal to the corresponding 2 digits
+ * of the input stream.
+ *
+ * @example HexDecode("beef", 4) => {190, 239}
+ */
+TString HexDecode(const void* in, size_t len);
+
+//! Convert an ASCII hex-string (case-insensitive) to the binary form. Note that h.Size() must be even (+h % 2 == 0).
+inline TString HexDecode(const TStringBuf h) {
+ return HexDecode(h.data(), h.size());
+}
diff --git a/util/string/hex_ut.cpp b/util/string/hex_ut.cpp
new file mode 100644
index 0000000000..39a83d5e62
--- /dev/null
+++ b/util/string/hex_ut.cpp
@@ -0,0 +1,19 @@
+#include "hex.h"
+
+#include <library/cpp/testing/unittest/registar.h>
+
+Y_UNIT_TEST_SUITE(THexCodingTest) {
+ Y_UNIT_TEST(TestEncode) {
+ UNIT_ASSERT_EQUAL(HexEncode("i1634iqwbf,&msdb"), "693136333469717762662C266D736462");
+ }
+
+ Y_UNIT_TEST(TestDecode) {
+ UNIT_ASSERT_EQUAL(HexDecode("693136333469717762662C266D736462"), "i1634iqwbf,&msdb");
+ }
+
+ Y_UNIT_TEST(TestDecodeCase) {
+ UNIT_ASSERT_EQUAL(HexDecode("12ABCDEF"), HexDecode("12abcdef"));
+ UNIT_ASSERT_EXCEPTION(HexDecode("Hello"), yexception); //< incorrect chars
+ UNIT_ASSERT_EXCEPTION(HexDecode("123"), yexception); //< odd length
+ }
+}
diff --git a/util/string/join.cpp b/util/string/join.cpp
new file mode 100644
index 0000000000..3f88e23128
--- /dev/null
+++ b/util/string/join.cpp
@@ -0,0 +1 @@
+#include "join.h"
diff --git a/util/string/join.h b/util/string/join.h
new file mode 100644
index 0000000000..b166fad1f3
--- /dev/null
+++ b/util/string/join.h
@@ -0,0 +1,265 @@
+#pragma once
+
+#include <util/generic/string.h>
+#include <util/generic/typetraits.h>
+#include <util/string/cast.h>
+#include "cast.h"
+
+/*
+ * Default implementation of AppendToString uses a temporary TString object which is inefficient. You can overload it
+ * for your type to speed up string joins. If you already have an Out() or operator<<() implementation you can simply
+ * do the following:
+ *
+ * inline void AppendToString(TString& dst, const TMyType& t) {
+ * TStringOutput o(dst);
+ * o << t;
+ * }
+ *
+ * Unfortunately we can't do this by default because for some types ToString() is defined while Out() is not.
+ * For standard types (strings of all kinds and arithmetic types) we don't use a temporary TString in AppendToString().
+ */
+
+template <typename TCharType, typename T>
+inline std::enable_if_t<!std::is_arithmetic<std::remove_cv_t<T>>::value, void>
+AppendToString(TBasicString<TCharType>& dst, const T& t) {
+ dst.AppendNoAlias(ToString(t));
+}
+
+template <typename TCharType, typename T>
+inline std::enable_if_t<std::is_arithmetic<std::remove_cv_t<T>>::value, void>
+AppendToString(TBasicString<TCharType>& dst, const T& t) {
+ char buf[512];
+ dst.append(buf, ToString<std::remove_cv_t<T>>(t, buf, sizeof(buf)));
+}
+
+template <typename TCharType>
+inline void AppendToString(TBasicString<TCharType>& dst, const TCharType* t) {
+ dst.append(t);
+}
+
+template <typename TCharType>
+inline void AppendToString(TBasicString<TCharType>& dst, TBasicStringBuf<TCharType> t) {
+ dst.append(t);
+}
+
+namespace NPrivate {
+ template <typename T>
+ inline size_t GetLength(const T&) {
+ // By default don't pre-allocate space when joining and appending non-string types.
+ // This code can be extended by estimating stringified length for specific types (e.g. 10 for ui32).
+ return 0;
+ }
+
+ template <>
+ inline size_t GetLength(const TString& s) {
+ return s.length();
+ }
+
+ template <>
+ inline size_t GetLength(const TStringBuf& s) {
+ return s.length();
+ }
+
+ template <>
+ inline size_t GetLength(const char* const& s) {
+ return (s ? std::char_traits<char>::length(s) : 0);
+ }
+
+ inline size_t GetAppendLength(const TStringBuf /*delim*/) {
+ return 0;
+ }
+
+ template <typename TFirst, typename... TRest>
+ size_t GetAppendLength(const TStringBuf delim, const TFirst& f, const TRest&... r) {
+ return delim.length() + ::NPrivate::GetLength(f) + ::NPrivate::GetAppendLength(delim, r...);
+ }
+}
+
+template <typename TCharType>
+inline void AppendJoinNoReserve(TBasicString<TCharType>&, TBasicStringBuf<TCharType>) {
+}
+
+template <typename TCharType, typename TFirst, typename... TRest>
+inline void AppendJoinNoReserve(TBasicString<TCharType>& dst, TBasicStringBuf<TCharType> delim, const TFirst& f, const TRest&... r) {
+ AppendToString(dst, delim);
+ AppendToString(dst, f);
+ AppendJoinNoReserve(dst, delim, r...);
+}
+
+template <typename... TValues>
+inline void AppendJoin(TString& dst, const TStringBuf delim, const TValues&... values) {
+ const size_t appendLength = ::NPrivate::GetAppendLength(delim, values...);
+ if (appendLength > 0) {
+ dst.reserve(dst.length() + appendLength);
+ }
+ AppendJoinNoReserve(dst, delim, values...);
+}
+
+template <typename TFirst, typename... TRest>
+inline TString Join(const TStringBuf delim, const TFirst& f, const TRest&... r) {
+ TString ret = ToString(f);
+ AppendJoin(ret, delim, r...);
+ return ret;
+}
+
+// Note that char delimeter @cdelim will be printed as single char string,
+// but any char value @v will be printed as corresponding numeric code.
+// For example, Join('a', 'a', 'a') will print "97a97" (see unit-test).
+template <typename... TValues>
+inline TString Join(char cdelim, const TValues&... v) {
+ return Join(TStringBuf(&cdelim, 1), v...);
+}
+
+namespace NPrivate {
+ template <typename TCharType, typename TIter>
+ inline TBasicString<TCharType> JoinRange(TBasicStringBuf<TCharType> delim, const TIter beg, const TIter end) {
+ TBasicString<TCharType> out;
+ if (beg != end) {
+ size_t total = ::NPrivate::GetLength(*beg);
+ for (TIter pos = beg; ++pos != end;) {
+ total += delim.length() + ::NPrivate::GetLength(*pos);
+ }
+ if (total > 0) {
+ out.reserve(total);
+ }
+
+ AppendToString(out, *beg);
+ for (TIter pos = beg; ++pos != end;) {
+ AppendJoinNoReserve(out, delim, *pos);
+ }
+ }
+
+ return out;
+ }
+
+} // namespace NPrivate
+
+template <typename TIter>
+TString JoinRange(std::string_view delim, const TIter beg, const TIter end) {
+ return ::NPrivate::JoinRange<char>(delim, beg, end);
+}
+
+template <typename TIter>
+TString JoinRange(char delim, const TIter beg, const TIter end) {
+ TStringBuf delimBuf(&delim, 1);
+ return ::NPrivate::JoinRange<char>(delimBuf, beg, end);
+}
+
+template <typename TIter>
+TUtf16String JoinRange(std::u16string_view delim, const TIter beg, const TIter end) {
+ return ::NPrivate::JoinRange<wchar16>(delim, beg, end);
+}
+
+template <typename TIter>
+TUtf16String JoinRange(wchar16 delim, const TIter beg, const TIter end) {
+ TWtringBuf delimBuf(&delim, 1);
+ return ::NPrivate::JoinRange<wchar16>(delimBuf, beg, end);
+}
+
+template <typename TIter>
+TUtf32String JoinRange(std::u32string_view delim, const TIter beg, const TIter end) {
+ return ::NPrivate::JoinRange<wchar32>(delim, beg, end);
+}
+
+template <typename TIter>
+TUtf32String JoinRange(wchar32 delim, const TIter beg, const TIter end) {
+ TUtf32StringBuf delimBuf(&delim, 1);
+ return ::NPrivate::JoinRange<wchar32>(delimBuf, beg, end);
+}
+
+template <typename TCharType, typename TContainer>
+inline TBasicString<TCharType> JoinSeq(std::basic_string_view<TCharType> delim, const TContainer& data) {
+ using std::begin;
+ using std::end;
+ return JoinRange(delim, begin(data), end(data));
+}
+
+template <typename TCharType, typename TContainer>
+inline TBasicString<TCharType> JoinSeq(const TCharType* delim, const TContainer& data) {
+ TBasicStringBuf<TCharType> delimBuf = delim;
+ return JoinSeq(delimBuf, data);
+}
+
+template <typename TCharType, typename TContainer>
+inline TBasicString<TCharType> JoinSeq(const TBasicString<TCharType>& delim, const TContainer& data) {
+ TBasicStringBuf<TCharType> delimBuf = delim;
+ return JoinSeq(delimBuf, data);
+}
+
+template <typename TCharType, typename TContainer>
+inline std::enable_if_t<
+ std::is_same_v<TCharType, char> ||
+ std::is_same_v<TCharType, char16_t> ||
+ std::is_same_v<TCharType, char32_t>,
+ TBasicString<TCharType>>
+JoinSeq(TCharType delim, const TContainer& data) {
+ TBasicStringBuf<TCharType> delimBuf(&delim, 1);
+ return JoinSeq(delimBuf, data);
+}
+
+/** \brief Functor for streaming iterative objects from TIterB e to TIterE b, separated with delim.
+ * Difference from JoinSeq, JoinRange, Join is the lack of TString object - all depends on operator<< for the type and
+ * realization of IOutputStream
+ */
+template <class TIterB, class TIterE>
+struct TRangeJoiner {
+ friend constexpr IOutputStream& operator<<(IOutputStream& stream, const TRangeJoiner<TIterB, TIterE>& rangeJoiner) {
+ if (rangeJoiner.b != rangeJoiner.e) {
+ stream << *rangeJoiner.b;
+
+ for (auto it = std::next(rangeJoiner.b); it != rangeJoiner.e; ++it)
+ stream << rangeJoiner.delim << *it;
+ }
+ return stream;
+ }
+
+ constexpr TRangeJoiner(TStringBuf delim, TIterB&& b, TIterE&& e)
+ : delim(delim)
+ , b(std::forward<TIterB>(b))
+ , e(std::forward<TIterE>(e))
+ {
+ }
+
+private:
+ const TStringBuf delim;
+ const TIterB b;
+ const TIterE e;
+};
+
+template <class TIterB, class TIterE = TIterB>
+constexpr auto MakeRangeJoiner(TStringBuf delim, TIterB&& b, TIterE&& e) {
+ return TRangeJoiner<TIterB, TIterE>(delim, std::forward<TIterB>(b), std::forward<TIterE>(e));
+}
+
+template <class TContainer>
+constexpr auto MakeRangeJoiner(TStringBuf delim, const TContainer& data) {
+ return MakeRangeJoiner(delim, std::cbegin(data), std::cend(data));
+}
+
+template <class TVal>
+constexpr auto MakeRangeJoiner(TStringBuf delim, const std::initializer_list<TVal>& data) {
+ return MakeRangeJoiner(delim, std::cbegin(data), std::cend(data));
+}
+
+/* We force (std::initializer_list<TStringBuf>) input type for (TString) and (const char*) types because:
+ * # When (std::initializer_list<TString>) is used, TString objects are copied into the initializer_list object.
+ * Storing TStringBufs instead is faster, even with COW-enabled strings.
+ * # For (const char*) we calculate length only once and store it in TStringBuf. Otherwise strlen scan would be executed
+ * in both GetAppendLength and AppendToString. For string literals constant lengths get propagated in compile-time.
+ *
+ * This way JoinSeq(",", { s1, s2 }) always does the right thing whatever types s1 and s2 have.
+ *
+ * If someone needs to join std::initializer_list<TString> -- it still works because of the TContainer template above.
+*/
+
+template <typename T>
+inline std::enable_if_t<
+ !std::is_same<std::decay_t<T>, TString>::value && !std::is_same<std::decay_t<T>, const char*>::value,
+ TString>
+JoinSeq(const TStringBuf delim, const std::initializer_list<T>& data) {
+ return JoinRange(delim, data.begin(), data.end());
+}
+
+inline TString JoinSeq(const TStringBuf delim, const std::initializer_list<TStringBuf>& data) {
+ return JoinRange(delim, data.begin(), data.end());
+}
diff --git a/util/string/join_ut.cpp b/util/string/join_ut.cpp
new file mode 100644
index 0000000000..3ed2b2459c
--- /dev/null
+++ b/util/string/join_ut.cpp
@@ -0,0 +1,163 @@
+#include "join.h"
+
+#include <library/cpp/testing/unittest/registar.h>
+#include <util/generic/vector.h>
+
+#include <util/stream/output.h>
+
+struct TCustomData {
+ TVector<int> Ints;
+};
+
+TString ToString(const TCustomData& d) {
+ return JoinSeq("__", d.Ints);
+}
+
+Y_UNIT_TEST_SUITE(JoinStringTest) {
+ Y_UNIT_TEST(ScalarItems) {
+ UNIT_ASSERT_EQUAL(Join(',', 10, 11.1, "foobar"), "10,11.1,foobar");
+ UNIT_ASSERT_EQUAL(Join(", ", 10, 11.1, "foobar"), "10, 11.1, foobar");
+ UNIT_ASSERT_EQUAL(Join(", ", 10, 11.1, TString("foobar")), "10, 11.1, foobar");
+
+ UNIT_ASSERT_EQUAL(Join('#', 0, "a", "foobar", -1.4, TStringBuf("aaa")), "0#a#foobar#-1.4#aaa");
+ UNIT_ASSERT_EQUAL(Join("", "", ""), "");
+ UNIT_ASSERT_EQUAL(Join("", "a", "b", "c"), "abc");
+ UNIT_ASSERT_EQUAL(Join("", "a", "b", "", "c"), "abc");
+ UNIT_ASSERT_EQUAL(Join(" ", "a", "b", "", "c"), "a b c");
+ }
+
+ Y_UNIT_TEST(IntContainerItems) {
+ int v[] = {1, 2, 3};
+ TVector<int> vv(v, v + 3);
+ UNIT_ASSERT_EQUAL(JoinSeq(" ", vv), "1 2 3");
+ UNIT_ASSERT_EQUAL(JoinSeq(" ", vv), JoinRange(" ", vv.begin(), vv.end()));
+ UNIT_ASSERT_EQUAL(JoinRange(" ", v, v + 2), "1 2");
+ UNIT_ASSERT_EQUAL(JoinSeq(" ", {}), "");
+ UNIT_ASSERT_EQUAL(JoinSeq(" ", {42}), "42");
+ UNIT_ASSERT_EQUAL(JoinSeq(" ", {1, 2, 3}), "1 2 3");
+ UNIT_ASSERT_VALUES_EQUAL(JoinSeq(" ", v), "1 2 3");
+ }
+
+ Y_UNIT_TEST(StrContainerItems) {
+ // try various overloads and template type arguments
+ static const char* const result = "1 22 333";
+ static const char* const v[] = {"1", "22", "333"};
+ TVector<const char*> vchar(v, v + sizeof(v) / sizeof(v[0]));
+ TVector<TStringBuf> vbuf(v, v + sizeof(v) / sizeof(v[0]));
+ TVector<TString> vstring(v, v + sizeof(v) / sizeof(v[0]));
+
+ // ranges
+ UNIT_ASSERT_EQUAL(JoinRange(" ", v, v + 3), result);
+ UNIT_ASSERT_EQUAL(JoinRange(" ", vchar.begin(), vchar.end()), result);
+ UNIT_ASSERT_EQUAL(JoinRange(" ", vbuf.begin(), vbuf.end()), result);
+ UNIT_ASSERT_EQUAL(JoinRange(" ", vstring.begin(), vstring.end()), result);
+ {
+ TStringStream stream;
+ stream << MakeRangeJoiner(" ", v, v + 3);
+ UNIT_ASSERT_EQUAL(stream.Str(), result);
+ }
+ {
+ TStringStream stream;
+ stream << MakeRangeJoiner(" ", vchar.begin(), vchar.end());
+ UNIT_ASSERT_EQUAL(stream.Str(), result);
+ }
+ {
+ TStringStream stream;
+ stream << MakeRangeJoiner(" ", vbuf.begin(), vbuf.end());
+ UNIT_ASSERT_EQUAL(stream.Str(), result);
+ }
+ {
+ TStringStream stream;
+ stream << MakeRangeJoiner(" ", vstring.begin(), vstring.end());
+ UNIT_ASSERT_EQUAL(stream.Str(), result);
+ }
+
+ // vectors
+ UNIT_ASSERT_EQUAL(JoinSeq(" ", vchar), result);
+ UNIT_ASSERT_EQUAL(JoinSeq(" ", vbuf), result);
+ UNIT_ASSERT_EQUAL(JoinSeq(" ", vstring), result);
+ {
+ TStringStream stream;
+ stream << MakeRangeJoiner(" ", vchar);
+ UNIT_ASSERT_EQUAL(stream.Str(), result);
+ }
+ {
+ TStringStream stream;
+ stream << MakeRangeJoiner(" ", vbuf);
+ UNIT_ASSERT_EQUAL(stream.Str(), result);
+ }
+ {
+ TStringStream stream;
+ stream << MakeRangeJoiner(" ", vstring);
+ UNIT_ASSERT_EQUAL(stream.Str(), result);
+ }
+
+ // initializer lists with type deduction
+ UNIT_ASSERT_EQUAL(JoinSeq(" ", {v[0], v[1], v[2]}), result);
+ UNIT_ASSERT_EQUAL(JoinSeq(" ", {vchar[0], vchar[1], vchar[2]}), result);
+ UNIT_ASSERT_EQUAL(JoinSeq(" ", {vbuf[0], vbuf[1], vbuf[2]}), result);
+ UNIT_ASSERT_EQUAL(JoinSeq(" ", {vstring[0], vstring[1], vstring[2]}), result);
+ {
+ TStringStream stream;
+ stream << MakeRangeJoiner(" ", {v[0], v[1], v[2]});
+ UNIT_ASSERT_EQUAL(stream.Str(), result);
+ }
+ {
+ TStringStream stream;
+ stream << MakeRangeJoiner(" ", {vchar[0], vchar[1], vchar[2]});
+ UNIT_ASSERT_EQUAL(stream.Str(), result);
+ }
+ {
+ TStringStream stream;
+ stream << MakeRangeJoiner(" ", {vbuf[0], vbuf[1], vbuf[2]});
+ UNIT_ASSERT_EQUAL(stream.Str(), result);
+ }
+ {
+ TStringStream stream;
+ stream << MakeRangeJoiner(" ", {vstring[0], vstring[1], vstring[2]});
+ UNIT_ASSERT_EQUAL(stream.Str(), result);
+ }
+
+ // initializer lists with explicit types
+ UNIT_ASSERT_EQUAL(JoinSeq(" ", std::initializer_list<const char*>{v[0], v[1], v[2]}), result);
+ UNIT_ASSERT_EQUAL(JoinSeq(" ", std::initializer_list<const char*>{vchar[0], vchar[1], vchar[2]}), result);
+ UNIT_ASSERT_EQUAL(JoinSeq(" ", std::initializer_list<TStringBuf>{vbuf[0], vbuf[1], vbuf[2]}), result);
+ UNIT_ASSERT_EQUAL(JoinSeq(" ", std::initializer_list<TString>{vstring[0], vstring[1], vstring[2]}), result);
+ {
+ TStringStream stream;
+ stream << MakeRangeJoiner(" ", std::initializer_list<const char*>{v[0], v[1], v[2]});
+ UNIT_ASSERT_EQUAL(stream.Str(), result);
+ }
+ {
+ TStringStream stream;
+ stream << MakeRangeJoiner(" ", std::initializer_list<const char*>{vchar[0], vchar[1], vchar[2]});
+ UNIT_ASSERT_EQUAL(stream.Str(), result);
+ }
+ {
+ TStringStream stream;
+ stream << MakeRangeJoiner(" ", std::initializer_list<TStringBuf>{vbuf[0], vbuf[1], vbuf[2]});
+ UNIT_ASSERT_EQUAL(stream.Str(), result);
+ }
+ {
+ TStringStream stream;
+ stream << MakeRangeJoiner(" ", std::initializer_list<TString>{vstring[0], vstring[1], vstring[2]});
+ UNIT_ASSERT_EQUAL(stream.Str(), result);
+ }
+
+ // c-style array
+ UNIT_ASSERT_VALUES_EQUAL(JoinSeq(" ", v), result);
+ }
+
+ Y_UNIT_TEST(CustomToString) {
+ TCustomData d1{{1, 2, 3, 4, 5}};
+ TCustomData d2{{0, -1, -2}};
+ UNIT_ASSERT_EQUAL(Join(" ", d1, d2), "1__2__3__4__5 0__-1__-2");
+ }
+
+ Y_UNIT_TEST(JoinChars) {
+ // Note that char delimeter is printed as single char string,
+ // but joined char values are printed as their numeric codes! O_o
+ UNIT_ASSERT_EQUAL(Join('a', 'a', 'a'), "97a97");
+ UNIT_ASSERT_EQUAL(Join("a", "a", "a"), "aaa");
+ }
+}
diff --git a/util/string/printf.cpp b/util/string/printf.cpp
new file mode 100644
index 0000000000..5b7c34d4e1
--- /dev/null
+++ b/util/string/printf.cpp
@@ -0,0 +1,38 @@
+#include "printf.h"
+
+#include <util/stream/printf.h>
+#include <util/stream/str.h>
+
+int vsprintf(TString& s, const char* c, va_list params) {
+ TStringOutput so(s.remove());
+
+ return Printf(so, c, params);
+}
+
+int sprintf(TString& s, const char* c, ...) {
+ va_list params;
+ va_start(params, c);
+ const int k = vsprintf(s, c, params);
+ va_end(params);
+ return k;
+}
+
+TString Sprintf(const char* c, ...) {
+ TString s;
+ va_list params;
+ va_start(params, c);
+ vsprintf(s, c, params);
+ va_end(params);
+ return s;
+}
+
+int fcat(TString& s, const char* c, ...) {
+ TStringOutput so(s);
+
+ va_list params;
+ va_start(params, c);
+ const size_t ret = Printf(so, c, params);
+ va_end(params);
+
+ return ret;
+}
diff --git a/util/string/printf.h b/util/string/printf.h
new file mode 100644
index 0000000000..925c6edaff
--- /dev/null
+++ b/util/string/printf.h
@@ -0,0 +1,13 @@
+#pragma once
+
+#include <util/generic/fwd.h>
+#include <util/system/compiler.h>
+
+#include <cstdarg>
+
+/// formatted print. return printed length:
+int Y_PRINTF_FORMAT(2, 0) vsprintf(TString& s, const char* c, va_list params);
+/// formatted print. return printed length:
+int Y_PRINTF_FORMAT(2, 3) sprintf(TString& s, const char* c, ...);
+TString Y_PRINTF_FORMAT(1, 2) Sprintf(const char* c, ...);
+int Y_PRINTF_FORMAT(2, 3) fcat(TString& s, const char* c, ...);
diff --git a/util/string/printf_ut.cpp b/util/string/printf_ut.cpp
new file mode 100644
index 0000000000..2b2f980b70
--- /dev/null
+++ b/util/string/printf_ut.cpp
@@ -0,0 +1,30 @@
+#include "printf.h"
+
+#include <library/cpp/testing/unittest/registar.h>
+
+Y_UNIT_TEST_SUITE(TStringPrintf) {
+ Y_UNIT_TEST(TestSprintf) {
+ TString s;
+ int len = sprintf(s, "Hello %s", "world");
+ UNIT_ASSERT_EQUAL(s, TString("Hello world"));
+ UNIT_ASSERT_EQUAL(len, 11);
+ }
+
+ Y_UNIT_TEST(TestFcat) {
+ TString s;
+ int len = sprintf(s, "Hello %s", "world");
+ UNIT_ASSERT_EQUAL(s, TString("Hello world"));
+ UNIT_ASSERT_EQUAL(len, 11);
+ len = fcat(s, " qwqw%s", "as");
+ UNIT_ASSERT_EQUAL(s, TString("Hello world qwqwas"));
+ UNIT_ASSERT_EQUAL(len, 7);
+ }
+
+ Y_UNIT_TEST(TestSpecial) {
+ UNIT_ASSERT_EQUAL("4294967295", Sprintf("%" PRIu32, (ui32)(-1)));
+ }
+
+ Y_UNIT_TEST(TestExplicitPositions) {
+ UNIT_ASSERT_EQUAL("abc xyz abc", Sprintf("%1$s %2$s %1$s", "abc", "xyz"));
+ }
+}
diff --git a/util/string/reverse.cpp b/util/string/reverse.cpp
new file mode 100644
index 0000000000..167cd11f49
--- /dev/null
+++ b/util/string/reverse.cpp
@@ -0,0 +1,33 @@
+#include "reverse.h"
+
+#include <util/generic/string.h>
+#include <util/generic/vector.h>
+#include <util/charset/wide_specific.h>
+
+#include <algorithm>
+
+void ReverseInPlace(TString& string) {
+ auto* begin = string.begin();
+ std::reverse(begin, begin + string.size());
+}
+
+void ReverseInPlace(TUtf16String& string) {
+ auto* begin = string.begin();
+ const auto len = string.size();
+ auto* end = begin + string.size();
+
+ TVector<wchar16> buffer(len);
+ wchar16* rbegin = buffer.data() + len;
+ for (wchar16* p = begin; p < end;) {
+ const size_t symbolSize = W16SymbolSize(p, end);
+ rbegin -= symbolSize;
+ std::copy(p, p + symbolSize, rbegin);
+ p += symbolSize;
+ }
+ std::copy(buffer.begin(), buffer.end(), begin);
+}
+
+void ReverseInPlace(TUtf32String& string) {
+ auto* begin = string.begin();
+ std::reverse(begin, begin + string.size());
+}
diff --git a/util/string/reverse.h b/util/string/reverse.h
new file mode 100644
index 0000000000..80f8b00887
--- /dev/null
+++ b/util/string/reverse.h
@@ -0,0 +1,16 @@
+#pragma once
+
+#include <util/generic/fwd.h>
+
+void ReverseInPlace(TString& string);
+
+/** NB. UTF-16 is variable-length encoding because of the surrogate pairs.
+ * This function takes this into account and treats a surrogate pair as a single symbol.
+ * Ex. if [C D] is a surrogate pair,
+ * A B [C D] E
+ * will become
+ * E [C D] B A
+ */
+void ReverseInPlace(TUtf16String& string);
+
+void ReverseInPlace(TUtf32String& string);
diff --git a/util/string/split.cpp b/util/string/split.cpp
new file mode 100644
index 0000000000..7d26857cc7
--- /dev/null
+++ b/util/string/split.cpp
@@ -0,0 +1,24 @@
+#include "split.h"
+
+template <class TValue>
+inline size_t Split(const char* ptr, const char* delim, TVector<TValue>& values) {
+ values.erase(values.begin(), values.end());
+ while (ptr && *ptr) {
+ ptr += strspn(ptr, delim);
+ if (ptr && *ptr) {
+ size_t epos = strcspn(ptr, delim);
+ assert(epos);
+ values.push_back(TValue(ptr, epos));
+ ptr += epos;
+ }
+ }
+ return values.size();
+}
+
+size_t Split(const char* ptr, const char* delim, TVector<TString>& values) {
+ return Split<TString>(ptr, delim, values);
+}
+
+size_t Split(const TString& in, const TString& delim, TVector<TString>& res) {
+ return Split(in.data(), delim.data(), res);
+}
diff --git a/util/string/split.h b/util/string/split.h
new file mode 100644
index 0000000000..80f8c787dc
--- /dev/null
+++ b/util/string/split.h
@@ -0,0 +1,1085 @@
+#pragma once
+
+#include "strspn.h"
+#include "cast.h"
+
+#include <util/generic/algorithm.h>
+#include <util/generic/fwd.h>
+#include <util/generic/iterator.h>
+#include <util/generic/iterator_range.h>
+#include <util/generic/store_policy.h>
+#include <util/generic/strbuf.h>
+#include <util/generic/string.h>
+#include <util/generic/typetraits.h>
+#include <util/generic/vector.h>
+#include <util/generic/ylimits.h>
+#include <util/system/compat.h>
+#include <util/system/defaults.h>
+
+#include <utility>
+#include <stlfwd>
+
+// NOTE: Check StringSplitter below to get more convenient split string interface.
+
+namespace NStringSplitPrivate {
+
+ template <class T, class I, class = void>
+ struct TIsConsumer: std::false_type {};
+
+ template <class T, class I>
+ struct TIsConsumer<
+ T, I,
+ TVoidT<decltype(std::declval<T>().Consume(
+ std::declval<I>(), std::declval<I>(), std::declval<I>()))>>
+ : std::true_type {};
+
+ template <class T, class I>
+ constexpr bool TIsConsumerV = TIsConsumer<T, I>::value;
+
+ template <class T>
+ T* Find(T* str, std::common_type_t<T> ch) {
+ for (; *str; ++str) {
+ if (*str == ch) {
+ return str;
+ }
+ }
+
+ return nullptr;
+ }
+
+}
+
+template <class I, class TDelim, class TConsumer>
+std::enable_if_t<::NStringSplitPrivate::TIsConsumerV<TConsumer, I>>
+SplitString(I b, I e, const TDelim& d, TConsumer&& c) {
+ I l, i;
+
+ do {
+ l = b;
+ i = d.Find(b, e);
+ } while (c.Consume(l, i, b) && (b != i));
+}
+
+template <class I, class TDelim, class TConsumer>
+std::enable_if_t<::NStringSplitPrivate::TIsConsumerV<TConsumer, I>>
+SplitString(I b, const TDelim& d, TConsumer&& c) {
+ I l, i;
+
+ do {
+ l = b;
+ i = d.Find(b);
+ } while (c.Consume(l, i, b) && (b != i));
+}
+
+template <class I1, class I2>
+static inline I1* FastStrChr(I1* str, I2 f) noexcept {
+ I1* ret = NStringSplitPrivate::Find(str, f);
+
+ if (!ret) {
+ ret = str + std::char_traits<I1>::length(str);
+ }
+
+ return ret;
+}
+
+template <class I>
+static inline I* FastStrStr(I* str, I* f, size_t l) noexcept {
+ std::basic_string_view<I> strView(str);
+ const auto ret = strView.find(*f);
+
+ if (ret != std::string::npos) {
+ std::basic_string_view<I> fView(f, l);
+ strView = strView.substr(ret);
+ for (; strView.size() >= l; strView = strView.substr(1)) {
+ if (strView.substr(0, l) == fView) {
+ break;
+ }
+ }
+
+ return strView.size() >= l ? strView.data() : strView.data() + strView.size();
+ } else {
+ return strView.data() + strView.size();
+ }
+}
+
+template <class Char>
+struct TStringDelimiter {
+ inline TStringDelimiter(Char* delim) noexcept
+ : Delim(delim)
+ , Len(std::char_traits<Char>::length(delim))
+ {
+ }
+
+ inline TStringDelimiter(Char* delim, size_t len) noexcept
+ : Delim(delim)
+ , Len(len)
+ {
+ }
+
+ inline Char* Find(Char*& b, Char* e) const noexcept {
+ const auto ret = std::basic_string_view<Char>(b, e - b).find(Delim, 0, Len);
+
+ if (ret != std::string::npos) {
+ const auto result = b + ret;
+ b = result + Len;
+ return result;
+ }
+
+ return (b = e);
+ }
+
+ inline Char* Find(Char*& b) const noexcept {
+ Char* ret = FastStrStr(b, Delim, Len);
+
+ b = *ret ? ret + Len : ret;
+
+ return ret;
+ }
+
+ Char* Delim;
+ const size_t Len;
+};
+
+template <class Char>
+struct TCharDelimiter {
+ inline TCharDelimiter(Char ch) noexcept
+ : Ch(ch)
+ {
+ }
+
+ inline Char* Find(Char*& b, Char* e) const noexcept {
+ const auto ret = std::basic_string_view<Char>(b, e - b).find(Ch);
+
+ if (ret != std::string::npos) {
+ const auto result = b + ret;
+ b = result + 1;
+ return result;
+ }
+
+ return (b = e);
+ }
+
+ inline Char* Find(Char*& b) const noexcept {
+ Char* ret = FastStrChr(b, Ch);
+
+ if (*ret) {
+ b = ret + 1;
+ } else {
+ b = ret;
+ }
+
+ return ret;
+ }
+
+ Char Ch;
+};
+
+template <class Iterator, class Condition>
+struct TFuncDelimiter {
+public:
+ template <class... Args>
+ TFuncDelimiter(Args&&... args)
+ : Fn(std::forward<Args>(args)...)
+ {
+ }
+
+ inline Iterator Find(Iterator& b, Iterator e) const noexcept {
+ if ((b = std::find_if(b, e, Fn)) != e) {
+ return b++;
+ }
+
+ return b;
+ }
+
+private:
+ Condition Fn;
+};
+
+template <class Char>
+struct TFindFirstOf {
+ inline TFindFirstOf(Char* set)
+ : Set(set)
+ {
+ }
+
+ inline Char* FindFirstOf(Char* b, Char* e) const noexcept {
+ Char* ret = b;
+ for (; ret != e; ++ret) {
+ if (NStringSplitPrivate::Find(Set, *ret))
+ break;
+ }
+ return ret;
+ }
+
+ inline Char* FindFirstOf(Char* b) const noexcept {
+ const std::basic_string_view<Char> bView(b);
+ const auto ret = bView.find_first_of(Set);
+ return ret != std::string::npos ? b + ret : b + bView.size();
+ }
+
+ Char* Set;
+};
+
+template <>
+struct TFindFirstOf<const char>: public TCompactStrSpn {
+ inline TFindFirstOf(const char* set, const char* e)
+ : TCompactStrSpn(set, e)
+ {
+ }
+
+ inline TFindFirstOf(const char* set)
+ : TCompactStrSpn(set)
+ {
+ }
+};
+
+template <class Char>
+struct TSetDelimiter: private TFindFirstOf<const Char> {
+ using TFindFirstOf<const Char>::TFindFirstOf;
+
+ inline Char* Find(Char*& b, Char* e) const noexcept {
+ Char* ret = const_cast<Char*>(this->FindFirstOf(b, e));
+
+ if (ret != e) {
+ b = ret + 1;
+ return ret;
+ }
+
+ return (b = e);
+ }
+
+ inline Char* Find(Char*& b) const noexcept {
+ Char* ret = const_cast<Char*>(this->FindFirstOf(b));
+
+ if (*ret) {
+ b = ret + 1;
+ return ret;
+ }
+
+ return (b = ret);
+ }
+};
+
+namespace NSplitTargetHasPushBack {
+ Y_HAS_MEMBER(push_back, PushBack);
+}
+
+template <class T, class = void>
+struct TConsumerBackInserter;
+
+template <class T>
+struct TConsumerBackInserter<T, std::enable_if_t<NSplitTargetHasPushBack::TClassHasPushBack<T>::value>> {
+ static void DoInsert(T* C, const typename T::value_type& i) {
+ C->push_back(i);
+ }
+};
+
+template <class T>
+struct TConsumerBackInserter<T, std::enable_if_t<!NSplitTargetHasPushBack::TClassHasPushBack<T>::value>> {
+ static void DoInsert(T* C, const typename T::value_type& i) {
+ C->insert(C->end(), i);
+ }
+};
+
+template <class T>
+struct TContainerConsumer {
+ inline TContainerConsumer(T* c) noexcept
+ : C(c)
+ {
+ }
+
+ template <class I>
+ inline bool Consume(I* b, I* d, I* /*e*/) {
+ TConsumerBackInserter<T>::DoInsert(C, typename T::value_type(b, d));
+
+ return true;
+ }
+
+ T* C;
+};
+
+template <class T>
+struct TContainerConvertingConsumer {
+ inline TContainerConvertingConsumer(T* c) noexcept
+ : C(c)
+ {
+ }
+
+ template <class I>
+ inline bool Consume(I* b, I* d, I* /*e*/) {
+ TConsumerBackInserter<T>::DoInsert(C, FromString<typename T::value_type>(TStringBuf(b, d)));
+
+ return true;
+ }
+
+ T* C;
+};
+
+template <class S, class I>
+struct TLimitingConsumer {
+ inline TLimitingConsumer(size_t cnt, S* slave) noexcept
+ : Cnt(cnt ? cnt - 1 : Max<size_t>())
+ , Slave(slave)
+ , Last(nullptr)
+ {
+ }
+
+ inline bool Consume(I* b, I* d, I* e) {
+ if (!Cnt) {
+ Last = b;
+
+ return false;
+ }
+
+ --Cnt;
+
+ return Slave->Consume(b, d, e);
+ }
+
+ size_t Cnt;
+ S* Slave;
+ I* Last;
+};
+
+template <class S>
+struct TSkipEmptyTokens {
+ inline TSkipEmptyTokens(S* slave) noexcept
+ : Slave(slave)
+ {
+ }
+
+ template <class I>
+ inline bool Consume(I* b, I* d, I* e) {
+ if (b != d) {
+ return Slave->Consume(b, d, e);
+ }
+
+ return true;
+ }
+
+ S* Slave;
+};
+
+template <class S>
+struct TKeepDelimiters {
+ inline TKeepDelimiters(S* slave) noexcept
+ : Slave(slave)
+ {
+ }
+
+ template <class I>
+ inline bool Consume(I* b, I* d, I* e) {
+ if (Slave->Consume(b, d, d)) {
+ if (d != e) {
+ return Slave->Consume(d, e, e);
+ }
+
+ return true;
+ }
+
+ return false;
+ }
+
+ S* Slave;
+};
+
+template <class T>
+struct TSimplePusher {
+ inline bool Consume(char* b, char* d, char*) {
+ *d = 0;
+ C->push_back(b);
+
+ return true;
+ }
+
+ T* C;
+};
+
+template <class T>
+static inline void Split(char* buf, char ch, T* res) {
+ res->resize(0);
+ if (*buf == 0)
+ return;
+
+ TCharDelimiter<char> delim(ch);
+ TSimplePusher<T> pusher = {res};
+
+ SplitString(buf, delim, pusher);
+}
+
+/// Split string into res vector. Res vector is cleared before split.
+/// Old good slow split function.
+/// Field delimter is any number of symbols specified in delim (no empty strings in res vector)
+/// @return number of elements created
+size_t Split(const char* in, const char* delim, TVector<TString>& res);
+size_t Split(const TString& in, const TString& delim, TVector<TString>& res);
+
+/// Old split reimplemented for TStringBuf using the new code
+/// Note that delim can be constructed from char* automatically (it is not cheap though)
+inline size_t Split(const TStringBuf s, const TSetDelimiter<const char>& delim, TVector<TStringBuf>& res) {
+ res.clear();
+ TContainerConsumer<TVector<TStringBuf>> res1(&res);
+ TSkipEmptyTokens<TContainerConsumer<TVector<TStringBuf>>> consumer(&res1);
+ SplitString(s.data(), s.data() + s.size(), delim, consumer);
+ return res.size();
+}
+
+template <class P, class D>
+void GetNext(TStringBuf& s, D delim, P& param) {
+ TStringBuf next = s.NextTok(delim);
+ Y_ENSURE(next.IsInited(), TStringBuf("Split: number of fields less than number of Split output arguments"));
+ param = FromString<P>(next);
+}
+
+template <class P, class D>
+void GetNext(TStringBuf& s, D delim, TMaybe<P>& param) {
+ TStringBuf next = s.NextTok(delim);
+ if (next.IsInited()) {
+ param = FromString<P>(next);
+ } else {
+ param.Clear();
+ }
+}
+
+// example:
+// Split(TStringBuf("Sherlock,2014,36.6"), ',', name, year, temperature);
+template <class D, class P1, class P2>
+void Split(TStringBuf s, D delim, P1& p1, P2& p2) {
+ GetNext(s, delim, p1);
+ GetNext(s, delim, p2);
+ Y_ENSURE(!s.IsInited(), TStringBuf("Split: number of fields more than number of Split output arguments"));
+}
+
+template <class D, class P1, class P2, class... Other>
+void Split(TStringBuf s, D delim, P1& p1, P2& p2, Other&... other) {
+ GetNext(s, delim, p1);
+ Split(s, delim, p2, other...);
+}
+
+/**
+ * \fn auto StringSplitter(...)
+ *
+ * Creates a string splitter object. The only use for it is to call one of its
+ * `Split*` methods, and then do something with the resulting proxy range.
+ *
+ * Some examples:
+ * \code
+ * TVector<TStringBuf> values = StringSplitter("1\t2\t3").Split('\t');
+ *
+ * for(TStringBuf part: StringSplitter("1::2::::3").SplitByString("::").SkipEmpty()) {
+ * Cerr << part;
+ * }
+ *
+ * TVector<TString> firstTwoValues = StringSplitter("1\t2\t3").Split('\t').Take(2);
+ * \endcode
+ *
+ * Use `Collect` or `AddTo` to store split results into an existing container:
+ * \code
+ * TVector<TStringBuf> values = {"0"};
+ * StringSplitter("1\t2\t3").Split('\t').AddTo(&values);
+ * \endcode
+ * Note that `Collect` clears target container, while `AddTo` just inserts values.
+ * You can use these methods with any container that has `emplace` / `emplace_back`.
+ *
+ * Use `ParseInto` to also perform string conversions before inserting values
+ * into target container:
+ * \code
+ * TSet<int> values;
+ * StringSplitter("1\t2\t3").Split('\t').ParseInto(&values);
+ * \endcode
+ */
+
+namespace NStringSplitPrivate {
+ Y_HAS_MEMBER(push_back, PushBack);
+ Y_HAS_MEMBER(insert, Insert);
+ Y_HAS_MEMBER(data, Data);
+
+ /**
+ * This one is needed here so that `std::string_view -> std::string_view`
+ * conversion works.
+ */
+ template <class Src, class Dst>
+ inline void DoFromString(const Src& src, Dst* dst) {
+ *dst = ::FromString<Dst>(src);
+ }
+
+ template <class T>
+ inline void DoFromString(const T& src, T* dst) noexcept {
+ *dst = src;
+ }
+
+ template <class T>
+ inline void DoFromString(const T& src, decltype(std::ignore)* dst) noexcept {
+ *dst = src;
+ }
+
+ template <class Src, class Dst>
+ inline Y_WARN_UNUSED_RESULT bool TryDoFromString(const Src& src, Dst* dst) noexcept {
+ return ::TryFromString(src, *dst);
+ }
+
+ template <class T>
+ inline Y_WARN_UNUSED_RESULT bool TryDoFromString(const T& src, T* dst) noexcept {
+ *dst = src;
+ return true;
+ }
+
+ template <class T>
+ inline Y_WARN_UNUSED_RESULT bool TryDoFromString(const T& src, decltype(std::ignore)* dst) noexcept {
+ *dst = src;
+ return true;
+ }
+
+ /**
+ * Consumer that places provided elements into a container. Not using
+ * `emplace(iterator)` for efficiency.
+ */
+ template <class Container>
+ struct TContainerConsumer {
+ using value_type = typename Container::value_type;
+
+ TContainerConsumer(Container* c)
+ : C_(c)
+ {
+ }
+
+ // TODO: return bool (continue)
+ template <class StringBuf>
+ void operator()(StringBuf e) const {
+ this->operator()(C_, e);
+ }
+
+ private:
+ template <class OtherContainer, class StringBuf>
+ auto operator()(OtherContainer* c, StringBuf e) const -> decltype(c->emplace_back()) {
+ return c->emplace_back(value_type(e));
+ }
+
+ template <class OtherContainer, class StringBuf>
+ auto operator()(OtherContainer* c, StringBuf e) const -> decltype(c->emplace()) {
+ return c->emplace(value_type(e));
+ }
+
+ Container* C_;
+ };
+
+ /**
+ * Consumer that converts provided elements via `FromString` and places them
+ * into a container.
+ */
+ template <class Container>
+ struct TContainerConvertingConsumer {
+ using value_type = typename Container::value_type;
+
+ TContainerConvertingConsumer(Container* c)
+ : C_(c)
+ {
+ }
+
+ template <class StringBuf>
+ void operator()(StringBuf e) const {
+ this->operator()(C_, e);
+ }
+
+ private:
+ template <class OtherContainer, class StringBuf>
+ auto operator()(OtherContainer* c, StringBuf e) const -> decltype(c->emplace_back()) {
+ value_type v;
+ DoFromString(e, &v);
+ return c->emplace_back(std::move(v));
+ }
+
+ template <class OtherContainer, class StringBuf>
+ auto operator()(OtherContainer* c, StringBuf e) const -> decltype(c->emplace()) {
+ value_type v;
+ DoFromString(e, &v);
+ return c->emplace(std::move(v));
+ }
+
+ Container* C_;
+ };
+
+ template <class String>
+ struct TStringBufOfImpl {
+ using type = std::conditional_t<
+ THasData<String>::value,
+ TBasicStringBuf<typename String::value_type>,
+ TIteratorRange<typename String::const_iterator>>;
+ };
+
+ template <class Char, class Traits, class Allocator>
+ struct TStringBufOfImpl<std::basic_string<Char, Traits, Allocator>> {
+ using type = std::basic_string_view<Char, Traits>;
+ };
+
+ template <class Char, class Traits>
+ struct TStringBufOfImpl<std::basic_string_view<Char, Traits>> {
+ using type = std::basic_string_view<Char, Traits>;
+ };
+
+ /**
+ * Metafunction that returns a string buffer for the given type. This is to
+ * make sure that splitting `std::string` returns `std::string_view`.
+ */
+ template <class String>
+ using TStringBufOf = typename TStringBufOfImpl<String>::type;
+
+ template <class StringBuf, class Iterator>
+ StringBuf DoMakeStringBuf(Iterator b, Iterator e, StringBuf*) {
+ return StringBuf(b, e);
+ }
+
+ template <class Char, class Traits, class Iterator>
+ std::basic_string_view<Char, Traits> DoMakeStringBuf(Iterator b, Iterator e, std::basic_string_view<Char, Traits>*) {
+ return std::basic_string_view<Char, Traits>(b, e - b);
+ }
+
+ template <class StringBuf, class Iterator>
+ StringBuf MakeStringBuf(Iterator b, Iterator e) {
+ return DoMakeStringBuf(b, e, static_cast<StringBuf*>(nullptr));
+ }
+
+ template <class String>
+ struct TIteratorOfImpl {
+ using type = std::conditional_t<
+ THasData<String>::value,
+ const typename String::value_type*,
+ typename String::const_iterator>;
+ };
+
+ template <class String>
+ using TIteratorOf = typename TIteratorOfImpl<String>::type;
+
+ template <class String>
+ struct TIterState {
+ using TStringBufType = TStringBufOf<String>;
+ using TIterator = TIteratorOf<String>;
+
+ TIterState(const String& string) noexcept
+ : TokS()
+ , TokD()
+ {
+ Init(string, THasData<String>());
+ }
+
+ operator TStringBufType() const noexcept {
+ return Token();
+ }
+
+ template <
+ typename Other,
+ class = typename std::enable_if<
+ std::is_convertible<Other, TStringBufType>::value,
+ void>::type>
+ bool operator==(const Other& toCompare) const {
+ return TStringBufType(*this) == TStringBufType(toCompare);
+ }
+
+ explicit operator bool() const {
+ return !Empty();
+ }
+
+ TIterator TokenStart() const noexcept {
+ return TokS;
+ }
+
+ TIterator TokenDelim() const noexcept {
+ return TokD;
+ }
+
+ TIterator TokenEnd() const noexcept {
+ return B;
+ }
+
+ Y_PURE_FUNCTION bool Empty() const noexcept {
+ return TokenStart() == TokenDelim();
+ }
+
+ TStringBufType Token() const noexcept {
+ return MakeStringBuf<TStringBufType>(TokenStart(), TokenDelim());
+ }
+
+ TStringBufType Delim() const noexcept {
+ return MakeStringBuf<TStringBufType>(TokenDelim(), TokenEnd());
+ }
+
+ TIterator B;
+ TIterator E;
+
+ TIterator TokS;
+ TIterator TokD;
+
+ private:
+ void Init(const String& string, std::true_type) {
+ B = string.data();
+ E = string.data() + string.size();
+ }
+
+ void Init(const String& string, std::false_type) {
+ B = string.begin();
+ E = string.end();
+ }
+ };
+
+ template <class Base>
+ class TSplitRange: public Base, public TInputRangeAdaptor<TSplitRange<Base>> {
+ using TStringBufType = decltype(std::declval<Base>().Next()->Token());
+
+ public:
+ template <typename... Args>
+ inline TSplitRange(Args&&... args)
+ : Base(std::forward<Args>(args)...)
+ {
+ }
+
+ template <class Consumer, std::enable_if_t<std::is_same<decltype(std::declval<Consumer>()(std::declval<TStringBufType>())), void>::value, int>* = nullptr>
+ inline void Consume(Consumer&& f) {
+ for (auto&& it : *this) {
+ f(it.Token());
+ }
+ }
+
+ template <class Consumer, std::enable_if_t<std::is_same<decltype(std::declval<Consumer>()(std::declval<TStringBufType>())), bool>::value, int>* = nullptr>
+ inline bool Consume(Consumer&& f) {
+ for (auto&& it : *this) {
+ if (!f(it.Token())) {
+ return false;
+ }
+ }
+ return true;
+ }
+
+ template <class Container, class = std::enable_if_t<THasInsert<Container>::value || THasPushBack<Container>::value>>
+ operator Container() {
+ Container result;
+ AddTo(&result);
+ return result;
+ }
+
+ template <class S>
+ inline TVector<S> ToList() {
+ TVector<S> result;
+ for (auto&& it : *this) {
+ result.push_back(S(it.Token()));
+ }
+ return result;
+ }
+
+ template <class Container>
+ inline void Collect(Container* c) {
+ Y_ASSERT(c);
+ c->clear();
+ AddTo(c);
+ }
+
+ template <class Container>
+ inline void AddTo(Container* c) {
+ Y_ASSERT(c);
+ TContainerConsumer<Container> consumer(c);
+ Consume(consumer);
+ }
+
+ template <class Container>
+ inline void ParseInto(Container* c) {
+ Y_ASSERT(c);
+ TContainerConvertingConsumer<Container> consumer(c);
+ Consume(consumer);
+ }
+
+ // TODO: this is actually TryParseInto
+ /**
+ * Same as `CollectInto`, just doesn't throw.
+ *
+ * \param[out] args Output arguments.
+ * \returns Whether parsing was successful.
+ */
+ template <typename... Args>
+ inline bool TryCollectInto(Args*... args) noexcept {
+ size_t successfullyFilled = 0;
+ auto it = this->begin();
+
+ //FIXME: actually, some kind of TryApplyToMany is needed in order to stop iteration upon first failure
+ ApplyToMany([&](auto&& arg) {
+ if (it != this->end()) {
+ if (TryDoFromString(it->Token(), arg)) {
+ ++successfullyFilled;
+ }
+ ++it;
+ }
+ }, args...);
+
+ return successfullyFilled == sizeof...(args) && it == this->end();
+ }
+
+ // TODO: this is actually ParseInto
+ /**
+ * Splits and parses everything that's in this splitter into `args`.
+ *
+ * Example usage:
+ * \code
+ * int l, r;
+ * StringSplitter("100*200").Split('*').CollectInto(&l, &r);
+ * \endcode
+ *
+ * \param[out] args Output arguments.
+ * \throws If not all items were parsed, or
+ * if there were too many items in the split.
+ */
+ template <typename... Args>
+ inline void CollectInto(Args*... args) {
+ Y_ENSURE(TryCollectInto<Args...>(args...));
+ }
+
+ inline size_t Count() const {
+ size_t cnt = 0;
+ for (auto&& it : *this) {
+ Y_UNUSED(it);
+ ++cnt;
+ }
+ return cnt;
+ }
+ };
+
+ template <class String>
+ class TStringSplitter {
+ using TStringType = String;
+ using TStringBufType = TStringBufOf<TStringType>;
+ using TChar = typename TStringType::value_type;
+ using TIterator = TIteratorOf<TStringType>;
+ using TIteratorState = TIterState<TStringType>;
+
+ /**
+ * Base class for all split ranges that actually does the splitting.
+ */
+ template <class DelimStorage>
+ struct TSplitRangeBase {
+ template <class OtherString, class... Args>
+ inline TSplitRangeBase(OtherString&& s, Args&&... args)
+ : String_(std::forward<OtherString>(s))
+ , State_(String_)
+ , Delim_(std::forward<Args>(args)...)
+ {
+ }
+
+ inline TIteratorState* Next() {
+ if (State_.TokD == State_.B) {
+ return nullptr;
+ }
+
+ State_.TokS = State_.B;
+ State_.TokD = Delim_.Ptr()->Find(State_.B, State_.E);
+
+ return &State_;
+ }
+
+ private:
+ TStringType String_;
+ TIteratorState State_;
+ DelimStorage Delim_;
+ };
+
+ template <class Base, class Filter>
+ struct TFilterRange: public Base {
+ template <class... Args>
+ inline TFilterRange(const Base& base, Args&&... args)
+ : Base(base)
+ , Filter_(std::forward<Args>(args)...)
+ {
+ }
+
+ inline TIteratorState* Next() {
+ TIteratorState* ret;
+
+ do {
+ ret = Base::Next();
+ } while (ret && !Filter_.Accept(ret));
+
+ return ret;
+ }
+
+ Filter Filter_;
+ };
+
+ struct TNonEmptyFilter {
+ template <class TToken>
+ inline bool Accept(const TToken* token) noexcept {
+ return !token->Empty();
+ }
+ };
+
+ template <class TIter>
+ struct TStopIteration;
+
+ template <class Base>
+ struct TFilters: public Base {
+ template <class TFilter>
+ using TIt = TSplitRange<TStopIteration<TFilters<TFilterRange<Base, TFilter>>>>;
+
+ template <typename... Args>
+ inline TFilters(Args&&... args)
+ : Base(std::forward<Args>(args)...)
+ {
+ }
+
+ inline TIt<TNonEmptyFilter> SkipEmpty() const {
+ return {*this};
+ }
+ };
+
+ template <class Base, class Stopper>
+ struct TStopRange: public Base {
+ template <typename... Args>
+ inline TStopRange(const Base& base, Args&&... args)
+ : Base(base)
+ , Stopper_(std::forward<Args>(args)...)
+ {
+ }
+
+ inline TIteratorState* Next() {
+ TIteratorState* ret = Base::Next();
+ if (!ret || Stopper_.Stop(ret)) {
+ return nullptr;
+ }
+ return ret;
+ }
+
+ Stopper Stopper_;
+ };
+
+ struct TTake {
+ TTake() = default;
+
+ TTake(size_t count)
+ : Count(count)
+ {
+ }
+
+ template <class TToken>
+ inline bool Stop(TToken*) noexcept {
+ if (Count > 0) {
+ --Count;
+ return false;
+ } else {
+ return true;
+ }
+ }
+
+ size_t Count = 0;
+ };
+
+ struct TLimit {
+ TLimit() = default;
+
+ TLimit(size_t count)
+ : Count(count)
+ {
+ Y_ASSERT(Count > 0);
+ }
+
+ template <class TToken>
+ inline bool Stop(TToken* token) noexcept {
+ if (Count > 1) {
+ --Count;
+ return false;
+ } else if (Count == 1) {
+ token->TokD = token->B = token->E;
+ return false;
+ }
+ return true;
+ }
+
+ size_t Count = 0;
+ };
+
+ template <class Base>
+ struct TStopIteration: public Base {
+ template <class TStopper>
+ using TIt = TSplitRange<TStopIteration<TFilters<TStopRange<Base, TStopper>>>>;
+
+ template <typename... Args>
+ inline TStopIteration(Args&&... args)
+ : Base(std::forward<Args>(args)...)
+ {
+ }
+
+ inline TIt<TTake> Take(size_t count) {
+ return {*this, count};
+ }
+
+ inline TIt<TLimit> Limit(size_t count) {
+ return {*this, count};
+ }
+ };
+
+ template <class TPolicy>
+ using TIt = TSplitRange<TStopIteration<TFilters<TSplitRangeBase<TPolicy>>>>;
+
+ public:
+ template <class OtherString>
+ explicit TStringSplitter(OtherString&& s)
+ : String_(std::forward<OtherString>(s))
+ {
+ }
+
+ //does not own TDelim
+ template <class TDelim>
+ inline TIt<TPtrPolicy<const TDelim>> Split(const TDelim& d) const noexcept {
+ return {String_, &d};
+ }
+
+ inline TIt<TEmbedPolicy<TCharDelimiter<const TChar>>> Split(TChar ch) const noexcept {
+ return {String_, ch};
+ }
+
+ inline TIt<TSimpleRefPolicy<TSetDelimiter<const TChar>>> SplitBySet(const TChar* set) const noexcept {
+ return {String_, set};
+ }
+
+ inline TIt<TEmbedPolicy<TStringDelimiter<const TChar>>> SplitByString(const TStringBufType& str) const noexcept {
+ return {String_, str.data(), str.size()};
+ }
+
+ template <class TFunc>
+ inline TIt<TEmbedPolicy<TFuncDelimiter<TIterator, TFunc>>> SplitByFunc(TFunc f) const noexcept {
+ return {String_, f};
+ }
+
+ private:
+ TStringType String_;
+ };
+
+ template <class String>
+ auto MakeStringSplitter(String&& s) {
+ return TStringSplitter<std::remove_reference_t<String>>(std::forward<String>(s));
+ }
+}
+
+template <class Iterator>
+auto StringSplitter(Iterator begin, Iterator end) {
+ return ::NStringSplitPrivate::MakeStringSplitter(TIteratorRange<Iterator>(begin, end));
+}
+
+template <class Char>
+auto StringSplitter(const Char* begin, const Char* end) {
+ return ::NStringSplitPrivate::MakeStringSplitter(TBasicStringBuf<Char>(begin, end));
+}
+
+template <class Char>
+auto StringSplitter(const Char* begin, size_t len) {
+ return ::NStringSplitPrivate::MakeStringSplitter(TBasicStringBuf<Char>(begin, len));
+}
+
+template <class Char>
+auto StringSplitter(const Char* str) {
+ return ::NStringSplitPrivate::MakeStringSplitter(TBasicStringBuf<Char>(str));
+}
+
+template <class String, std::enable_if_t<!std::is_pointer<std::remove_reference_t<String>>::value, int> = 0>
+auto StringSplitter(String& s) {
+ return ::NStringSplitPrivate::MakeStringSplitter(::NStringSplitPrivate::TStringBufOf<String>(s.data(), s.size()));
+}
+
+template <class String, std::enable_if_t<!std::is_pointer<std::remove_reference_t<String>>::value, int> = 0>
+auto StringSplitter(String&& s) {
+ return ::NStringSplitPrivate::MakeStringSplitter(std::move(s));
+}
diff --git a/util/string/split_ut.cpp b/util/string/split_ut.cpp
new file mode 100644
index 0000000000..43e59f2d75
--- /dev/null
+++ b/util/string/split_ut.cpp
@@ -0,0 +1,831 @@
+#include "split.h"
+
+#include <library/cpp/testing/unittest/registar.h>
+
+#include <util/stream/output.h>
+#include <util/charset/wide.h>
+#include <util/datetime/cputimer.h>
+#include <util/generic/maybe.h>
+
+#include <string>
+#include <string_view>
+
+template <typename T>
+static inline void OldSplit(char* pszBuf, T* pRes) {
+ pRes->resize(0);
+ pRes->push_back(pszBuf);
+ for (char* pszData = pszBuf; *pszData; ++pszData) {
+ if (*pszData == '\t') {
+ *pszData = 0;
+ pRes->push_back(pszData + 1);
+ }
+ }
+}
+
+template <class T1, class T2>
+inline void Cmp(const T1& t1, const T2& t2) {
+ try {
+ UNIT_ASSERT_EQUAL(t1.size(), t2.size());
+ } catch (...) {
+ Print(t1);
+ Cerr << "---------------" << Endl;
+ Print(t2);
+
+ throw;
+ }
+
+ auto i = t1.begin();
+ auto j = t2.begin();
+
+ for (; i != t1.end() && j != t2.end(); ++i, ++j) {
+ try {
+ UNIT_ASSERT_EQUAL(*i, *j);
+ } catch (...) {
+ Cerr << "(" << *i << ")->(" << *j << ")" << Endl;
+
+ throw;
+ }
+ }
+}
+
+template <class T>
+inline void Print(const T& t) {
+ for (typename T::const_iterator i = t.begin(); i != t.end(); ++i) {
+ Cerr << *i << Endl;
+ }
+}
+
+template <template <typename> class TConsumer, typename TResult, typename I, typename TDelimiter>
+void TestDelimiterOnString(TResult& good, I* str, const TDelimiter& delim) {
+ TResult test;
+ TConsumer<TResult> consumer(&test);
+ SplitString(str, delim, consumer);
+ Cmp(good, test);
+ UNIT_ASSERT_EQUAL(good, test);
+}
+
+template <template <typename> class TConsumer, typename TResult, typename I, typename TDelimiter>
+void TestDelimiterOnRange(TResult& good, I* b, I* e, const TDelimiter& delim) {
+ TResult test;
+ TConsumer<TResult> consumer(&test);
+ SplitString(b, e, delim, consumer);
+ Cmp(good, test);
+ UNIT_ASSERT_EQUAL(good, test);
+}
+
+template <typename TConsumer, typename TResult, typename I>
+void TestConsumerOnString(TResult& good, I* str, I* d) {
+ TResult test;
+ TContainerConsumer<TResult> consumer(&test);
+ TConsumer tested(&consumer);
+ TCharDelimiter<const I> delim(*d);
+ SplitString(str, delim, tested);
+ Cmp(good, test);
+ UNIT_ASSERT_EQUAL(good, test);
+}
+
+template <typename TConsumer, typename TResult, typename I>
+void TestConsumerOnRange(TResult& good, I* b, I* e, I* d) {
+ TResult test;
+ TContainerConsumer<TResult> consumer(&test);
+ TConsumer tested(&consumer);
+ TCharDelimiter<const I> delim(*d);
+ SplitString(b, e, delim, tested);
+ Cmp(good, test);
+ UNIT_ASSERT_EQUAL(good, test);
+}
+
+using TStrokaConsumer = TContainerConsumer<TVector<TString>>;
+
+void TestLimitingConsumerOnString(TVector<TString>& good, const char* str, const char* d, size_t n, const char* last) {
+ TVector<TString> test;
+ TStrokaConsumer consumer(&test);
+ TLimitingConsumer<TStrokaConsumer, const char> limits(n, &consumer);
+ TCharDelimiter<const char> delim(*d);
+ SplitString(str, delim, limits);
+ Cmp(good, test);
+ UNIT_ASSERT_EQUAL(good, test);
+ UNIT_ASSERT_EQUAL(TString(limits.Last), TString(last)); // Quite unobvious behaviour. Why the last token is not added to slave consumer?
+}
+
+void TestLimitingConsumerOnRange(TVector<TString>& good, const char* b, const char* e, const char* d, size_t n, const char* last) {
+ TVector<TString> test;
+ TStrokaConsumer consumer(&test);
+ TLimitingConsumer<TStrokaConsumer, const char> limits(n, &consumer);
+ TCharDelimiter<const char> delim(*d);
+ SplitString(b, e, delim, limits);
+ Cmp(good, test);
+ UNIT_ASSERT_EQUAL(good, test);
+ UNIT_ASSERT_EQUAL(TString(limits.Last), TString(last));
+}
+
+Y_UNIT_TEST_SUITE(SplitStringTest) {
+ Y_UNIT_TEST(TestCharSingleDelimiter) {
+ TString data("qw ab qwabcab");
+ TString canonic[] = {"qw", "ab", "", "qwabcab"};
+ TVector<TString> good(canonic, canonic + 4);
+ TCharDelimiter<const char> delim(' ');
+
+ TestDelimiterOnString<TContainerConsumer>(good, data.data(), delim);
+ TestDelimiterOnRange<TContainerConsumer>(good, data.data(), data.end(), delim);
+ }
+
+ Y_UNIT_TEST(TestWideSingleDelimiter) {
+ TUtf16String data(u"qw ab qwabcab");
+ TUtf16String canonic[] = {u"qw", u"ab", TUtf16String(), u"qwabcab"};
+ TVector<TUtf16String> good(canonic, canonic + 4);
+ TCharDelimiter<const wchar16> delim(' ');
+
+ TestDelimiterOnString<TContainerConsumer>(good, data.data(), delim);
+ TestDelimiterOnRange<TContainerConsumer>(good, data.data(), data.end(), delim);
+ }
+
+ Y_UNIT_TEST(TestConvertToIntCharSingleDelimiter) {
+ TString data("42 4242 -12345 0");
+ i32 canonic[] = {42, 4242, -12345, 0};
+ TVector<i32> good(canonic, canonic + 4);
+ TCharDelimiter<const char> delim(' ');
+
+ TestDelimiterOnString<TContainerConvertingConsumer>(good, data.data(), delim);
+ TestDelimiterOnRange<TContainerConvertingConsumer>(good, data.data(), data.end(), delim);
+ }
+
+ Y_UNIT_TEST(TestCharSkipEmpty) {
+ TString data("qw ab qwabcab ");
+ TString canonic[] = {"qw", "ab", "qwabcab"};
+ TVector<TString> good(canonic, canonic + 3);
+
+ TestConsumerOnString<TSkipEmptyTokens<TStrokaConsumer>>(good, data.data(), " ");
+ TestConsumerOnRange<TSkipEmptyTokens<TStrokaConsumer>>(good, data.data(), data.end(), " ");
+ }
+
+ Y_UNIT_TEST(TestCharKeepDelimiters) {
+ TString data("qw ab qwabcab ");
+ TString canonic[] = {"qw", " ", "ab", " ", "", " ", "qwabcab", " ", ""};
+ TVector<TString> good(canonic, canonic + 9);
+
+ TestConsumerOnString<TKeepDelimiters<TStrokaConsumer>>(good, data.data(), " ");
+ TestConsumerOnRange<TKeepDelimiters<TStrokaConsumer>>(good, data.data(), data.end(), " ");
+ }
+
+ Y_UNIT_TEST(TestCharLimit) {
+ TString data("qw ab qwabcab ");
+ TString canonic[] = {"qw", "ab"};
+ TVector<TString> good(canonic, canonic + 2);
+
+ TestLimitingConsumerOnString(good, data.data(), " ", 3, " qwabcab ");
+ TestLimitingConsumerOnRange(good, data.data(), data.end(), " ", 3, " qwabcab ");
+ }
+
+ Y_UNIT_TEST(TestCharStringDelimiter) {
+ TString data("qw ab qwababcab");
+ TString canonic[] = {"qw ", " qw", "", "c", ""};
+ TVector<TString> good(canonic, canonic + 5);
+ TStringDelimiter<const char> delim("ab");
+
+ TestDelimiterOnString<TContainerConsumer>(good, data.data(), delim);
+ TestDelimiterOnRange<TContainerConsumer>(good, data.data(), data.end(), delim);
+ }
+
+ Y_UNIT_TEST(TestWideStringDelimiter) {
+ TUtf16String data(u"qw ab qwababcab");
+ TUtf16String canonic[] = {u"qw ", u" qw", TUtf16String(), u"c", TUtf16String()};
+ TVector<TUtf16String> good(canonic, canonic + 5);
+ TUtf16String wideDelim(u"ab");
+ TStringDelimiter<const wchar16> delim(wideDelim.data());
+
+ TestDelimiterOnString<TContainerConsumer>(good, data.data(), delim);
+ TestDelimiterOnRange<TContainerConsumer>(good, data.data(), data.end(), delim);
+ }
+
+ Y_UNIT_TEST(TestCharSetDelimiter) {
+ TString data("qw ab qwababccab");
+ TString canonic[] = {"q", " ab q", "abab", "", "ab"};
+ TVector<TString> good(canonic, canonic + 5);
+ TSetDelimiter<const char> delim("wc");
+
+ TestDelimiterOnString<TContainerConsumer>(good, data.data(), delim);
+ TestDelimiterOnRange<TContainerConsumer>(good, data.data(), data.end(), delim);
+ }
+
+ Y_UNIT_TEST(TestWideSetDelimiter) {
+ TUtf16String data(u"qw ab qwababccab");
+ TUtf16String canonic[] = {u"q", u" ab q", u"abab", TUtf16String(), u"ab"};
+ TVector<TUtf16String> good(canonic, canonic + 5);
+ TUtf16String wideDelim(u"wc");
+ TSetDelimiter<const wchar16> delim(wideDelim.data());
+
+ TestDelimiterOnString<TContainerConsumer>(good, data.data(), delim);
+ }
+
+ Y_UNIT_TEST(TestWideSetDelimiterRange) {
+ TUtf16String data(u"qw ab qwababccab");
+ TUtf16String canonic[] = {u"q", u" ab q", u"abab", TUtf16String(), u"ab"};
+ TVector<TUtf16String> good(1);
+ TUtf16String wideDelim(u"wc");
+ TSetDelimiter<const wchar16> delim(wideDelim.data());
+
+ TVector<TUtf16String> test;
+ TContainerConsumer<TVector<TUtf16String>> consumer(&test);
+ SplitString(data.data(), data.data(), delim, consumer); // Empty string is still inserted into consumer
+ Cmp(good, test);
+
+ good.assign(canonic, canonic + 4);
+ good.push_back(TUtf16String());
+ test.clear();
+ SplitString(data.data(), data.end() - 2, delim, consumer);
+ Cmp(good, test);
+ }
+
+ Y_UNIT_TEST(TestSplit) {
+ TString data("qw ab qwababcba");
+ TString canonic[] = {"qw ", " qw", "c"};
+ TVector<TString> good(canonic, canonic + 3);
+ TString delim = "ab";
+ TVector<TString> test;
+ Split(data, delim, test);
+ Cmp(good, test);
+
+ TVector<TStringBuf> test1;
+ Split(data, delim.data(), test1);
+ Cmp(good, test1);
+ }
+
+ Y_UNIT_TEST(ConvenientSplitTest) {
+ TString data("abc 22 33.5 xyz");
+ TString str;
+ int num1 = 0;
+ double num2 = 0;
+ TStringBuf strBuf;
+ Split(data, ' ', str, num1, num2, strBuf);
+ UNIT_ASSERT_VALUES_EQUAL(str, "abc");
+ UNIT_ASSERT_VALUES_EQUAL(num1, 22);
+ UNIT_ASSERT_VALUES_EQUAL(num2, 33.5);
+ UNIT_ASSERT_VALUES_EQUAL(strBuf, "xyz");
+ }
+
+ Y_UNIT_TEST(ConvenientSplitTestWithMaybe) {
+ TString data("abc 42");
+ TString str;
+ TMaybe<double> num2 = 1;
+ TMaybe<double> maybe = 1;
+
+ Split(data, ' ', str, num2, maybe);
+
+ UNIT_ASSERT_VALUES_EQUAL(str, "abc");
+ UNIT_ASSERT_VALUES_EQUAL(*num2, 42);
+ UNIT_ASSERT(!maybe);
+ }
+
+ Y_UNIT_TEST(ConvenientSplitTestExceptions) {
+ TString data("abc 22 33");
+ TString s1, s2, s3, s4;
+
+ UNIT_ASSERT_EXCEPTION(Split(data, ' ', s1, s2), yexception);
+ UNIT_ASSERT_NO_EXCEPTION(Split(data, ' ', s1, s2, s3));
+ UNIT_ASSERT_EXCEPTION(Split(data, ' ', s1, s2, s3, s4), yexception);
+ }
+
+ Y_UNIT_TEST(ConvenientSplitTestMaybeExceptions) {
+ TString data("abc 22 33");
+ TString s1, s2;
+ TMaybe<TString> m1, m2;
+
+ UNIT_ASSERT_EXCEPTION(Split(data, ' ', s1, m1), yexception);
+ UNIT_ASSERT_EXCEPTION(Split(data, ' ', m1, m2), yexception);
+ UNIT_ASSERT_NO_EXCEPTION(Split(data, ' ', s1, s2, m1));
+
+ UNIT_ASSERT_NO_EXCEPTION(Split(data, ' ', s1, s2, m1, m2));
+ UNIT_ASSERT_EXCEPTION(Split(data, ' ', m1, m2, s1, s2), yexception);
+
+ UNIT_ASSERT_NO_EXCEPTION(Split(data, ' ', s1, s2, m1, m2, m1, m1, m1, m1));
+ UNIT_ASSERT_EXCEPTION(Split(data, ' ', s1, s2, m1, m2, m1, m1, m1, m1, s1), yexception);
+ }
+}
+
+template <typename I, typename C>
+void TestStringSplitterCount(I* str, C delim, size_t good) {
+ size_t res = StringSplitter(str).Split(delim).Count();
+ UNIT_ASSERT_VALUES_EQUAL(res, good);
+}
+
+Y_UNIT_TEST_SUITE(StringSplitter) {
+ Y_UNIT_TEST(TestSplit) {
+ int sum = 0;
+
+ for (const auto& it : StringSplitter("1,2,3").Split(',')) {
+ sum += FromString<int>(it.Token());
+ }
+
+ UNIT_ASSERT_VALUES_EQUAL(sum, 6);
+ }
+
+ Y_UNIT_TEST(TestSplit1) {
+ int cnt = 0;
+
+ for (const auto& it : StringSplitter(" ").Split(' ')) {
+ (void)it;
+
+ ++cnt;
+ }
+
+ UNIT_ASSERT_VALUES_EQUAL(cnt, 2);
+ }
+
+ Y_UNIT_TEST(TestSplitLimited) {
+ TVector<TString> expected = {"1", "2", "3,4,5"};
+ TVector<TString> actual = StringSplitter("1,2,3,4,5").Split(',').Limit(3).ToList<TString>();
+ UNIT_ASSERT_VALUES_EQUAL(expected, actual);
+ }
+
+ Y_UNIT_TEST(TestSplitLimitedWithEmptySkip) {
+ TVector<TString> expected = {"1", "2", "3,4,5"};
+ TVector<TString> actual = StringSplitter("1,,,2,,,,3,4,5").Split(',').SkipEmpty().Limit(3).ToList<TString>();
+ UNIT_ASSERT_VALUES_EQUAL(expected, actual);
+
+ expected = {"1", "2", ",,,3,4,5"};
+ actual = StringSplitter("1,2,,,,3,4,5").Split(',').Limit(3).SkipEmpty().ToList<TString>();
+ UNIT_ASSERT_VALUES_EQUAL(expected, actual);
+ }
+
+ Y_UNIT_TEST(TestSplitBySet) {
+ int sum = 0;
+
+ for (const auto& it : StringSplitter("1,2:3").SplitBySet(",:")) {
+ sum += FromString<int>(it.Token());
+ }
+
+ UNIT_ASSERT_VALUES_EQUAL(sum, 6);
+ }
+
+ Y_UNIT_TEST(TestSplitBySetLimited) {
+ TVector<TString> expected = {"1", "2", "3,4:5"};
+ TVector<TString> actual = StringSplitter("1,2:3,4:5").SplitBySet(",:").Limit(3).ToList<TString>();
+ UNIT_ASSERT_VALUES_EQUAL(expected, actual);
+ }
+
+ Y_UNIT_TEST(TestSplitBySetLimitedWithEmptySkip) {
+ TVector<TString> expected = {"1", "2", "3,4:5"};
+ TVector<TString> actual = StringSplitter("1,:,2::::,3,4:5").SplitBySet(",:").SkipEmpty().Limit(3).ToList<TString>();
+ UNIT_ASSERT_VALUES_EQUAL(expected, actual);
+
+ expected = {"1", ",2::::,3,4:5"};
+ actual = StringSplitter("1,:,2::::,3,4:5").SplitBySet(",:").Limit(3).SkipEmpty().ToList<TString>();
+ UNIT_ASSERT_VALUES_EQUAL(expected, actual);
+ }
+
+ Y_UNIT_TEST(TestSplitByString) {
+ int sum = 0;
+
+ for (const auto& it : StringSplitter("1ab2ab3").SplitByString("ab")) {
+ sum += FromString<int>(it.Token());
+ }
+
+ UNIT_ASSERT_VALUES_EQUAL(sum, 6);
+ }
+
+ Y_UNIT_TEST(TestSplitByStringLimited) {
+ TVector<TString> expected = {"1", "2", "3ab4ab5"};
+ TVector<TString> actual = StringSplitter("1ab2ab3ab4ab5").SplitByString("ab").Limit(3).ToList<TString>();
+ UNIT_ASSERT_VALUES_EQUAL(expected, actual);
+ }
+
+ Y_UNIT_TEST(TestSplitByStringLimitedWithEmptySkip) {
+ TVector<TString> expected = {"1", "2", "3ab4ab5"};
+ TVector<TString> actual = StringSplitter("1abab2ababababab3ab4ab5").SplitByString("ab").SkipEmpty().Limit(3).ToList<TString>();
+ UNIT_ASSERT_VALUES_EQUAL(expected, actual);
+ }
+
+ Y_UNIT_TEST(TestSplitByFunc) {
+ TString s = "123 456 \t\n789\n10\t 20";
+ TVector<TString> pattern = {"123", "456", "789", "10", "20"};
+
+ TVector<TString> tokens;
+ auto f = [](char a) { return a == ' ' || a == '\t' || a == '\n'; };
+ for (auto v : StringSplitter(s).SplitByFunc(f)) {
+ if (v) {
+ tokens.emplace_back(v);
+ }
+ }
+
+ UNIT_ASSERT(tokens == pattern);
+ }
+
+ Y_UNIT_TEST(TestSplitByFuncLimited) {
+ TVector<TString> expected = {"1", "2", "3a4b5"};
+ auto f = [](char a) { return a == 'a' || a == 'b'; };
+ TVector<TString> actual = StringSplitter("1a2b3a4b5").SplitByFunc(f).Limit(3).ToList<TString>();
+ UNIT_ASSERT_VALUES_EQUAL(expected, actual);
+ }
+
+ Y_UNIT_TEST(TestSplitByFuncLimitedWithEmptySkip) {
+ TVector<TString> expected = {"1", "2", "3a4b5"};
+ auto f = [](char a) { return a == 'a' || a == 'b'; };
+ TVector<TString> actual = StringSplitter("1aaba2bbababa3a4b5").SplitByFunc(f).SkipEmpty().Limit(3).Take(3).ToList<TString>();
+ UNIT_ASSERT_VALUES_EQUAL(expected, actual);
+ }
+
+ Y_UNIT_TEST(TestSkipEmpty) {
+ int sum = 0;
+
+ for (const auto& it : StringSplitter(" 1 2 3 ").Split(' ').SkipEmpty()) {
+ sum += FromString<int>(it.Token());
+ }
+
+ UNIT_ASSERT_VALUES_EQUAL(sum, 6);
+
+ // double
+ sum = 0;
+ for (const auto& it : StringSplitter(" 1 2 3 ").Split(' ').SkipEmpty().SkipEmpty()) {
+ sum += FromString<int>(it.Token());
+ }
+ UNIT_ASSERT_VALUES_EQUAL(sum, 6);
+ }
+
+ Y_UNIT_TEST(TestTake) {
+ TVector<TString> expected = {"1", "2", "3"};
+ UNIT_ASSERT_VALUES_EQUAL(expected, StringSplitter("1 2 3 4 5 6 7 8 9 10").Split(' ').Take(3).ToList<TString>());
+
+ expected = {"1", "2"};
+ UNIT_ASSERT_VALUES_EQUAL(expected, StringSplitter(" 1 2 3 ").Split(' ').SkipEmpty().Take(2).ToList<TString>());
+
+ expected = {"1", "2", "3"};
+ UNIT_ASSERT_VALUES_EQUAL(expected, StringSplitter("1 2 3 4 5 6 7 8 9 10").Split(' ').Take(5).Take(3).ToList<TString>());
+ UNIT_ASSERT_VALUES_EQUAL(expected, StringSplitter("1 2 3 4 5 6 7 8 9 10").Split(' ').Take(3).Take(5).ToList<TString>());
+
+ expected = {"1", "2"};
+ UNIT_ASSERT_VALUES_EQUAL(expected, StringSplitter(" 1 2 3 ").Split(' ').Take(4).SkipEmpty().ToList<TString>());
+
+ expected = {"1"};
+ UNIT_ASSERT_VALUES_EQUAL(expected, StringSplitter(" 1 2 3 ").Split(' ').Take(4).SkipEmpty().Take(1).ToList<TString>());
+ }
+
+ Y_UNIT_TEST(TestCompile) {
+ (void)StringSplitter(TString());
+ (void)StringSplitter(TStringBuf());
+ (void)StringSplitter("", 0);
+ }
+
+ Y_UNIT_TEST(TestStringSplitterCountEmpty) {
+ TCharDelimiter<const char> delim(' ');
+ TestStringSplitterCount("", delim, 1);
+ }
+
+ Y_UNIT_TEST(TestStringSplitterCountOne) {
+ TCharDelimiter<const char> delim(' ');
+ TestStringSplitterCount("one", delim, 1);
+ }
+
+ Y_UNIT_TEST(TestStringSplitterCountWithOneDelimiter) {
+ TCharDelimiter<const char> delim(' ');
+ TestStringSplitterCount("one two", delim, 2);
+ }
+
+ Y_UNIT_TEST(TestStringSplitterCountWithTrailing) {
+ TCharDelimiter<const char> delim(' ');
+ TestStringSplitterCount(" one ", delim, 3);
+ }
+
+ Y_UNIT_TEST(TestStringSplitterConsume) {
+ TVector<TString> expected = {"1", "2", "3"};
+ TVector<TString> actual;
+ auto func = [&actual](const TBasicStringBuf<char>& token) {
+ actual.push_back(TString(token));
+ };
+ StringSplitter("1 2 3").Split(' ').Consume(func);
+ UNIT_ASSERT_VALUES_EQUAL(expected, actual);
+ }
+
+ Y_UNIT_TEST(TestStringSplitterConsumeConditional) {
+ TVector<TString> expected = {"1", "2"};
+ TVector<TString> actual;
+ auto func = [&actual](const TBasicStringBuf<char>& token) {
+ if (token == "3") {
+ return false;
+ }
+ actual.push_back(TString(token));
+ return true;
+ };
+ bool completed = StringSplitter("1 2 3 4 5").Split(' ').Consume(func);
+ UNIT_ASSERT(!completed);
+ UNIT_ASSERT_VALUES_EQUAL(expected, actual);
+ }
+
+ Y_UNIT_TEST(TestStringSplitterToList) {
+ TVector<TString> expected = {"1", "2", "3"};
+ TVector<TString> actual = StringSplitter("1 2 3").Split(' ').ToList<TString>();
+ UNIT_ASSERT_VALUES_EQUAL(expected, actual);
+ }
+
+ Y_UNIT_TEST(TestStringSplitterCollectPushBack) {
+ TVector<TString> expected = {"1", "2", "3"};
+ TVector<TString> actual;
+ StringSplitter("1 2 3").Split(' ').Collect(&actual);
+ UNIT_ASSERT_VALUES_EQUAL(expected, actual);
+ }
+
+ Y_UNIT_TEST(TestStringSplitterCollectInsert) {
+ TSet<TString> expected = {"1", "2", "3"};
+ TSet<TString> actual;
+ StringSplitter("1 2 3 1 2 3").Split(' ').Collect(&actual);
+ UNIT_ASSERT_VALUES_EQUAL(expected, actual);
+ }
+
+ Y_UNIT_TEST(TestStringSplitterCollectClears) {
+ TVector<TString> v;
+ StringSplitter("1 2 3").Split(' ').Collect(&v);
+ UNIT_ASSERT_VALUES_EQUAL(v.size(), 3);
+ StringSplitter("4 5").Split(' ').Collect(&v);
+ UNIT_ASSERT_VALUES_EQUAL(v.size(), 2);
+ }
+
+ Y_UNIT_TEST(TestStringSplitterAddToDoesntClear) {
+ TVector<TString> v;
+ StringSplitter("1 2 3").Split(' ').AddTo(&v);
+ UNIT_ASSERT_VALUES_EQUAL(v.size(), 3);
+ StringSplitter("4 5").Split(' ').AddTo(&v);
+ UNIT_ASSERT_VALUES_EQUAL(v.size(), 5);
+ }
+
+ Y_UNIT_TEST(TestSplitStringInto) {
+ int a = -1;
+ TStringBuf s;
+ double d = -1;
+ StringSplitter("2 substr 1.02").Split(' ').CollectInto(&a, &s, &d);
+ UNIT_ASSERT_VALUES_EQUAL(a, 2);
+ UNIT_ASSERT_VALUES_EQUAL(s, "substr");
+ UNIT_ASSERT_DOUBLES_EQUAL(d, 1.02, 0.0001);
+ UNIT_ASSERT_EXCEPTION(StringSplitter("1").Split(' ').CollectInto(&a, &a), yexception);
+ UNIT_ASSERT_EXCEPTION(StringSplitter("1 2 3").Split(' ').CollectInto(&a, &a), yexception);
+ }
+
+ Y_UNIT_TEST(TestSplitStringWithIgnore) {
+ TStringBuf s;
+ StringSplitter("x y z").Split(' ').CollectInto(&std::ignore, &s, &std::ignore);
+ UNIT_ASSERT_VALUES_EQUAL(s, "y");
+
+ UNIT_ASSERT_EXCEPTION(StringSplitter("ignored != non-requred").Split(':').CollectInto(&s, &std::ignore), yexception);
+ }
+
+ Y_UNIT_TEST(TestTryCollectInto) {
+ int a, b, c;
+ bool parsingSucceeded;
+ parsingSucceeded = StringSplitter("100,500,3").Split(',').TryCollectInto(&a, &b, &c);
+ UNIT_ASSERT(parsingSucceeded);
+ UNIT_ASSERT_VALUES_EQUAL(a, 100);
+ UNIT_ASSERT_VALUES_EQUAL(b, 500);
+ UNIT_ASSERT_VALUES_EQUAL(c, 3);
+
+ //not enough tokens
+ parsingSucceeded = StringSplitter("3,14").Split(',').TryCollectInto(&a, &b, &c);
+ UNIT_ASSERT(!parsingSucceeded);
+
+ //too many tokens
+ parsingSucceeded = StringSplitter("3,14,15,92,6").Split(',').TryCollectInto(&a, &b, &c);
+ UNIT_ASSERT(!parsingSucceeded);
+
+ //where single TryFromString fails
+ parsingSucceeded = StringSplitter("ot topota kopyt pyl po polu letit").Split(' ').TryCollectInto(&a, &b, &c);
+ UNIT_ASSERT(!parsingSucceeded);
+ }
+
+ Y_UNIT_TEST(TestOwningSplit1) {
+ int sum = 0;
+
+ for (const auto& it : StringSplitter(TString("1,2,3")).Split(',')) {
+ sum += FromString<int>(it.Token());
+ }
+
+ UNIT_ASSERT_VALUES_EQUAL(sum, 6);
+ }
+
+ Y_UNIT_TEST(TestOwningSplit2) {
+ int sum = 0;
+
+ TString str("1,2,3");
+ for (const auto& it : StringSplitter(str).Split(',')) {
+ sum += FromString<int>(it.Token());
+ }
+
+ UNIT_ASSERT_VALUES_EQUAL(sum, 6);
+ }
+
+ Y_UNIT_TEST(TestOwningSplit3) {
+ int sum = 0;
+
+ const TString str("1,2,3");
+ for (const auto& it : StringSplitter(str).Split(',')) {
+ sum += FromString<int>(it.Token());
+ }
+
+ UNIT_ASSERT_VALUES_EQUAL(sum, 6);
+ }
+
+ Y_UNIT_TEST(TestAssigment) {
+ TVector<TString> expected0 = {"1", "2", "3", "4"};
+ TVector<TString> actual0 = StringSplitter("1 2 3 4").Split(' ');
+ UNIT_ASSERT_VALUES_EQUAL(expected0, actual0);
+
+ TSet<TString> expected1 = {"11", "22", "33", "44"};
+ TSet<TString> actual1 = StringSplitter("11 22 33 44").Split(' ');
+ UNIT_ASSERT_VALUES_EQUAL(expected1, actual1);
+
+ TSet<TString> expected2 = {"11", "aa"};
+ auto actual2 = static_cast<TSet<TString>>(StringSplitter("11 aa 11 11 aa").Split(' '));
+ UNIT_ASSERT_VALUES_EQUAL(expected2, actual2);
+
+ TVector<TString> expected3 = {"dd", "bb"};
+ auto actual3 = TVector<TString>(StringSplitter("dd\tbb").Split('\t'));
+ UNIT_ASSERT_VALUES_EQUAL(expected3, actual3);
+ }
+
+ Y_UNIT_TEST(TestRangeBasedFor) {
+ TVector<TString> actual0 = {"11", "22", "33", "44"};
+ size_t num = 0;
+ for (TStringBuf elem : StringSplitter("11 22 33 44").Split(' ')) {
+ UNIT_ASSERT_VALUES_EQUAL(elem, actual0[num++]);
+ }
+
+ TVector<TString> actual1 = {"another", "one,", "and", "another", "one"};
+ num = 0;
+ for (TStringBuf elem : StringSplitter(TStringBuf("another one, and \n\n another one")).SplitBySet(" \n").SkipEmpty()) {
+ UNIT_ASSERT_VALUES_EQUAL(elem, actual1[num++]);
+ }
+
+ TVector<TUtf16String> actual2 = {u"привет,", u"как", u"дела"};
+ num = 0;
+ for (TWtringBuf elem : StringSplitter(u"привет, как дела").Split(wchar16(' '))) {
+ UNIT_ASSERT_VALUES_EQUAL(elem, actual2[num++]);
+ }
+
+ TVector<TString> copy(4);
+ auto v = StringSplitter("11 22 33 44").Split(' ');
+ Copy(v.begin(), v.end(), copy.begin());
+ UNIT_ASSERT_VALUES_EQUAL(actual0, copy);
+ }
+
+ Y_UNIT_TEST(TestParseInto) {
+ TVector<int> actual0 = {1, 2, 3, 4};
+ TVector<int> answer0;
+
+ StringSplitter("1 2 3 4").Split(' ').ParseInto(&answer0);
+ UNIT_ASSERT_VALUES_EQUAL(actual0, answer0);
+
+ TVector<int> actual1 = {42, 1, 2, 3, 4};
+ TVector<int> answer1 = {42};
+ StringSplitter("1 2 3 4").Split(' ').ParseInto(&answer1);
+ UNIT_ASSERT_VALUES_EQUAL(actual1, answer1);
+
+ answer1.clear();
+ UNIT_ASSERT_EXCEPTION(StringSplitter("1 2 3 4").Split(' ').ParseInto(&answer1), yexception);
+
+ answer1 = {42};
+ StringSplitter(" 1 2 3 4").Split(' ').SkipEmpty().ParseInto(&answer1);
+ UNIT_ASSERT_VALUES_EQUAL(actual1, answer1);
+
+ answer1.clear();
+ StringSplitter(" \n 1 2 \n\n\n 3 4\n ").SplitBySet(" \n").SkipEmpty().ParseInto(&answer1);
+ UNIT_ASSERT_VALUES_EQUAL(actual0, answer1);
+ }
+
+ Y_UNIT_TEST(TestStdString) {
+ std::vector<std::string_view> r0, r1, answer = {"lol", "zomg"};
+ std::string s = "lol zomg";
+ for (std::string_view ss : StringSplitter(s).Split(' ')) {
+ r0.push_back(ss);
+ }
+ StringSplitter(s).Split(' ').Collect(&r1);
+
+ UNIT_ASSERT_VALUES_EQUAL(r0, answer);
+ UNIT_ASSERT_VALUES_EQUAL(r1, answer);
+ }
+
+ Y_UNIT_TEST(TestStdStringView) {
+ std::string_view s = "aaacccbbb";
+ std::vector<std::string_view> expected = {"aaa", "bbb"};
+ std::vector<std::string_view> actual = StringSplitter(s).SplitByString("ccc");
+ UNIT_ASSERT_VALUES_EQUAL(expected, actual);
+ }
+
+ Y_UNIT_TEST(TestStdSplitAfterSplit) {
+ std::string_view input = "a*b+a*b";
+ for (std::string_view summand : StringSplitter(input).Split('+')) {
+ //FIXME: std::string is used to workaround MSVC ICE
+ UNIT_ASSERT_VALUES_EQUAL(std::string(summand), "a*b");
+ std::string_view multiplier1, multiplier2;
+ bool splitResult = StringSplitter(summand).Split('*').TryCollectInto(&multiplier1, &multiplier2);
+ UNIT_ASSERT(splitResult);
+ UNIT_ASSERT_VALUES_EQUAL(std::string(multiplier1), "a");
+ UNIT_ASSERT_VALUES_EQUAL(std::string(multiplier2), "b");
+ }
+ }
+
+ Y_UNIT_TEST(TestStdSplitWithParsing) {
+ std::string_view input = "1,2,3,4";
+ TVector<ui64> numbers;
+ const TVector<ui64> expected{1, 2, 3, 4};
+ StringSplitter(input).Split(',').ParseInto(&numbers);
+ UNIT_ASSERT_VALUES_EQUAL(numbers, expected);
+ }
+
+ Y_UNIT_TEST(TestArcadiaStdInterop) {
+ TVector<TString> expected0 = {"a", "b"};
+ TVector<TStringBuf> expected1 = {"a", "b"};
+ std::string src1("a b");
+ std::string_view src2("a b");
+ TVector<TString> actual0 = StringSplitter(src1).Split(' ').SkipEmpty();
+ TVector<TString> actual1 = StringSplitter(src2).Split(' ').SkipEmpty();
+ TVector<TStringBuf> actual2 = StringSplitter(src1).Split(' ').SkipEmpty();
+ TVector<TStringBuf> actual3 = StringSplitter(src2).Split(' ').SkipEmpty();
+ UNIT_ASSERT_VALUES_EQUAL(expected0, actual0);
+ UNIT_ASSERT_VALUES_EQUAL(expected0, actual1);
+ UNIT_ASSERT_VALUES_EQUAL(expected1, actual2);
+ UNIT_ASSERT_VALUES_EQUAL(expected1, actual3);
+ }
+
+ Y_UNIT_TEST(TestConstCString) {
+ const char* b = "a;b";
+ const char* e = b + 3;
+
+ std::vector<TStringBuf> v;
+ StringSplitter(b, e).Split(';').AddTo(&v);
+
+ std::vector<TStringBuf> expected = {"a", "b"};
+ UNIT_ASSERT_VALUES_EQUAL(v, expected);
+ }
+
+ Y_UNIT_TEST(TestCStringRef) {
+ TString s = "lol";
+ char* str = s.Detach();
+
+ std::vector<TStringBuf> v = StringSplitter(str).Split('o');
+ std::vector<TStringBuf> expected = {"l", "l"};
+ UNIT_ASSERT_VALUES_EQUAL(v, expected);
+ }
+
+ Y_UNIT_TEST(TestSplitVector) {
+ std::vector<char> buffer = {'a', ';', 'b'};
+
+ std::vector<TStringBuf> v = StringSplitter(buffer).Split(';');
+
+ std::vector<TStringBuf> expected = {"a", "b"};
+ UNIT_ASSERT_VALUES_EQUAL(v, expected);
+ }
+
+ class TDoubleIterator {
+ public:
+ using iterator_category = std::input_iterator_tag;
+ using value_type = int;
+ using pointer = void;
+ using reference = int;
+ using const_reference = int;
+ using difference_type = ptrdiff_t;
+
+ TDoubleIterator() = default;
+
+ TDoubleIterator(const char* ptr)
+ : Ptr_(ptr)
+ {
+ }
+
+ TDoubleIterator operator++() {
+ Ptr_ += 2;
+ return *this;
+ }
+
+ TDoubleIterator operator++(int) {
+ TDoubleIterator tmp = *this;
+ ++*this;
+ return tmp;
+ }
+
+ friend bool operator==(TDoubleIterator l, TDoubleIterator r) {
+ return l.Ptr_ == r.Ptr_;
+ }
+
+ friend bool operator!=(TDoubleIterator l, TDoubleIterator r) {
+ return l.Ptr_ != r.Ptr_;
+ }
+
+ int operator*() const {
+ return (*Ptr_ - '0') * 10 + *(Ptr_ + 1) - '0';
+ }
+
+ private:
+ const char* Ptr_ = nullptr;
+ };
+
+ Y_UNIT_TEST(TestInputIterator) {
+ const char* beg = "1213002233000011";
+ const char* end = beg + strlen(beg);
+
+ std::vector<std::vector<int>> expected = {{12, 13}, {22, 33}, {}, {11}};
+ int i = 0;
+
+ for (TIteratorRange<TDoubleIterator> part : StringSplitter(TDoubleIterator(beg), TDoubleIterator(end)).SplitByFunc([](int value) { return value == 0; })) {
+ UNIT_ASSERT(std::equal(part.begin(), part.end(), expected[i].begin(), expected[i].end()));
+ i++;
+ }
+ UNIT_ASSERT_VALUES_EQUAL(i, expected.size());
+ }
+}
diff --git a/util/string/strip.cpp b/util/string/strip.cpp
new file mode 100644
index 0000000000..c921571cf0
--- /dev/null
+++ b/util/string/strip.cpp
@@ -0,0 +1,23 @@
+#include "strip.h"
+#include "ascii.h"
+
+#include <util/string/reverse.h>
+
+bool Collapse(const TString& from, TString& to, size_t maxLen) {
+ return CollapseImpl<TString, bool (*)(unsigned char)>(from, to, maxLen, IsAsciiSpace);
+}
+
+void CollapseText(const TString& from, TString& to, size_t maxLen) {
+ Collapse(from, to, maxLen);
+ StripInPlace(to);
+ if (to.size() >= maxLen) {
+ to.remove(maxLen - 5); // " ..."
+ ReverseInPlace(to);
+ size_t pos = to.find_first_of(" .,;");
+ if (pos != TString::npos && pos < 32) {
+ to.remove(0, pos + 1);
+ }
+ ReverseInPlace(to);
+ to.append(" ...");
+ }
+}
diff --git a/util/string/strip.h b/util/string/strip.h
new file mode 100644
index 0000000000..d5ef6da96d
--- /dev/null
+++ b/util/string/strip.h
@@ -0,0 +1,257 @@
+#pragma once
+
+#include "ascii.h"
+
+#include <util/generic/string.h>
+#include <util/generic/strbuf.h>
+#include <utility>
+
+template <class It>
+struct TIsAsciiSpaceAdapter {
+ bool operator()(const It& it) const noexcept {
+ return IsAsciiSpace(*it);
+ }
+};
+
+template <class It>
+TIsAsciiSpaceAdapter<It> IsAsciiSpaceAdapter(It) {
+ return {};
+}
+
+template <class TChar>
+struct TEqualsStripAdapter {
+ TEqualsStripAdapter(TChar ch)
+ : Ch(ch)
+ {
+ }
+
+ template <class It>
+ bool operator()(const It& it) const noexcept {
+ return *it == Ch;
+ }
+
+ const TChar Ch;
+};
+
+template <class TChar>
+TEqualsStripAdapter<TChar> EqualsStripAdapter(TChar ch) {
+ return {ch};
+}
+
+template <class It, class TStripCriterion>
+inline void StripRangeBegin(It& b, const It& e, TStripCriterion&& criterion) noexcept {
+ while (b < e && criterion(b)) {
+ ++b;
+ }
+}
+
+template <class It>
+inline void StripRangeBegin(It& b, const It& e) noexcept {
+ StripRangeBegin(b, e, IsAsciiSpaceAdapter(b));
+}
+
+template <class It, class TStripCriterion>
+inline void StripRangeEnd(const It& b, It& e, TStripCriterion&& criterion) noexcept {
+ while (b < e && criterion(e - 1)) {
+ --e;
+ }
+}
+
+template <class It>
+inline void StripRangeEnd(const It& b, It& e) noexcept {
+ StripRangeEnd(b, e, IsAsciiSpaceAdapter(b));
+}
+
+template <bool stripBeg, bool stripEnd>
+struct TStripImpl {
+ template <class It, class TStripCriterion>
+ static inline bool StripRange(It& b, It& e, TStripCriterion&& criterion) noexcept {
+ const size_t oldLen = e - b;
+
+ if (stripBeg) {
+ StripRangeBegin(b, e, criterion);
+ }
+
+ if (stripEnd) {
+ StripRangeEnd(b, e, criterion);
+ }
+
+ const size_t newLen = e - b;
+ return newLen != oldLen;
+ }
+
+ template <class T, class TStripCriterion>
+ static inline bool StripString(const T& from, T& to, TStripCriterion&& criterion) {
+ auto b = from.begin();
+ auto e = from.end();
+
+ if (StripRange(b, e, criterion)) {
+ to = T(b, e - b);
+
+ return true;
+ }
+
+ to = from;
+
+ return false;
+ }
+
+ template <class T, class TStripCriterion>
+ static inline T StripString(const T& from, TStripCriterion&& criterion) {
+ T ret;
+ StripString(from, ret, criterion);
+ return ret;
+ }
+
+ template <class T>
+ static inline T StripString(const T& from) {
+ return StripString(from, IsAsciiSpaceAdapter(from.begin()));
+ }
+};
+
+template <class It, class TStripCriterion>
+inline bool StripRange(It& b, It& e, TStripCriterion&& criterion) noexcept {
+ return TStripImpl<true, true>::StripRange(b, e, criterion);
+}
+
+template <class It>
+inline bool StripRange(It& b, It& e) noexcept {
+ return StripRange(b, e, IsAsciiSpaceAdapter(b));
+}
+
+template <class It, class TStripCriterion>
+inline bool Strip(It& b, size_t& len, TStripCriterion&& criterion) noexcept {
+ It e = b + len;
+
+ if (StripRange(b, e, criterion)) {
+ len = e - b;
+
+ return true;
+ }
+
+ return false;
+}
+
+template <class It>
+inline bool Strip(It& b, size_t& len) noexcept {
+ return Strip(b, len, IsAsciiSpaceAdapter(b));
+}
+
+template <class T, class TStripCriterion>
+static inline bool StripString(const T& from, T& to, TStripCriterion&& criterion) {
+ return TStripImpl<true, true>::StripString(from, to, criterion);
+}
+
+template <class T>
+static inline bool StripString(const T& from, T& to) {
+ return StripString(from, to, IsAsciiSpaceAdapter(from.begin()));
+}
+
+template <class T, class TStripCriterion>
+static inline T StripString(const T& from, TStripCriterion&& criterion) {
+ return TStripImpl<true, true>::StripString(from, criterion);
+}
+
+template <class T>
+static inline T StripString(const T& from) {
+ return TStripImpl<true, true>::StripString(from);
+}
+
+template <class T>
+static inline T StripStringLeft(const T& from) {
+ return TStripImpl<true, false>::StripString(from);
+}
+
+template <class T>
+static inline T StripStringRight(const T& from) {
+ return TStripImpl<false, true>::StripString(from);
+}
+
+template <class T, class TStripCriterion>
+static inline T StripStringLeft(const T& from, TStripCriterion&& criterion) {
+ return TStripImpl<true, false>::StripString(from, criterion);
+}
+
+template <class T, class TStripCriterion>
+static inline T StripStringRight(const T& from, TStripCriterion&& criterion) {
+ return TStripImpl<false, true>::StripString(from, criterion);
+}
+
+/// Copies the given string removing leading and trailing spaces.
+static inline bool Strip(const TString& from, TString& to) {
+ return StripString(from, to);
+}
+
+/// Removes leading and trailing spaces from the string.
+inline TString& StripInPlace(TString& s) {
+ Strip(s, s);
+ return s;
+}
+
+/// Returns a copy of the given string with removed leading and trailing spaces.
+inline TString Strip(const TString& s) Y_WARN_UNUSED_RESULT;
+inline TString Strip(const TString& s) {
+ TString ret = s;
+ Strip(ret, ret);
+ return ret;
+}
+
+template <class TChar, class TWhitespaceFunc>
+size_t CollapseImpl(TChar* s, size_t n, const TWhitespaceFunc& isWhitespace) {
+ size_t newLen = 0;
+ for (size_t i = 0; i < n; ++i, ++newLen) {
+ size_t nextNonSpace = i;
+ while (nextNonSpace < n && isWhitespace(s[nextNonSpace])) {
+ ++nextNonSpace;
+ }
+ size_t numSpaces = nextNonSpace - i;
+ if (numSpaces > 1 || (numSpaces == 1 && s[i] != ' ')) {
+ s[newLen] = ' ';
+ i = nextNonSpace - 1;
+ } else {
+ s[newLen] = s[i];
+ }
+ }
+ return newLen;
+}
+
+template <class TStringType, class TWhitespaceFunc>
+bool CollapseImpl(const TStringType& from, TStringType& to, size_t maxLen, const TWhitespaceFunc& isWhitespace) {
+ to = from;
+ maxLen = maxLen ? Min(maxLen, to.size()) : to.size();
+ for (size_t i = 0; i < maxLen; ++i) {
+ if (isWhitespace(to[i]) && (to[i] != ' ' || isWhitespace(to[i + 1]))) {
+ size_t tailSize = maxLen - i;
+ size_t newTailSize = CollapseImpl(to.begin() + i, tailSize, isWhitespace);
+ to.remove(i + newTailSize, tailSize - newTailSize);
+ return true;
+ }
+ }
+ return false;
+}
+
+bool Collapse(const TString& from, TString& to, size_t maxLen = 0);
+
+/// Replaces several consequtive space symbols with one (processing is limited to maxLen bytes)
+inline TString& CollapseInPlace(TString& s, size_t maxLen = 0) {
+ Collapse(s, s, maxLen);
+ return s;
+}
+
+/// Replaces several consequtive space symbols with one (processing is limited to maxLen bytes)
+inline TString Collapse(const TString& s, size_t maxLen = 0) Y_WARN_UNUSED_RESULT;
+inline TString Collapse(const TString& s, size_t maxLen) {
+ TString ret;
+ Collapse(s, ret, maxLen);
+ return ret;
+}
+
+void CollapseText(const TString& from, TString& to, size_t maxLen);
+
+/// The same as Collapse() + truncates the string to maxLen.
+/// @details An ellipsis is inserted at the end of the truncated line.
+inline void CollapseText(TString& s, size_t maxLen) {
+ TString to;
+ CollapseText(s, to, maxLen);
+ s = to;
+}
diff --git a/util/string/strip_ut.cpp b/util/string/strip_ut.cpp
new file mode 100644
index 0000000000..d1029d1498
--- /dev/null
+++ b/util/string/strip_ut.cpp
@@ -0,0 +1,138 @@
+#include "strip.h"
+
+#include <library/cpp/testing/unittest/registar.h>
+
+#include <util/charset/wide.h>
+
+Y_UNIT_TEST_SUITE(TStripStringTest) {
+ Y_UNIT_TEST(TestStrip) {
+ struct TTest {
+ const char* Str;
+ const char* StripLeftRes;
+ const char* StripRightRes;
+ const char* StripRes;
+ };
+ static const TTest tests[] = {
+ {" 012 ", "012 ", " 012", "012"},
+ {" 012", "012", " 012", "012"},
+ {"012\t\t", "012\t\t", "012", "012"},
+ {"\t012\t", "012\t", "\t012", "012"},
+ {"012", "012", "012", "012"},
+ {"012\r\n", "012\r\n", "012", "012"},
+ {"\n012\r", "012\r", "\n012", "012"},
+ {"\n \t\r", "", "", ""},
+ {"", "", "", ""},
+ {"abc", "abc", "abc", "abc"},
+ {"a c", "a c", "a c", "a c"},
+ };
+
+ for (const auto& test : tests) {
+ TString inputStr(test.Str);
+
+ TString s;
+ Strip(inputStr, s);
+ UNIT_ASSERT_EQUAL(s, test.StripRes);
+
+ UNIT_ASSERT_EQUAL(StripString(inputStr), test.StripRes);
+ UNIT_ASSERT_EQUAL(StripStringLeft(inputStr), test.StripLeftRes);
+ UNIT_ASSERT_EQUAL(StripStringRight(inputStr), test.StripRightRes);
+
+ TStringBuf inputStrBuf(test.Str);
+ UNIT_ASSERT_EQUAL(StripString(inputStrBuf), test.StripRes);
+ UNIT_ASSERT_EQUAL(StripStringLeft(inputStrBuf), test.StripLeftRes);
+ UNIT_ASSERT_EQUAL(StripStringRight(inputStrBuf), test.StripRightRes);
+ };
+ }
+
+ Y_UNIT_TEST(TestCustomStrip) {
+ struct TTest {
+ const char* Str;
+ const char* Result;
+ };
+ static const TTest tests[] = {
+ {"//012//", "012"},
+ {"//012", "012"},
+ {"012", "012"},
+ {"012//", "012"},
+ };
+
+ for (auto test : tests) {
+ UNIT_ASSERT_EQUAL(
+ StripString(TString(test.Str), EqualsStripAdapter('/')),
+ test.Result);
+ };
+ }
+
+ Y_UNIT_TEST(TestCustomStripLeftRight) {
+ struct TTest {
+ const char* Str;
+ const char* ResultLeft;
+ const char* ResultRight;
+ };
+ static const TTest tests[] = {
+ {"//012//", "012//", "//012"},
+ {"//012", "012", "//012"},
+ {"012", "012", "012"},
+ {"012//", "012//", "012"},
+ };
+
+ for (const auto& test : tests) {
+ UNIT_ASSERT_EQUAL(
+ StripStringLeft(TString(test.Str), EqualsStripAdapter('/')),
+ test.ResultLeft);
+ UNIT_ASSERT_EQUAL(
+ StripStringRight(TString(test.Str), EqualsStripAdapter('/')),
+ test.ResultRight);
+ };
+ }
+
+ Y_UNIT_TEST(TestNullStringStrip) {
+ TStringBuf nullString(nullptr, nullptr);
+ UNIT_ASSERT_EQUAL(
+ StripString(nullString),
+ TString());
+ }
+
+ Y_UNIT_TEST(TestWtrokaStrip) {
+ UNIT_ASSERT_EQUAL(StripString(TWtringBuf(u" abc ")), u"abc");
+ UNIT_ASSERT_EQUAL(StripStringLeft(TWtringBuf(u" abc ")), u"abc ");
+ UNIT_ASSERT_EQUAL(StripStringRight(TWtringBuf(u" abc ")), u" abc");
+ }
+
+ Y_UNIT_TEST(TestWtrokaCustomStrip) {
+ UNIT_ASSERT_EQUAL(
+ StripString(
+ TWtringBuf(u"/abc/"),
+ EqualsStripAdapter(u'/')),
+ u"abc");
+ }
+
+ Y_UNIT_TEST(TestCollapse) {
+ TString s;
+ Collapse(TString(" 123 456 "), s);
+ UNIT_ASSERT(s == " 123 456 ");
+ Collapse(TString(" 123 456 "), s, 10);
+ UNIT_ASSERT(s == " 123 456 ");
+
+ s = TString(" a b c ");
+ TString s2 = s;
+ CollapseInPlace(s2);
+
+ UNIT_ASSERT(s == s2);
+#ifndef TSTRING_IS_STD_STRING
+ UNIT_ASSERT(s.c_str() == s2.c_str()); // Collapse() does not change the string at all
+#endif
+ }
+
+ Y_UNIT_TEST(TestCollapseText) {
+ TString abs1("Very long description string written in unknown language.");
+ TString abs2(abs1);
+ TString abs3(abs1);
+ CollapseText(abs1, 204);
+ CollapseText(abs2, 54);
+ CollapseText(abs3, 49);
+ UNIT_ASSERT_EQUAL(abs1 == "Very long description string written in unknown language.", true);
+ UNIT_ASSERT_EQUAL(abs2 == "Very long description string written in unknown ...", true);
+ UNIT_ASSERT_EQUAL(abs3 == "Very long description string written in ...", true);
+ }
+}
diff --git a/util/string/strspn.cpp b/util/string/strspn.cpp
new file mode 100644
index 0000000000..cdb8d7ca9b
--- /dev/null
+++ b/util/string/strspn.cpp
@@ -0,0 +1 @@
+#include "strspn.h"
diff --git a/util/string/strspn.h b/util/string/strspn.h
new file mode 100644
index 0000000000..8229e74a9c
--- /dev/null
+++ b/util/string/strspn.h
@@ -0,0 +1,65 @@
+#pragma once
+
+#include "cstriter.h"
+
+#include <util/generic/bitmap.h>
+
+template <class TSetType>
+class TStrSpnImpl {
+public:
+ inline TStrSpnImpl(const char* b, const char* e) {
+ Init(b, e);
+ }
+
+ inline TStrSpnImpl(const char* s) {
+ Init(s, TCStringEndIterator());
+ }
+
+ //FirstOf
+ template <class It>
+ inline It FindFirstOf(It b, const char* e) const noexcept {
+ return FindFirst<false>(b, e);
+ }
+
+ template <class It>
+ inline It FindFirstOf(It s) const noexcept {
+ return FindFirst<false>(s, TCStringEndIterator());
+ }
+
+ //FirstNotOf
+ template <class It>
+ inline It FindFirstNotOf(It b, const char* e) const noexcept {
+ return FindFirst<true>(b, e);
+ }
+
+ template <class It>
+ inline It FindFirstNotOf(It s) const noexcept {
+ return FindFirst<true>(s, TCStringEndIterator());
+ }
+
+ inline void Set(ui8 b) noexcept {
+ S_.Set(b);
+ }
+
+private:
+ template <bool Result, class It1, class It2>
+ inline It1 FindFirst(It1 b, It2 e) const noexcept {
+ while (b != e && (S_.Get((ui8)*b) == Result)) {
+ ++b;
+ }
+
+ return b;
+ }
+
+ template <class It1, class It2>
+ inline void Init(It1 b, It2 e) {
+ while (b != e) {
+ this->Set((ui8)*b++);
+ }
+ }
+
+private:
+ TSetType S_;
+};
+
+using TCompactStrSpn = TStrSpnImpl<TBitMap<256>>;
diff --git a/util/string/subst.cpp b/util/string/subst.cpp
new file mode 100644
index 0000000000..b2df328dc1
--- /dev/null
+++ b/util/string/subst.cpp
@@ -0,0 +1,201 @@
+#include "subst.h"
+
+#include <util/generic/strbuf.h>
+#include <util/generic/string.h>
+#include <util/system/compiler.h>
+
+#include <string>
+#include <type_traits>
+
+// a bit of template magic (to be fast and unreadable)
+template <class TStringType, class TTo, bool Main>
+static Y_FORCE_INLINE void MoveBlock(typename TStringType::value_type* ptr, size_t& srcPos, size_t& dstPos, const size_t off, const TTo to, const size_t toSize) {
+ const size_t unchangedSize = off - srcPos;
+ if (dstPos < srcPos) {
+ for (size_t i = 0; i < unchangedSize; ++i) {
+ ptr[dstPos++] = ptr[srcPos++];
+ }
+ } else {
+ dstPos += unchangedSize;
+ srcPos += unchangedSize;
+ }
+
+ if (Main) {
+ for (size_t i = 0; i < toSize; ++i) {
+ ptr[dstPos++] = to[i];
+ }
+ }
+}
+
+template <typename T, typename U>
+static bool IsIntersect(const T& a, const U& b) noexcept {
+ if (b.data() < a.data()) {
+ return IsIntersect(b, a);
+ }
+
+ return !a.empty() && !b.empty() &&
+ ((a.data() <= b.data() && b.data() < a.data() + a.size()) ||
+ (a.data() < b.data() + b.size() && b.data() + b.size() <= a.data() + a.size()));
+}
+
+/**
+ * Replaces all occurences of substring @c from in string @c s to string @c to.
+ * Uses two separate implementations (inplace for shrink and append for grow case)
+ * See IGNIETFERRO-394
+ **/
+template <class TStringType, typename TStringViewType = TBasicStringBuf<typename TStringType::value_type>>
+static inline size_t SubstGlobalImpl(TStringType& s, const TStringViewType from, const TStringViewType to, size_t fromPos = 0) {
+ if (from.empty()) {
+ return 0;
+ }
+
+ Y_ASSERT(!IsIntersect(s, from));
+ Y_ASSERT(!IsIntersect(s, to));
+
+ const size_t fromSize = from.size();
+ const size_t toSize = to.size();
+ size_t replacementsCount = 0;
+ size_t off = fromPos;
+ size_t srcPos = 0;
+
+ if (toSize > fromSize) {
+ // string will grow: append to another string
+ TStringType result;
+ for (; (off = TStringViewType(s).find(from, off)) != TStringType::npos; off += fromSize) {
+ if (!replacementsCount) {
+ // first replacement occured, we can prepare result string
+ result.reserve(s.size() + s.size() / 3);
+ }
+ result.append(s.begin() + srcPos, s.begin() + off);
+ result.append(to.data(), to.size());
+ srcPos = off + fromSize;
+ ++replacementsCount;
+ }
+ if (replacementsCount) {
+ // append tail
+ result.append(s.begin() + srcPos, s.end());
+ s = std::move(result);
+ }
+ return replacementsCount;
+ }
+
+ // string will not grow: use inplace algo
+ size_t dstPos = 0;
+ typename TStringType::value_type* ptr = &*s.begin();
+ for (; (off = TStringViewType(s).find(from, off)) != TStringType::npos; off += fromSize) {
+ Y_ASSERT(dstPos <= srcPos);
+ MoveBlock<TStringType, TStringViewType, true>(ptr, srcPos, dstPos, off, to, toSize);
+ srcPos = off + fromSize;
+ ++replacementsCount;
+ }
+
+ if (replacementsCount) {
+ // append tail
+ MoveBlock<TStringType, TStringViewType, false>(ptr, srcPos, dstPos, s.size(), to, toSize);
+ s.resize(dstPos);
+ }
+ return replacementsCount;
+}
+
+/// Replaces all occurences of the 'from' symbol in a string to the 'to' symbol.
+template <class TStringType>
+inline size_t SubstCharGlobalImpl(TStringType& s, typename TStringType::value_type from, typename TStringType::value_type to, size_t fromPos = 0) {
+ if (fromPos >= s.size()) {
+ return 0;
+ }
+
+ size_t result = 0;
+ fromPos = s.find(from, fromPos);
+
+ // s.begin() might cause memory copying, so call it only if needed
+ if (fromPos != TStringType::npos) {
+ auto* it = &*s.begin() + fromPos;
+ *it = to;
+ ++result;
+ // at this point string is copied and it's safe to use constant s.end() to iterate
+ const auto* const sEnd = &*s.end();
+ // unrolled loop goes first because it is more likely that `it` will be properly aligned
+ for (const auto* const end = sEnd - (sEnd - it) % 4; it < end;) {
+ if (*it == from) {
+ *it = to;
+ ++result;
+ }
+ ++it;
+ if (*it == from) {
+ *it = to;
+ ++result;
+ }
+ ++it;
+ if (*it == from) {
+ *it = to;
+ ++result;
+ }
+ ++it;
+ if (*it == from) {
+ *it = to;
+ ++result;
+ }
+ ++it;
+ }
+ for (; it < sEnd; ++it) {
+ if (*it == from) {
+ *it = to;
+ ++result;
+ }
+ }
+ }
+
+ return result;
+}
+
+/* Standard says that `char16_t` is a distinct type and has same size, signedness and alignment as
+ * `std::uint_least16_t`, so we check if `char16_t` has same signedness and size as `wchar16` to be
+ * sure that we can make safe casts between values of these types and pointers.
+ */
+static_assert(sizeof(wchar16) == sizeof(char16_t), "");
+static_assert(sizeof(wchar32) == sizeof(char32_t), "");
+static_assert(std::is_unsigned<wchar16>::value == std::is_unsigned<char16_t>::value, "");
+static_assert(std::is_unsigned<wchar32>::value == std::is_unsigned<char32_t>::value, "");
+
+size_t SubstGlobal(TString& text, const TStringBuf what, const TStringBuf with, size_t from) {
+ return SubstGlobalImpl(text, what, with, from);
+}
+
+size_t SubstGlobal(std::string& text, const TStringBuf what, const TStringBuf with, size_t from) {
+ return SubstGlobalImpl(text, what, with, from);
+}
+
+size_t SubstGlobal(TUtf16String& text, const TWtringBuf what, const TWtringBuf with, size_t from) {
+ return SubstGlobalImpl(text, what, with, from);
+}
+
+size_t SubstGlobal(TUtf32String& text, const TUtf32StringBuf what, const TUtf32StringBuf with, size_t from) {
+ return SubstGlobalImpl(text, what, with, from);
+}
+
+size_t SubstGlobal(std::u16string& text, const TWtringBuf what, const TWtringBuf with, size_t from) {
+ return SubstGlobalImpl(text,
+ std::u16string_view(reinterpret_cast<const char16_t*>(what.data()), what.size()),
+ std::u16string_view(reinterpret_cast<const char16_t*>(with.data()), with.size()),
+ from);
+}
+
+size_t SubstGlobal(TString& text, char what, char with, size_t from) {
+ return SubstCharGlobalImpl(text, what, with, from);
+}
+
+size_t SubstGlobal(std::string& text, char what, char with, size_t from) {
+ return SubstCharGlobalImpl(text, what, with, from);
+}
+
+size_t SubstGlobal(TUtf16String& text, wchar16 what, wchar16 with, size_t from) {
+ return SubstCharGlobalImpl(text, (char16_t)what, (char16_t)with, from);
+}
+
+size_t SubstGlobal(std::u16string& text, wchar16 what, wchar16 with, size_t from) {
+ return SubstCharGlobalImpl(text, (char16_t)what, (char16_t)with, from);
+}
+
+size_t SubstGlobal(TUtf32String& text, wchar32 what, wchar32 with, size_t from) {
+ return SubstCharGlobalImpl(text, (char32_t)what, (char32_t)with, from);
+}
diff --git a/util/string/subst.h b/util/string/subst.h
new file mode 100644
index 0000000000..45b622fbef
--- /dev/null
+++ b/util/string/subst.h
@@ -0,0 +1,56 @@
+#pragma once
+
+#include <util/generic/fwd.h>
+
+#include <stlfwd>
+
+/* Replace all occurences of substring `what` with string `with` starting from position `from`.
+ *
+ * @param text String to modify.
+ * @param what Substring to replace.
+ * @param with Substring to use as replacement.
+ * @param from Position at with to start replacement.
+ *
+ * @return Number of replacements occured.
+ */
+size_t SubstGlobal(TString& text, TStringBuf what, TStringBuf with, size_t from = 0);
+size_t SubstGlobal(std::string& text, TStringBuf what, TStringBuf with, size_t from = 0);
+size_t SubstGlobal(TUtf16String& text, TWtringBuf what, TWtringBuf with, size_t from = 0);
+size_t SubstGlobal(std::u16string& text, TWtringBuf what, TWtringBuf with, size_t from = 0);
+size_t SubstGlobal(TUtf32String& text, TUtf32StringBuf what, TUtf32StringBuf with, size_t from = 0);
+
+/* Replace all occurences of character `what` with character `with` starting from position `from`.
+ *
+ * @param text String to modify.
+ * @param what Character to replace.
+ * @param with Character to use as replacement.
+ * @param from Position at with to start replacement.
+ *
+ * @return Number of replacements occured.
+ */
+size_t SubstGlobal(TString& text, char what, char with, size_t from = 0);
+size_t SubstGlobal(std::string& text, char what, char with, size_t from = 0);
+size_t SubstGlobal(TUtf16String& text, wchar16 what, wchar16 with, size_t from = 0);
+size_t SubstGlobal(std::u16string& text, wchar16 what, wchar16 with, size_t from = 0);
+size_t SubstGlobal(TUtf32String& text, wchar32 what, wchar32 with, size_t from = 0);
+
+// TODO(yazevnul):
+// - rename `SubstGlobal` to `ReplaceAll` for convenience
+// - add `SubstGlobalCopy(TStringBuf)` for convenience
+// - add `RemoveAll(text, what, from)` as a shortcut for `SubstGlobal(text, what, "", from)`
+// - rename file to `replace.h`
+
+/* Replace all occurences of substring or character `what` with string or character `with` starting from position `from`, and return result string.
+ *
+ * @param text String to modify.
+ * @param what Substring/character to replace.
+ * @param with Substring/character to use as replacement.
+ * @param from Position at with to start replacement.
+ *
+ * @return Result string
+ */
+template <class TStringType, class TPatternType>
+Y_WARN_UNUSED_RESULT TStringType SubstGlobalCopy(TStringType result, TPatternType what, TPatternType with, size_t from = 0) {
+ SubstGlobal(result, what, with, from);
+ return result;
+}
diff --git a/util/string/subst_ut.cpp b/util/string/subst_ut.cpp
new file mode 100644
index 0000000000..21eccef779
--- /dev/null
+++ b/util/string/subst_ut.cpp
@@ -0,0 +1,253 @@
+#include "join.h"
+#include "subst.h"
+#include <string>
+
+#include <library/cpp/testing/unittest/registar.h>
+
+Y_UNIT_TEST_SUITE(TStringSubst) {
+ static const size_t MIN_FROM_CTX = 4;
+ static const TVector<TString> ALL_FROM{TString("F"), TString("FF")};
+ static const TVector<TString> ALL_TO{TString(""), TString("T"), TString("TT"), TString("TTT")};
+
+ static void AssertSubstGlobal(const TString& sFrom, const TString& sTo, const TString& from, const TString& to, const size_t fromPos, const size_t numSubst) {
+ TString s = sFrom;
+ size_t res = SubstGlobal(s, from, to, fromPos);
+ UNIT_ASSERT_VALUES_EQUAL_C(res, numSubst,
+ TStringBuilder() << "numSubst=" << numSubst << ", fromPos=" << fromPos << ", " << sFrom << " -> " << sTo);
+ if (numSubst) {
+ UNIT_ASSERT_STRINGS_EQUAL_C(s, sTo,
+ TStringBuilder() << "numSubst=" << numSubst << ", fromPos=" << fromPos << ", " << sFrom << " -> " << sTo);
+ } else {
+ // ensure s didn't trigger copy-on-write
+ UNIT_ASSERT_VALUES_EQUAL_C(s.c_str(), sFrom.c_str(),
+ TStringBuilder() << "numSubst=" << numSubst << ", fromPos=" << fromPos << ", " << sFrom << " -> " << sTo);
+ }
+ }
+
+ Y_UNIT_TEST(TestSubstGlobalNoSubstA) {
+ for (const auto& from : ALL_FROM) {
+ const size_t fromSz = from.size();
+ const size_t minSz = fromSz;
+ const size_t maxSz = fromSz + MIN_FROM_CTX;
+ for (size_t sz = minSz; sz <= maxSz; ++sz) {
+ for (size_t fromPos = 0; fromPos < sz; ++fromPos) {
+ TString s{sz, '.'};
+ for (const auto& to : ALL_TO) {
+ AssertSubstGlobal(s, s, from, to, fromPos, 0);
+ }
+ }
+ }
+ }
+ }
+
+ Y_UNIT_TEST(TestSubstGlobalNoSubstB) {
+ for (const auto& from : ALL_FROM) {
+ const size_t fromSz = from.size();
+ const size_t minSz = fromSz;
+ const size_t maxSz = fromSz + MIN_FROM_CTX;
+ for (size_t sz = minSz; sz <= maxSz; ++sz) {
+ for (size_t fromPos = 0; fromPos <= sz - fromSz; ++fromPos) {
+ for (size_t fromBeg = 0; fromBeg < fromPos; ++fromBeg) {
+ const auto parts = {
+ TString{fromBeg, '.'},
+ TString{sz - fromSz - fromBeg, '.'}};
+ TString s = JoinSeq(from, parts);
+ for (const auto& to : ALL_TO) {
+ AssertSubstGlobal(s, s, from, to, fromPos, 0);
+ }
+ }
+ }
+ }
+ }
+ }
+
+ static void DoTestSubstGlobal(TVector<TString>& parts, const size_t minBeg, const size_t sz,
+ const TString& from, const size_t fromPos, const size_t numSubst) {
+ const size_t numLeft = numSubst - parts.size();
+ for (size_t fromBeg = minBeg; fromBeg <= sz - numLeft * from.size(); ++fromBeg) {
+ if (parts.empty()) {
+ parts.emplace_back(fromBeg, '.');
+ } else {
+ parts.emplace_back(fromBeg - minBeg, '.');
+ }
+
+ if (numLeft == 1) {
+ parts.emplace_back(sz - fromBeg - from.size(), '.');
+ TString sFrom = JoinSeq(from, parts);
+ UNIT_ASSERT_VALUES_EQUAL_C(sFrom.size(), sz, sFrom);
+ for (const auto& to : ALL_TO) {
+ TString sTo = JoinSeq(to, parts);
+ AssertSubstGlobal(sFrom, sTo, from, to, fromPos, numSubst);
+ }
+ parts.pop_back();
+ } else {
+ DoTestSubstGlobal(parts, fromBeg + from.size(), sz, from, fromPos, numSubst);
+ }
+
+ parts.pop_back();
+ }
+ }
+
+ static void DoTestSubstGlobal(size_t numSubst) {
+ TVector<TString> parts;
+ for (const auto& from : ALL_FROM) {
+ const size_t fromSz = from.size();
+ const size_t minSz = numSubst * fromSz;
+ const size_t maxSz = numSubst * (fromSz + MIN_FROM_CTX);
+ for (size_t sz = minSz; sz <= maxSz; ++sz) {
+ const size_t maxPos = sz - numSubst * fromSz;
+ for (size_t fromPos = 0; fromPos <= maxPos; ++fromPos) {
+ DoTestSubstGlobal(parts, fromPos, sz, from, fromPos, numSubst);
+ }
+ }
+ }
+ }
+
+ Y_UNIT_TEST(TestSubstGlobalSubst1) {
+ DoTestSubstGlobal(1);
+ }
+
+ Y_UNIT_TEST(TestSubstGlobalSubst2) {
+ DoTestSubstGlobal(2);
+ }
+
+ Y_UNIT_TEST(TestSubstGlobalSubst3) {
+ DoTestSubstGlobal(3);
+ }
+
+ Y_UNIT_TEST(TestSubstGlobalSubst4) {
+ DoTestSubstGlobal(4);
+ }
+
+ Y_UNIT_TEST(TestSubstGlobalOld) {
+ TString s;
+ s = "aaa";
+ SubstGlobal(s, "a", "bb");
+ UNIT_ASSERT_EQUAL(s, TString("bbbbbb"));
+ s = "aaa";
+ SubstGlobal(s, "a", "b");
+ UNIT_ASSERT_EQUAL(s, TString("bbb"));
+ s = "aaa";
+ SubstGlobal(s, "a", "");
+ UNIT_ASSERT_EQUAL(s, TString(""));
+ s = "abcdefbcbcdfb";
+ SubstGlobal(s, "bc", "bbc", 2);
+ UNIT_ASSERT_EQUAL(s, TString("abcdefbbcbbcdfb"));
+ s = "Москва ~ Париж";
+ SubstGlobal(s, " ~ ", " ");
+ UNIT_ASSERT_EQUAL(s, TString("Москва Париж"));
+ }
+
+ Y_UNIT_TEST(TestSubstGlobalOldRet) {
+ const TString s1 = "aaa";
+ const TString s2 = SubstGlobalCopy(s1, "a", "bb");
+ UNIT_ASSERT_EQUAL(s2, TString("bbbbbb"));
+
+ const TString s3 = "aaa";
+ const TString s4 = SubstGlobalCopy(s3, "a", "b");
+ UNIT_ASSERT_EQUAL(s4, TString("bbb"));
+
+ const TString s5 = "aaa";
+ const TString s6 = SubstGlobalCopy(s5, "a", "");
+ UNIT_ASSERT_EQUAL(s6, TString(""));
+
+ const TString s7 = "abcdefbcbcdfb";
+ const TString s8 = SubstGlobalCopy(s7, "bc", "bbc", 2);
+ UNIT_ASSERT_EQUAL(s8, TString("abcdefbbcbbcdfb"));
+
+ const TString s9 = "Москва ~ Париж";
+ const TString s10 = SubstGlobalCopy(s9, " ~ ", " ");
+ UNIT_ASSERT_EQUAL(s10, TString("Москва Париж"));
+ }
+
+ Y_UNIT_TEST(TestSubstCharGlobal) {
+ TUtf16String w = u"abcdabcd";
+ SubstGlobal(w, wchar16('b'), wchar16('B'), 3);
+ UNIT_ASSERT_EQUAL(w, u"abcdaBcd");
+
+ TString s = "aaa";
+ SubstGlobal(s, 'a', 'b', 1);
+ UNIT_ASSERT_EQUAL(s, TString("abb"));
+ }
+
+ Y_UNIT_TEST(TestSubstCharGlobalRet) {
+ const TUtf16String w1 = u"abcdabcd";
+ const TUtf16String w2 = SubstGlobalCopy(w1, wchar16('b'), wchar16('B'), 3);
+ UNIT_ASSERT_EQUAL(w2, u"abcdaBcd");
+
+ const TString s1 = "aaa";
+ const TString s2 = SubstGlobalCopy(s1, 'a', 'b', 1);
+ UNIT_ASSERT_EQUAL(s2, TString("abb"));
+ }
+
+ Y_UNIT_TEST(TestSubstStdString) {
+ std::string s = "aaa";
+ SubstGlobal(s, "a", "b", 1);
+ UNIT_ASSERT_EQUAL(s, "abb");
+ }
+
+ Y_UNIT_TEST(TestSubstStdStringRet) {
+ const std::string s1 = "aaa";
+ const std::string s2 = SubstGlobalCopy(s1, "a", "b", 1);
+ UNIT_ASSERT_EQUAL(s2, "abb");
+ }
+
+ Y_UNIT_TEST(TestSubstGlobalChar) {
+ {
+ const TString s = "a";
+ const TString st = "b";
+ TString ss = s;
+ UNIT_ASSERT_VALUES_EQUAL(s.size(), SubstGlobal(ss, 'a', 'b'));
+ UNIT_ASSERT_VALUES_EQUAL(st, ss);
+ }
+ {
+ const TString s = "aa";
+ const TString st = "bb";
+ TString ss = s;
+ UNIT_ASSERT_VALUES_EQUAL(s.size(), SubstGlobal(ss, 'a', 'b'));
+ UNIT_ASSERT_VALUES_EQUAL(st, ss);
+ }
+ {
+ const TString s = "aaa";
+ const TString st = "bbb";
+ TString ss = s;
+ UNIT_ASSERT_VALUES_EQUAL(s.size(), SubstGlobal(ss, 'a', 'b'));
+ UNIT_ASSERT_VALUES_EQUAL(st, ss);
+ }
+ {
+ const TString s = "aaaa";
+ const TString st = "bbbb";
+ TString ss = s;
+ UNIT_ASSERT_VALUES_EQUAL(s.size(), SubstGlobal(ss, 'a', 'b'));
+ UNIT_ASSERT_VALUES_EQUAL(st, ss);
+ }
+ {
+ const TString s = "aaaaa";
+ const TString st = "bbbbb";
+ TString ss = s;
+ UNIT_ASSERT_VALUES_EQUAL(s.size(), SubstGlobal(ss, 'a', 'b'));
+ UNIT_ASSERT_VALUES_EQUAL(st, ss);
+ }
+ {
+ const TString s = "aaaaaa";
+ const TString st = "bbbbbb";
+ TString ss = s;
+ UNIT_ASSERT_VALUES_EQUAL(s.size(), SubstGlobal(ss, 'a', 'b'));
+ UNIT_ASSERT_VALUES_EQUAL(st, ss);
+ }
+ {
+ const TString s = "aaaaaaa";
+ const TString st = "bbbbbbb";
+ TString ss = s;
+ UNIT_ASSERT_VALUES_EQUAL(s.size(), SubstGlobal(ss, 'a', 'b'));
+ UNIT_ASSERT_VALUES_EQUAL(st, ss);
+ }
+ {
+ const TString s = "aaaaaaaa";
+ const TString st = "bbbbbbbb";
+ TString ss = s;
+ UNIT_ASSERT_VALUES_EQUAL(s.size(), SubstGlobal(ss, 'a', 'b'));
+ UNIT_ASSERT_VALUES_EQUAL(st, ss);
+ }
+ }
+}
diff --git a/util/string/type.cpp b/util/string/type.cpp
new file mode 100644
index 0000000000..49671c02c2
--- /dev/null
+++ b/util/string/type.cpp
@@ -0,0 +1,86 @@
+#include "type.h"
+#include "ascii.h"
+
+#include <array>
+
+bool IsSpace(const char* s, size_t len) noexcept {
+ if (len == 0) {
+ return false;
+ }
+ for (const char* p = s; p < s + len; ++p) {
+ if (!IsAsciiSpace(*p)) {
+ return false;
+ }
+ }
+ return true;
+}
+
+template <typename TStringType>
+static bool IsNumberT(const TStringType& s) noexcept {
+ if (s.empty()) {
+ return false;
+ }
+
+ return std::all_of(s.begin(), s.end(), IsAsciiDigit<typename TStringType::value_type>);
+}
+
+bool IsNumber(const TStringBuf s) noexcept {
+ return IsNumberT(s);
+}
+
+bool IsNumber(const TWtringBuf s) noexcept {
+ return IsNumberT(s);
+}
+
+template <typename TStringType>
+static bool IsHexNumberT(const TStringType& s) noexcept {
+ if (s.empty()) {
+ return false;
+ }
+
+ return std::all_of(s.begin(), s.end(), IsAsciiHex<typename TStringType::value_type>);
+}
+
+bool IsHexNumber(const TStringBuf s) noexcept {
+ return IsHexNumberT(s);
+}
+
+bool IsHexNumber(const TWtringBuf s) noexcept {
+ return IsHexNumberT(s);
+}
+
+namespace {
+ template <size_t N>
+ bool IsCaseInsensitiveAnyOf(TStringBuf str, const std::array<TStringBuf, N>& options) {
+ for (auto option : options) {
+ if (str.size() == option.size() && ::strnicmp(str.data(), option.data(), str.size()) == 0) {
+ return true;
+ }
+ }
+ return false;
+ }
+} //anonymous namespace
+
+bool IsTrue(const TStringBuf v) noexcept {
+ static constexpr std::array<TStringBuf, 7> trueOptions{
+ "true",
+ "t",
+ "yes",
+ "y",
+ "on",
+ "1",
+ "da"};
+ return IsCaseInsensitiveAnyOf(v, trueOptions);
+}
+
+bool IsFalse(const TStringBuf v) noexcept {
+ static constexpr std::array<TStringBuf, 7> falseOptions{
+ "false",
+ "f",
+ "no",
+ "n",
+ "off",
+ "0",
+ "net"};
+ return IsCaseInsensitiveAnyOf(v, falseOptions);
+}
diff --git a/util/string/type.h b/util/string/type.h
new file mode 100644
index 0000000000..d6cb29ea58
--- /dev/null
+++ b/util/string/type.h
@@ -0,0 +1,42 @@
+#pragma once
+
+#include <util/generic/strbuf.h>
+
+Y_PURE_FUNCTION bool IsSpace(const char* s, size_t len) noexcept;
+
+/// Checks if a string is a set of only space symbols.
+Y_PURE_FUNCTION static inline bool IsSpace(const TStringBuf s) noexcept {
+ return IsSpace(s.data(), s.size());
+}
+
+/// Returns "true" if the given string is an arabic number ([0-9]+)
+Y_PURE_FUNCTION bool IsNumber(const TStringBuf s) noexcept;
+
+Y_PURE_FUNCTION bool IsNumber(const TWtringBuf s) noexcept;
+
+/// Returns "true" if the given string is a hex number ([0-9a-fA-F]+)
+Y_PURE_FUNCTION bool IsHexNumber(const TStringBuf s) noexcept;
+
+Y_PURE_FUNCTION bool IsHexNumber(const TWtringBuf s) noexcept;
+
+/* Tests if the given string is case insensitive equal to one of:
+ * - "true",
+ * - "t",
+ * - "yes",
+ * - "y",
+ * - "on",
+ * - "1",
+ * - "da".
+ */
+Y_PURE_FUNCTION bool IsTrue(const TStringBuf value) noexcept;
+
+/* Tests if the given string is case insensitive equal to one of:
+ * - "false",
+ * - "f",
+ * - "no",
+ * - "n",
+ * - "off",
+ * - "0",
+ * - "net".
+ */
+Y_PURE_FUNCTION bool IsFalse(const TStringBuf value) noexcept;
diff --git a/util/string/type_ut.cpp b/util/string/type_ut.cpp
new file mode 100644
index 0000000000..03e7af62bd
--- /dev/null
+++ b/util/string/type_ut.cpp
@@ -0,0 +1,76 @@
+#include "type.h"
+
+#include <library/cpp/testing/unittest/registar.h>
+
+#include <util/charset/wide.h>
+
+Y_UNIT_TEST_SUITE(TStringClassify) {
+ Y_UNIT_TEST(TestIsSpace) {
+ UNIT_ASSERT_EQUAL(IsSpace(" "), true);
+ UNIT_ASSERT_EQUAL(IsSpace("\t\r\n"), true);
+ UNIT_ASSERT_EQUAL(IsSpace(""), false);
+ UNIT_ASSERT_EQUAL(IsSpace(" a"), false);
+ }
+
+ Y_UNIT_TEST(TestIsTrue) {
+ UNIT_ASSERT(IsTrue("1"));
+ UNIT_ASSERT(IsTrue("yes"));
+ UNIT_ASSERT(IsTrue("YeS"));
+ UNIT_ASSERT(IsTrue("on"));
+ UNIT_ASSERT(IsTrue("true"));
+ UNIT_ASSERT(IsTrue("t"));
+ UNIT_ASSERT(IsTrue("da"));
+
+ UNIT_ASSERT(!IsTrue(""));
+ UNIT_ASSERT(!IsTrue("tr"));
+ UNIT_ASSERT(!IsTrue("foobar"));
+ }
+
+ Y_UNIT_TEST(TestIsFalse) {
+ UNIT_ASSERT(IsFalse("0"));
+ UNIT_ASSERT(IsFalse("no"));
+ UNIT_ASSERT(IsFalse("off"));
+ UNIT_ASSERT(IsFalse("false"));
+ UNIT_ASSERT(IsFalse("f"));
+ UNIT_ASSERT(IsFalse("net"));
+
+ UNIT_ASSERT(!IsFalse(""));
+ UNIT_ASSERT(!IsFalse("fa"));
+ UNIT_ASSERT(!IsFalse("foobar"));
+ }
+
+ Y_UNIT_TEST(TestIsNumber) {
+ UNIT_ASSERT(IsNumber("0"));
+ UNIT_ASSERT(IsNumber("12345678901234567890"));
+ UNIT_ASSERT(!IsNumber("1234567890a"));
+ UNIT_ASSERT(!IsNumber("12345xx67890a"));
+ UNIT_ASSERT(!IsNumber("foobar"));
+ UNIT_ASSERT(!IsNumber(""));
+
+ UNIT_ASSERT(IsNumber(u"0"));
+ UNIT_ASSERT(IsNumber(u"12345678901234567890"));
+ UNIT_ASSERT(!IsNumber(u"1234567890a"));
+ UNIT_ASSERT(!IsNumber(u"12345xx67890a"));
+ UNIT_ASSERT(!IsNumber(u"foobar"));
+ }
+
+ Y_UNIT_TEST(TestIsHexNumber) {
+ UNIT_ASSERT(IsHexNumber("0"));
+ UNIT_ASSERT(IsHexNumber("aaaadddAAAAA"));
+ UNIT_ASSERT(IsHexNumber("0123456789ABCDEFabcdef"));
+ UNIT_ASSERT(IsHexNumber("12345678901234567890"));
+ UNIT_ASSERT(IsHexNumber("1234567890a"));
+ UNIT_ASSERT(!IsHexNumber("12345xx67890a"));
+ UNIT_ASSERT(!IsHexNumber("foobar"));
+ UNIT_ASSERT(!IsHexNumber(TString()));
+
+ UNIT_ASSERT(IsHexNumber(u"0"));
+ UNIT_ASSERT(IsHexNumber(u"aaaadddAAAAA"));
+ UNIT_ASSERT(IsHexNumber(u"0123456789ABCDEFabcdef"));
+ UNIT_ASSERT(IsHexNumber(u"12345678901234567890"));
+ UNIT_ASSERT(IsHexNumber(u"1234567890a"));
+ UNIT_ASSERT(!IsHexNumber(u"12345xx67890a"));
+ UNIT_ASSERT(!IsHexNumber(u"foobar"));
+ UNIT_ASSERT(!IsHexNumber(TUtf16String()));
+ }
+}
diff --git a/util/string/ut/ya.make b/util/string/ut/ya.make
new file mode 100644
index 0000000000..6e80812825
--- /dev/null
+++ b/util/string/ut/ya.make
@@ -0,0 +1,24 @@
+UNITTEST_FOR(util)
+
+OWNER(g:util)
+SUBSCRIBER(g:util-subscribers)
+
+SRCS(
+ string/builder_ut.cpp
+ string/cast_ut.cpp
+ string/escape_ut.cpp
+ string/join_ut.cpp
+ string/hex_ut.cpp
+ string/printf_ut.cpp
+ string/split_ut.cpp
+ string/strip_ut.cpp
+ string/subst_ut.cpp
+ string/type_ut.cpp
+ string/util_ut.cpp
+ string/vector_ut.cpp
+ string/ascii_ut.cpp
+)
+
+INCLUDE(${ARCADIA_ROOT}/util/tests/ya_util_tests.inc)
+
+END()
diff --git a/util/string/util.cpp b/util/string/util.cpp
new file mode 100644
index 0000000000..b14f20bf75
--- /dev/null
+++ b/util/string/util.cpp
@@ -0,0 +1,72 @@
+#include "util.h"
+
+#include <util/generic/utility.h>
+
+#include <cstdio>
+#include <cstdarg>
+#include <cstdlib>
+
+int a2i(const TString& s) {
+ return atoi(s.c_str());
+}
+
+//============================== span =====================================
+
+void str_spn::init(const char* charset, bool extended) {
+ // chars_table_1 is necessary to avoid some unexpected
+ // multi-threading issues
+ ui8 chars_table_1[256];
+ memset(chars_table_1, 0, sizeof(chars_table_1));
+ if (extended) {
+ for (const char* cs = charset; *cs; cs++) {
+ if (cs[1] == '-' && cs[2] != 0) {
+ for (int c = (ui8)*cs; c <= (ui8)cs[2]; c++) {
+ chars_table_1[c] = 1;
+ }
+ cs += 2;
+ continue;
+ }
+ chars_table_1[(ui8)*cs] = 1;
+ }
+ } else {
+ for (; *charset; charset++) {
+ chars_table_1[(ui8)*charset] = 1;
+ }
+ }
+ memcpy(chars_table, chars_table_1, 256);
+ chars_table_1[0] = 1;
+ for (int n = 0; n < 256; n++) {
+ c_chars_table[n] = !chars_table_1[n];
+ }
+}
+
+Tr::Tr(const char* from, const char* to) {
+ for (size_t n = 0; n < 256; n++) {
+ Map[n] = (char)n;
+ }
+ for (; *from && *to; from++, to++) {
+ Map[(ui8)*from] = *to;
+ }
+}
+
+size_t Tr::FindFirstChangePosition(const TString& str) const {
+ for (auto it = str.begin(); it != str.end(); ++it) {
+ if (ConvertChar(*it) != *it) {
+ return it - str.begin();
+ }
+ }
+
+ return TString::npos;
+}
+
+void Tr::Do(TString& str) const {
+ const size_t changePosition = FindFirstChangePosition(str);
+
+ if (changePosition == TString::npos) {
+ return;
+ }
+
+ for (auto it = str.begin() + changePosition; it != str.end(); ++it) {
+ *it = ConvertChar(*it);
+ }
+}
diff --git a/util/string/util.h b/util/string/util.h
new file mode 100644
index 0000000000..0d77a5042b
--- /dev/null
+++ b/util/string/util.h
@@ -0,0 +1,195 @@
+#pragma once
+
+//THIS FILE A COMPAT STUB HEADER
+
+#include <cstring>
+#include <cstdarg>
+#include <algorithm>
+
+#include <util/system/defaults.h>
+#include <util/generic/string.h>
+#include <util/generic/strbuf.h>
+
+/// @addtogroup Strings_Miscellaneous
+/// @{
+int a2i(const TString& s);
+
+/// Removes the last character if it is equal to c.
+template <class T>
+inline void RemoveIfLast(T& s, int c) {
+ const size_t length = s.length();
+ if (length && s[length - 1] == c)
+ s.remove(length - 1);
+}
+
+/// Adds lastCh symbol to the the of the string if it is not already there.
+inline void addIfNotLast(TString& s, int lastCh) {
+ size_t len = s.length();
+ if (!len || s[len - 1] != lastCh) {
+ s.append(char(lastCh));
+ }
+}
+
+/// @details Finishes the string with lastCh1 if lastCh2 is not present in the string and lastCh1 is not already at the end of the string.
+/// Else, if lastCh2 is not equal to the symbol before the last, it finishes the string with lastCh2.
+/// @todo ?? Define, when to apply the function. Is in use several times for URLs parsing.
+inline void addIfAbsent(TString& s, char lastCh1, char lastCh2) {
+ size_t pos = s.find(lastCh2);
+ if (pos == TString::npos) {
+ //s.append((char)lastCh1);
+ addIfNotLast(s, lastCh1);
+ } else if (pos < s.length() - 1) {
+ addIfNotLast(s, lastCh2);
+ }
+}
+
+/// @}
+
+/*
+ * ------------------------------------------------------------------
+ *
+ * A fast implementation of glibc's functions;
+ * strspn, strcspn and strpbrk.
+ *
+ * ------------------------------------------------------------------
+ */
+struct ui8_256 {
+ // forward chars table
+ ui8 chars_table[256];
+ // reverse (for c* functions) chars table
+ ui8 c_chars_table[256];
+};
+
+class str_spn: public ui8_256 {
+public:
+ explicit str_spn(const char* charset, bool extended = false) {
+ // exteneded: if true, treat charset string more like
+ // interior of brackets [ ], e.g. "a-z0-9"
+ init(charset, extended);
+ }
+
+ /// Return first character in table, like strpbrk()
+ /// That is, skip all characters not in table
+ /// [DIFFERENCE FOR NOT_FOUND CASE: Returns end of string, not NULL]
+ const char* brk(const char* s) const {
+ while (c_chars_table[(ui8)*s])
+ ++s;
+ return s;
+ }
+
+ const char* brk(const char* s, const char* e) const {
+ while (s < e && c_chars_table[(ui8)*s])
+ ++s;
+ return s;
+ }
+
+ /// Return first character not in table, like strpbrk() for inverted table.
+ /// That is, skip all characters in table
+ const char* cbrk(const char* s) const {
+ while (chars_table[(ui8)*s])
+ ++s;
+ return s;
+ }
+
+ const char* cbrk(const char* s, const char* e) const {
+ while (s < e && chars_table[(ui8)*s])
+ ++s;
+ return s;
+ }
+
+ /// Offset of the first character not in table, like strspn().
+ size_t spn(const char* s) const {
+ return cbrk(s) - s;
+ }
+
+ size_t spn(const char* s, const char* e) const {
+ return cbrk(s, e) - s;
+ }
+
+ /// Offset of the first character in table, like strcspn().
+ size_t cspn(const char* s) const {
+ return brk(s) - s;
+ }
+
+ size_t cspn(const char* s, const char* e) const {
+ return brk(s, e) - s;
+ }
+
+ char* brk(char* s) const {
+ return const_cast<char*>(brk((const char*)s));
+ }
+
+ char* cbrk(char* s) const {
+ return const_cast<char*>(cbrk((const char*)s));
+ }
+
+ /// See strsep [BUT argument is *&, not **]
+ char* sep(char*& s) const {
+ char sep_char; // unused;
+ return sep(s, sep_char);
+ }
+
+ /// strsep + remember character that was destroyed
+ char* sep(char*& s, char& sep_char) const {
+ if (!s)
+ return nullptr;
+ char* ret = s;
+ char* next = brk(ret);
+ if (*next) {
+ sep_char = *next;
+ *next = 0;
+ s = next + 1;
+ } else {
+ sep_char = 0;
+ s = nullptr;
+ }
+ return ret;
+ }
+
+protected:
+ void init(const char* charset, bool extended);
+ str_spn() = default;
+};
+
+// an analogue of tr/$from/$to/
+class Tr {
+public:
+ Tr(const char* from, const char* to);
+
+ char ConvertChar(char ch) const {
+ return Map[(ui8)ch];
+ }
+
+ void Do(char* s) const {
+ for (; *s; s++)
+ *s = ConvertChar(*s);
+ }
+ void Do(const char* src, char* dst) const {
+ for (; *src; src++)
+ *dst++ = ConvertChar(*src);
+ *dst = 0;
+ }
+ void Do(char* s, size_t l) const {
+ for (size_t i = 0; i < l && s[i]; i++)
+ s[i] = ConvertChar(s[i]);
+ }
+ void Do(TString& str) const;
+
+private:
+ char Map[256];
+
+ size_t FindFirstChangePosition(const TString& str) const;
+};
+
+// Removes all occurrences of given character from string
+template <typename TStringType>
+void RemoveAll(TStringType& str, typename TStringType::char_type ch) {
+ size_t pos = str.find(ch); // 'find' to avoid cloning of string in 'TString.begin()'
+ if (pos == TStringType::npos)
+ return;
+
+ typename TStringType::iterator begin = str.begin();
+ typename TStringType::iterator end = begin + str.length();
+ typename TStringType::iterator it = std::remove(begin + pos, end, ch);
+ str.erase(it, end);
+}
diff --git a/util/string/util_ut.cpp b/util/string/util_ut.cpp
new file mode 100644
index 0000000000..18a2d8e195
--- /dev/null
+++ b/util/string/util_ut.cpp
@@ -0,0 +1,46 @@
+#include "util.h"
+
+#include <library/cpp/testing/unittest/registar.h>
+
+class TStrUtilTest: public TTestBase {
+ UNIT_TEST_SUITE(TStrUtilTest);
+ UNIT_TEST(TestSpn);
+ UNIT_TEST(TestRemoveAll);
+ UNIT_TEST_SUITE_END();
+
+public:
+ void TestSpn() {
+ str_spn rul("a-z", true);
+ char s[] = "!@#$ab%^&c+-";
+ UNIT_ASSERT_EQUAL(rul.brk(s), s + 4);
+ UNIT_ASSERT_EQUAL(rul.brk(s + 4), s + 4);
+ UNIT_ASSERT_EQUAL(rul.brk(s + 10), s + 12);
+ char* s1 = s;
+ UNIT_ASSERT_EQUAL(strcmp(rul.sep(s1), "!@#$"), 0);
+ UNIT_ASSERT_EQUAL(strcmp(rul.sep(s1), ""), 0);
+ UNIT_ASSERT_EQUAL(strcmp(rul.sep(s1), "%^&"), 0);
+ UNIT_ASSERT_EQUAL(strcmp(rul.sep(s1), "+-"), 0);
+ UNIT_ASSERT_EQUAL(rul.sep(s1), nullptr);
+ }
+
+ void TestRemoveAll() {
+ static const struct T {
+ const char* Str;
+ char Ch;
+ const char* Result;
+ } tests[] = {
+ {"", 'x', ""},
+ {"hello world", 'h', "ello world"},
+ {"hello world", 'l', "heo word"},
+ {"hello world", 'x', "hello world"},
+ };
+
+ for (const T* t = tests; t != std::end(tests); ++t) {
+ TString str(t->Str);
+ RemoveAll(str, t->Ch);
+ UNIT_ASSERT_EQUAL(t->Result, str);
+ }
+ }
+};
+
+UNIT_TEST_SUITE_REGISTRATION(TStrUtilTest);
diff --git a/util/string/vector.cpp b/util/string/vector.cpp
new file mode 100644
index 0000000000..9ba401f0a2
--- /dev/null
+++ b/util/string/vector.cpp
@@ -0,0 +1,91 @@
+#include "util.h"
+#include "split.h"
+#include "vector.h"
+
+#include <util/system/defaults.h>
+
+template <class TConsumer, class TDelim, typename TChr>
+static inline void DoSplit2(TConsumer& c, TDelim& d, const TBasicStringBuf<TChr> str, int) {
+ SplitString(str.data(), str.data() + str.size(), d, c);
+}
+
+template <class TConsumer, class TDelim, typename TChr>
+static inline void DoSplit1(TConsumer& cc, TDelim& d, const TBasicStringBuf<TChr> str, int opts) {
+ if (opts & KEEP_EMPTY_TOKENS) {
+ DoSplit2(cc, d, str, opts);
+ } else {
+ TSkipEmptyTokens<TConsumer> sc(&cc);
+
+ DoSplit2(sc, d, str, opts);
+ }
+}
+
+template <class C, class TDelim, typename TChr>
+static inline void DoSplit0(C* res, const TBasicStringBuf<TChr> str, TDelim& d, size_t maxFields, int options) {
+ using TStringType = std::conditional_t<std::is_same<TChr, wchar16>::value, TUtf16String, TString>;
+ res->clear();
+
+ if (!str.data()) {
+ return;
+ }
+
+ using TConsumer = TContainerConsumer<C>;
+ TConsumer cc(res);
+
+ if (maxFields) {
+ TLimitingConsumer<TConsumer, const TChr> lc(maxFields, &cc);
+
+ DoSplit1(lc, d, str, options);
+
+ if (lc.Last) {
+ res->push_back(TStringType(lc.Last, str.data() + str.size() - lc.Last));
+ }
+ } else {
+ DoSplit1(cc, d, str, options);
+ }
+}
+
+template <typename TChr>
+static void SplitStringImplT(TVector<std::conditional_t<std::is_same<TChr, wchar16>::value, TUtf16String, TString>>* res,
+ const TBasicStringBuf<TChr> str, const TChr* delim, size_t maxFields, int options) {
+ if (!*delim) {
+ return;
+ }
+
+ if (*(delim + 1)) {
+ TStringDelimiter<const TChr> d(delim, std::char_traits<TChr>::length(delim));
+
+ DoSplit0(res, str, d, maxFields, options);
+ } else {
+ TCharDelimiter<const TChr> d(*delim);
+
+ DoSplit0(res, str, d, maxFields, options);
+ }
+}
+
+void ::NPrivate::SplitStringImpl(TVector<TString>* res, const char* ptr, const char* delim, size_t maxFields, int options) {
+ return SplitStringImplT<char>(res, TStringBuf(ptr), delim, maxFields, options);
+}
+
+void ::NPrivate::SplitStringImpl(TVector<TString>* res, const char* ptr, size_t len, const char* delim, size_t maxFields, int options) {
+ return SplitStringImplT<char>(res, TStringBuf(ptr, len), delim, maxFields, options);
+}
+
+void ::NPrivate::SplitStringImpl(TVector<TUtf16String>* res, const wchar16* ptr, const wchar16* delimiter, size_t maxFields, int options) {
+ return SplitStringImplT<wchar16>(res, TWtringBuf(ptr), delimiter, maxFields, options);
+}
+
+void ::NPrivate::SplitStringImpl(TVector<TUtf16String>* res, const wchar16* ptr, size_t len, const wchar16* delimiter, size_t maxFields, int options) {
+ return SplitStringImplT<wchar16>(res, TWtringBuf(ptr, len), delimiter, maxFields, options);
+}
+
+TUtf16String JoinStrings(const TVector<TUtf16String>& v, const TWtringBuf delim) {
+ return JoinStrings(v.begin(), v.end(), delim);
+}
+
+TUtf16String JoinStrings(const TVector<TUtf16String>& v, size_t index, size_t count, const TWtringBuf delim) {
+ const size_t f = Min(index, v.size());
+ const size_t l = f + Min(count, v.size() - f);
+
+ return JoinStrings(v.begin() + f, v.begin() + l, delim);
+}
diff --git a/util/string/vector.h b/util/string/vector.h
new file mode 100644
index 0000000000..e36c348bbe
--- /dev/null
+++ b/util/string/vector.h
@@ -0,0 +1,132 @@
+#pragma once
+
+#include "cast.h"
+#include "split.h"
+
+#include <util/generic/map.h>
+#include <util/generic/strbuf.h>
+#include <util/generic/string.h>
+#include <util/generic/vector.h>
+#include <util/string/cast.h>
+#include <util/system/yassert.h>
+
+#define KEEP_EMPTY_TOKENS 0x01
+
+//
+// NOTE: Check StringSplitter below to get more convenient split string interface.
+
+namespace NPrivate {
+
+ void SplitStringImpl(TVector<TString>* res, const char* ptr,
+ const char* delimiter, size_t maxFields, int options);
+ void SplitStringImpl(TVector<TString>* res, const char* ptr, size_t len,
+ const char* delimiter, size_t maxFields, int options);
+
+ void SplitStringImpl(TVector<TUtf16String>* res, const wchar16* ptr,
+ const wchar16* delimiter, size_t maxFields, int options);
+ void SplitStringImpl(TVector<TUtf16String>* res, const wchar16* ptr, size_t len,
+ const wchar16* delimiter, size_t maxFields, int options);
+
+ template <typename C>
+ struct TStringDeducer;
+
+ template <>
+ struct TStringDeducer<char> {
+ using type = TString;
+ };
+
+ template <>
+ struct TStringDeducer<wchar16> {
+ using type = TUtf16String;
+ };
+}
+
+template <typename C>
+TVector<typename ::NPrivate::TStringDeducer<C>::type>
+SplitString(const C* ptr, const C* delimiter,
+ size_t maxFields = 0, int options = 0) {
+ TVector<typename ::NPrivate::TStringDeducer<C>::type> res;
+ ::NPrivate::SplitStringImpl(&res, ptr, delimiter, maxFields, options);
+ return res;
+}
+
+template <typename C>
+TVector<typename ::NPrivate::TStringDeducer<C>::type>
+SplitString(const C* ptr, size_t len, const C* delimiter,
+ size_t maxFields = 0, int options = 0) {
+ TVector<typename ::NPrivate::TStringDeducer<C>::type> res;
+ ::NPrivate::SplitStringImpl(&res, ptr, len, delimiter, maxFields, options);
+ return res;
+}
+
+template <typename C>
+TVector<typename ::NPrivate::TStringDeducer<C>::type>
+SplitString(const typename ::NPrivate::TStringDeducer<C>::type& str, const C* delimiter,
+ size_t maxFields = 0, int options = 0) {
+ return SplitString(str.data(), str.size(), delimiter, maxFields, options);
+}
+
+template <class TIter>
+inline TString JoinStrings(TIter begin, TIter end, const TStringBuf delim) {
+ if (begin == end)
+ return TString();
+
+ TString result = ToString(*begin);
+
+ for (++begin; begin != end; ++begin) {
+ result.append(delim);
+ result.append(ToString(*begin));
+ }
+
+ return result;
+}
+
+template <class TIter>
+inline TUtf16String JoinStrings(TIter begin, TIter end, const TWtringBuf delim) {
+ if (begin == end)
+ return TUtf16String();
+
+ TUtf16String result = ToWtring(*begin);
+
+ for (++begin; begin != end; ++begin) {
+ result.append(delim);
+ result.append(ToWtring(*begin));
+ }
+
+ return result;
+}
+
+/// Concatenates elements of given TVector<TString>.
+inline TString JoinStrings(const TVector<TString>& v, const TStringBuf delim) {
+ return JoinStrings(v.begin(), v.end(), delim);
+}
+
+inline TString JoinStrings(const TVector<TString>& v, size_t index, size_t count, const TStringBuf delim) {
+ Y_ASSERT(index + count <= v.size() && "JoinStrings(): index or count out of range");
+ return JoinStrings(v.begin() + index, v.begin() + index + count, delim);
+}
+
+template <typename T>
+inline TString JoinVectorIntoString(const TVector<T>& v, const TStringBuf delim) {
+ return JoinStrings(v.begin(), v.end(), delim);
+}
+
+template <typename T>
+inline TString JoinVectorIntoString(const TVector<T>& v, size_t index, size_t count, const TStringBuf delim) {
+ Y_ASSERT(index + count <= v.size() && "JoinVectorIntoString(): index or count out of range");
+ return JoinStrings(v.begin() + index, v.begin() + index + count, delim);
+}
+
+TUtf16String JoinStrings(const TVector<TUtf16String>& v, const TWtringBuf delim);
+TUtf16String JoinStrings(const TVector<TUtf16String>& v, size_t index, size_t count, const TWtringBuf delim);
+
+//! Converts vector of strings to vector of type T variables
+template <typename T, typename TStringType>
+TVector<T> Scan(const TVector<TStringType>& input) {
+ TVector<T> output;
+ output.reserve(input.size());
+ for (int i = 0; i < input.ysize(); ++i) {
+ output.push_back(FromString<T>(input[i]));
+ }
+ return output;
+}
diff --git a/util/string/vector_ut.cpp b/util/string/vector_ut.cpp
new file mode 100644
index 0000000000..817120f268
--- /dev/null
+++ b/util/string/vector_ut.cpp
@@ -0,0 +1,38 @@
+#include <library/cpp/testing/unittest/registar.h>
+#include <util/charset/wide.h>
+
+#include "cast.h"
+#include "vector.h"
+
+Y_UNIT_TEST_SUITE(TStringJoinTest) {
+ Y_UNIT_TEST(Test1) {
+ TVector<TUtf16String> v;
+
+ UNIT_ASSERT_EQUAL(JoinStrings(v, ToWtring("")), ToWtring(""));
+ }
+
+ Y_UNIT_TEST(Test2) {
+ TVector<TUtf16String> v;
+
+ v.push_back(ToWtring("1"));
+ v.push_back(ToWtring("2"));
+
+ UNIT_ASSERT_EQUAL(JoinStrings(v, ToWtring(" ")), ToWtring("1 2"));
+ }
+
+ Y_UNIT_TEST(Test3) {
+ TVector<TUtf16String> v;
+
+ v.push_back(ToWtring("1"));
+ v.push_back(ToWtring("2"));
+
+ UNIT_ASSERT_EQUAL(JoinStrings(v, 1, 10, ToWtring(" ")), ToWtring("2"));
+ }
+
+ Y_UNIT_TEST(TestJoinWStrings) {
+ const TUtf16String str = u"Яндекс";
+ const TVector<TUtf16String> v(1, str);
+
+ UNIT_ASSERT_EQUAL(JoinStrings(v, TUtf16String()), str);
+ }
+}
diff --git a/util/string/ya.make b/util/string/ya.make
new file mode 100644
index 0000000000..79c9498ddd
--- /dev/null
+++ b/util/string/ya.make
@@ -0,0 +1,6 @@
+OWNER(g:util)
+SUBSCRIBER(g:util-subscribers)
+
+RECURSE_FOR_TESTS(
+ ut
+)