diff options
author | Anton Samokhvalov <pg83@yandex.ru> | 2022-02-10 16:45:17 +0300 |
---|---|---|
committer | Daniil Cherednik <dcherednik@yandex-team.ru> | 2022-02-10 16:45:17 +0300 |
commit | d3a398281c6fd1d3672036cb2d63f842d2cb28c5 (patch) | |
tree | dd4bd3ca0f36b817e96812825ffaf10d645803f2 /util/string | |
parent | 72cb13b4aff9bc9cf22e49251bc8fd143f82538f (diff) | |
download | ydb-d3a398281c6fd1d3672036cb2d63f842d2cb28c5.tar.gz |
Restoring authorship annotation for Anton Samokhvalov <pg83@yandex.ru>. Commit 2 of 2.
Diffstat (limited to 'util/string')
56 files changed, 2244 insertions, 2244 deletions
diff --git a/util/string/ascii.cpp b/util/string/ascii.cpp index 4a90a4ae06..95edb95cc8 100644 --- a/util/string/ascii.cpp +++ b/util/string/ascii.cpp @@ -1,48 +1,48 @@ -#include "ascii.h" - +#include "ascii.h" + #include <util/system/yassert.h> #include <util/system/compat.h> -// clang-format off -extern const unsigned char NPrivate::ASCII_CLASS[256] = { - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x01, 0x01, 0x01, 0x01, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +// clang-format off +extern const unsigned char NPrivate::ASCII_CLASS[256] = { + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x01, 0x01, 0x01, 0x01, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x68, 0x68, 0x68, 0x68, 0x68, 0x68, 0x68, 0x68, 0x68, 0x68, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x72, 0x72, 0x72, 0x72, 0x72, 0x72, 0x32, 0x32, 0x32, 0x32, 0x32, 0x32, 0x32, 0x32, 0x32, 0x32, 0x32, 0x32, 0x32, 0x32, 0x32, 0x32, 0x32, 0x32, 0x32, 0x32, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x74, 0x74, 0x74, 0x74, 0x74, 0x74, 0x34, 0x34, 0x34, 0x34, 0x34, 0x34, 0x34, 0x34, 0x34, 0x34, 0x34, 0x34, 0x34, 0x34, 0x34, 0x34, 0x34, 0x34, 0x34, 0x34, 0x80, 0x80, 0x80, 0x80, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, -}; + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +}; + +extern const unsigned char NPrivate::ASCII_LOWER[256] = { + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, + 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, + 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, + 64, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, + 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 91, 92, 93, 94, 95, + 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, + 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, + 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, + 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, + 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, + 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, + 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, + 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, + 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, + 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255, +}; +// clang-format on -extern const unsigned char NPrivate::ASCII_LOWER[256] = { - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, - 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, - 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, - 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, - 64, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, - 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 91, 92, 93, 94, 95, - 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, - 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, - 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, - 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, - 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, - 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, - 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, - 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, - 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, - 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255, -}; -// clang-format on - int AsciiCompareIgnoreCase(const TStringBuf s1, const TStringBuf s2) noexcept { if (s1.size() <= s2.size()) { if (int cmp = strnicmp(s1.data(), s2.data(), s1.size())) { diff --git a/util/string/ascii.h b/util/string/ascii.h index adf81b093f..10344384d3 100644 --- a/util/string/ascii.h +++ b/util/string/ascii.h @@ -1,9 +1,9 @@ -#pragma once - -#include <util/system/defaults.h> +#pragma once + +#include <util/system/defaults.h> #include <util/system/compat.h> #include <util/generic/string.h> - + // ctype.h-like functions, locale-independent: // IsAscii{Upper,Lower,Digit,Alpha,Alnum,Space} and // AsciiTo{Upper,Lower} @@ -11,149 +11,149 @@ // standard functions from <ctype.h> are locale dependent, // and cause undefined behavior when called on chars outside [0..127] range -namespace NPrivate { - enum ECharClass { - CC_SPACE = 1, - CC_UPPER = 2, - CC_LOWER = 4, - CC_DIGIT = 8, - CC_ALPHA = 16, - CC_ALNUM = 32, +namespace NPrivate { + enum ECharClass { + CC_SPACE = 1, + CC_UPPER = 2, + CC_LOWER = 4, + CC_DIGIT = 8, + CC_ALPHA = 16, + CC_ALNUM = 32, CC_ISHEX = 64, CC_PUNCT = 128, - }; - - extern const unsigned char ASCII_CLASS[256]; - extern const unsigned char ASCII_LOWER[256]; - - template <class T> + }; + + extern const unsigned char ASCII_CLASS[256]; + extern const unsigned char ASCII_LOWER[256]; + + template <class T> struct TDereference { using type = T; - }; - + }; + #ifndef TSTRING_IS_STD_STRING - template <class String> + template <class String> struct TDereference<TBasicCharRef<String>> { using type = typename String::value_type; - }; + }; #endif - - template <class T> + + template <class T> using TDereferenced = typename TDereference<T>::type; - template <class T> + template <class T> bool RangeOk(T c) noexcept { static_assert(std::is_integral<T>::value, "Integral type character expected"); - if (sizeof(T) == 1) { + if (sizeof(T) == 1) { return true; - } + } return c >= static_cast<T>(0) && c <= static_cast<T>(127); - } + } #ifndef TSTRING_IS_STD_STRING - template <class String> + template <class String> bool RangeOk(const TBasicCharRef<String>& c) { return RangeOk(static_cast<typename String::value_type>(c)); } #endif -} - +} + constexpr bool IsAscii(const int c) noexcept { return !(c & ~0x7f); } -inline bool IsAsciiSpace(unsigned char c) { - return ::NPrivate::ASCII_CLASS[c] & ::NPrivate::CC_SPACE; -} - -inline bool IsAsciiUpper(unsigned char c) { - return ::NPrivate::ASCII_CLASS[c] & ::NPrivate::CC_UPPER; -} +inline bool IsAsciiSpace(unsigned char c) { + return ::NPrivate::ASCII_CLASS[c] & ::NPrivate::CC_SPACE; +} + +inline bool IsAsciiUpper(unsigned char c) { + return ::NPrivate::ASCII_CLASS[c] & ::NPrivate::CC_UPPER; +} -inline bool IsAsciiLower(unsigned char c) { - return ::NPrivate::ASCII_CLASS[c] & ::NPrivate::CC_LOWER; +inline bool IsAsciiLower(unsigned char c) { + return ::NPrivate::ASCII_CLASS[c] & ::NPrivate::CC_LOWER; } -inline bool IsAsciiDigit(unsigned char c) { - return ::NPrivate::ASCII_CLASS[c] & ::NPrivate::CC_DIGIT; +inline bool IsAsciiDigit(unsigned char c) { + return ::NPrivate::ASCII_CLASS[c] & ::NPrivate::CC_DIGIT; } -inline bool IsAsciiAlpha(unsigned char c) { - return ::NPrivate::ASCII_CLASS[c] & ::NPrivate::CC_ALPHA; +inline bool IsAsciiAlpha(unsigned char c) { + return ::NPrivate::ASCII_CLASS[c] & ::NPrivate::CC_ALPHA; } -inline bool IsAsciiAlnum(unsigned char c) { - return ::NPrivate::ASCII_CLASS[c] & ::NPrivate::CC_ALNUM; +inline bool IsAsciiAlnum(unsigned char c) { + return ::NPrivate::ASCII_CLASS[c] & ::NPrivate::CC_ALNUM; } -inline bool IsAsciiHex(unsigned char c) { - return ::NPrivate::ASCII_CLASS[c] & ::NPrivate::CC_ISHEX; +inline bool IsAsciiHex(unsigned char c) { + return ::NPrivate::ASCII_CLASS[c] & ::NPrivate::CC_ISHEX; } inline bool IsAsciiPunct(unsigned char c) { return ::NPrivate::ASCII_CLASS[c] & ::NPrivate::CC_PUNCT; } -// some overloads - -template <class T> -inline bool IsAsciiSpace(T c) { +// some overloads + +template <class T> +inline bool IsAsciiSpace(T c) { return ::NPrivate::RangeOk(c) && IsAsciiSpace(static_cast<unsigned char>(c)); } template <class T> -inline bool IsAsciiUpper(T c) { +inline bool IsAsciiUpper(T c) { return ::NPrivate::RangeOk(c) && IsAsciiUpper(static_cast<unsigned char>(c)); -} - -template <class T> -inline bool IsAsciiLower(T c) { +} + +template <class T> +inline bool IsAsciiLower(T c) { return ::NPrivate::RangeOk(c) && IsAsciiLower(static_cast<unsigned char>(c)); -} - -template <class T> -inline bool IsAsciiDigit(T c) { +} + +template <class T> +inline bool IsAsciiDigit(T c) { return ::NPrivate::RangeOk(c) && IsAsciiDigit(static_cast<unsigned char>(c)); -} - -template <class T> -inline bool IsAsciiAlpha(T c) { +} + +template <class T> +inline bool IsAsciiAlpha(T c) { return ::NPrivate::RangeOk(c) && IsAsciiAlpha(static_cast<unsigned char>(c)); -} - -template <class T> -inline bool IsAsciiAlnum(T c) { +} + +template <class T> +inline bool IsAsciiAlnum(T c) { return ::NPrivate::RangeOk(c) && IsAsciiAlnum(static_cast<unsigned char>(c)); -} - -template <class T> -inline bool IsAsciiHex(T c) { +} + +template <class T> +inline bool IsAsciiHex(T c) { return ::NPrivate::RangeOk(c) && IsAsciiHex(static_cast<unsigned char>(c)); -} - +} + template <class T> inline bool IsAsciiPunct(T c) { return ::NPrivate::RangeOk(c) && IsAsciiPunct(static_cast<unsigned char>(c)); } -// some extra helpers -inline ui8 AsciiToLower(ui8 c) noexcept { - return ::NPrivate::ASCII_LOWER[c]; -} - -inline char AsciiToLower(char c) noexcept { - return (char)AsciiToLower((ui8)c); -} - -template <class T> -inline ::NPrivate::TDereferenced<T> AsciiToLower(T c) noexcept { - return (c >= 0 && c <= 127) ? (::NPrivate::TDereferenced<T>)AsciiToLower((ui8)c) : c; +// some extra helpers +inline ui8 AsciiToLower(ui8 c) noexcept { + return ::NPrivate::ASCII_LOWER[c]; +} + +inline char AsciiToLower(char c) noexcept { + return (char)AsciiToLower((ui8)c); +} + +template <class T> +inline ::NPrivate::TDereferenced<T> AsciiToLower(T c) noexcept { + return (c >= 0 && c <= 127) ? (::NPrivate::TDereferenced<T>)AsciiToLower((ui8)c) : c; } template <class T> -inline ::NPrivate::TDereferenced<T> AsciiToUpper(T c) noexcept { +inline ::NPrivate::TDereferenced<T> AsciiToUpper(T c) noexcept { return IsAsciiLower(c) ? (c + ('A' - 'a')) : c; } @@ -211,7 +211,7 @@ static inline int AsciiCompareIgnoreCase(const char* s1, const char* s2) noexcep * - positive otherwise, * similar to stricmp. */ -Y_PURE_FUNCTION int AsciiCompareIgnoreCase(const TStringBuf s1, const TStringBuf s2) noexcept; +Y_PURE_FUNCTION int AsciiCompareIgnoreCase(const TStringBuf s1, const TStringBuf s2) noexcept; /** * ASCII case-sensitive string comparison (for proper UTF8 strings diff --git a/util/string/ascii_ut.cpp b/util/string/ascii_ut.cpp index 33f531694f..89069fee50 100644 --- a/util/string/ascii_ut.cpp +++ b/util/string/ascii_ut.cpp @@ -37,30 +37,30 @@ Y_UNIT_TEST_SUITE(TAsciiTest) { UNIT_ASSERT_VALUES_EQUAL((bool)ispunct(i), IsAsciiPunct((char)i)); } } - + Y_UNIT_TEST(Test1) { - for (int i = 128; i < 1000; ++i) { - UNIT_ASSERT(!IsAsciiHex(i)); - UNIT_ASSERT(!IsAsciiSpace(i)); - UNIT_ASSERT(!IsAsciiAlnum(i)); - UNIT_ASSERT(!IsAsciiAlpha(i)); - UNIT_ASSERT(!IsAsciiUpper(i)); - UNIT_ASSERT(!IsAsciiLower(i)); - UNIT_ASSERT(!IsAsciiDigit(i)); + for (int i = 128; i < 1000; ++i) { + UNIT_ASSERT(!IsAsciiHex(i)); + UNIT_ASSERT(!IsAsciiSpace(i)); + UNIT_ASSERT(!IsAsciiAlnum(i)); + UNIT_ASSERT(!IsAsciiAlpha(i)); + UNIT_ASSERT(!IsAsciiUpper(i)); + UNIT_ASSERT(!IsAsciiLower(i)); + UNIT_ASSERT(!IsAsciiDigit(i)); UNIT_ASSERT(!IsAsciiPunct(i)); - } - - for (int i = -1000; i < 0; ++i) { - UNIT_ASSERT(!IsAsciiHex(i)); - UNIT_ASSERT(!IsAsciiSpace(i)); - UNIT_ASSERT(!IsAsciiAlnum(i)); - UNIT_ASSERT(!IsAsciiAlpha(i)); - UNIT_ASSERT(!IsAsciiUpper(i)); - UNIT_ASSERT(!IsAsciiLower(i)); - UNIT_ASSERT(!IsAsciiDigit(i)); + } + + for (int i = -1000; i < 0; ++i) { + UNIT_ASSERT(!IsAsciiHex(i)); + UNIT_ASSERT(!IsAsciiSpace(i)); + UNIT_ASSERT(!IsAsciiAlnum(i)); + UNIT_ASSERT(!IsAsciiAlpha(i)); + UNIT_ASSERT(!IsAsciiUpper(i)); + UNIT_ASSERT(!IsAsciiLower(i)); + UNIT_ASSERT(!IsAsciiDigit(i)); UNIT_ASSERT(!IsAsciiPunct(i)); - } - } + } + } Y_UNIT_TEST(CompareTest) { UNIT_ASSERT(AsciiEqualsIgnoreCase("qqq", "qQq")); diff --git a/util/string/benchmark/ascii/main.cpp b/util/string/benchmark/ascii/main.cpp index 789300bde2..673047025d 100644 --- a/util/string/benchmark/ascii/main.cpp +++ b/util/string/benchmark/ascii/main.cpp @@ -1,22 +1,22 @@ #include <library/cpp/testing/benchmark/bench.h> - -#include <util/generic/xrange.h> -#include <util/string/ascii.h> -#include <util/generic/bitmap.h> -#include <util/generic/singleton.h> - -namespace { - struct TUpperMap: public TBitMap<256> { - inline TUpperMap() noexcept { - for (unsigned i = 'A'; i <= 'Z'; ++i) { - Set((ui8)i); - } - } - - inline char ToLower(char x) const noexcept { - return Get((ui8)x) ? x + ('a' - 'A') : x; - } - }; + +#include <util/generic/xrange.h> +#include <util/string/ascii.h> +#include <util/generic/bitmap.h> +#include <util/generic/singleton.h> + +namespace { + struct TUpperMap: public TBitMap<256> { + inline TUpperMap() noexcept { + for (unsigned i = 'A'; i <= 'Z'; ++i) { + Set((ui8)i); + } + } + + inline char ToLower(char x) const noexcept { + return Get((ui8)x) ? x + ('a' - 'A') : x; + } + }; struct TToLowerLookup { char Table[256]; @@ -31,66 +31,66 @@ namespace { return Table[(ui8)x]; } }; -} - -static inline char FastAsciiToLower(char c) { - return (c >= 'A' && c <= 'Z') ? (c + ('a' - 'A')) : c; -} - -static inline char FastAsciiToLower2(char c) { - return c + ('a' - 'A') * (int)(c >= 'A' && c <= 'Z'); -} - -Y_CPU_BENCHMARK(AsciiToLower, iface) { - for (const auto i : xrange(iface.Iterations())) { - Y_UNUSED(i); - - for (int j = 0; j < 256; ++j) { - Y_DO_NOT_OPTIMIZE_AWAY(AsciiToLower(j)); - } - } -} - -Y_CPU_BENCHMARK(AsciiToLowerChar, iface) { - for (const auto i : xrange(iface.Iterations())) { - Y_UNUSED(i); - - for (int j = 0; j < 256; ++j) { - Y_DO_NOT_OPTIMIZE_AWAY(AsciiToLower((char)j)); - } - } -} - -Y_CPU_BENCHMARK(FastAsciiToLower, iface) { - for (const auto i : xrange(iface.Iterations())) { - Y_UNUSED(i); - - for (int j = 0; j < 256; ++j) { - Y_DO_NOT_OPTIMIZE_AWAY(FastAsciiToLower(j)); - } - } -} - -Y_CPU_BENCHMARK(FastAsciiToLower2, iface) { - for (const auto i : xrange(iface.Iterations())) { - Y_UNUSED(i); - - for (int j = 0; j < 256; ++j) { - Y_DO_NOT_OPTIMIZE_AWAY(FastAsciiToLower2(j)); - } - } -} - -Y_CPU_BENCHMARK(BitMapAsciiToLower, iface) { - for (const auto i : xrange(iface.Iterations())) { - Y_UNUSED(i); - - for (int j = 0; j < 256; ++j) { - Y_DO_NOT_OPTIMIZE_AWAY(Singleton<TUpperMap>()->ToLower(j)); - } - } -} - +} + +static inline char FastAsciiToLower(char c) { + return (c >= 'A' && c <= 'Z') ? (c + ('a' - 'A')) : c; +} + +static inline char FastAsciiToLower2(char c) { + return c + ('a' - 'A') * (int)(c >= 'A' && c <= 'Z'); +} + +Y_CPU_BENCHMARK(AsciiToLower, iface) { + for (const auto i : xrange(iface.Iterations())) { + Y_UNUSED(i); + + for (int j = 0; j < 256; ++j) { + Y_DO_NOT_OPTIMIZE_AWAY(AsciiToLower(j)); + } + } +} + +Y_CPU_BENCHMARK(AsciiToLowerChar, iface) { + for (const auto i : xrange(iface.Iterations())) { + Y_UNUSED(i); + + for (int j = 0; j < 256; ++j) { + Y_DO_NOT_OPTIMIZE_AWAY(AsciiToLower((char)j)); + } + } +} + +Y_CPU_BENCHMARK(FastAsciiToLower, iface) { + for (const auto i : xrange(iface.Iterations())) { + Y_UNUSED(i); + + for (int j = 0; j < 256; ++j) { + Y_DO_NOT_OPTIMIZE_AWAY(FastAsciiToLower(j)); + } + } +} + +Y_CPU_BENCHMARK(FastAsciiToLower2, iface) { + for (const auto i : xrange(iface.Iterations())) { + Y_UNUSED(i); + + for (int j = 0; j < 256; ++j) { + Y_DO_NOT_OPTIMIZE_AWAY(FastAsciiToLower2(j)); + } + } +} + +Y_CPU_BENCHMARK(BitMapAsciiToLower, iface) { + for (const auto i : xrange(iface.Iterations())) { + Y_UNUSED(i); + + for (int j = 0; j < 256; ++j) { + Y_DO_NOT_OPTIMIZE_AWAY(Singleton<TUpperMap>()->ToLower(j)); + } + } +} + Y_CPU_BENCHMARK(LookupAsciiToLower, iface) { for (const auto i : xrange(iface.Iterations())) { Y_UNUSED(i); @@ -112,12 +112,12 @@ Y_CPU_BENCHMARK(LookupAsciiToLowerNoSingleton, iface) { } } -Y_CPU_BENCHMARK(tolower, iface) { - for (const auto i : xrange(iface.Iterations())) { - Y_UNUSED(i); - - for (int j = 0; j < 256; ++j) { - Y_DO_NOT_OPTIMIZE_AWAY(tolower(j)); - } - } -} +Y_CPU_BENCHMARK(tolower, iface) { + for (const auto i : xrange(iface.Iterations())) { + Y_UNUSED(i); + + for (int j = 0; j < 256; ++j) { + Y_DO_NOT_OPTIMIZE_AWAY(tolower(j)); + } + } +} diff --git a/util/string/benchmark/ascii/ya.make b/util/string/benchmark/ascii/ya.make index e973ba183a..f95b9e0fa8 100644 --- a/util/string/benchmark/ascii/ya.make +++ b/util/string/benchmark/ascii/ya.make @@ -1,9 +1,9 @@ Y_BENCHMARK() - -OWNER(pg) - -SRCS( - main.cpp -) - -END() + +OWNER(pg) + +SRCS( + main.cpp +) + +END() diff --git a/util/string/benchmark/cast/main.cpp b/util/string/benchmark/cast/main.cpp index 341c8a55d3..f604712ab6 100644 --- a/util/string/benchmark/cast/main.cpp +++ b/util/string/benchmark/cast/main.cpp @@ -1,66 +1,66 @@ #include <library/cpp/testing/benchmark/bench.h> - -#include <util/string/cast.h> -#include <util/generic/xrange.h> - + +#include <util/string/cast.h> +#include <util/generic/xrange.h> + char str1[] = "1"; char str12[] = "12"; char str1234[] = "1234"; char str12345678[] = "12345678"; -Y_CPU_BENCHMARK(Parse_1, iface) { - for (const auto i : xrange(iface.Iterations())) { - Y_UNUSED(i); +Y_CPU_BENCHMARK(Parse_1, iface) { + for (const auto i : xrange(iface.Iterations())) { + Y_UNUSED(i); Y_DO_NOT_OPTIMIZE_AWAY(FromString<ui32>(str1, 1)); - } -} - -Y_CPU_BENCHMARK(Parse_12, iface) { - for (const auto i : xrange(iface.Iterations())) { - Y_UNUSED(i); + } +} + +Y_CPU_BENCHMARK(Parse_12, iface) { + for (const auto i : xrange(iface.Iterations())) { + Y_UNUSED(i); Y_DO_NOT_OPTIMIZE_AWAY(FromString<ui32>(str12, 2)); - } -} - -Y_CPU_BENCHMARK(Parse_1234, iface) { - for (const auto i : xrange(iface.Iterations())) { - Y_UNUSED(i); + } +} + +Y_CPU_BENCHMARK(Parse_1234, iface) { + for (const auto i : xrange(iface.Iterations())) { + Y_UNUSED(i); Y_DO_NOT_OPTIMIZE_AWAY(FromString<ui32>(str1234, 4)); - } -} - -Y_CPU_BENCHMARK(Parse_12345678, iface) { - for (const auto i : xrange(iface.Iterations())) { - Y_UNUSED(i); + } +} + +Y_CPU_BENCHMARK(Parse_12345678, iface) { + for (const auto i : xrange(iface.Iterations())) { + Y_UNUSED(i); Y_DO_NOT_OPTIMIZE_AWAY(FromString<ui32>(str12345678, 8)); - } -} - -//atoi -Y_CPU_BENCHMARK(Atoi_1, iface) { - for (const auto i : xrange(iface.Iterations())) { - Y_UNUSED(i); + } +} + +//atoi +Y_CPU_BENCHMARK(Atoi_1, iface) { + for (const auto i : xrange(iface.Iterations())) { + Y_UNUSED(i); Y_DO_NOT_OPTIMIZE_AWAY(atoi(str1)); - } -} - -Y_CPU_BENCHMARK(Atoi_12, iface) { - for (const auto i : xrange(iface.Iterations())) { - Y_UNUSED(i); + } +} + +Y_CPU_BENCHMARK(Atoi_12, iface) { + for (const auto i : xrange(iface.Iterations())) { + Y_UNUSED(i); Y_DO_NOT_OPTIMIZE_AWAY(atoi(str12)); - } -} - -Y_CPU_BENCHMARK(Atoi_1234, iface) { - for (const auto i : xrange(iface.Iterations())) { - Y_UNUSED(i); + } +} + +Y_CPU_BENCHMARK(Atoi_1234, iface) { + for (const auto i : xrange(iface.Iterations())) { + Y_UNUSED(i); Y_DO_NOT_OPTIMIZE_AWAY(atoi(str1234)); - } -} - -Y_CPU_BENCHMARK(Atoi_12345678, iface) { - for (const auto i : xrange(iface.Iterations())) { - Y_UNUSED(i); + } +} + +Y_CPU_BENCHMARK(Atoi_12345678, iface) { + for (const auto i : xrange(iface.Iterations())) { + Y_UNUSED(i); Y_DO_NOT_OPTIMIZE_AWAY(atoi(str12345678)); - } -} + } +} diff --git a/util/string/benchmark/cast/ya.make b/util/string/benchmark/cast/ya.make index e973ba183a..f95b9e0fa8 100644 --- a/util/string/benchmark/cast/ya.make +++ b/util/string/benchmark/cast/ya.make @@ -1,9 +1,9 @@ Y_BENCHMARK() - -OWNER(pg) - -SRCS( - main.cpp -) - -END() + +OWNER(pg) + +SRCS( + main.cpp +) + +END() diff --git a/util/string/benchmark/float_to_string/main.cpp b/util/string/benchmark/float_to_string/main.cpp index fed0de4b8f..1c7c0684a3 100644 --- a/util/string/benchmark/float_to_string/main.cpp +++ b/util/string/benchmark/float_to_string/main.cpp @@ -37,8 +37,8 @@ namespace { TVector<TExample<T>> Examples; TExamplesHolder() - : Examples(N) - { + : Examples(N) + { TFastRng<ui64> prng{N * sizeof(T) * 42}; for (auto& x : Examples) { x.Value = prng.GenRandReal4() + prng.Uniform(Max<ui16>()); @@ -52,8 +52,8 @@ namespace { TVector<TExample<T>> Examples; TNearZeroExamplesHolder() - : Examples(N) - { + : Examples(N) + { TFastRng<ui64> prng{N * sizeof(T) * 42}; for (auto& x : Examples) { x.Value = prng.GenRandReal4(); diff --git a/util/string/benchmark/join/main.cpp b/util/string/benchmark/join/main.cpp index 4742089bf8..1a8633d3a8 100644 --- a/util/string/benchmark/join/main.cpp +++ b/util/string/benchmark/join/main.cpp @@ -34,7 +34,7 @@ namespace { s = ::ToString(Prng.GenRand()); } - template <typename T, typename... TArgs> + template <typename T, typename... TArgs> void Randomize(T& t, TArgs&... args) { Randomize(t); Randomize(args...); @@ -44,7 +44,7 @@ namespace { TFastRng<ui64> Prng; }; - template <size_t N, typename... T> + template <size_t N, typename... T> struct TExamplesHolder { using TExamples = TVector<std::tuple<T...>>; TExamples Examples; @@ -54,26 +54,26 @@ namespace { { TRandomizer r{N * sizeof(typename TExamples::value_type) * 42}; for (auto& x : Examples) { - Apply([&r](T&... t) { r.Randomize(t...); }, x); + Apply([&r](T&... t) { r.Randomize(t...); }, x); } } }; - template <typename... TArgs> + template <typename... TArgs> TString JoinTuple(std::tuple<TArgs...> t) { return Apply([](TArgs... x) -> TString { return Join("-", x...); }, t); } } -#define DEFINE_BENCHMARK(count, types, ...) \ - Y_CPU_BENCHMARK(Join_##count##_##types, iface) { \ - const auto& examples = Default<TExamplesHolder<count, __VA_ARGS__>>().Examples; \ - for (const auto i : xrange(iface.Iterations())) { \ - Y_UNUSED(i); \ - for (const auto e : examples) { \ - Y_DO_NOT_OPTIMIZE_AWAY(JoinTuple(e)); \ - } \ - } \ +#define DEFINE_BENCHMARK(count, types, ...) \ + Y_CPU_BENCHMARK(Join_##count##_##types, iface) { \ + const auto& examples = Default<TExamplesHolder<count, __VA_ARGS__>>().Examples; \ + for (const auto i : xrange(iface.Iterations())) { \ + Y_UNUSED(i); \ + for (const auto e : examples) { \ + Y_DO_NOT_OPTIMIZE_AWAY(JoinTuple(e)); \ + } \ + } \ } DEFINE_BENCHMARK(100, SS, TString, TString); diff --git a/util/string/benchmark/subst_global/main.cpp b/util/string/benchmark/subst_global/main.cpp index 000f5b47e0..e0decfa042 100644 --- a/util/string/benchmark/subst_global/main.cpp +++ b/util/string/benchmark/subst_global/main.cpp @@ -154,7 +154,7 @@ namespace { for (const auto dummy : xrange(i.Iterations())) { \ Y_UNUSED(dummy); \ auto s = str; \ - NBench::Escape(s.data()); \ + NBench::Escape(s.data()); \ Y_DO_NOT_OPTIMIZE_AWAY(SubstGlobal(s, ToUnderlying(D::WHAT), ToUnderlying(D::WITH))); \ NBench::Clobber(); \ } \ @@ -167,7 +167,7 @@ namespace { for (const auto dummy : xrange(i.Iterations())) { \ Y_UNUSED(dummy); \ auto s = str; \ - NBench::Escape(s.data()); \ + NBench::Escape(s.data()); \ Y_DO_NOT_OPTIMIZE_AWAY(SubstGlobal(s, ToUnderlying(D::WHAT), ToUnderlying(D::WITH))); \ NBench::Clobber(); \ } \ diff --git a/util/string/benchmark/ya.make b/util/string/benchmark/ya.make index 4410edabaf..266b53c7b3 100644 --- a/util/string/benchmark/ya.make +++ b/util/string/benchmark/ya.make @@ -5,8 +5,8 @@ OWNER( SUBSCRIBER(g:util-subscribers) RECURSE( - ascii - cast + ascii + cast float_to_string float_to_string/metrics join diff --git a/util/string/builder.h b/util/string/builder.h index 0b95e72a5d..7b54821151 100644 --- a/util/string/builder.h +++ b/util/string/builder.h @@ -6,11 +6,11 @@ namespace NPrivateStringBuilder { class TStringBuilder: public TString { - public: - inline TStringBuilder() - : Out(*this) - { - } + public: + inline TStringBuilder() + : Out(*this) + { + } TStringBuilder(TStringBuilder&& rhs) : TString(std::move(rhs)) @@ -18,22 +18,22 @@ namespace NPrivateStringBuilder { { } - TStringOutput Out; - }; + TStringOutput Out; + }; - template <class T> - static inline TStringBuilder& operator<<(TStringBuilder& builder, const T& t) { - builder.Out << t; + template <class T> + static inline TStringBuilder& operator<<(TStringBuilder& builder, const T& t) { + builder.Out << t; - return builder; - } + return builder; + } - template <class T> - static inline TStringBuilder&& operator<<(TStringBuilder&& builder, const T& t) { - builder.Out << t; + template <class T> + static inline TStringBuilder&& operator<<(TStringBuilder&& builder, const T& t) { + builder.Out << t; return std::move(builder); - } + } } using TStringBuilder = NPrivateStringBuilder::TStringBuilder; diff --git a/util/string/cast.cpp b/util/string/cast.cpp index 3b1e050e53..aa1e65a8e9 100644 --- a/util/string/cast.cpp +++ b/util/string/cast.cpp @@ -1,41 +1,41 @@ #include <util/system/defaults.h> - -#if defined(_freebsd_) && !defined(__LONG_LONG_SUPPORTED) - #define __LONG_LONG_SUPPORTED -#endif - -#include <cstdio> -#include <string> + +#if defined(_freebsd_) && !defined(__LONG_LONG_SUPPORTED) + #define __LONG_LONG_SUPPORTED +#endif + +#include <cstdio> +#include <string> #include <cmath> - + #include <util/string/type.h> #include <util/string/cast.h> #include <util/string/escape.h> - -#include <contrib/libs/double-conversion/double-conversion.h> - + +#include <contrib/libs/double-conversion/double-conversion.h> + #include <util/generic/string.h> #include <util/system/yassert.h> #include <util/generic/yexception.h> -#include <util/generic/typetraits.h> +#include <util/generic/typetraits.h> #include <util/generic/ylimits.h> -#include <util/generic/singleton.h> -#include <util/generic/utility.h> - -using double_conversion::DoubleToStringConverter; -using double_conversion::StringBuilder; -using double_conversion::StringToDoubleConverter; - -/* - * ------------------------------ formatters ------------------------------ - */ +#include <util/generic/singleton.h> +#include <util/generic/utility.h> + +using double_conversion::DoubleToStringConverter; +using double_conversion::StringBuilder; +using double_conversion::StringToDoubleConverter; + +/* + * ------------------------------ formatters ------------------------------ + */ namespace { constexpr char IntToChar[] = {'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F'}; static_assert(Y_ARRAY_SIZE(IntToChar) == 16, "expect Y_ARRAY_SIZE(IntToChar) == 16"); - // clang-format off + // clang-format off constexpr int LetterToIntMap[] = { 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, @@ -49,23 +49,23 @@ namespace { 20, 20, 20, 20, 20, 20, 20, 10, 11, 12, 13, 14, 15, }; - // clang-format on + // clang-format on template <class T> std::enable_if_t<std::is_signed<T>::value, std::make_unsigned_t<T>> NegateNegativeSigned(T value) noexcept { - return std::make_unsigned_t<T>(-(value + 1)) + std::make_unsigned_t<T>(1); + return std::make_unsigned_t<T>(-(value + 1)) + std::make_unsigned_t<T>(1); } template <class T> std::enable_if_t<std::is_unsigned<T>::value, std::make_unsigned_t<T>> NegateNegativeSigned(T) noexcept { - Y_UNREACHABLE(); + Y_UNREACHABLE(); } - template <class T> + template <class T> std::make_signed_t<T> NegatePositiveSigned(T value) noexcept { - return value > 0 ? (-std::make_signed_t<T>(value - 1) - 1) : 0; - } - + return value > 0 ? (-std::make_signed_t<T>(value - 1) - 1) : 0; + } + template <class T, unsigned base, class TChar> struct TBasicIntFormatter { static_assert(1 < base && base < 17, "expect 1 < base && base < 17"); @@ -73,7 +73,7 @@ namespace { static inline size_t Format(T value, TChar* buf, size_t len) { Y_ENSURE(len, TStringBuf("zero length")); - + TChar* tmp = buf; do { @@ -99,46 +99,46 @@ namespace { } return result; - } + } }; - + template <class T, unsigned base, class TChar> struct TIntFormatter { static_assert(1 < base && base < 17, "expect 1 < base && base < 17"); static_assert(std::is_integral<T>::value, "T must be an integral type."); - + static inline size_t Format(T value, TChar* buf, size_t len) { - using TUFmt = TBasicIntFormatter<std::make_unsigned_t<T>, base, TChar>; + using TUFmt = TBasicIntFormatter<std::make_unsigned_t<T>, base, TChar>; - if (std::is_signed<T>::value && value < 0) { + if (std::is_signed<T>::value && value < 0) { Y_ENSURE(len >= 2, TStringBuf("not enough room in buffer")); *buf = '-'; - - return 1 + TUFmt::Format(NegateNegativeSigned(value), buf + 1, len - 1); + + return 1 + TUFmt::Format(NegateNegativeSigned(value), buf + 1, len - 1); } - return TUFmt::Format(value, buf, len); - } + return TUFmt::Format(value, buf, len); + } }; - + template <class T> struct TFltModifiers; - + template <class T, int base, class TChar> Y_NO_INLINE size_t FormatInt(T value, TChar* buf, size_t len) { return TIntFormatter<T, base, TChar>::Format(value, buf, len); } - + template <class T> inline size_t FormatFlt(T t, char* buf, size_t len) { const int ret = snprintf(buf, len, TFltModifiers<T>::ModifierWrite, t); - + Y_ENSURE(ret >= 0 && (size_t)ret <= len, TStringBuf("cannot format float")); - + return (size_t)ret; - } - + } + enum EParseStatus { PS_OK = 0, PS_EMPTY_STRING, @@ -147,97 +147,97 @@ namespace { PS_BAD_SYMBOL, PS_OVERFLOW, }; - + constexpr ui8 SAFE_LENS[4][17] = { - {0, 0, 7, 5, 3, 3, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1}, - {0, 0, 15, 10, 7, 6, 6, 5, 5, 5, 4, 4, 4, 4, 4, 4, 3}, - {0, 0, 31, 20, 15, 13, 12, 11, 10, 10, 9, 9, 8, 8, 8, 8, 7}, - {0, 0, 63, 40, 31, 27, 24, 22, 21, 20, 19, 18, 17, 17, 16, 16, 15}, - }; - - inline constexpr ui8 ConstLog2(ui8 x) noexcept { - return x == 1 ? 0 : 1 + ConstLog2(x / 2); - } - - template <unsigned BASE, class TChar, class T> + {0, 0, 7, 5, 3, 3, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1}, + {0, 0, 15, 10, 7, 6, 6, 5, 5, 5, 4, 4, 4, 4, 4, 4, 3}, + {0, 0, 31, 20, 15, 13, 12, 11, 10, 10, 9, 9, 8, 8, 8, 8, 7}, + {0, 0, 63, 40, 31, 27, 24, 22, 21, 20, 19, 18, 17, 17, 16, 16, 15}, + }; + + inline constexpr ui8 ConstLog2(ui8 x) noexcept { + return x == 1 ? 0 : 1 + ConstLog2(x / 2); + } + + template <unsigned BASE, class TChar, class T> inline std::enable_if_t<(BASE > 10), bool> CharToDigit(TChar c, T* digit) noexcept { - unsigned uc = c; - - if (uc >= Y_ARRAY_SIZE(LetterToIntMap)) { - return false; - } - - *digit = LetterToIntMap[uc]; - - return *digit < BASE; - } - - template <unsigned BASE, class TChar, class T> + unsigned uc = c; + + if (uc >= Y_ARRAY_SIZE(LetterToIntMap)) { + return false; + } + + *digit = LetterToIntMap[uc]; + + return *digit < BASE; + } + + template <unsigned BASE, class TChar, class T> inline std::enable_if_t<(BASE <= 10), bool> CharToDigit(TChar c, T* digit) noexcept { - return (c >= '0') && ((*digit = (c - '0')) < BASE); - } - + return (c >= '0') && ((*digit = (c - '0')) < BASE); + } + template <class T, unsigned base, class TChar> struct TBasicIntParser { static_assert(1 < base && base < 17, "Expect 1 < base && base < 17."); static_assert(std::is_unsigned<T>::value, "TBasicIntParser can only handle unsigned integers."); - enum : unsigned { - BASE_POW_2 = base * base, - }; - - static inline EParseStatus Parse(const TChar** ppos, const TChar* end, T max, T* target) noexcept { + enum : unsigned { + BASE_POW_2 = base * base, + }; + + static inline EParseStatus Parse(const TChar** ppos, const TChar* end, T max, T* target) noexcept { Y_ASSERT(*ppos != end); /* This check should be somewhere up the stack. */ - const size_t maxSafeLen = SAFE_LENS[ConstLog2(sizeof(T))][base]; - - // can parse without overflow - if (size_t(end - *ppos) <= maxSafeLen) { - T result; - - if (ParseFast(*ppos, end, &result) && result <= max) { - *target = result; - - return PS_OK; - } - } - - return ParseSlow(ppos, end, max, target); - } - - static inline bool ParseFast(const TChar* pos, const TChar* end, T* target) noexcept { + const size_t maxSafeLen = SAFE_LENS[ConstLog2(sizeof(T))][base]; + + // can parse without overflow + if (size_t(end - *ppos) <= maxSafeLen) { + T result; + + if (ParseFast(*ppos, end, &result) && result <= max) { + *target = result; + + return PS_OK; + } + } + + return ParseSlow(ppos, end, max, target); + } + + static inline bool ParseFast(const TChar* pos, const TChar* end, T* target) noexcept { + T result = T(); + T d1; + T d2; + + // we have end > pos + auto beforeEnd = end - 1; + + while (pos < beforeEnd && CharToDigit<base>(*pos, &d1) && CharToDigit<base>(*(pos + 1), &d2)) { + result = result * BASE_POW_2 + d1 * base + d2; + pos += 2; + } + + while (pos != end && CharToDigit<base>(*pos, &d1)) { + result = result * base + d1; + ++pos; + } + + *target = result; + + return pos == end; + } + + static inline EParseStatus ParseSlow(const TChar** ppos, const TChar* end, T max, T* target) noexcept { T result = T(); - T d1; - T d2; - - // we have end > pos - auto beforeEnd = end - 1; - - while (pos < beforeEnd && CharToDigit<base>(*pos, &d1) && CharToDigit<base>(*(pos + 1), &d2)) { - result = result * BASE_POW_2 + d1 * base + d2; - pos += 2; - } - - while (pos != end && CharToDigit<base>(*pos, &d1)) { - result = result * base + d1; - ++pos; - } - - *target = result; - - return pos == end; - } - - static inline EParseStatus ParseSlow(const TChar** ppos, const TChar* end, T max, T* target) noexcept { - T result = T(); T preMulMax = max / base; const TChar* pos = *ppos; while (pos != end) { T digit; - - if (!CharToDigit<base>(*pos, &digit)) { + + if (!CharToDigit<base>(*pos, &digit)) { *ppos = pos; - + return PS_BAD_SYMBOL; } @@ -256,7 +256,7 @@ namespace { } *target = result; - + return PS_OK; } }; @@ -282,8 +282,8 @@ namespace { const TChar* pos = *ppos; if (pos == end) { return PS_EMPTY_STRING; - } - + } + bool negative = false; TUnsigned max; if (*pos == '+') { @@ -313,14 +313,14 @@ namespace { } if (IsSigned) { - *target = negative ? NegatePositiveSigned(result) : static_cast<T>(result); + *target = negative ? NegatePositiveSigned(result) : static_cast<T>(result); } else { *target = result; } return PS_OK; - } + } }; - + template <class TChar> [[noreturn]] static Y_NO_INLINE void ThrowParseError(EParseStatus status, const TChar* data, size_t len, const TChar* pos) { Y_ASSERT(status != PS_OK); @@ -341,21 +341,21 @@ namespace { default: ythrow yexception() << TStringBuf("Unknown error code in string converter. "); } - } - + } + template <typename T, typename TUnsigned, int base, typename TChar> Y_NO_INLINE T ParseInt(const TChar* data, size_t len, const TBounds<TUnsigned>& bounds) { T result; const TChar* pos = data; EParseStatus status = TIntParser<T, base, TChar>::Parse(&pos, pos + len, bounds, &result); - + if (status == PS_OK) { return result; } else { ThrowParseError(status, data, len, pos); } } - + template <typename T, typename TUnsigned, int base, typename TChar> Y_NO_INLINE bool TryParseInt(const TChar* data, size_t len, const TBounds<TUnsigned>& bounds, T* result) { return TIntParser<T, base, TChar>::Parse(&data, data + len, bounds, result) == PS_OK; @@ -387,20 +387,20 @@ namespace { ythrow TFromStringException() << TStringBuf("cannot parse float(") << TStringBuf(data, len) << TStringBuf(")"); } -#define DEF_FLT_MOD(type, modifierWrite, modifierRead) \ - template <> \ - struct TFltModifiers<type> { \ - static const char* const ModifierWrite; \ - static const char* const ModifierReadAndChar; \ - }; \ - \ - const char* const TFltModifiers<type>::ModifierWrite = modifierWrite; \ - const char* const TFltModifiers<type>::ModifierReadAndChar = modifierRead "%c"; - +#define DEF_FLT_MOD(type, modifierWrite, modifierRead) \ + template <> \ + struct TFltModifiers<type> { \ + static const char* const ModifierWrite; \ + static const char* const ModifierReadAndChar; \ + }; \ + \ + const char* const TFltModifiers<type>::ModifierWrite = modifierWrite; \ + const char* const TFltModifiers<type>::ModifierReadAndChar = modifierRead "%c"; + DEF_FLT_MOD(long double, "%.10Lg", "%Lg") - -#undef DEF_FLT_MOD - + +#undef DEF_FLT_MOD + /* The following constants are initialized in terms of <climits> constants to make * sure they go into binary as actual values and there is no associated * initialization code. @@ -415,34 +415,34 @@ namespace { constexpr TBounds<ui64> lUBounds = {static_cast<ui64>(ULONG_MAX), 0}; constexpr TBounds<ui64> llSBounds = {static_cast<ui64>(LLONG_MAX), static_cast<ui64>(ULLONG_MAX - LLONG_MAX)}; constexpr TBounds<ui64> llUBounds = {static_cast<ui64>(ULLONG_MAX), 0}; -} - -#define DEF_INT_SPEC_II(TYPE, ITYPE, BASE) \ - template <> \ - size_t IntToString<BASE, TYPE>(TYPE value, char* buf, size_t len) { \ - return FormatInt<ITYPE, BASE, char>(value, buf, len); \ - } - -#define DEF_INT_SPEC_I(TYPE, ITYPE) \ - template <> \ - size_t ToStringImpl<TYPE>(TYPE value, char* buf, size_t len) { \ - return FormatInt<ITYPE, 10, char>(value, buf, len); \ - } \ - DEF_INT_SPEC_II(TYPE, ITYPE, 2) \ - DEF_INT_SPEC_II(TYPE, ITYPE, 8) \ - DEF_INT_SPEC_II(TYPE, ITYPE, 10) \ +} + +#define DEF_INT_SPEC_II(TYPE, ITYPE, BASE) \ + template <> \ + size_t IntToString<BASE, TYPE>(TYPE value, char* buf, size_t len) { \ + return FormatInt<ITYPE, BASE, char>(value, buf, len); \ + } + +#define DEF_INT_SPEC_I(TYPE, ITYPE) \ + template <> \ + size_t ToStringImpl<TYPE>(TYPE value, char* buf, size_t len) { \ + return FormatInt<ITYPE, 10, char>(value, buf, len); \ + } \ + DEF_INT_SPEC_II(TYPE, ITYPE, 2) \ + DEF_INT_SPEC_II(TYPE, ITYPE, 8) \ + DEF_INT_SPEC_II(TYPE, ITYPE, 10) \ DEF_INT_SPEC_II(TYPE, ITYPE, 16) - -#define DEF_INT_SPEC(TYPE) \ - DEF_INT_SPEC_I(signed TYPE, i64) \ + +#define DEF_INT_SPEC(TYPE) \ + DEF_INT_SPEC_I(signed TYPE, i64) \ DEF_INT_SPEC_I(unsigned TYPE, ui64) - -DEF_INT_SPEC(char) -DEF_INT_SPEC(short) -DEF_INT_SPEC(int) -DEF_INT_SPEC(long) -DEF_INT_SPEC(long long) - + +DEF_INT_SPEC(char) +DEF_INT_SPEC(short) +DEF_INT_SPEC(int) +DEF_INT_SPEC(long) +DEF_INT_SPEC(long long) + #ifdef __cpp_char8_t template <> size_t ToStringImpl<char8_t>(char8_t value, char* buf, size_t len) { @@ -458,20 +458,20 @@ DEF_INT_SPEC_I(wchar_t, TWCharIType) DEF_INT_SPEC_I(wchar16, ui64) // wchar16 is always unsigned DEF_INT_SPEC_I(wchar32, ui64) // wchar32 is always unsigned -#undef DEF_INT_SPEC +#undef DEF_INT_SPEC #undef DEF_INT_SPEC_I #undef DEF_INT_SPEC_II - -#define DEF_FLT_SPEC(type) \ - template <> \ - size_t ToStringImpl<type>(type t, char* buf, size_t len) { \ - return FormatFlt<type>(t, buf, len); \ - } - + +#define DEF_FLT_SPEC(type) \ + template <> \ + size_t ToStringImpl<type>(type t, char* buf, size_t len) { \ + return FormatFlt<type>(t, buf, len); \ + } + DEF_FLT_SPEC(long double) -#undef DEF_FLT_SPEC - +#undef DEF_FLT_SPEC + template <> size_t ToStringImpl<bool>(bool t, char* buf, size_t len) { Y_ENSURE(len, TStringBuf("zero length")); @@ -479,9 +479,9 @@ size_t ToStringImpl<bool>(bool t, char* buf, size_t len) { return 1; } -/* - * ------------------------------ parsers ------------------------------ - */ +/* + * ------------------------------ parsers ------------------------------ + */ template <> bool TryFromStringImpl<bool>(const char* data, size_t len, bool& result) { @@ -508,11 +508,11 @@ bool TryFromStringImpl<bool>(const char* data, size_t len, bool& result) { template <> bool FromStringImpl<bool>(const char* data, size_t len) { bool result; - - if (!TryFromStringImpl<bool>(data, len, result)) { + + if (!TryFromStringImpl<bool>(data, len, result)) { ythrow TFromStringException() << TStringBuf("Cannot parse bool(") << TStringBuf(data, len) << TStringBuf("). "); - } - + } + return result; } @@ -527,8 +527,8 @@ TStringBuf FromStringImpl<TStringBuf>(const char* data, size_t len) { } template <> -std::string FromStringImpl<std::string>(const char* data, size_t len) { - return std::string(data, len); +std::string FromStringImpl<std::string>(const char* data, size_t len) { + return std::string(data, len); } template <> @@ -555,7 +555,7 @@ bool TryFromStringImpl<TString>(const char* data, size_t len, TString& result) { } template <> -bool TryFromStringImpl<std::string>(const char* data, size_t len, std::string& result) { +bool TryFromStringImpl<std::string>(const char* data, size_t len, std::string& result) { result.assign(data, len); return true; } @@ -572,46 +572,46 @@ bool TryFromStringImpl<TUtf16String>(const wchar16* data, size_t len, TUtf16Stri return true; } -#define DEF_INT_SPEC_III(CHAR, TYPE, ITYPE, BOUNDS, BASE) \ - template <> \ - TYPE IntFromString<TYPE, BASE>(const CHAR* data, size_t len) { \ - return ParseInt<ITYPE, ui64, BASE>(data, len, BOUNDS); \ - } \ - template <> \ - bool TryIntFromString<BASE>(const CHAR* data, size_t len, TYPE& result) { \ - ITYPE tmp; \ - bool status = TryParseInt<ITYPE, ui64, BASE>(data, len, BOUNDS, &tmp); \ - if (status) { \ - result = tmp; \ - } \ - return status; \ - } - -#define DEF_INT_SPEC_II(CHAR, TYPE, ITYPE, BOUNDS) \ - template <> \ - TYPE FromStringImpl<TYPE>(const CHAR* data, size_t len) { \ - return ParseInt<ITYPE, ui64, 10>(data, len, BOUNDS); \ - } \ - template <> \ - bool TryFromStringImpl<TYPE>(const CHAR* data, size_t len, TYPE& result) { \ - ITYPE tmp; \ - bool status = TryParseInt<ITYPE, ui64, 10>(data, len, BOUNDS, &tmp); \ - if (status) { \ - result = tmp; \ - } \ - return status; \ - } \ - DEF_INT_SPEC_III(CHAR, TYPE, ITYPE, BOUNDS, 2) \ - DEF_INT_SPEC_III(CHAR, TYPE, ITYPE, BOUNDS, 8) \ - DEF_INT_SPEC_III(CHAR, TYPE, ITYPE, BOUNDS, 10) \ +#define DEF_INT_SPEC_III(CHAR, TYPE, ITYPE, BOUNDS, BASE) \ + template <> \ + TYPE IntFromString<TYPE, BASE>(const CHAR* data, size_t len) { \ + return ParseInt<ITYPE, ui64, BASE>(data, len, BOUNDS); \ + } \ + template <> \ + bool TryIntFromString<BASE>(const CHAR* data, size_t len, TYPE& result) { \ + ITYPE tmp; \ + bool status = TryParseInt<ITYPE, ui64, BASE>(data, len, BOUNDS, &tmp); \ + if (status) { \ + result = tmp; \ + } \ + return status; \ + } + +#define DEF_INT_SPEC_II(CHAR, TYPE, ITYPE, BOUNDS) \ + template <> \ + TYPE FromStringImpl<TYPE>(const CHAR* data, size_t len) { \ + return ParseInt<ITYPE, ui64, 10>(data, len, BOUNDS); \ + } \ + template <> \ + bool TryFromStringImpl<TYPE>(const CHAR* data, size_t len, TYPE& result) { \ + ITYPE tmp; \ + bool status = TryParseInt<ITYPE, ui64, 10>(data, len, BOUNDS, &tmp); \ + if (status) { \ + result = tmp; \ + } \ + return status; \ + } \ + DEF_INT_SPEC_III(CHAR, TYPE, ITYPE, BOUNDS, 2) \ + DEF_INT_SPEC_III(CHAR, TYPE, ITYPE, BOUNDS, 8) \ + DEF_INT_SPEC_III(CHAR, TYPE, ITYPE, BOUNDS, 10) \ DEF_INT_SPEC_III(CHAR, TYPE, ITYPE, BOUNDS, 16) - -#define DEF_INT_SPEC_I(TYPE, ITYPE, BOUNDS) \ - DEF_INT_SPEC_II(char, TYPE, ITYPE, BOUNDS) \ - DEF_INT_SPEC_II(wchar16, TYPE, ITYPE, BOUNDS) -#define DEF_INT_SPEC(TYPE, ID) \ - DEF_INT_SPEC_I(signed TYPE, i64, ID##SBounds) \ +#define DEF_INT_SPEC_I(TYPE, ITYPE, BOUNDS) \ + DEF_INT_SPEC_II(char, TYPE, ITYPE, BOUNDS) \ + DEF_INT_SPEC_II(wchar16, TYPE, ITYPE, BOUNDS) + +#define DEF_INT_SPEC(TYPE, ID) \ + DEF_INT_SPEC_I(signed TYPE, i64, ID##SBounds) \ DEF_INT_SPEC_I(unsigned TYPE, ui64, ID##UBounds) #define DEF_INT_SPEC_FIXED_WIDTH(TYPE, ID) \ @@ -625,21 +625,21 @@ DEF_INT_SPEC(long, l) DEF_INT_SPEC(long long, ll) #undef DEF_INT_SPEC_FIXED_WIDTH -#undef DEF_INT_SPEC +#undef DEF_INT_SPEC #undef DEF_INT_SPEC_I #undef DEF_INT_SPEC_II #undef DEF_INT_SPEC_III - -#define DEF_FLT_SPEC(type) \ - template <> \ - type FromStringImpl<type>(const char* data, size_t len) { \ - return ParseFlt<type>(data, len); \ - } - -DEF_FLT_SPEC(long double) - -#undef DEF_FLT_SPEC - + +#define DEF_FLT_SPEC(type) \ + template <> \ + type FromStringImpl<type>(const char* data, size_t len) { \ + return ParseFlt<type>(data, len); \ + } + +DEF_FLT_SPEC(long double) + +#undef DEF_FLT_SPEC + // Using StrToD for float and double because it is faster than sscanf. // Exception-free, specialized for float types template <> @@ -693,152 +693,152 @@ float FromStringImpl<float>(const char* data, size_t len) { return static_cast<float>(FromStringImpl<double>(data, len)); } -double StrToD(const char* b, const char* e, char** se) { - struct TCvt: public StringToDoubleConverter { - inline TCvt() - : StringToDoubleConverter(ALLOW_TRAILING_JUNK | ALLOW_HEX | ALLOW_LEADING_SPACES, 0.0, NAN, nullptr, nullptr) - { - } - }; - - int out = 0; - - const auto res = SingletonWithPriority<TCvt, 0>()->StringToDouble(b, e - b, &out); - - if (se) { - *se = (char*)(b + out); - } - - return res; -} - -double StrToD(const char* b, char** se) { - return StrToD(b, b + strlen(b), se); -} - -namespace { - static inline DoubleToStringConverter& ToStringConverterNoPad() noexcept { - struct TCvt: public DoubleToStringConverter { - inline TCvt() noexcept - : DoubleToStringConverter(EMIT_POSITIVE_EXPONENT_SIGN, "inf", "nan", 'e', -10, 21, 4, 0) - { - } - }; - - return *SingletonWithPriority<TCvt, 0>(); - } - - struct TBuilder { - alignas(StringBuilder) char Store[sizeof(StringBuilder)]; - StringBuilder* SB; - - inline TBuilder(char* buf, size_t len) noexcept - : SB(new (Store) StringBuilder(buf, len)) - { - } - }; - - static inline size_t FixZeros(char* buf, size_t len) noexcept { - auto end = buf + len; - auto point = (char*)memchr(buf, '.', len); - - if (!point) { - return len; - } - - auto exp = (char*)memchr(point, 'e', end - point); - - if (!exp) { - exp = end; - } - - auto c = exp; - - c -= 1; - - while (point < c && *c == '0') { - --c; - } - - if (*c == '.') { - --c; - } - - memmove(c + 1, exp, end - exp); - - return c - buf + 1 + end - exp; - } - - static inline size_t FixEnd(char* buf, size_t len) noexcept { - if (len > 2) { - auto sign = buf[len - 2]; - - if (sign == '-' || sign == '+') { - buf[len] = buf[len - 1]; - buf[len - 1] = '0'; - ++len; - } - } - - buf[len] = 0; - - return len; - } - - static inline size_t DoDtoa(double d, char* buf, size_t len, int prec) noexcept { - TBuilder sb(buf, len); - +double StrToD(const char* b, const char* e, char** se) { + struct TCvt: public StringToDoubleConverter { + inline TCvt() + : StringToDoubleConverter(ALLOW_TRAILING_JUNK | ALLOW_HEX | ALLOW_LEADING_SPACES, 0.0, NAN, nullptr, nullptr) + { + } + }; + + int out = 0; + + const auto res = SingletonWithPriority<TCvt, 0>()->StringToDouble(b, e - b, &out); + + if (se) { + *se = (char*)(b + out); + } + + return res; +} + +double StrToD(const char* b, char** se) { + return StrToD(b, b + strlen(b), se); +} + +namespace { + static inline DoubleToStringConverter& ToStringConverterNoPad() noexcept { + struct TCvt: public DoubleToStringConverter { + inline TCvt() noexcept + : DoubleToStringConverter(EMIT_POSITIVE_EXPONENT_SIGN, "inf", "nan", 'e', -10, 21, 4, 0) + { + } + }; + + return *SingletonWithPriority<TCvt, 0>(); + } + + struct TBuilder { + alignas(StringBuilder) char Store[sizeof(StringBuilder)]; + StringBuilder* SB; + + inline TBuilder(char* buf, size_t len) noexcept + : SB(new (Store) StringBuilder(buf, len)) + { + } + }; + + static inline size_t FixZeros(char* buf, size_t len) noexcept { + auto end = buf + len; + auto point = (char*)memchr(buf, '.', len); + + if (!point) { + return len; + } + + auto exp = (char*)memchr(point, 'e', end - point); + + if (!exp) { + exp = end; + } + + auto c = exp; + + c -= 1; + + while (point < c && *c == '0') { + --c; + } + + if (*c == '.') { + --c; + } + + memmove(c + 1, exp, end - exp); + + return c - buf + 1 + end - exp; + } + + static inline size_t FixEnd(char* buf, size_t len) noexcept { + if (len > 2) { + auto sign = buf[len - 2]; + + if (sign == '-' || sign == '+') { + buf[len] = buf[len - 1]; + buf[len - 1] = '0'; + ++len; + } + } + + buf[len] = 0; + + return len; + } + + static inline size_t DoDtoa(double d, char* buf, size_t len, int prec) noexcept { + TBuilder sb(buf, len); + Y_VERIFY(ToStringConverterNoPad().ToPrecision(d, prec, sb.SB), "conversion failed"); - - return FixEnd(buf, FixZeros(buf, sb.SB->position())); - } -} - -template <> -size_t ToStringImpl<double>(double d, char* buf, size_t len) { - return DoDtoa(d, buf, len, 10); -} - -template <> -size_t ToStringImpl<float>(float f, char* buf, size_t len) { - return DoDtoa(f, buf, len, 6); -} - -size_t FloatToString(float t, char* buf, size_t len, EFloatToStringMode mode, int ndigits) { - if (mode == PREC_AUTO) { - TBuilder sb(buf, len); - + + return FixEnd(buf, FixZeros(buf, sb.SB->position())); + } +} + +template <> +size_t ToStringImpl<double>(double d, char* buf, size_t len) { + return DoDtoa(d, buf, len, 10); +} + +template <> +size_t ToStringImpl<float>(float f, char* buf, size_t len) { + return DoDtoa(f, buf, len, 6); +} + +size_t FloatToString(float t, char* buf, size_t len, EFloatToStringMode mode, int ndigits) { + if (mode == PREC_AUTO) { + TBuilder sb(buf, len); + Y_VERIFY(ToStringConverterNoPad().ToShortestSingle(t, sb.SB), "conversion failed"); - - return FixEnd(buf, sb.SB->position()); - } - - return FloatToString((double)t, buf, len, mode, ndigits); -} - -size_t FloatToString(double t, char* buf, size_t len, EFloatToStringMode mode, int ndigits) { - if (mode == PREC_NDIGITS) { - auto minDigits = DoubleToStringConverter::kMinPrecisionDigits; - auto maxDigits = DoubleToStringConverter::kMaxPrecisionDigits; - - return DoDtoa(t, buf, len, ClampVal(ndigits, minDigits, maxDigits)); - } - - TBuilder sb(buf, len); - - if (mode == PREC_AUTO) { + + return FixEnd(buf, sb.SB->position()); + } + + return FloatToString((double)t, buf, len, mode, ndigits); +} + +size_t FloatToString(double t, char* buf, size_t len, EFloatToStringMode mode, int ndigits) { + if (mode == PREC_NDIGITS) { + auto minDigits = DoubleToStringConverter::kMinPrecisionDigits; + auto maxDigits = DoubleToStringConverter::kMaxPrecisionDigits; + + return DoDtoa(t, buf, len, ClampVal(ndigits, minDigits, maxDigits)); + } + + TBuilder sb(buf, len); + + if (mode == PREC_AUTO) { Y_VERIFY(ToStringConverterNoPad().ToShortest(t, sb.SB), "conversion failed"); - - return FixEnd(buf, sb.SB->position()); - } - - if (!ToStringConverterNoPad().ToFixed(t, ndigits, sb.SB)) { - return FloatToString(t, buf, len, PREC_AUTO); - } - - if (mode == PREC_POINT_DIGITS_STRIP_ZEROES) { - return FixZeros(buf, sb.SB->position()); - } - - return sb.SB->position(); -} + + return FixEnd(buf, sb.SB->position()); + } + + if (!ToStringConverterNoPad().ToFixed(t, ndigits, sb.SB)) { + return FloatToString(t, buf, len, PREC_AUTO); + } + + if (mode == PREC_POINT_DIGITS_STRIP_ZEROES) { + return FixZeros(buf, sb.SB->position()); + } + + return sb.SB->position(); +} diff --git a/util/string/cast.h b/util/string/cast.h index ad4228f2cc..90e925c194 100644 --- a/util/string/cast.h +++ b/util/string/cast.h @@ -1,31 +1,31 @@ #pragma once - + #include <util/system/defaults.h> -#include <util/stream/str.h> +#include <util/stream/str.h> #include <util/generic/string.h> #include <util/generic/strbuf.h> -#include <util/generic/typetraits.h> +#include <util/generic/typetraits.h> #include <util/generic/yexception.h> - -/* - * specialized for all arithmetic types - */ - -template <class T> -size_t ToStringImpl(T t, char* buf, size_t len); - + +/* + * specialized for all arithmetic types + */ + +template <class T> +size_t ToStringImpl(T t, char* buf, size_t len); + /** * Converts @c t to string writing not more than @c len bytes to output buffer @c buf. * No NULL terminator appended! Throws exception on buffer overflow. * @return number of bytes written */ -template <class T> -inline size_t ToString(const T& t, char* buf, size_t len) { +template <class T> +inline size_t ToString(const T& t, char* buf, size_t len) { using TParam = typename TTypeTraits<T>::TFuncParam; - - return ToStringImpl<TParam>(t, buf, len); -} - + + return ToStringImpl<TParam>(t, buf, len); +} + /** * Floating point to string conversion mode, values are enforced by `dtoa_impl.cpp`. */ @@ -49,108 +49,108 @@ enum EFloatToStringMode { size_t FloatToString(float t, char* buf, size_t len, EFloatToStringMode mode = PREC_AUTO, int ndigits = 0); size_t FloatToString(double t, char* buf, size_t len, EFloatToStringMode mode = PREC_AUTO, int ndigits = 0); -template <typename T> +template <typename T> inline TString FloatToString(const T& t, EFloatToStringMode mode = PREC_AUTO, int ndigits = 0) { char buf[512]; // Max<double>() with mode = PREC_POINT_DIGITS has 309 digits before the decimal point size_t count = FloatToString(t, buf, sizeof(buf), mode, ndigits); return TString(buf, count); } -namespace NPrivate { - template <class T, bool isSimple> - struct TToString { +namespace NPrivate { + template <class T, bool isSimple> + struct TToString { static inline TString Cvt(const T& t) { - char buf[512]; - + char buf[512]; + return TString(buf, ToString<T>(t, buf, sizeof(buf))); - } - }; - - template <class T> - struct TToString<T, false> { + } + }; + + template <class T> + struct TToString<T, false> { static inline TString Cvt(const T& t) { TString s; TStringOutput o(s); o << t; return s; - } - }; -} - -/* - * some clever implementations... - */ -template <class T> + } + }; +} + +/* + * some clever implementations... + */ +template <class T> inline TString ToString(const T& t) { using TR = std::remove_cv_t<T>; - + return ::NPrivate::TToString<TR, std::is_arithmetic<TR>::value>::Cvt((const TR&)t); -} - +} + inline const TString& ToString(const TString& s) noexcept { - return s; -} - + return s; +} + inline const TString& ToString(TString& s) noexcept { - return s; -} - + return s; +} + inline TString ToString(const char* s) { - return s; -} - + return s; +} + inline TString ToString(char* s) { - return s; -} - -/* + return s; +} + +/* * Wrapper for wide strings. */ -template <class T> +template <class T> inline TUtf16String ToWtring(const T& t) { return TUtf16String::FromAscii(ToString(t)); } inline const TUtf16String& ToWtring(const TUtf16String& w) { - return w; -} - + return w; +} + inline const TUtf16String& ToWtring(TUtf16String& w) { - return w; -} - + return w; +} + struct TFromStringException: public TBadCastException { }; /* - * specialized for: + * specialized for: * bool - * short - * unsigned short - * int - * unsigned int - * long - * unsigned long - * long long - * unsigned long long - * float - * double - * long double - */ + * short + * unsigned short + * int + * unsigned int + * long + * unsigned long + * long long + * unsigned long long + * float + * double + * long double + */ template <typename T, typename TChar> T FromStringImpl(const TChar* data, size_t len); - + template <typename T, typename TChar> inline T FromString(const TChar* data, size_t len) { - return ::FromStringImpl<T>(data, len); -} - + return ::FromStringImpl<T>(data, len); +} + template <typename T, typename TChar> inline T FromString(const TChar* data) { return ::FromString<T>(data, std::char_traits<TChar>::length(data)); -} - -template <class T> +} + +template <class T> inline T FromString(const TStringBuf& s) { return ::FromString<T>(s.data(), s.size()); } @@ -158,13 +158,13 @@ inline T FromString(const TStringBuf& s) { template <class T> inline T FromString(const TString& s) { return ::FromString<T>(s.data(), s.size()); -} - -template <class T> -inline T FromString(const std::string& s) { - return ::FromString<T>(s.data(), s.size()); -} - +} + +template <class T> +inline T FromString(const std::string& s) { + return ::FromString<T>(s.data(), s.size()); +} + template <> inline TString FromString<TString>(const TString& s) { return s; @@ -256,11 +256,11 @@ inline bool TryFromString(const TString& s, T& result) { } template <class T> -inline bool TryFromString(const std::string& s, T& result) { - return TryFromString<T>(s.data(), s.size(), result); -} - -template <class T> +inline bool TryFromString(const std::string& s, T& result) { + return TryFromString<T>(s.data(), s.size(), result); +} + +template <class T> inline bool TryFromString(const TWtringBuf& s, T& result) { return TryFromString<T>(s.data(), s.size(), result); } @@ -308,9 +308,9 @@ inline T FromStringWithDefault(const TStringType& s) { return FromStringWithDefault<T>(s, T()); } -double StrToD(const char* b, char** se); -double StrToD(const char* b, const char* e, char** se); - +double StrToD(const char* b, char** se); +double StrToD(const char* b, const char* e, char** se); + template <int base, class T> size_t IntToString(T t, char* buf, size_t len); diff --git a/util/string/cast.py b/util/string/cast.py index 7c5daa1212..4787f6ef44 100644 --- a/util/string/cast.py +++ b/util/string/cast.py @@ -1,27 +1,27 @@ -print 'static const ui8 SAFE_LENS[4][15] = {' - - -def nb(n, b): - if n == 0: - return [0] - - digits = [] - - while n: - digits.append(int(n % b)) - n /= b - - return digits[::-1] - - -for p in (1, 2, 4, 8): - - def it1(): - for base in range(2, 17): - m = 2 ** (8 * p) - 1 - - yield len(nb(m, base)) - 1 - - print ' {0, 0, ' + ', '.join(str(x) for x in it1()) + '},' - -print '};' +print 'static const ui8 SAFE_LENS[4][15] = {' + + +def nb(n, b): + if n == 0: + return [0] + + digits = [] + + while n: + digits.append(int(n % b)) + n /= b + + return digits[::-1] + + +for p in (1, 2, 4, 8): + + def it1(): + for base in range(2, 17): + m = 2 ** (8 * p) - 1 + + yield len(nb(m, base)) - 1 + + print ' {0, 0, ' + ', '.join(str(x) for x in it1()) + '},' + +print '};' diff --git a/util/string/cast_ut.cpp b/util/string/cast_ut.cpp index 2b7a8bc98b..033450c38c 100644 --- a/util/string/cast_ut.cpp +++ b/util/string/cast_ut.cpp @@ -1,44 +1,44 @@ -#include "cast.h" - +#include "cast.h" + #include <library/cpp/testing/unittest/registar.h> - + #include <util/charset/wide.h> #include <util/system/defaults.h> #include <limits> // positive test (return true or no exception) -#define test1(t, v) \ - F<t>().CheckTryOK(v); \ - F<t>().CheckOK(v) - +#define test1(t, v) \ + F<t>().CheckTryOK(v); \ + F<t>().CheckOK(v) + // negative test (return false or exception) -#define test2(t, v) \ - F<t>().CheckTryFail(v); \ - F<t>().CheckExc(v) - -#define EPS 10E-7 - +#define test2(t, v) \ + F<t>().CheckTryFail(v); \ + F<t>().CheckExc(v) + +#define EPS 10E-7 + #define HEX_MACROS_MAP(mac, type, val) mac(type, val, 2) mac(type, val, 8) mac(type, val, 10) mac(type, val, 16) -#define OK_HEX_CHECK(type, val, base) UNIT_ASSERT_EQUAL((IntFromStringForCheck<base>(IntToString<base>(val))), val); +#define OK_HEX_CHECK(type, val, base) UNIT_ASSERT_EQUAL((IntFromStringForCheck<base>(IntToString<base>(val))), val); #define EXC_HEX_CHECK(type, val, base) UNIT_ASSERT_EXCEPTION((IntFromString<type, base>(IntToString<base>(val))), yexception); #define TRY_HEX_MACROS_MAP(mac, type, val, result, def) \ - mac(type, val, result, def, 2) \ - mac(type, val, result, def, 8) \ - mac(type, val, result, def, 10) \ - mac(type, val, result, def, 16) + mac(type, val, result, def, 2) \ + mac(type, val, result, def, 8) \ + mac(type, val, result, def, 10) \ + mac(type, val, result, def, 16) -#define TRY_OK_HEX_CHECK(type, val, result, def, base) \ - result = def; \ +#define TRY_OK_HEX_CHECK(type, val, result, def, base) \ + result = def; \ UNIT_ASSERT_EQUAL(TryIntFromStringForCheck<base>(IntToString<base>(val), result), true); \ UNIT_ASSERT_EQUAL(result, val); -#define TRY_FAIL_HEX_CHECK(type, val, result, def, base) \ - result = def; \ - UNIT_ASSERT_VALUES_EQUAL(TryIntFromStringForCheck<base>(IntToString<base>(val), result), false); \ - UNIT_ASSERT_VALUES_EQUAL(result, def); +#define TRY_FAIL_HEX_CHECK(type, val, result, def, base) \ + result = def; \ + UNIT_ASSERT_VALUES_EQUAL(TryIntFromStringForCheck<base>(IntToString<base>(val), result), false); \ + UNIT_ASSERT_VALUES_EQUAL(result, def); template <class A> struct TRet { @@ -46,7 +46,7 @@ struct TRet { inline A IntFromStringForCheck(const TString& str) { return IntFromString<A, base>(str); } - + template <int base> inline bool TryIntFromStringForCheck(const TString& str, A& result) { return TryIntFromString<base>(str, result); @@ -54,11 +54,11 @@ struct TRet { template <class B> inline void CheckOK(B v) { - UNIT_ASSERT_VALUES_EQUAL(FromString<A>(ToString(v)), v); // char - UNIT_ASSERT_VALUES_EQUAL(FromString<A>(ToWtring(v)), v); // wide char + UNIT_ASSERT_VALUES_EQUAL(FromString<A>(ToString(v)), v); // char + UNIT_ASSERT_VALUES_EQUAL(FromString<A>(ToWtring(v)), v); // wide char HEX_MACROS_MAP(OK_HEX_CHECK, A, v); } - + template <class B> inline void CheckExc(B v) { UNIT_ASSERT_EXCEPTION(FromString<A>(ToString(v)), yexception); // char @@ -70,10 +70,10 @@ struct TRet { inline void CheckTryOK(B v) { static const A defaultV = 42; A convV; - UNIT_ASSERT_VALUES_EQUAL(TryFromString<A>(ToString(v), convV), true); // char - UNIT_ASSERT_VALUES_EQUAL(v, convV); - UNIT_ASSERT_VALUES_EQUAL(TryFromString<A>(ToWtring(v), convV), true); // wide char - UNIT_ASSERT_VALUES_EQUAL(v, convV); + UNIT_ASSERT_VALUES_EQUAL(TryFromString<A>(ToString(v), convV), true); // char + UNIT_ASSERT_VALUES_EQUAL(v, convV); + UNIT_ASSERT_VALUES_EQUAL(TryFromString<A>(ToWtring(v), convV), true); // wide char + UNIT_ASSERT_VALUES_EQUAL(v, convV); TRY_HEX_MACROS_MAP(TRY_OK_HEX_CHECK, A, v, convV, defaultV); } @@ -81,10 +81,10 @@ struct TRet { template <class B> inline void CheckTryFail(B v) { static const A defaultV = 42; - A convV = defaultV; // to check that original value is not trashed on bad cast - UNIT_ASSERT_VALUES_EQUAL(TryFromString<A>(ToString(v), convV), false); // char + A convV = defaultV; // to check that original value is not trashed on bad cast + UNIT_ASSERT_VALUES_EQUAL(TryFromString<A>(ToString(v), convV), false); // char UNIT_ASSERT_VALUES_EQUAL(defaultV, convV); - UNIT_ASSERT_VALUES_EQUAL(TryFromString<A>(ToWtring(v), convV), false); // wide char + UNIT_ASSERT_VALUES_EQUAL(TryFromString<A>(ToWtring(v), convV), false); // wide char UNIT_ASSERT_VALUES_EQUAL(defaultV, convV); TRY_HEX_MACROS_MAP(TRY_FAIL_HEX_CHECK, A, v, convV, defaultV); @@ -95,14 +95,14 @@ template <> struct TRet<bool> { template <class B> inline void CheckOK(B v) { - UNIT_ASSERT_VALUES_EQUAL(FromString<bool>(ToString(v)), v); + UNIT_ASSERT_VALUES_EQUAL(FromString<bool>(ToString(v)), v); } template <class B> inline void CheckTryOK(B v) { B convV; - UNIT_ASSERT_VALUES_EQUAL(TryFromString<bool>(ToString(v), convV), true); - UNIT_ASSERT_VALUES_EQUAL(v, convV); + UNIT_ASSERT_VALUES_EQUAL(TryFromString<bool>(ToString(v), convV), true); + UNIT_ASSERT_VALUES_EQUAL(v, convV); } template <class B> @@ -114,7 +114,7 @@ struct TRet<bool> { inline void CheckTryFail(B v) { static const bool defaultV = false; bool convV = defaultV; - UNIT_ASSERT_VALUES_EQUAL(TryFromString<bool>(ToString(v), convV), false); + UNIT_ASSERT_VALUES_EQUAL(TryFromString<bool>(ToString(v), convV), false); UNIT_ASSERT_VALUES_EQUAL(defaultV, convV); } }; @@ -124,7 +124,7 @@ inline TRet<A> F() { return TRet<A>(); }; -#if 0 +#if 0 template <class T> inline void CheckConvertToBuffer(const T& value, const size_t size, const TString& canonValue) { const size_t maxSize = 256; @@ -136,51 +136,51 @@ inline void CheckConvertToBuffer(const T& value, const size_t size, const TStrin UNIT_ASSERT_EXCEPTION(length = ToString(value, buffer, size), yexception); // check that no bytes after size was trashed for (size_t i = size; i < maxSize; ++i) - UNIT_ASSERT_VALUES_EQUAL(buffer[i], magic); + UNIT_ASSERT_VALUES_EQUAL(buffer[i], magic); } else { length = ToString(value, buffer, size); UNIT_ASSERT(length < maxSize); // check that no bytes after length was trashed for (size_t i = length; i < maxSize; ++i) - UNIT_ASSERT_VALUES_EQUAL(buffer[i], magic); + UNIT_ASSERT_VALUES_EQUAL(buffer[i], magic); TStringBuf result(buffer, length); - UNIT_ASSERT_VALUES_EQUAL(result, TStringBuf(canonValue)); + UNIT_ASSERT_VALUES_EQUAL(result, TStringBuf(canonValue)); } } -#endif +#endif Y_UNIT_TEST_SUITE(TCastTest) { - template <class A> - inline TRet<A> F() { - return TRet<A>(); - }; - - template <class TFloat> - void GoodFloatTester(const char* str, const TFloat canonValue, const double eps) { - TFloat f = canonValue + 42.0; // shift value to make it far from proper - UNIT_ASSERT_VALUES_EQUAL(TryFromString<TFloat>(str, f), true); - UNIT_ASSERT_DOUBLES_EQUAL(f, canonValue, eps); - f = FromString<TFloat>(str); - UNIT_ASSERT_DOUBLES_EQUAL(f, canonValue, eps); - } - - template <class TFloat> - void BadFloatTester(const char* str) { - const double eps = 10E-5; - TFloat f = 42.0; // make it far from proper - auto res = TryFromString<TFloat>(str, f); - - UNIT_ASSERT_VALUES_EQUAL(res, false); - UNIT_ASSERT_DOUBLES_EQUAL(f, 42.0, eps); // check value was not trashed - UNIT_ASSERT_EXCEPTION(f = FromString<TFloat>(str), TFromStringException); + template <class A> + inline TRet<A> F() { + return TRet<A>(); + }; + + template <class TFloat> + void GoodFloatTester(const char* str, const TFloat canonValue, const double eps) { + TFloat f = canonValue + 42.0; // shift value to make it far from proper + UNIT_ASSERT_VALUES_EQUAL(TryFromString<TFloat>(str, f), true); + UNIT_ASSERT_DOUBLES_EQUAL(f, canonValue, eps); + f = FromString<TFloat>(str); + UNIT_ASSERT_DOUBLES_EQUAL(f, canonValue, eps); + } + + template <class TFloat> + void BadFloatTester(const char* str) { + const double eps = 10E-5; + TFloat f = 42.0; // make it far from proper + auto res = TryFromString<TFloat>(str, f); + + UNIT_ASSERT_VALUES_EQUAL(res, false); + UNIT_ASSERT_DOUBLES_EQUAL(f, 42.0, eps); // check value was not trashed + UNIT_ASSERT_EXCEPTION(f = FromString<TFloat>(str), TFromStringException); Y_UNUSED(f); // shut up compiler about 'assigned value that is not used' - } + } Y_UNIT_TEST(TestToFrom) { - test1(bool, true); - test1(bool, false); - test2(bool, ""); - test2(bool, "a"); + test1(bool, true); + test1(bool, false); + test2(bool, ""); + test2(bool, "a"); test2(ui8, -1); test1(i8, -1); @@ -194,91 +194,91 @@ Y_UNIT_TEST_SUITE(TCastTest) { test1(ui8, UCHAR_MAX - 1); test2(ui8, (int)UCHAR_MAX + 1); test2(ui8, -1); - test1(int, -1); - test2(unsigned int, -1); - test1(short int, -1); - test2(unsigned short int, -1); - test1(long int, -1); - test2(unsigned long int, -1); - test1(int, INT_MAX); - test1(int, INT_MIN); - test1(int, INT_MAX - 1); - test1(int, INT_MIN + 1); - test2(int, (long long int)INT_MAX + 1); - test2(int, (long long int)INT_MIN - 1); - test1(unsigned int, UINT_MAX); - test1(unsigned int, UINT_MAX - 1); - test2(unsigned int, (long long int)UINT_MAX + 1); - test1(short int, SHRT_MAX); - test1(short int, SHRT_MIN); - test1(short int, SHRT_MAX - 1); - test1(short int, SHRT_MIN + 1); - test2(short int, (long long int)SHRT_MAX + 1); - test2(short int, (long long int)SHRT_MIN - 1); - test1(unsigned short int, USHRT_MAX); - test1(unsigned short int, USHRT_MAX - 1); - test2(unsigned short int, (long long int)USHRT_MAX + 1); - test1(long int, LONG_MAX); - test1(long int, LONG_MIN); - test1(long int, LONG_MAX - 1); - test1(long int, LONG_MIN + 1); + test1(int, -1); + test2(unsigned int, -1); + test1(short int, -1); + test2(unsigned short int, -1); + test1(long int, -1); + test2(unsigned long int, -1); + test1(int, INT_MAX); + test1(int, INT_MIN); + test1(int, INT_MAX - 1); + test1(int, INT_MIN + 1); + test2(int, (long long int)INT_MAX + 1); + test2(int, (long long int)INT_MIN - 1); + test1(unsigned int, UINT_MAX); + test1(unsigned int, UINT_MAX - 1); + test2(unsigned int, (long long int)UINT_MAX + 1); + test1(short int, SHRT_MAX); + test1(short int, SHRT_MIN); + test1(short int, SHRT_MAX - 1); + test1(short int, SHRT_MIN + 1); + test2(short int, (long long int)SHRT_MAX + 1); + test2(short int, (long long int)SHRT_MIN - 1); + test1(unsigned short int, USHRT_MAX); + test1(unsigned short int, USHRT_MAX - 1); + test2(unsigned short int, (long long int)USHRT_MAX + 1); + test1(long int, LONG_MAX); + test1(long int, LONG_MIN); + test1(long int, LONG_MAX - 1); + test1(long int, LONG_MIN + 1); test1(long long int, LLONG_MAX); test1(long long int, LLONG_MIN); test1(long long int, LLONG_MAX - 1); test1(long long int, LLONG_MIN + 1); - } - + } + Y_UNIT_TEST(TestVolatile) { - volatile int x = 1; - UNIT_ASSERT_VALUES_EQUAL(ToString(x), "1"); - } - + volatile int x = 1; + UNIT_ASSERT_VALUES_EQUAL(ToString(x), "1"); + } + Y_UNIT_TEST(TestStrToD) { UNIT_ASSERT_DOUBLES_EQUAL(StrToD("1.1", nullptr), 1.1, EPS); UNIT_ASSERT_DOUBLES_EQUAL(StrToD("1.12345678", nullptr), 1.12345678, EPS); UNIT_ASSERT_DOUBLES_EQUAL(StrToD("10E-5", nullptr), 10E-5, EPS); UNIT_ASSERT_DOUBLES_EQUAL(StrToD("1.1E+5", nullptr), 1.1E+5, EPS); - + char* ret = nullptr; - - UNIT_ASSERT_DOUBLES_EQUAL(StrToD("1.1y", &ret), 1.1, EPS); - UNIT_ASSERT_VALUES_EQUAL(*ret, 'y'); - UNIT_ASSERT_DOUBLES_EQUAL(StrToD("1.12345678z", &ret), 1.12345678, EPS); - UNIT_ASSERT_VALUES_EQUAL(*ret, 'z'); - UNIT_ASSERT_DOUBLES_EQUAL(StrToD("10E-5y", &ret), 10E-5, EPS); - UNIT_ASSERT_VALUES_EQUAL(*ret, 'y'); - UNIT_ASSERT_DOUBLES_EQUAL(StrToD("1.1E+5z", &ret), 1.1E+5, EPS); - UNIT_ASSERT_VALUES_EQUAL(*ret, 'z'); - } - + + UNIT_ASSERT_DOUBLES_EQUAL(StrToD("1.1y", &ret), 1.1, EPS); + UNIT_ASSERT_VALUES_EQUAL(*ret, 'y'); + UNIT_ASSERT_DOUBLES_EQUAL(StrToD("1.12345678z", &ret), 1.12345678, EPS); + UNIT_ASSERT_VALUES_EQUAL(*ret, 'z'); + UNIT_ASSERT_DOUBLES_EQUAL(StrToD("10E-5y", &ret), 10E-5, EPS); + UNIT_ASSERT_VALUES_EQUAL(*ret, 'y'); + UNIT_ASSERT_DOUBLES_EQUAL(StrToD("1.1E+5z", &ret), 1.1E+5, EPS); + UNIT_ASSERT_VALUES_EQUAL(*ret, 'z'); + } + Y_UNIT_TEST(TestFloats) { // "%g" mode - UNIT_ASSERT_VALUES_EQUAL(FloatToString(0.1f, PREC_NDIGITS, 6), "0.1"); // drop trailing zeroes - UNIT_ASSERT_VALUES_EQUAL(FloatToString(0.12345678f, PREC_NDIGITS, 6), "0.123457"); - UNIT_ASSERT_VALUES_EQUAL(FloatToString(1e-20f, PREC_NDIGITS, 6), "1e-20"); + UNIT_ASSERT_VALUES_EQUAL(FloatToString(0.1f, PREC_NDIGITS, 6), "0.1"); // drop trailing zeroes + UNIT_ASSERT_VALUES_EQUAL(FloatToString(0.12345678f, PREC_NDIGITS, 6), "0.123457"); + UNIT_ASSERT_VALUES_EQUAL(FloatToString(1e-20f, PREC_NDIGITS, 6), "1e-20"); // "%f" mode - UNIT_ASSERT_VALUES_EQUAL(FloatToString(0.1f, PREC_POINT_DIGITS, 6), "0.100000"); - UNIT_ASSERT_VALUES_EQUAL(FloatToString(0.12345678f, PREC_POINT_DIGITS, 6), "0.123457"); - UNIT_ASSERT_VALUES_EQUAL(FloatToString(1e-20f, PREC_POINT_DIGITS, 6), "0.000000"); - UNIT_ASSERT_VALUES_EQUAL(FloatToString(12.34f, PREC_POINT_DIGITS, 0), "12"); // rounding to integers drops '.' + UNIT_ASSERT_VALUES_EQUAL(FloatToString(0.1f, PREC_POINT_DIGITS, 6), "0.100000"); + UNIT_ASSERT_VALUES_EQUAL(FloatToString(0.12345678f, PREC_POINT_DIGITS, 6), "0.123457"); + UNIT_ASSERT_VALUES_EQUAL(FloatToString(1e-20f, PREC_POINT_DIGITS, 6), "0.000000"); + UNIT_ASSERT_VALUES_EQUAL(FloatToString(12.34f, PREC_POINT_DIGITS, 0), "12"); // rounding to integers drops '.' // strip trailing zeroes - UNIT_ASSERT_VALUES_EQUAL(FloatToString(0.1f, PREC_POINT_DIGITS_STRIP_ZEROES, 6), "0.1"); - UNIT_ASSERT_VALUES_EQUAL(FloatToString(0.12345678f, PREC_POINT_DIGITS_STRIP_ZEROES, 6), "0.123457"); - UNIT_ASSERT_VALUES_EQUAL(FloatToString(1e-20f, PREC_POINT_DIGITS_STRIP_ZEROES, 6), "0"); - UNIT_ASSERT_VALUES_EQUAL(FloatToString(12.34f, PREC_POINT_DIGITS_STRIP_ZEROES, 0), "12"); // rounding to integers drops '.' - UNIT_ASSERT_VALUES_EQUAL(FloatToString(10000.0f, PREC_POINT_DIGITS_STRIP_ZEROES, 0), "10000"); + UNIT_ASSERT_VALUES_EQUAL(FloatToString(0.1f, PREC_POINT_DIGITS_STRIP_ZEROES, 6), "0.1"); + UNIT_ASSERT_VALUES_EQUAL(FloatToString(0.12345678f, PREC_POINT_DIGITS_STRIP_ZEROES, 6), "0.123457"); + UNIT_ASSERT_VALUES_EQUAL(FloatToString(1e-20f, PREC_POINT_DIGITS_STRIP_ZEROES, 6), "0"); + UNIT_ASSERT_VALUES_EQUAL(FloatToString(12.34f, PREC_POINT_DIGITS_STRIP_ZEROES, 0), "12"); // rounding to integers drops '.' + UNIT_ASSERT_VALUES_EQUAL(FloatToString(10000.0f, PREC_POINT_DIGITS_STRIP_ZEROES, 0), "10000"); // automatic selection of ndigits - UNIT_ASSERT_VALUES_EQUAL(FloatToString(0.1f), "0.1"); // drop trailing zeroes - UNIT_ASSERT_VALUES_EQUAL(FloatToString(0.12345678f), "0.12345678"); // 8 valid digits - UNIT_ASSERT_VALUES_EQUAL(FloatToString(1000.00006f), "1000.00006"); // 9 valid digits - UNIT_ASSERT_VALUES_EQUAL(FloatToString(1e-45f), "1e-45"); // denormalized: 1 valid digit - UNIT_ASSERT_VALUES_EQUAL(FloatToString(-0.0f), "-0"); // sign must be preserved + UNIT_ASSERT_VALUES_EQUAL(FloatToString(0.1f), "0.1"); // drop trailing zeroes + UNIT_ASSERT_VALUES_EQUAL(FloatToString(0.12345678f), "0.12345678"); // 8 valid digits + UNIT_ASSERT_VALUES_EQUAL(FloatToString(1000.00006f), "1000.00006"); // 9 valid digits + UNIT_ASSERT_VALUES_EQUAL(FloatToString(1e-45f), "1e-45"); // denormalized: 1 valid digit + UNIT_ASSERT_VALUES_EQUAL(FloatToString(-0.0f), "-0"); // sign must be preserved // version for double - UNIT_ASSERT_VALUES_EQUAL(FloatToString(1.0 / 10000), "0.0001"); // trailing zeroes - UNIT_ASSERT_VALUES_EQUAL(FloatToString(1.2345678901234567), "1.2345678901234567"); // no truncation - UNIT_ASSERT_VALUES_EQUAL(FloatToString(5e-324), "5e-324"); // denormalized - UNIT_ASSERT_VALUES_EQUAL(FloatToString(-0.0), "-0"); // sign must be preserved + UNIT_ASSERT_VALUES_EQUAL(FloatToString(1.0 / 10000), "0.0001"); // trailing zeroes + UNIT_ASSERT_VALUES_EQUAL(FloatToString(1.2345678901234567), "1.2345678901234567"); // no truncation + UNIT_ASSERT_VALUES_EQUAL(FloatToString(5e-324), "5e-324"); // denormalized + UNIT_ASSERT_VALUES_EQUAL(FloatToString(-0.0), "-0"); // sign must be preserved UNIT_ASSERT_STRINGS_EQUAL(FloatToString(std::numeric_limits<double>::quiet_NaN()), "nan"); UNIT_ASSERT_STRINGS_EQUAL(FloatToString(std::numeric_limits<double>::infinity()), "inf"); @@ -287,154 +287,154 @@ Y_UNIT_TEST_SUITE(TCastTest) { UNIT_ASSERT_STRINGS_EQUAL(FloatToString(std::numeric_limits<float>::quiet_NaN()), "nan"); UNIT_ASSERT_STRINGS_EQUAL(FloatToString(std::numeric_limits<float>::infinity()), "inf"); UNIT_ASSERT_STRINGS_EQUAL(FloatToString(-std::numeric_limits<float>::infinity()), "-inf"); - } - + } + Y_UNIT_TEST(TestReadFloats) { - GoodFloatTester<float>("0.0001", 0.0001f, EPS); - GoodFloatTester<double>("0.0001", 0.0001, EPS); - GoodFloatTester<long double>("0.0001", 0.0001, EPS); - GoodFloatTester<float>("10E-5", 10E-5f, EPS); - GoodFloatTester<double>("1.0001E5", 1.0001E5, EPS); - GoodFloatTester<long double>("1.0001e5", 1.0001e5, EPS); - GoodFloatTester<long double>(".0001e5", .0001e5, EPS); - BadFloatTester<float>("a10E-5"); - BadFloatTester<float>("10 "); - BadFloatTester<float>("10\t"); - //BadFloatTester<float>("10E"); - //BadFloatTester<float>("10.E"); - BadFloatTester<float>("..0"); + GoodFloatTester<float>("0.0001", 0.0001f, EPS); + GoodFloatTester<double>("0.0001", 0.0001, EPS); + GoodFloatTester<long double>("0.0001", 0.0001, EPS); + GoodFloatTester<float>("10E-5", 10E-5f, EPS); + GoodFloatTester<double>("1.0001E5", 1.0001E5, EPS); + GoodFloatTester<long double>("1.0001e5", 1.0001e5, EPS); + GoodFloatTester<long double>(".0001e5", .0001e5, EPS); + BadFloatTester<float>("a10E-5"); + BadFloatTester<float>("10 "); + BadFloatTester<float>("10\t"); + //BadFloatTester<float>("10E"); + //BadFloatTester<float>("10.E"); + BadFloatTester<float>("..0"); BadFloatTester<float>(""); // IGNIETFERRO-300 - BadFloatTester<double>("1.00.01"); - BadFloatTester<double>("1.0001E5b"); - BadFloatTester<double>("1.0001s"); - BadFloatTester<double>("1..01"); + BadFloatTester<double>("1.00.01"); + BadFloatTester<double>("1.0001E5b"); + BadFloatTester<double>("1.0001s"); + BadFloatTester<double>("1..01"); BadFloatTester<double>(""); // IGNIETFERRO-300 - BadFloatTester<long double>(".1.00"); - BadFloatTester<long double>("1.00."); - BadFloatTester<long double>("1.0001e5-"); - BadFloatTester<long double>("10e 2"); + BadFloatTester<long double>(".1.00"); + BadFloatTester<long double>("1.00."); + BadFloatTester<long double>("1.0001e5-"); + BadFloatTester<long double>("10e 2"); BadFloatTester<long double>(""); // IGNIETFERRO-300 - } + } Y_UNIT_TEST(TestLiteral) { - UNIT_ASSERT_VALUES_EQUAL(ToString("abc"), TString("abc")); - } + UNIT_ASSERT_VALUES_EQUAL(ToString("abc"), TString("abc")); + } Y_UNIT_TEST(TestFromStringStringBuf) { TString a = "xyz"; - TStringBuf b = FromString<TStringBuf>(a); - UNIT_ASSERT_VALUES_EQUAL(a, b); + TStringBuf b = FromString<TStringBuf>(a); + UNIT_ASSERT_VALUES_EQUAL(a, b); UNIT_ASSERT_VALUES_EQUAL((void*)a.data(), (void*)b.data()); - } + } -#if 0 +#if 0 Y_UNIT_TEST(TestBufferOverflow) { - CheckConvertToBuffer<float>(1.f, 5, "1"); - CheckConvertToBuffer<float>(1.005f, 3, "1.005"); - CheckConvertToBuffer<float>(1.00000000f, 3, "1"); + CheckConvertToBuffer<float>(1.f, 5, "1"); + CheckConvertToBuffer<float>(1.005f, 3, "1.005"); + CheckConvertToBuffer<float>(1.00000000f, 3, "1"); - CheckConvertToBuffer<double>(1.f, 5, "1"); - CheckConvertToBuffer<double>(1.005f, 3, "1.005"); - CheckConvertToBuffer<double>(1.00000000f, 3, "1"); + CheckConvertToBuffer<double>(1.f, 5, "1"); + CheckConvertToBuffer<double>(1.005f, 3, "1.005"); + CheckConvertToBuffer<double>(1.00000000f, 3, "1"); - CheckConvertToBuffer<int>(2, 5, "2"); - CheckConvertToBuffer<int>(1005, 3, "1005"); + CheckConvertToBuffer<int>(2, 5, "2"); + CheckConvertToBuffer<int>(1005, 3, "1005"); - CheckConvertToBuffer<size_t>(2, 5, "2"); - CheckConvertToBuffer<ui64>(1005000000000000ull, 32, "1005000000000000"); - CheckConvertToBuffer<ui64>(1005000000000000ull, 3, "1005000000000000"); + CheckConvertToBuffer<size_t>(2, 5, "2"); + CheckConvertToBuffer<ui64>(1005000000000000ull, 32, "1005000000000000"); + CheckConvertToBuffer<ui64>(1005000000000000ull, 3, "1005000000000000"); - // TString longNumber = TString("1.") + TString(1 << 20, '1'); - // UNIT_ASSERT_EXCEPTION(FromString<double>(longNumber), yexception); - } -#endif + // TString longNumber = TString("1.") + TString(1 << 20, '1'); + // UNIT_ASSERT_EXCEPTION(FromString<double>(longNumber), yexception); + } +#endif Y_UNIT_TEST(TestWide) { TUtf16String iw = u"-100500"; - int iv = 0; - UNIT_ASSERT_VALUES_EQUAL(TryFromString(iw, iv), true); - UNIT_ASSERT_VALUES_EQUAL(iv, -100500); + int iv = 0; + UNIT_ASSERT_VALUES_EQUAL(TryFromString(iw, iv), true); + UNIT_ASSERT_VALUES_EQUAL(iv, -100500); - ui64 uv = 0; + ui64 uv = 0; TUtf16String uw = u"21474836470"; - UNIT_ASSERT_VALUES_EQUAL(TryFromString(uw, uv), true); - UNIT_ASSERT_VALUES_EQUAL(uv, 21474836470ull); + UNIT_ASSERT_VALUES_EQUAL(TryFromString(uw, uv), true); + UNIT_ASSERT_VALUES_EQUAL(uv, 21474836470ull); TWtringBuf bw(uw.data(), uw.size()); - uv = 0; - UNIT_ASSERT_VALUES_EQUAL(TryFromString(uw, uv), true); - UNIT_ASSERT_VALUES_EQUAL(uv, 21474836470ull); + uv = 0; + UNIT_ASSERT_VALUES_EQUAL(TryFromString(uw, uv), true); + UNIT_ASSERT_VALUES_EQUAL(uv, 21474836470ull); const wchar16* beg = uw.data(); - uv = 0; + uv = 0; UNIT_ASSERT_VALUES_EQUAL(TryFromString(beg, uw.size(), uv), true); - UNIT_ASSERT_VALUES_EQUAL(uv, 21474836470ull); - } + UNIT_ASSERT_VALUES_EQUAL(uv, 21474836470ull); + } Y_UNIT_TEST(TestDefault) { size_t res = 0; const size_t def1 = 42; TString s1("100500"); - UNIT_ASSERT_VALUES_EQUAL(TryFromStringWithDefault(s1, res, def1), true); - UNIT_ASSERT_VALUES_EQUAL(res, 100500); + UNIT_ASSERT_VALUES_EQUAL(TryFromStringWithDefault(s1, res, def1), true); + UNIT_ASSERT_VALUES_EQUAL(res, 100500); - UNIT_ASSERT_VALUES_EQUAL(TryFromStringWithDefault(s1, res), true); - UNIT_ASSERT_VALUES_EQUAL(res, 100500); + UNIT_ASSERT_VALUES_EQUAL(TryFromStringWithDefault(s1, res), true); + UNIT_ASSERT_VALUES_EQUAL(res, 100500); - UNIT_ASSERT_VALUES_EQUAL(TryFromStringWithDefault("100500", res, def1), true); - UNIT_ASSERT_VALUES_EQUAL(res, 100500); + UNIT_ASSERT_VALUES_EQUAL(TryFromStringWithDefault("100500", res, def1), true); + UNIT_ASSERT_VALUES_EQUAL(res, 100500); UNIT_CHECK_GENERATED_NO_EXCEPTION(FromStringWithDefault(s1, def1), yexception); - UNIT_ASSERT_VALUES_EQUAL(FromStringWithDefault(s1, def1), 100500); - UNIT_ASSERT_VALUES_EQUAL(FromStringWithDefault<size_t>(s1), 100500); - UNIT_ASSERT_VALUES_EQUAL(FromStringWithDefault("100500", def1), 100500); + UNIT_ASSERT_VALUES_EQUAL(FromStringWithDefault(s1, def1), 100500); + UNIT_ASSERT_VALUES_EQUAL(FromStringWithDefault<size_t>(s1), 100500); + UNIT_ASSERT_VALUES_EQUAL(FromStringWithDefault("100500", def1), 100500); TString s2("100q500"); - UNIT_ASSERT_VALUES_EQUAL(TryFromStringWithDefault(s2, res), false); - UNIT_ASSERT_VALUES_EQUAL(res, size_t()); + UNIT_ASSERT_VALUES_EQUAL(TryFromStringWithDefault(s2, res), false); + UNIT_ASSERT_VALUES_EQUAL(res, size_t()); - UNIT_ASSERT_VALUES_EQUAL(TryFromStringWithDefault(s2, res, def1), false); - UNIT_ASSERT_VALUES_EQUAL(res, def1); + UNIT_ASSERT_VALUES_EQUAL(TryFromStringWithDefault(s2, res, def1), false); + UNIT_ASSERT_VALUES_EQUAL(res, def1); - UNIT_ASSERT_VALUES_EQUAL(TryFromStringWithDefault("100q500", res), false); - UNIT_ASSERT_VALUES_EQUAL(res, size_t()); + UNIT_ASSERT_VALUES_EQUAL(TryFromStringWithDefault("100q500", res), false); + UNIT_ASSERT_VALUES_EQUAL(res, size_t()); UNIT_ASSERT_VALUES_EQUAL(TryFromStringWithDefault("100 500", res), false); UNIT_ASSERT_VALUES_EQUAL(res, size_t()); UNIT_CHECK_GENERATED_NO_EXCEPTION(FromStringWithDefault(s2, def1), yexception); UNIT_CHECK_GENERATED_NO_EXCEPTION(FromStringWithDefault("100q500", def1), yexception); - UNIT_ASSERT_VALUES_EQUAL(FromStringWithDefault(s2, def1), def1); - UNIT_ASSERT_VALUES_EQUAL(FromStringWithDefault<size_t>(s2), size_t()); - UNIT_ASSERT_VALUES_EQUAL(FromStringWithDefault<size_t>("100q500"), size_t()); + UNIT_ASSERT_VALUES_EQUAL(FromStringWithDefault(s2, def1), def1); + UNIT_ASSERT_VALUES_EQUAL(FromStringWithDefault<size_t>(s2), size_t()); + UNIT_ASSERT_VALUES_EQUAL(FromStringWithDefault<size_t>("100q500"), size_t()); UNIT_CHECK_GENERATED_EXCEPTION(FromString<size_t>(s2), TFromStringException); int res2 = 0; const int def2 = -6; TUtf16String s3 = u"-100500"; - UNIT_ASSERT_VALUES_EQUAL(TryFromStringWithDefault(s3, res2, def2), true); - UNIT_ASSERT_VALUES_EQUAL(res2, -100500); + UNIT_ASSERT_VALUES_EQUAL(TryFromStringWithDefault(s3, res2, def2), true); + UNIT_ASSERT_VALUES_EQUAL(res2, -100500); - UNIT_ASSERT_VALUES_EQUAL(TryFromStringWithDefault(s3, res2), true); - UNIT_ASSERT_VALUES_EQUAL(res2, -100500); + UNIT_ASSERT_VALUES_EQUAL(TryFromStringWithDefault(s3, res2), true); + UNIT_ASSERT_VALUES_EQUAL(res2, -100500); UNIT_CHECK_GENERATED_NO_EXCEPTION(FromStringWithDefault(s3, def1), yexception); - UNIT_ASSERT_VALUES_EQUAL(FromStringWithDefault(s3, def2), -100500); - UNIT_ASSERT_VALUES_EQUAL(FromStringWithDefault<size_t>(s3), size_t()); + UNIT_ASSERT_VALUES_EQUAL(FromStringWithDefault(s3, def2), -100500); + UNIT_ASSERT_VALUES_EQUAL(FromStringWithDefault<size_t>(s3), size_t()); TUtf16String s4 = u"-f100500"; - UNIT_ASSERT_VALUES_EQUAL(TryFromStringWithDefault(s4, res2, def2), false); - UNIT_ASSERT_VALUES_EQUAL(res2, def2); + UNIT_ASSERT_VALUES_EQUAL(TryFromStringWithDefault(s4, res2, def2), false); + UNIT_ASSERT_VALUES_EQUAL(res2, def2); - UNIT_ASSERT_VALUES_EQUAL(TryFromStringWithDefault(s4, res2), false); - UNIT_ASSERT_VALUES_EQUAL(res2, size_t()); + UNIT_ASSERT_VALUES_EQUAL(TryFromStringWithDefault(s4, res2), false); + UNIT_ASSERT_VALUES_EQUAL(res2, size_t()); UNIT_CHECK_GENERATED_NO_EXCEPTION(FromStringWithDefault(s4, def2), yexception); - UNIT_ASSERT_VALUES_EQUAL(FromStringWithDefault(s4, def2), def2); + UNIT_ASSERT_VALUES_EQUAL(FromStringWithDefault(s4, def2), def2); UNIT_CHECK_GENERATED_EXCEPTION(FromString<size_t>(s4), yexception); - UNIT_ASSERT_VALUES_EQUAL(FromStringWithDefault<size_t>(s4), size_t()); + UNIT_ASSERT_VALUES_EQUAL(FromStringWithDefault<size_t>(s4), size_t()); } Y_UNIT_TEST(TestBool) { @@ -450,21 +450,21 @@ Y_UNIT_TEST_SUITE(TCastTest) { } Y_UNIT_TEST(TestAutoDetectType) { - UNIT_ASSERT_DOUBLES_EQUAL((float)FromString("0.0001"), 0.0001, EPS); - UNIT_ASSERT_DOUBLES_EQUAL((double)FromString("0.0015", sizeof("0.0015") - 2), 0.001, EPS); + UNIT_ASSERT_DOUBLES_EQUAL((float)FromString("0.0001"), 0.0001, EPS); + UNIT_ASSERT_DOUBLES_EQUAL((double)FromString("0.0015", sizeof("0.0015") - 2), 0.001, EPS); UNIT_ASSERT_DOUBLES_EQUAL((long double)FromString(TStringBuf("0.0001")), 0.0001, EPS); UNIT_ASSERT_DOUBLES_EQUAL((float)FromString(TString("10E-5")), 10E-5, EPS); - UNIT_ASSERT_VALUES_EQUAL((bool)FromString("da"), true); - UNIT_ASSERT_VALUES_EQUAL((bool)FromString("no"), false); + UNIT_ASSERT_VALUES_EQUAL((bool)FromString("da"), true); + UNIT_ASSERT_VALUES_EQUAL((bool)FromString("no"), false); UNIT_ASSERT_VALUES_EQUAL((short)FromString(u"9000"), 9000); UNIT_ASSERT_VALUES_EQUAL((int)FromString(u"-100500"), -100500); UNIT_ASSERT_VALUES_EQUAL((unsigned long long)FromString(TWtringBuf(u"42", 1)), 4); - int integer = FromString("125"); + int integer = FromString("125"); ui16 wideCharacterCode = FromString(u"125"); UNIT_ASSERT_VALUES_EQUAL(integer, wideCharacterCode); - } + } - static void CheckMessage(TFromStringException& exc, const TString& phrase) { + static void CheckMessage(TFromStringException& exc, const TString& phrase) { TString message = exc.what(); if (!message.Contains(phrase)) { Cerr << message << Endl; @@ -476,14 +476,14 @@ Y_UNIT_TEST_SUITE(TCastTest) { try { FromString<ui32>(""); UNIT_ASSERT(false); - } catch (TFromStringException& e) { + } catch (TFromStringException& e) { CheckMessage(e, "empty string as number"); } try { FromString<ui32>("-"); UNIT_ASSERT(false); - } catch (TFromStringException& e) { + } catch (TFromStringException& e) { // Unsigned should have no sign at all, so - is not expected CheckMessage(e, "Unexpected symbol \"-\" at pos 0 in string \"-\""); } @@ -491,21 +491,21 @@ Y_UNIT_TEST_SUITE(TCastTest) { try { FromString<i32>("-"); UNIT_ASSERT(false); - } catch (TFromStringException& e) { + } catch (TFromStringException& e) { CheckMessage(e, "Cannot parse string \"-\" as number"); } try { FromString<i32>("+"); UNIT_ASSERT(false); - } catch (TFromStringException& e) { + } catch (TFromStringException& e) { CheckMessage(e, "Cannot parse string \"+\" as number"); } try { FromString<ui32>("0.328413745072"); UNIT_ASSERT(false); - } catch (TFromStringException& e) { + } catch (TFromStringException& e) { CheckMessage(e, "Unexpected symbol \".\" at pos 1 in string \"0.328413745072\""); } } @@ -515,69 +515,69 @@ Y_UNIT_TEST_SUITE(TCastTest) { constexpr TStringBuf hello = "hello"; TStringBuf out; UNIT_ASSERT(TryFromString(hello, out)); - UNIT_ASSERT_VALUES_EQUAL(hello, out); + UNIT_ASSERT_VALUES_EQUAL(hello, out); } { constexpr TStringBuf empty = ""; TStringBuf out; UNIT_ASSERT(TryFromString(empty, out)); - UNIT_ASSERT_VALUES_EQUAL(empty, out); + UNIT_ASSERT_VALUES_EQUAL(empty, out); } { constexpr TStringBuf empty; TStringBuf out; UNIT_ASSERT(TryFromString(empty, out)); - UNIT_ASSERT_VALUES_EQUAL(empty, out); + UNIT_ASSERT_VALUES_EQUAL(empty, out); } { const auto hello = u"hello"; TWtringBuf out; UNIT_ASSERT(TryFromString(hello, out)); - UNIT_ASSERT_VALUES_EQUAL(hello, out); + UNIT_ASSERT_VALUES_EQUAL(hello, out); } { const TUtf16String empty; TWtringBuf out; UNIT_ASSERT(TryFromString(empty, out)); - UNIT_ASSERT_VALUES_EQUAL(empty, out); + UNIT_ASSERT_VALUES_EQUAL(empty, out); } { constexpr TWtringBuf empty; TWtringBuf out; UNIT_ASSERT(TryFromString(empty, out)); - UNIT_ASSERT_VALUES_EQUAL(empty, out); + UNIT_ASSERT_VALUES_EQUAL(empty, out); } } - + Y_UNIT_TEST(Nan) { - double xx = 0; - - UNIT_ASSERT(!TryFromString("NaN", xx)); - UNIT_ASSERT(!TryFromString("NAN", xx)); - UNIT_ASSERT(!TryFromString("nan", xx)); - } - + double xx = 0; + + UNIT_ASSERT(!TryFromString("NaN", xx)); + UNIT_ASSERT(!TryFromString("NAN", xx)); + UNIT_ASSERT(!TryFromString("nan", xx)); + } + Y_UNIT_TEST(Infinity) { - double xx = 0; - - UNIT_ASSERT(!TryFromString("Infinity", xx)); - UNIT_ASSERT(!TryFromString("INFINITY", xx)); - UNIT_ASSERT(!TryFromString("infinity", xx)); - } - + double xx = 0; + + UNIT_ASSERT(!TryFromString("Infinity", xx)); + UNIT_ASSERT(!TryFromString("INFINITY", xx)); + UNIT_ASSERT(!TryFromString("infinity", xx)); + } + Y_UNIT_TEST(TestBorderCases) { - UNIT_ASSERT_VALUES_EQUAL(ToString(0.0), "0"); - UNIT_ASSERT_VALUES_EQUAL(ToString(1.0), "1"); - UNIT_ASSERT_VALUES_EQUAL(ToString(10.0), "10"); - UNIT_ASSERT_VALUES_EQUAL(ToString(NAN), "nan"); - UNIT_ASSERT_VALUES_EQUAL(ToString(-NAN), "nan"); - UNIT_ASSERT_VALUES_EQUAL(ToString(INFINITY), "inf"); - UNIT_ASSERT_VALUES_EQUAL(ToString(-INFINITY), "-inf"); - UNIT_ASSERT_VALUES_EQUAL(ToString(1.1e+100), "1.1e+100"); - UNIT_ASSERT_VALUES_EQUAL(ToString(1e+100), "1e+100"); - UNIT_ASSERT_VALUES_EQUAL(ToString(87423.2031250000001), "87423.20313"); - UNIT_ASSERT_VALUES_EQUAL(FloatToString(1.0e60, PREC_POINT_DIGITS_STRIP_ZEROES, 0), "1e+60"); - } + UNIT_ASSERT_VALUES_EQUAL(ToString(0.0), "0"); + UNIT_ASSERT_VALUES_EQUAL(ToString(1.0), "1"); + UNIT_ASSERT_VALUES_EQUAL(ToString(10.0), "10"); + UNIT_ASSERT_VALUES_EQUAL(ToString(NAN), "nan"); + UNIT_ASSERT_VALUES_EQUAL(ToString(-NAN), "nan"); + UNIT_ASSERT_VALUES_EQUAL(ToString(INFINITY), "inf"); + UNIT_ASSERT_VALUES_EQUAL(ToString(-INFINITY), "-inf"); + UNIT_ASSERT_VALUES_EQUAL(ToString(1.1e+100), "1.1e+100"); + UNIT_ASSERT_VALUES_EQUAL(ToString(1e+100), "1e+100"); + UNIT_ASSERT_VALUES_EQUAL(ToString(87423.2031250000001), "87423.20313"); + UNIT_ASSERT_VALUES_EQUAL(FloatToString(1.0e60, PREC_POINT_DIGITS_STRIP_ZEROES, 0), "1e+60"); + } Y_UNIT_TEST(TestChar) { // Given a character ch, ToString(ch) returns @@ -599,4 +599,4 @@ Y_UNIT_TEST_SUITE(TCastTest) { UNIT_ASSERT_VALUES_EQUAL(ToString(U'я'), "1103"); UNIT_ASSERT_VALUES_EQUAL(ToString(U'\U0001F600'), "128512"); // 'GRINNING FACE' (U+1F600) } -}; +}; diff --git a/util/string/cstriter.cpp b/util/string/cstriter.cpp index 6c41ef90e4..fd61359c3d 100644 --- a/util/string/cstriter.cpp +++ b/util/string/cstriter.cpp @@ -1 +1 @@ -#include "cstriter.h" +#include "cstriter.h" diff --git a/util/string/cstriter.h b/util/string/cstriter.h index b4e7c30a97..ca57728c39 100644 --- a/util/string/cstriter.h +++ b/util/string/cstriter.h @@ -1,14 +1,14 @@ -#pragma once - -struct TCStringEndIterator { -}; - -template <class It> -static inline bool operator==(It b, TCStringEndIterator) { - return !*b; -} - -template <class It> -static inline bool operator!=(It b, TCStringEndIterator) { +#pragma once + +struct TCStringEndIterator { +}; + +template <class It> +static inline bool operator==(It b, TCStringEndIterator) { + return !*b; +} + +template <class It> +static inline bool operator!=(It b, TCStringEndIterator) { return !!*b; -} +} diff --git a/util/string/escape.cpp b/util/string/escape.cpp index 2e0c2890fe..cd09a7dbd0 100644 --- a/util/string/escape.cpp +++ b/util/string/escape.cpp @@ -1,6 +1,6 @@ -#include "escape.h" -#include "cast.h" - +#include "escape.h" +#include "cast.h" + #include <util/system/defaults.h> #include <util/charset/utf8.h> #include <util/charset/wide.h> @@ -25,7 +25,7 @@ * Each octal or hexadecimal escape sequence is the longest sequence of characters that can * constitute the escape sequence. * - * THEREFORE: + * THEREFORE: * - Octal escape sequence spans until rightmost non-octal-digit character. * - Octal escape sequence always terminates after three octal digits. * - Hexadecimal escape sequence spans until rightmost non-hexadecimal-digit character. @@ -40,113 +40,113 @@ * Replacement: [ ] { } # \ ^ | ~ * */ -namespace { - template <typename TChar> - static inline char HexDigit(TChar value) { +namespace { + template <typename TChar> + static inline char HexDigit(TChar value) { Y_ASSERT(value < 16); - if (value < 10) { - return '0' + value; - } else { - return 'A' + value - 10; - } - } - - template <typename TChar> - static inline char OctDigit(TChar value) { + if (value < 10) { + return '0' + value; + } else { + return 'A' + value - 10; + } + } + + template <typename TChar> + static inline char OctDigit(TChar value) { Y_ASSERT(value < 8); return '0' + value; - } - - template <typename TChar> - static inline bool IsPrintable(TChar c) { - return c >= 32 && c <= 126; - } - - template <typename TChar> - static inline bool IsHexDigit(TChar c) { - return (c >= '0' && c <= '9') || (c >= 'A' && c <= 'F') || (c >= 'a' && c <= 'f'); - } - - template <typename TChar> - static inline bool IsOctDigit(TChar c) { - return c >= '0' && c <= '7'; - } - - template <typename TChar> - struct TEscapeUtil; - - template <> - struct TEscapeUtil<char> { - static const size_t ESCAPE_C_BUFFER_SIZE = 4; - - template <typename TNextChar, typename TBufferChar> - static inline size_t EscapeC(unsigned char c, TNextChar next, TBufferChar r[ESCAPE_C_BUFFER_SIZE]) { - // (1) Printable characters go as-is, except backslash and double quote. - // (2) Characters \r, \n, \t and \0 ... \7 replaced by their simple escape characters (if possible). - // (3) Otherwise, character is encoded using hexadecimal escape sequence (if possible), or octal. - if (c == '\"') { - r[0] = '\\'; - r[1] = '\"'; - return 2; - } else if (c == '\\') { - r[0] = '\\'; - r[1] = '\\'; - return 2; - } else if (IsPrintable(c) && (!(c == '?' && next == '?'))) { - r[0] = c; - return 1; - } else if (c == '\r') { - r[0] = '\\'; - r[1] = 'r'; - return 2; - } else if (c == '\n') { - r[0] = '\\'; - r[1] = 'n'; - return 2; - } else if (c == '\t') { - r[0] = '\\'; - r[1] = 't'; - return 2; - } else if (c < 8 && !IsOctDigit(next)) { - r[0] = '\\'; - r[1] = OctDigit(c); - return 2; - } else if (!IsHexDigit(next)) { - r[0] = '\\'; - r[1] = 'x'; - r[2] = HexDigit((c & 0xF0) >> 4); - r[3] = HexDigit((c & 0x0F) >> 0); - return 4; - } else { - r[0] = '\\'; - r[1] = OctDigit((c & 0700) >> 6); - r[2] = OctDigit((c & 0070) >> 3); - r[3] = OctDigit((c & 0007) >> 0); - return 4; - } + } + + template <typename TChar> + static inline bool IsPrintable(TChar c) { + return c >= 32 && c <= 126; + } + + template <typename TChar> + static inline bool IsHexDigit(TChar c) { + return (c >= '0' && c <= '9') || (c >= 'A' && c <= 'F') || (c >= 'a' && c <= 'f'); + } + + template <typename TChar> + static inline bool IsOctDigit(TChar c) { + return c >= '0' && c <= '7'; + } + + template <typename TChar> + struct TEscapeUtil; + + template <> + struct TEscapeUtil<char> { + static const size_t ESCAPE_C_BUFFER_SIZE = 4; + + template <typename TNextChar, typename TBufferChar> + static inline size_t EscapeC(unsigned char c, TNextChar next, TBufferChar r[ESCAPE_C_BUFFER_SIZE]) { + // (1) Printable characters go as-is, except backslash and double quote. + // (2) Characters \r, \n, \t and \0 ... \7 replaced by their simple escape characters (if possible). + // (3) Otherwise, character is encoded using hexadecimal escape sequence (if possible), or octal. + if (c == '\"') { + r[0] = '\\'; + r[1] = '\"'; + return 2; + } else if (c == '\\') { + r[0] = '\\'; + r[1] = '\\'; + return 2; + } else if (IsPrintable(c) && (!(c == '?' && next == '?'))) { + r[0] = c; + return 1; + } else if (c == '\r') { + r[0] = '\\'; + r[1] = 'r'; + return 2; + } else if (c == '\n') { + r[0] = '\\'; + r[1] = 'n'; + return 2; + } else if (c == '\t') { + r[0] = '\\'; + r[1] = 't'; + return 2; + } else if (c < 8 && !IsOctDigit(next)) { + r[0] = '\\'; + r[1] = OctDigit(c); + return 2; + } else if (!IsHexDigit(next)) { + r[0] = '\\'; + r[1] = 'x'; + r[2] = HexDigit((c & 0xF0) >> 4); + r[3] = HexDigit((c & 0x0F) >> 0); + return 4; + } else { + r[0] = '\\'; + r[1] = OctDigit((c & 0700) >> 6); + r[2] = OctDigit((c & 0070) >> 3); + r[3] = OctDigit((c & 0007) >> 0); + return 4; + } } - }; - - template <> - struct TEscapeUtil<wchar16> { - static const size_t ESCAPE_C_BUFFER_SIZE = 6; - - template <typename TNextChar, typename TBufferChar> - static inline size_t EscapeC(wchar16 c, TNextChar next, TBufferChar r[ESCAPE_C_BUFFER_SIZE]) { - if (c < 0x100) { - return TEscapeUtil<char>::EscapeC(char(c), next, r); - } else { - r[0] = '\\'; - r[1] = 'u'; - r[2] = HexDigit((c & 0xF000) >> 12); - r[3] = HexDigit((c & 0x0F00) >> 8); - r[4] = HexDigit((c & 0x00F0) >> 4); - r[5] = HexDigit((c & 0x000F) >> 0); - return 6; - } + }; + + template <> + struct TEscapeUtil<wchar16> { + static const size_t ESCAPE_C_BUFFER_SIZE = 6; + + template <typename TNextChar, typename TBufferChar> + static inline size_t EscapeC(wchar16 c, TNextChar next, TBufferChar r[ESCAPE_C_BUFFER_SIZE]) { + if (c < 0x100) { + return TEscapeUtil<char>::EscapeC(char(c), next, r); + } else { + r[0] = '\\'; + r[1] = 'u'; + r[2] = HexDigit((c & 0xF000) >> 12); + r[3] = HexDigit((c & 0x0F00) >> 8); + r[4] = HexDigit((c & 0x00F0) >> 4); + r[5] = HexDigit((c & 0x000F) >> 0); + return 6; + } } - }; -} + }; +} template <class TChar> TBasicString<TChar>& EscapeCImpl(const TChar* str, size_t len, TBasicString<TChar>& r) { @@ -176,204 +176,204 @@ TBasicString<TChar>& EscapeCImpl(const TChar* str, size_t len, TBasicString<TCha template TString& EscapeCImpl<TString::TChar>(const TString::TChar* str, size_t len, TString& r); template TUtf16String& EscapeCImpl<TUtf16String::TChar>(const TUtf16String::TChar* str, size_t len, TUtf16String& r); - -namespace { - template <class TStr> + +namespace { + template <class TStr> inline void AppendUnicode(TStr& s, wchar32 v) { - char buf[10]; - size_t sz = 0; - - WriteUTF8Char(v, sz, (ui8*)buf); - s.AppendNoAlias(buf, sz); - } + char buf[10]; + size_t sz = 0; + + WriteUTF8Char(v, sz, (ui8*)buf); + s.AppendNoAlias(buf, sz); + } inline void AppendUnicode(TUtf16String& s, wchar32 v) { - WriteSymbol(v, s); - } - - template <ui32 sz, typename TChar> - inline size_t CountHex(const TChar* p, const TChar* pe) { - auto b = p; - auto e = Min(p + sz, pe); - - while (b < e && IsHexDigit(*b)) { - ++b; - } - - return b - p; + WriteSymbol(v, s); } - template <size_t sz, typename TChar, typename T> - inline bool ParseHex(const TChar* p, const TChar* pe, T& t) noexcept { - return (p + sz <= pe) && TryIntFromString<16>(p, sz, t); - } - - template <ui32 sz, typename TChar> - inline size_t CountOct(const TChar* p, const TChar* pe) { - ui32 maxsz = Min<size_t>(sz, pe - p); - - if (3 == sz && 3 == maxsz && !(*p >= '0' && *p <= '3')) { - maxsz = 2; - } - - for (ui32 i = 0; i < maxsz; ++i, ++p) { - if (!IsOctDigit(*p)) { - return i; - } - } - - return maxsz; + template <ui32 sz, typename TChar> + inline size_t CountHex(const TChar* p, const TChar* pe) { + auto b = p; + auto e = Min(p + sz, pe); + + while (b < e && IsHexDigit(*b)) { + ++b; + } + + return b - p; + } + + template <size_t sz, typename TChar, typename T> + inline bool ParseHex(const TChar* p, const TChar* pe, T& t) noexcept { + return (p + sz <= pe) && TryIntFromString<16>(p, sz, t); + } + + template <ui32 sz, typename TChar> + inline size_t CountOct(const TChar* p, const TChar* pe) { + ui32 maxsz = Min<size_t>(sz, pe - p); + + if (3 == sz && 3 == maxsz && !(*p >= '0' && *p <= '3')) { + maxsz = 2; + } + + for (ui32 i = 0; i < maxsz; ++i, ++p) { + if (!IsOctDigit(*p)) { + return i; + } + } + + return maxsz; } } -template <class TChar, class TStr> -static TStr& DoUnescapeC(const TChar* p, size_t sz, TStr& res) { - const TChar* pe = p + sz; +template <class TChar, class TStr> +static TStr& DoUnescapeC(const TChar* p, size_t sz, TStr& res) { + const TChar* pe = p + sz; - while (p != pe) { + while (p != pe) { if ('\\' == *p) { ++p; - if (p == pe) { + if (p == pe) { return res; - } + } - switch (*p) { - default: + switch (*p) { + default: res.append(*p); - break; + break; case 'a': res.append('\a'); break; - case 'b': - res.append('\b'); - break; - case 'f': - res.append('\f'); - break; - case 'n': - res.append('\n'); - break; - case 'r': - res.append('\r'); - break; - case 't': - res.append('\t'); - break; + case 'b': + res.append('\b'); + break; + case 'f': + res.append('\f'); + break; + case 'n': + res.append('\n'); + break; + case 'r': + res.append('\r'); + break; + case 't': + res.append('\t'); + break; case 'v': res.append('\v'); break; - case 'u': { + case 'u': { ui16 cp[2]; - - if (ParseHex<4>(p + 1, pe, cp[0])) { - if (Y_UNLIKELY(cp[0] >= 0xD800 && cp[0] <= 0xDBFF && ParseHex<4>(p + 7, pe, cp[1]) && p[5] == '\\' && p[6] == 'u')) { + + if (ParseHex<4>(p + 1, pe, cp[0])) { + if (Y_UNLIKELY(cp[0] >= 0xD800 && cp[0] <= 0xDBFF && ParseHex<4>(p + 7, pe, cp[1]) && p[5] == '\\' && p[6] == 'u')) { const wchar16 wbuf[] = {wchar16(cp[0]), wchar16(cp[1])}; AppendUnicode(res, ReadSymbol(wbuf, wbuf + 2)); - p += 10; - } else { - AppendUnicode(res, (wchar32)cp[0]); - p += 4; - } - } else { - res.append(*p); - } - - break; - } - - case 'U': + p += 10; + } else { + AppendUnicode(res, (wchar32)cp[0]); + p += 4; + } + } else { + res.append(*p); + } + + break; + } + + case 'U': if (CountHex<8>(p + 1, pe) != 8) { - res.append(*p); + res.append(*p); } else { - AppendUnicode(res, IntFromString<ui32, 16>(p + 1, 8)); + AppendUnicode(res, IntFromString<ui32, 16>(p + 1, 8)); p += 8; } - break; - case 'x': - if (ui32 v = CountHex<2>(p + 1, pe)) { - res.append((TChar)IntFromString<ui32, 16>(p + 1, v)); - p += v; - } else { - res.append(*p); - } - - break; - case '0': - case '1': - case '2': - case '3': { - ui32 v = CountOct<3>(p, pe); // v is always positive - res.append((TChar)IntFromString<ui32, 8>(p, v)); - p += v - 1; - } break; - case '4': - case '5': - case '6': - case '7': { - ui32 v = CountOct<2>(p, pe); // v is always positive - res.append((TChar)IntFromString<ui32, 8>(p, v)); - p += v - 1; - } break; + break; + case 'x': + if (ui32 v = CountHex<2>(p + 1, pe)) { + res.append((TChar)IntFromString<ui32, 16>(p + 1, v)); + p += v; + } else { + res.append(*p); + } + + break; + case '0': + case '1': + case '2': + case '3': { + ui32 v = CountOct<3>(p, pe); // v is always positive + res.append((TChar)IntFromString<ui32, 8>(p, v)); + p += v - 1; + } break; + case '4': + case '5': + case '6': + case '7': { + ui32 v = CountOct<2>(p, pe); // v is always positive + res.append((TChar)IntFromString<ui32, 8>(p, v)); + p += v - 1; + } break; } - - ++p; + + ++p; } else { const auto r = std::basic_string_view<TChar>(p, pe - p).find('\\'); const auto n = r != std::string::npos ? p + r : pe; - - res.append(p, n); - p = n; + + res.append(p, n); + p = n; } } return res; } -template <class TChar> +template <class TChar> TBasicString<TChar>& UnescapeCImpl(const TChar* p, size_t sz, TBasicString<TChar>& res) { - return DoUnescapeC(p, sz, res); -} - -template <class TChar> -TChar* UnescapeC(const TChar* str, size_t len, TChar* buf) { - struct TUnboundedString { + return DoUnescapeC(p, sz, res); +} + +template <class TChar> +TChar* UnescapeC(const TChar* str, size_t len, TChar* buf) { + struct TUnboundedString { void append(TChar ch) noexcept { - *P++ = ch; - } - + *P++ = ch; + } + void append(const TChar* b, const TChar* e) noexcept { - while (b != e) { - append(*b++); - } - } - + while (b != e) { + append(*b++); + } + } + void AppendNoAlias(const TChar* s, size_t l) noexcept { - append(s, s + l); - } - - TChar* P; - } bufbuf = {buf}; - - return DoUnescapeC(str, len, bufbuf).P; -} - + append(s, s + l); + } + + TChar* P; + } bufbuf = {buf}; + + return DoUnescapeC(str, len, bufbuf).P; +} + template TString& UnescapeCImpl<TString::TChar>(const TString::TChar* str, size_t len, TString& r); template TUtf16String& UnescapeCImpl<TUtf16String::TChar>(const TUtf16String::TChar* str, size_t len, TUtf16String& r); - -template char* UnescapeC<char>(const char* str, size_t len, char* buf); - + +template char* UnescapeC<char>(const char* str, size_t len, char* buf); + template <class TChar> size_t UnescapeCCharLen(const TChar* begin, const TChar* end) { - if (begin >= end) { + if (begin >= end) { return 0; - } - if (*begin != '\\') { + } + if (*begin != '\\') { return 1; - } - if (++begin == end) { + } + if (++begin == end) { return 1; - } + } switch (*begin) { default: diff --git a/util/string/escape.h b/util/string/escape.h index 799eae6432..b01be65b0e 100644 --- a/util/string/escape.h +++ b/util/string/escape.h @@ -9,9 +9,9 @@ TBasicString<TChar>& EscapeCImpl(const TChar* str, size_t len, TBasicString<TCha template <class TChar> TBasicString<TChar>& UnescapeCImpl(const TChar* str, size_t len, TBasicString<TChar>&); -template <class TChar> -TChar* UnescapeC(const TChar* str, size_t len, TChar* buf); - +template <class TChar> +TChar* UnescapeC(const TChar* str, size_t len, TChar* buf); + template <typename TChar> static inline TBasicString<TChar>& EscapeC(const TChar* str, size_t len, TBasicString<TChar>& s) { return EscapeCImpl(str, len, s); @@ -21,8 +21,8 @@ template <typename TChar> static inline TBasicString<TChar> EscapeC(const TChar* str, size_t len) { TBasicString<TChar> s; return EscapeC(str, len, s); -} - +} + template <typename TChar> static inline TBasicString<TChar> EscapeC(const TBasicStringBuf<TChar>& str) { return EscapeC(str.data(), str.size()); @@ -41,14 +41,14 @@ static inline TBasicString<TChar> UnescapeC(const TChar* str, size_t len) { template <typename TChar> static inline TBasicString<TChar> EscapeC(TChar ch) { - return EscapeC(&ch, 1); -} - -template <typename TChar> + return EscapeC(&ch, 1); +} + +template <typename TChar> static inline TBasicString<TChar> EscapeC(const TChar* str) { return EscapeC(str, std::char_traits<TChar>::length(str)); -} - +} + TString& EscapeC(const TStringBuf str, TString& res); TUtf16String& EscapeC(const TWtringBuf str, TUtf16String& res); diff --git a/util/string/escape_ut.cpp b/util/string/escape_ut.cpp index ec15e52e18..cd38ecffd3 100644 --- a/util/string/escape_ut.cpp +++ b/util/string/escape_ut.cpp @@ -1,5 +1,5 @@ -#include "escape.h" - +#include "escape.h" + #include <library/cpp/testing/unittest/registar.h> #include <util/generic/string.h> @@ -14,8 +14,8 @@ namespace { TExample(const TStringBuf expected, const TStringBuf source) : Expected{expected} - , Source{source} - { + , Source{source} + { } }; } @@ -27,15 +27,15 @@ static const TExample CommonTestData[] = { {"http://ya.ru/\\0", "http://ya.ru/\0"sv}, {"http://ya.ru/\\0\\0", "http://ya.ru/\0\0"sv}, - {"http://ya.ru/\\0\\0000", "http://ya.ru/\0\0" - "0"sv}, - {"http://ya.ru/\\0\\0001", "http://ya.ru/\0\x00" - "1"sv}, + {"http://ya.ru/\\0\\0000", "http://ya.ru/\0\0" + "0"sv}, + {"http://ya.ru/\\0\\0001", "http://ya.ru/\0\x00" + "1"sv}, - {R"(\2\4\00678)", "\2\4\6" - "78"sv}, // \6 -> \006 because next char '7' is "octal" - {R"(\2\4\689)", "\2\4\6" - "89"sv}, // \6 -> \6 because next char '8' is not "octal" + {R"(\2\4\00678)", "\2\4\6" + "78"sv}, // \6 -> \006 because next char '7' is "octal" + {R"(\2\4\689)", "\2\4\6" + "89"sv}, // \6 -> \6 because next char '8' is not "octal" {R"(\"Hello\", Alice said.)", "\"Hello\", Alice said."}, {"Slash\\\\dash!", "Slash\\dash!"}, @@ -44,7 +44,7 @@ static const TExample CommonTestData[] = { {"There are questions \\x3F\\x3F?", "There are questions ???"}, {"There are questions \\x3F?", "There are questions ??"}, -}; +}; Y_UNIT_TEST_SUITE(TEscapeCTest) { Y_UNIT_TEST(TestStrokaEscapeC) { @@ -66,14 +66,14 @@ Y_UNIT_TEST_SUITE(TEscapeCTest) { UNIT_ASSERT_VALUES_EQUAL("\xFF", UnescapeC(TString("\\xFF"))); UNIT_ASSERT_VALUES_EQUAL("\\377f", EscapeC(TString("\xff" - "f"))); - UNIT_ASSERT_VALUES_EQUAL("\xff" - "f", + "f"))); + UNIT_ASSERT_VALUES_EQUAL("\xff" + "f", UnescapeC(TString("\\377f"))); UNIT_ASSERT_VALUES_EQUAL("\\xFFg", EscapeC(TString("\xff" - "g"))); - UNIT_ASSERT_VALUES_EQUAL("\xff" - "g", + "g"))); + UNIT_ASSERT_VALUES_EQUAL("\xff" + "g", UnescapeC(TString("\\xFFg"))); UNIT_ASSERT_VALUES_EQUAL("\xEA\x9A\x96", UnescapeC(TString("\\uA696"))); UNIT_ASSERT_VALUES_EQUAL("Странный компроматтест", UnescapeC(TString("\\u0421\\u0442\\u0440\\u0430\\u043d\\u043d\\u044b\\u0439 \\u043a\\u043e\\u043c\\u043f\\u0440\\u043e\\u043c\\u0430\\u0442тест"))); @@ -130,16 +130,16 @@ Y_UNIT_TEST_SUITE(TEscapeCTest) { test("\\400\\1", 3); test("\\4xxx", 2); } - + Y_UNIT_TEST(TestUnbounded) { - char buf[100000]; - - for (const auto& x : CommonTestData) { + char buf[100000]; + + for (const auto& x : CommonTestData) { char* end = UnescapeC(x.Expected.data(), x.Expected.size(), buf); - - UNIT_ASSERT_VALUES_EQUAL(x.Source, TStringBuf(buf, end)); - } - } + + UNIT_ASSERT_VALUES_EQUAL(x.Source, TStringBuf(buf, end)); + } + } Y_UNIT_TEST(TestCapitalUEscapes) { UNIT_ASSERT_VALUES_EQUAL(UnescapeC("\\U00000020"), " "); diff --git a/util/string/fuzzing/collapse/main.cpp b/util/string/fuzzing/collapse/main.cpp index 04119f2267..e7b09f0f55 100644 --- a/util/string/fuzzing/collapse/main.cpp +++ b/util/string/fuzzing/collapse/main.cpp @@ -1,12 +1,12 @@ -#include <util/string/strip.h> -#include <util/charset/wide.h> - -extern "C" int LLVMFuzzerTestOneInput(const ui8* data, size_t size) { +#include <util/string/strip.h> +#include <util/charset/wide.h> + +extern "C" int LLVMFuzzerTestOneInput(const ui8* data, size_t size) { TUtf16String w((const wchar16*)data, size / 2); - Collapse(w); - + Collapse(w); + TString s((const char*)data, size); CollapseInPlace(s); - - return 0; // Non-zero return values are reserved for future use. -} + + return 0; // Non-zero return values are reserved for future use. +} diff --git a/util/string/fuzzing/collapse/ya.make b/util/string/fuzzing/collapse/ya.make index 14986170eb..b8614f6411 100644 --- a/util/string/fuzzing/collapse/ya.make +++ b/util/string/fuzzing/collapse/ya.make @@ -1,13 +1,13 @@ FUZZ() - + OWNER( pg g:util ) SUBSCRIBER(g:util-subscribers) - -SRCS( - main.cpp -) - -END() + +SRCS( + main.cpp +) + +END() diff --git a/util/string/fuzzing/strtod/main.cpp b/util/string/fuzzing/strtod/main.cpp index 91eb5ff506..50ea2a6afc 100644 --- a/util/string/fuzzing/strtod/main.cpp +++ b/util/string/fuzzing/strtod/main.cpp @@ -1,9 +1,9 @@ -#include <util/string/cast.h> - -extern "C" int LLVMFuzzerTestOneInput(const ui8* data, size_t size) { - double res; - - TryFromString<double>((const char*)data, size, res); - - return 0; // Non-zero return values are reserved for future use. -} +#include <util/string/cast.h> + +extern "C" int LLVMFuzzerTestOneInput(const ui8* data, size_t size) { + double res; + + TryFromString<double>((const char*)data, size, res); + + return 0; // Non-zero return values are reserved for future use. +} diff --git a/util/string/fuzzing/strtod/ya.make b/util/string/fuzzing/strtod/ya.make index 14986170eb..b8614f6411 100644 --- a/util/string/fuzzing/strtod/ya.make +++ b/util/string/fuzzing/strtod/ya.make @@ -1,13 +1,13 @@ FUZZ() - + OWNER( pg g:util ) SUBSCRIBER(g:util-subscribers) - -SRCS( - main.cpp -) - -END() + +SRCS( + main.cpp +) + +END() diff --git a/util/string/fuzzing/ya.make b/util/string/fuzzing/ya.make index 5f34271d91..617e0f2b1d 100644 --- a/util/string/fuzzing/ya.make +++ b/util/string/fuzzing/ya.make @@ -4,8 +4,8 @@ OWNER( ) SUBSCRIBER(g:util-subscribers) -RECURSE( +RECURSE( collapse escape_c - strtod -) + strtod +) diff --git a/util/string/hex.cpp b/util/string/hex.cpp index 4d12ad691c..667397987f 100644 --- a/util/string/hex.cpp +++ b/util/string/hex.cpp @@ -1,21 +1,21 @@ #include "hex.h" -const char* const Char2DigitTable = ("\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" - "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" - "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" - "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\xff\xff\xff\xff\xff\xff" //0-9 - "\xff\x0a\x0b\x0c\x0d\x0e\x0f\xff\xff\xff\xff\xff\xff\xff\xff\xff" //A-Z - "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" - "\xff\x0a\x0b\x0c\x0d\x0e\x0f\xff\xff\xff\xff\xff\xff\xff\xff\xff" //a-z - "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" - "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" - "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" - "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" - "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" - "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" - "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" - "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" - "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff"); +const char* const Char2DigitTable = ("\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" + "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\xff\xff\xff\xff\xff\xff" //0-9 + "\xff\x0a\x0b\x0c\x0d\x0e\x0f\xff\xff\xff\xff\xff\xff\xff\xff\xff" //A-Z + "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\x0a\x0b\x0c\x0d\x0e\x0f\xff\xff\xff\xff\xff\xff\xff\xff\xff" //a-z + "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff"); char* HexEncode(const void* in, size_t len, char* out) { const unsigned char* b = (const unsigned char*)in; diff --git a/util/string/hex.h b/util/string/hex.h index 84f22ae97e..af3d2d528f 100644 --- a/util/string/hex.h +++ b/util/string/hex.h @@ -1,35 +1,35 @@ #pragma once - + #include <util/generic/string.h> #include <util/generic/yexception.h> #include <util/system/yassert.h> - -inline static char DigitToChar(unsigned char digit) { - if (digit < 10) { - return (char)digit + '0'; - } - - return (char)(digit - 10) + 'A'; -} - -extern const char* const Char2DigitTable; - -inline static int Char2Digit(char ch) { + +inline static char DigitToChar(unsigned char digit) { + if (digit < 10) { + return (char)digit + '0'; + } + + return (char)(digit - 10) + 'A'; +} + +extern const char* const Char2DigitTable; + +inline static int Char2Digit(char ch) { char result = Char2DigitTable[(unsigned char)ch]; Y_ENSURE(result != '\xff', "invalid hex character " << (int)ch); return result; -} - +} + //! Convert a hex string of exactly 2 chars to int /*! @example String2Byte("10") => 16 */ inline static int String2Byte(const char* s) { - return Char2Digit(*s) * 16 + Char2Digit(*(s + 1)); -} - + return Char2Digit(*s) * 16 + Char2Digit(*(s + 1)); +} + char* HexEncode(const void* in, size_t len, char* out); - + TString HexEncode(const void* in, size_t len); - + inline TString HexEncode(const TStringBuf h) { return HexEncode(h.data(), h.size()); } diff --git a/util/string/hex_ut.cpp b/util/string/hex_ut.cpp index 17013109d5..39a83d5e62 100644 --- a/util/string/hex_ut.cpp +++ b/util/string/hex_ut.cpp @@ -1,19 +1,19 @@ -#include "hex.h" - +#include "hex.h" + #include <library/cpp/testing/unittest/registar.h> - + Y_UNIT_TEST_SUITE(THexCodingTest) { Y_UNIT_TEST(TestEncode) { - UNIT_ASSERT_EQUAL(HexEncode("i1634iqwbf,&msdb"), "693136333469717762662C266D736462"); - } - + UNIT_ASSERT_EQUAL(HexEncode("i1634iqwbf,&msdb"), "693136333469717762662C266D736462"); + } + Y_UNIT_TEST(TestDecode) { - UNIT_ASSERT_EQUAL(HexDecode("693136333469717762662C266D736462"), "i1634iqwbf,&msdb"); - } - + UNIT_ASSERT_EQUAL(HexDecode("693136333469717762662C266D736462"), "i1634iqwbf,&msdb"); + } + Y_UNIT_TEST(TestDecodeCase) { - UNIT_ASSERT_EQUAL(HexDecode("12ABCDEF"), HexDecode("12abcdef")); + UNIT_ASSERT_EQUAL(HexDecode("12ABCDEF"), HexDecode("12abcdef")); UNIT_ASSERT_EXCEPTION(HexDecode("Hello"), yexception); //< incorrect chars - UNIT_ASSERT_EXCEPTION(HexDecode("123"), yexception); //< odd length - } -} + UNIT_ASSERT_EXCEPTION(HexDecode("123"), yexception); //< odd length + } +} diff --git a/util/string/join.cpp b/util/string/join.cpp index 46c6491e1a..3f88e23128 100644 --- a/util/string/join.cpp +++ b/util/string/join.cpp @@ -1 +1 @@ -#include "join.h" +#include "join.h" diff --git a/util/string/join.h b/util/string/join.h index 9a735ceb29..b166fad1f3 100644 --- a/util/string/join.h +++ b/util/string/join.h @@ -3,7 +3,7 @@ #include <util/generic/string.h> #include <util/generic/typetraits.h> #include <util/string/cast.h> -#include "cast.h" +#include "cast.h" /* * Default implementation of AppendToString uses a temporary TString object which is inefficient. You can overload it @@ -201,43 +201,43 @@ JoinSeq(TCharType delim, const TContainer& data) { * Difference from JoinSeq, JoinRange, Join is the lack of TString object - all depends on operator<< for the type and * realization of IOutputStream */ -template <class TIterB, class TIterE> -struct TRangeJoiner { +template <class TIterB, class TIterE> +struct TRangeJoiner { friend constexpr IOutputStream& operator<<(IOutputStream& stream, const TRangeJoiner<TIterB, TIterE>& rangeJoiner) { - if (rangeJoiner.b != rangeJoiner.e) { + if (rangeJoiner.b != rangeJoiner.e) { stream << *rangeJoiner.b; - for (auto it = std::next(rangeJoiner.b); it != rangeJoiner.e; ++it) + for (auto it = std::next(rangeJoiner.b); it != rangeJoiner.e; ++it) stream << rangeJoiner.delim << *it; } return stream; } - constexpr TRangeJoiner(TStringBuf delim, TIterB&& b, TIterE&& e) - : delim(delim) - , b(std::forward<TIterB>(b)) - , e(std::forward<TIterE>(e)) - { - } - + constexpr TRangeJoiner(TStringBuf delim, TIterB&& b, TIterE&& e) + : delim(delim) + , b(std::forward<TIterB>(b)) + , e(std::forward<TIterE>(e)) + { + } + private: const TStringBuf delim; const TIterB b; const TIterE e; }; -template <class TIterB, class TIterE = TIterB> -constexpr auto MakeRangeJoiner(TStringBuf delim, TIterB&& b, TIterE&& e) { +template <class TIterB, class TIterE = TIterB> +constexpr auto MakeRangeJoiner(TStringBuf delim, TIterB&& b, TIterE&& e) { return TRangeJoiner<TIterB, TIterE>(delim, std::forward<TIterB>(b), std::forward<TIterE>(e)); } -template <class TContainer> -constexpr auto MakeRangeJoiner(TStringBuf delim, const TContainer& data) { +template <class TContainer> +constexpr auto MakeRangeJoiner(TStringBuf delim, const TContainer& data) { return MakeRangeJoiner(delim, std::cbegin(data), std::cend(data)); } -template <class TVal> -constexpr auto MakeRangeJoiner(TStringBuf delim, const std::initializer_list<TVal>& data) { +template <class TVal> +constexpr auto MakeRangeJoiner(TStringBuf delim, const std::initializer_list<TVal>& data) { return MakeRangeJoiner(delim, std::cbegin(data), std::cend(data)); } @@ -253,9 +253,9 @@ constexpr auto MakeRangeJoiner(TStringBuf delim, const std::initializer_list<TVa */ template <typename T> -inline std::enable_if_t< - !std::is_same<std::decay_t<T>, TString>::value && !std::is_same<std::decay_t<T>, const char*>::value, - TString> +inline std::enable_if_t< + !std::is_same<std::decay_t<T>, TString>::value && !std::is_same<std::decay_t<T>, const char*>::value, + TString> JoinSeq(const TStringBuf delim, const std::initializer_list<T>& data) { return JoinRange(delim, data.begin(), data.end()); } diff --git a/util/string/join_ut.cpp b/util/string/join_ut.cpp index 408f8e1658..3ed2b2459c 100644 --- a/util/string/join_ut.cpp +++ b/util/string/join_ut.cpp @@ -31,7 +31,7 @@ Y_UNIT_TEST_SUITE(JoinStringTest) { TVector<int> vv(v, v + 3); UNIT_ASSERT_EQUAL(JoinSeq(" ", vv), "1 2 3"); UNIT_ASSERT_EQUAL(JoinSeq(" ", vv), JoinRange(" ", vv.begin(), vv.end())); - UNIT_ASSERT_EQUAL(JoinRange(" ", v, v + 2), "1 2"); + UNIT_ASSERT_EQUAL(JoinRange(" ", v, v + 2), "1 2"); UNIT_ASSERT_EQUAL(JoinSeq(" ", {}), ""); UNIT_ASSERT_EQUAL(JoinSeq(" ", {42}), "42"); UNIT_ASSERT_EQUAL(JoinSeq(" ", {1, 2, 3}), "1 2 3"); diff --git a/util/string/printf.cpp b/util/string/printf.cpp index 6b49d80a9c..5b7c34d4e1 100644 --- a/util/string/printf.cpp +++ b/util/string/printf.cpp @@ -1,38 +1,38 @@ -#include "printf.h" - -#include <util/stream/printf.h> +#include "printf.h" + +#include <util/stream/printf.h> #include <util/stream/str.h> - + int vsprintf(TString& s, const char* c, va_list params) { - TStringOutput so(s.remove()); - - return Printf(so, c, params); -} - + TStringOutput so(s.remove()); + + return Printf(so, c, params); +} + int sprintf(TString& s, const char* c, ...) { - va_list params; - va_start(params, c); - const int k = vsprintf(s, c, params); - va_end(params); - return k; -} - + va_list params; + va_start(params, c); + const int k = vsprintf(s, c, params); + va_end(params); + return k; +} + TString Sprintf(const char* c, ...) { TString s; - va_list params; - va_start(params, c); - vsprintf(s, c, params); - va_end(params); - return s; -} - + va_list params; + va_start(params, c); + vsprintf(s, c, params); + va_end(params); + return s; +} + int fcat(TString& s, const char* c, ...) { - TStringOutput so(s); - - va_list params; - va_start(params, c); - const size_t ret = Printf(so, c, params); - va_end(params); - - return ret; -} + TStringOutput so(s); + + va_list params; + va_start(params, c); + const size_t ret = Printf(so, c, params); + va_end(params); + + return ret; +} diff --git a/util/string/printf.h b/util/string/printf.h index 841fa16e51..925c6edaff 100644 --- a/util/string/printf.h +++ b/util/string/printf.h @@ -1,13 +1,13 @@ -#pragma once - +#pragma once + #include <util/generic/fwd.h> #include <util/system/compiler.h> - + #include <cstdarg> -/// formatted print. return printed length: +/// formatted print. return printed length: int Y_PRINTF_FORMAT(2, 0) vsprintf(TString& s, const char* c, va_list params); -/// formatted print. return printed length: +/// formatted print. return printed length: int Y_PRINTF_FORMAT(2, 3) sprintf(TString& s, const char* c, ...); TString Y_PRINTF_FORMAT(1, 2) Sprintf(const char* c, ...); int Y_PRINTF_FORMAT(2, 3) fcat(TString& s, const char* c, ...); diff --git a/util/string/printf_ut.cpp b/util/string/printf_ut.cpp index 1913966e72..2b2f980b70 100644 --- a/util/string/printf_ut.cpp +++ b/util/string/printf_ut.cpp @@ -1,5 +1,5 @@ -#include "printf.h" - +#include "printf.h" + #include <library/cpp/testing/unittest/registar.h> Y_UNIT_TEST_SUITE(TStringPrintf) { @@ -7,24 +7,24 @@ Y_UNIT_TEST_SUITE(TStringPrintf) { TString s; int len = sprintf(s, "Hello %s", "world"); UNIT_ASSERT_EQUAL(s, TString("Hello world")); - UNIT_ASSERT_EQUAL(len, 11); + UNIT_ASSERT_EQUAL(len, 11); } - + Y_UNIT_TEST(TestFcat) { TString s; - int len = sprintf(s, "Hello %s", "world"); + int len = sprintf(s, "Hello %s", "world"); UNIT_ASSERT_EQUAL(s, TString("Hello world")); - UNIT_ASSERT_EQUAL(len, 11); - len = fcat(s, " qwqw%s", "as"); + UNIT_ASSERT_EQUAL(len, 11); + len = fcat(s, " qwqw%s", "as"); UNIT_ASSERT_EQUAL(s, TString("Hello world qwqwas")); - UNIT_ASSERT_EQUAL(len, 7); - } - + UNIT_ASSERT_EQUAL(len, 7); + } + Y_UNIT_TEST(TestSpecial) { UNIT_ASSERT_EQUAL("4294967295", Sprintf("%" PRIu32, (ui32)(-1))); - } + } Y_UNIT_TEST(TestExplicitPositions) { UNIT_ASSERT_EQUAL("abc xyz abc", Sprintf("%1$s %2$s %1$s", "abc", "xyz")); } -} +} diff --git a/util/string/split.cpp b/util/string/split.cpp index c5d05aa86c..7d26857cc7 100644 --- a/util/string/split.cpp +++ b/util/string/split.cpp @@ -1,6 +1,6 @@ #include "split.h" -template <class TValue> +template <class TValue> inline size_t Split(const char* ptr, const char* delim, TVector<TValue>& values) { values.erase(values.begin(), values.end()); while (ptr && *ptr) { diff --git a/util/string/split.h b/util/string/split.h index e568cab618..bc46d9e64c 100644 --- a/util/string/split.h +++ b/util/string/split.h @@ -1,8 +1,8 @@ #pragma once - -#include "strspn.h" + +#include "strspn.h" #include "cast.h" - + #include <util/generic/algorithm.h> #include <util/generic/fwd.h> #include <util/generic/iterator.h> @@ -15,7 +15,7 @@ #include <util/generic/ylimits.h> #include <util/system/compat.h> #include <util/system/defaults.h> - + #include <utility> #include <stlfwd> @@ -24,7 +24,7 @@ namespace NStringSplitPrivate { template <class T, class I, class = void> - struct TIsConsumer: std::false_type {}; + struct TIsConsumer: std::false_type {}; template <class T, class I> struct TIsConsumer< @@ -49,44 +49,44 @@ namespace NStringSplitPrivate { } -template <class I, class TDelim, class TConsumer> +template <class I, class TDelim, class TConsumer> std::enable_if_t<::NStringSplitPrivate::TIsConsumerV<TConsumer, I>> SplitString(I b, I e, const TDelim& d, TConsumer&& c) { - I l, i; - - do { - l = b; - i = d.Find(b, e); - } while (c.Consume(l, i, b) && (b != i)); -} - -template <class I, class TDelim, class TConsumer> + I l, i; + + do { + l = b; + i = d.Find(b, e); + } while (c.Consume(l, i, b) && (b != i)); +} + +template <class I, class TDelim, class TConsumer> std::enable_if_t<::NStringSplitPrivate::TIsConsumerV<TConsumer, I>> SplitString(I b, const TDelim& d, TConsumer&& c) { - I l, i; - - do { - l = b; - i = d.Find(b); - } while (c.Consume(l, i, b) && (b != i)); -} - + I l, i; + + do { + l = b; + i = d.Find(b); + } while (c.Consume(l, i, b) && (b != i)); +} + template <class I1, class I2> static inline I1* FastStrChr(I1* str, I2 f) noexcept { I1* ret = NStringSplitPrivate::Find(str, f); - - if (!ret) { + + if (!ret) { ret = str + std::char_traits<I1>::length(str); - } - - return ret; -} - -template <class I> + } + + return ret; +} + +template <class I> static inline I* FastStrStr(I* str, I* f, size_t l) noexcept { std::basic_string_view<I> strView(str); const auto ret = strView.find(*f); - + if (ret != std::string::npos) { std::basic_string_view<I> fView(f, l); strView = strView.substr(ret); @@ -95,89 +95,89 @@ static inline I* FastStrStr(I* str, I* f, size_t l) noexcept { break; } } - + return strView.size() >= l ? strView.data() : strView.data() + strView.size(); } else { return strView.data() + strView.size(); - } -} - + } +} + template <class Char> -struct TStringDelimiter { +struct TStringDelimiter { inline TStringDelimiter(Char* delim) noexcept - : Delim(delim) + : Delim(delim) , Len(std::char_traits<Char>::length(delim)) - { - } - + { + } + inline TStringDelimiter(Char* delim, size_t len) noexcept - : Delim(delim) - , Len(len) - { + : Delim(delim) + , Len(len) + { } inline Char* Find(Char*& b, Char* e) const noexcept { const auto ret = std::basic_string_view<Char>(b, e - b).find(Delim, 0, Len); - + if (ret != std::string::npos) { const auto result = b + ret; b = result + Len; return result; - } - - return (b = e); - } - + } + + return (b = e); + } + inline Char* Find(Char*& b) const noexcept { Char* ret = FastStrStr(b, Delim, Len); - + b = *ret ? ret + Len : ret; - - return ret; - } - + + return ret; + } + Char* Delim; - const size_t Len; -}; - + const size_t Len; +}; + template <class Char> -struct TCharDelimiter { +struct TCharDelimiter { inline TCharDelimiter(Char ch) noexcept - : Ch(ch) - { - } - + : Ch(ch) + { + } + inline Char* Find(Char*& b, Char* e) const noexcept { const auto ret = std::basic_string_view<Char>(b, e - b).find(Ch); - + if (ret != std::string::npos) { const auto result = b + ret; b = result + 1; return result; - } - - return (b = e); - } - + } + + return (b = e); + } + inline Char* Find(Char*& b) const noexcept { Char* ret = FastStrChr(b, Ch); - - if (*ret) { - b = ret + 1; - } else { - b = ret; - } - - return ret; - } - + + if (*ret) { + b = ret + 1; + } else { + b = ret; + } + + return ret; + } + Char Ch; -}; - +}; + template <class Iterator, class Condition> struct TFuncDelimiter { public: - template <class... Args> + template <class... Args> TFuncDelimiter(Args&&... args) : Fn(std::forward<Args>(args)...) { @@ -196,7 +196,7 @@ private: }; template <class Char> -struct TFindFirstOf { +struct TFindFirstOf { inline TFindFirstOf(Char* set) : Set(set) { @@ -221,17 +221,17 @@ struct TFindFirstOf { }; template <> -struct TFindFirstOf<const char>: public TCompactStrSpn { +struct TFindFirstOf<const char>: public TCompactStrSpn { inline TFindFirstOf(const char* set, const char* e) : TCompactStrSpn(set, e) { } - inline TFindFirstOf(const char* set) - : TCompactStrSpn(set) + inline TFindFirstOf(const char* set) + : TCompactStrSpn(set) { } -}; +}; template <class Char> struct TSetDelimiter: private TFindFirstOf<const Char> { @@ -239,7 +239,7 @@ struct TSetDelimiter: private TFindFirstOf<const Char> { inline Char* Find(Char*& b, Char* e) const noexcept { Char* ret = const_cast<Char*>(this->FindFirstOf(b, e)); - + if (ret != e) { b = ret + 1; return ret; @@ -267,37 +267,37 @@ namespace NSplitTargetHasPushBack { template <class T, class = void> struct TConsumerBackInserter; -template <class T> +template <class T> struct TConsumerBackInserter<T, std::enable_if_t<NSplitTargetHasPushBack::TClassHasPushBack<T>::value>> { - static void DoInsert(T* C, const typename T::value_type& i) { + static void DoInsert(T* C, const typename T::value_type& i) { C->push_back(i); } }; template <class T> struct TConsumerBackInserter<T, std::enable_if_t<!NSplitTargetHasPushBack::TClassHasPushBack<T>::value>> { - static void DoInsert(T* C, const typename T::value_type& i) { + static void DoInsert(T* C, const typename T::value_type& i) { C->insert(C->end(), i); } }; template <class T> -struct TContainerConsumer { +struct TContainerConsumer { inline TContainerConsumer(T* c) noexcept - : C(c) - { - } - - template <class I> - inline bool Consume(I* b, I* d, I* /*e*/) { + : C(c) + { + } + + template <class I> + inline bool Consume(I* b, I* d, I* /*e*/) { TConsumerBackInserter<T>::DoInsert(C, typename T::value_type(b, d)); - - return true; - } - - T* C; -}; - + + return true; + } + + T* C; +}; + template <class T> struct TContainerConvertingConsumer { inline TContainerConvertingConsumer(T* c) noexcept @@ -315,98 +315,98 @@ struct TContainerConvertingConsumer { T* C; }; -template <class S, class I> -struct TLimitingConsumer { +template <class S, class I> +struct TLimitingConsumer { inline TLimitingConsumer(size_t cnt, S* slave) noexcept - : Cnt(cnt ? cnt - 1 : Max<size_t>()) - , Slave(slave) - , Last(nullptr) - { - } - - inline bool Consume(I* b, I* d, I* e) { - if (!Cnt) { - Last = b; - - return false; - } - - --Cnt; - - return Slave->Consume(b, d, e); - } - - size_t Cnt; - S* Slave; - I* Last; -}; - -template <class S> -struct TSkipEmptyTokens { + : Cnt(cnt ? cnt - 1 : Max<size_t>()) + , Slave(slave) + , Last(nullptr) + { + } + + inline bool Consume(I* b, I* d, I* e) { + if (!Cnt) { + Last = b; + + return false; + } + + --Cnt; + + return Slave->Consume(b, d, e); + } + + size_t Cnt; + S* Slave; + I* Last; +}; + +template <class S> +struct TSkipEmptyTokens { inline TSkipEmptyTokens(S* slave) noexcept - : Slave(slave) - { - } - - template <class I> - inline bool Consume(I* b, I* d, I* e) { - if (b != d) { - return Slave->Consume(b, d, e); - } - - return true; - } - - S* Slave; -}; - -template <class S> -struct TKeepDelimiters { + : Slave(slave) + { + } + + template <class I> + inline bool Consume(I* b, I* d, I* e) { + if (b != d) { + return Slave->Consume(b, d, e); + } + + return true; + } + + S* Slave; +}; + +template <class S> +struct TKeepDelimiters { inline TKeepDelimiters(S* slave) noexcept - : Slave(slave) - { - } - - template <class I> - inline bool Consume(I* b, I* d, I* e) { - if (Slave->Consume(b, d, d)) { - if (d != e) { - return Slave->Consume(d, e, e); - } - - return true; - } - - return false; - } - - S* Slave; -}; - -template <class T> -struct TSimplePusher { - inline bool Consume(char* b, char* d, char*) { - *d = 0; - C->push_back(b); - - return true; - } - - T* C; -}; - -template <class T> + : Slave(slave) + { + } + + template <class I> + inline bool Consume(I* b, I* d, I* e) { + if (Slave->Consume(b, d, d)) { + if (d != e) { + return Slave->Consume(d, e, e); + } + + return true; + } + + return false; + } + + S* Slave; +}; + +template <class T> +struct TSimplePusher { + inline bool Consume(char* b, char* d, char*) { + *d = 0; + C->push_back(b); + + return true; + } + + T* C; +}; + +template <class T> static inline void Split(char* buf, char ch, T* res) { - res->resize(0); + res->resize(0); if (*buf == 0) return; TCharDelimiter<char> delim(ch); - TSimplePusher<T> pusher = {res}; - + TSimplePusher<T> pusher = {res}; + SplitString(buf, delim, pusher); -} - +} + /// Split string into res vector. Res vector is cleared before split. /// Old good slow split function. /// Field delimter is any number of symbols specified in delim (no empty strings in res vector) @@ -424,7 +424,7 @@ inline size_t Split(const TStringBuf s, const TSetDelimiter<const char>& delim, return res.size(); } -template <class P, class D> +template <class P, class D> void GetNext(TStringBuf& s, D delim, P& param) { TStringBuf next = s.NextTok(delim); Y_ENSURE(next.IsInited(), TStringBuf("Split: number of fields less than number of Split output arguments")); @@ -443,14 +443,14 @@ void GetNext(TStringBuf& s, D delim, TMaybe<P>& param) { // example: // Split(TStringBuf("Sherlock,2014,36.6"), ',', name, year, temperature); -template <class D, class P1, class P2> +template <class D, class P1, class P2> void Split(TStringBuf s, D delim, P1& p1, P2& p2) { GetNext(s, delim, p1); GetNext(s, delim, p2); Y_ENSURE(!s.IsInited(), TStringBuf("Split: number of fields more than number of Split output arguments")); } -template <class D, class P1, class P2, class... Other> +template <class D, class P1, class P2, class... Other> void Split(TStringBuf s, D delim, P1& p1, P2& p2, Other&... other) { GetNext(s, delim, p1); Split(s, delim, p2, other...); @@ -498,12 +498,12 @@ namespace NStringSplitPrivate { * This one is needed here so that `std::string_view -> std::string_view` * conversion works. */ - template <class Src, class Dst> + template <class Src, class Dst> inline void DoFromString(const Src& src, Dst* dst) { *dst = ::FromString<Dst>(src); } - template <class T> + template <class T> inline void DoFromString(const T& src, T* dst) noexcept { *dst = src; } @@ -513,12 +513,12 @@ namespace NStringSplitPrivate { *dst = src; } - template <class Src, class Dst> + template <class Src, class Dst> inline Y_WARN_UNUSED_RESULT bool TryDoFromString(const Src& src, Dst* dst) noexcept { return ::TryFromString(src, *dst); } - template <class T> + template <class T> inline Y_WARN_UNUSED_RESULT bool TryDoFromString(const T& src, T* dst) noexcept { *dst = src; return true; @@ -544,18 +544,18 @@ namespace NStringSplitPrivate { } // TODO: return bool (continue) - template <class StringBuf> + template <class StringBuf> void operator()(StringBuf e) const { this->operator()(C_, e); } private: - template <class OtherContainer, class StringBuf> + template <class OtherContainer, class StringBuf> auto operator()(OtherContainer* c, StringBuf e) const -> decltype(c->emplace_back()) { return c->emplace_back(value_type(e)); } - template <class OtherContainer, class StringBuf> + template <class OtherContainer, class StringBuf> auto operator()(OtherContainer* c, StringBuf e) const -> decltype(c->emplace()) { return c->emplace(value_type(e)); } @@ -582,14 +582,14 @@ namespace NStringSplitPrivate { } private: - template <class OtherContainer, class StringBuf> + template <class OtherContainer, class StringBuf> auto operator()(OtherContainer* c, StringBuf e) const -> decltype(c->emplace_back()) { value_type v; DoFromString(e, &v); return c->emplace_back(std::move(v)); } - template <class OtherContainer, class StringBuf> + template <class OtherContainer, class StringBuf> auto operator()(OtherContainer* c, StringBuf e) const -> decltype(c->emplace()) { value_type v; DoFromString(e, &v); @@ -604,7 +604,7 @@ namespace NStringSplitPrivate { using type = std::conditional_t< THasData<String>::value, TBasicStringBuf<typename String::value_type>, - TIteratorRange<typename String::const_iterator>>; + TIteratorRange<typename String::const_iterator>>; }; template <class Char, class Traits, class Allocator> @@ -621,36 +621,36 @@ namespace NStringSplitPrivate { * Metafunction that returns a string buffer for the given type. This is to * make sure that splitting `std::string` returns `std::string_view`. */ - template <class String> + template <class String> using TStringBufOf = typename TStringBufOfImpl<String>::type; - template <class StringBuf, class Iterator> + template <class StringBuf, class Iterator> StringBuf DoMakeStringBuf(Iterator b, Iterator e, StringBuf*) { return StringBuf(b, e); } - template <class Char, class Traits, class Iterator> + template <class Char, class Traits, class Iterator> std::basic_string_view<Char, Traits> DoMakeStringBuf(Iterator b, Iterator e, std::basic_string_view<Char, Traits>*) { return std::basic_string_view<Char, Traits>(b, e - b); } - template <class StringBuf, class Iterator> + template <class StringBuf, class Iterator> StringBuf MakeStringBuf(Iterator b, Iterator e) { return DoMakeStringBuf(b, e, static_cast<StringBuf*>(nullptr)); } - template <class String> + template <class String> struct TIteratorOfImpl { using type = std::conditional_t< THasData<String>::value, const typename String::value_type*, - typename String::const_iterator>; + typename String::const_iterator>; }; - template <class String> + template <class String> using TIteratorOf = typename TIteratorOfImpl<String>::type; - template <class String> + template <class String> class TStringSplitter; template <class String> @@ -667,7 +667,7 @@ namespace NStringSplitPrivate { { } - template < + template < typename Other, typename = std::enable_if_t< std::is_convertible<Other, TStringBufType>::value>> @@ -706,7 +706,7 @@ namespace NStringSplitPrivate { }; template <class Base> - class TSplitRange: public Base, public TInputRangeAdaptor<TSplitRange<Base>> { + class TSplitRange: public Base, public TInputRangeAdaptor<TSplitRange<Base>> { using TStringBufType = decltype(std::declval<Base>().Next()->Token()); public: @@ -733,7 +733,7 @@ namespace NStringSplitPrivate { return true; } - template <class Container, class = std::enable_if_t<THasInsert<Container>::value || THasPushBack<Container>::value>> + template <class Container, class = std::enable_if_t<THasInsert<Container>::value || THasPushBack<Container>::value>> operator Container() { Container result; AddTo(&result); @@ -790,7 +790,7 @@ namespace NStringSplitPrivate { } ++it; } - }, args...); + }, args...); return successfullyFilled == sizeof...(args) && it == this->end(); } @@ -864,7 +864,7 @@ namespace NStringSplitPrivate { }; template <class Base, class Filter> - struct TFilterRange: public Base { + struct TFilterRange: public Base { template <class... Args> inline TFilterRange(const Base& base, Args&&... args) : Base(base) @@ -896,7 +896,7 @@ namespace NStringSplitPrivate { struct TStopIteration; template <class Base> - struct TFilters: public Base { + struct TFilters: public Base { template <class TFilter> using TIt = TSplitRange<TStopIteration<TFilters<TFilterRange<Base, TFilter>>>>; @@ -907,12 +907,12 @@ namespace NStringSplitPrivate { } inline TIt<TNonEmptyFilter> SkipEmpty() const { - return {*this}; + return {*this}; } }; template <class Base, class Stopper> - struct TStopRange: public Base { + struct TStopRange: public Base { template <typename... Args> inline TStopRange(const Base& base, Args&&... args) : Base(base) @@ -978,7 +978,7 @@ namespace NStringSplitPrivate { }; template <class Base> - struct TStopIteration: public Base { + struct TStopIteration: public Base { template <class TStopper> using TIt = TSplitRange<TStopIteration<TFilters<TStopRange<Base, TStopper>>>>; @@ -989,11 +989,11 @@ namespace NStringSplitPrivate { } inline TIt<TTake> Take(size_t count) { - return {*this, count}; + return {*this, count}; } inline TIt<TLimit> Limit(size_t count) { - return {*this, count}; + return {*this, count}; } }; @@ -1001,7 +1001,7 @@ namespace NStringSplitPrivate { using TIt = TSplitRange<TStopIteration<TFilters<TSplitRangeBase<TPolicy>>>>; public: - template <class OtherString> + template <class OtherString> explicit TStringSplitter(OtherString&& s) : String_(std::forward<OtherString>(s)) { @@ -1010,31 +1010,31 @@ namespace NStringSplitPrivate { //does not own TDelim template <class TDelim> inline TIt<TPtrPolicy<const TDelim>> Split(const TDelim& d) const noexcept { - return {String_, &d}; + return {String_, &d}; } inline TIt<TEmbedPolicy<TCharDelimiter<const TChar>>> Split(TChar ch) const noexcept { - return {String_, ch}; + return {String_, ch}; } inline TIt<TSimpleRefPolicy<TSetDelimiter<const TChar>>> SplitBySet(const TChar* set) const noexcept { - return {String_, set}; + return {String_, set}; } inline TIt<TEmbedPolicy<TStringDelimiter<const TChar>>> SplitByString(const TStringBufType& str) const noexcept { - return {String_, str.data(), str.size()}; + return {String_, str.data(), str.size()}; } template <class TFunc> inline TIt<TEmbedPolicy<TFuncDelimiter<TIterator, TFunc>>> SplitByFunc(TFunc f) const noexcept { - return {String_, f}; + return {String_, f}; } private: TStringType String_; }; - template <class String> + template <class String> auto MakeStringSplitter(String&& s) { return TStringSplitter<std::remove_reference_t<String>>(std::forward<String>(s)); } diff --git a/util/string/split_ut.cpp b/util/string/split_ut.cpp index 1a2f8a766f..43e59f2d75 100644 --- a/util/string/split_ut.cpp +++ b/util/string/split_ut.cpp @@ -1,27 +1,27 @@ -#include "split.h" - +#include "split.h" + #include <library/cpp/testing/unittest/registar.h> - + #include <util/stream/output.h> -#include <util/charset/wide.h> -#include <util/datetime/cputimer.h> +#include <util/charset/wide.h> +#include <util/datetime/cputimer.h> #include <util/generic/maybe.h> - + #include <string> #include <string_view> -template <typename T> -static inline void OldSplit(char* pszBuf, T* pRes) { - pRes->resize(0); - pRes->push_back(pszBuf); - for (char* pszData = pszBuf; *pszData; ++pszData) { - if (*pszData == '\t') { - *pszData = 0; - pRes->push_back(pszData + 1); - } - } -} - +template <typename T> +static inline void OldSplit(char* pszBuf, T* pRes) { + pRes->resize(0); + pRes->push_back(pszBuf); + for (char* pszData = pszBuf; *pszData; ++pszData) { + if (*pszData == '\t') { + *pszData = 0; + pRes->push_back(pszData + 1); + } + } +} + template <class T1, class T2> inline void Cmp(const T1& t1, const T2& t2) { try { @@ -34,10 +34,10 @@ inline void Cmp(const T1& t1, const T2& t2) { throw; } - auto i = t1.begin(); - auto j = t2.begin(); - - for (; i != t1.end() && j != t2.end(); ++i, ++j) { + auto i = t1.begin(); + auto j = t2.begin(); + + for (; i != t1.end() && j != t2.end(); ++i, ++j) { try { UNIT_ASSERT_EQUAL(*i, *j); } catch (...) { @@ -72,7 +72,7 @@ void TestDelimiterOnRange(TResult& good, I* b, I* e, const TDelimiter& delim) { Cmp(good, test); UNIT_ASSERT_EQUAL(good, test); } - + template <typename TConsumer, typename TResult, typename I> void TestConsumerOnString(TResult& good, I* str, I* d) { TResult test; @@ -83,7 +83,7 @@ void TestConsumerOnString(TResult& good, I* str, I* d) { Cmp(good, test); UNIT_ASSERT_EQUAL(good, test); } - + template <typename TConsumer, typename TResult, typename I> void TestConsumerOnRange(TResult& good, I* b, I* e, I* d) { TResult test; @@ -94,9 +94,9 @@ void TestConsumerOnRange(TResult& good, I* b, I* e, I* d) { Cmp(good, test); UNIT_ASSERT_EQUAL(good, test); } - + using TStrokaConsumer = TContainerConsumer<TVector<TString>>; - + void TestLimitingConsumerOnString(TVector<TString>& good, const char* str, const char* d, size_t n, const char* last) { TVector<TString> test; TStrokaConsumer consumer(&test); @@ -107,7 +107,7 @@ void TestLimitingConsumerOnString(TVector<TString>& good, const char* str, const UNIT_ASSERT_EQUAL(good, test); UNIT_ASSERT_EQUAL(TString(limits.Last), TString(last)); // Quite unobvious behaviour. Why the last token is not added to slave consumer? } - + void TestLimitingConsumerOnRange(TVector<TString>& good, const char* b, const char* e, const char* d, size_t n, const char* last) { TVector<TString> test; TStrokaConsumer consumer(&test); @@ -118,28 +118,28 @@ void TestLimitingConsumerOnRange(TVector<TString>& good, const char* b, const ch UNIT_ASSERT_EQUAL(good, test); UNIT_ASSERT_EQUAL(TString(limits.Last), TString(last)); } - + Y_UNIT_TEST_SUITE(SplitStringTest) { Y_UNIT_TEST(TestCharSingleDelimiter) { TString data("qw ab qwabcab"); TString canonic[] = {"qw", "ab", "", "qwabcab"}; TVector<TString> good(canonic, canonic + 4); TCharDelimiter<const char> delim(' '); - + TestDelimiterOnString<TContainerConsumer>(good, data.data(), delim); TestDelimiterOnRange<TContainerConsumer>(good, data.data(), data.end(), delim); } - + Y_UNIT_TEST(TestWideSingleDelimiter) { TUtf16String data(u"qw ab qwabcab"); TUtf16String canonic[] = {u"qw", u"ab", TUtf16String(), u"qwabcab"}; TVector<TUtf16String> good(canonic, canonic + 4); TCharDelimiter<const wchar16> delim(' '); - + TestDelimiterOnString<TContainerConsumer>(good, data.data(), delim); TestDelimiterOnRange<TContainerConsumer>(good, data.data(), data.end(), delim); } - + Y_UNIT_TEST(TestConvertToIntCharSingleDelimiter) { TString data("42 4242 -12345 0"); i32 canonic[] = {42, 4242, -12345, 0}; @@ -154,70 +154,70 @@ Y_UNIT_TEST_SUITE(SplitStringTest) { TString data("qw ab qwabcab "); TString canonic[] = {"qw", "ab", "qwabcab"}; TVector<TString> good(canonic, canonic + 3); - + TestConsumerOnString<TSkipEmptyTokens<TStrokaConsumer>>(good, data.data(), " "); TestConsumerOnRange<TSkipEmptyTokens<TStrokaConsumer>>(good, data.data(), data.end(), " "); } - + Y_UNIT_TEST(TestCharKeepDelimiters) { TString data("qw ab qwabcab "); TString canonic[] = {"qw", " ", "ab", " ", "", " ", "qwabcab", " ", ""}; TVector<TString> good(canonic, canonic + 9); - + TestConsumerOnString<TKeepDelimiters<TStrokaConsumer>>(good, data.data(), " "); TestConsumerOnRange<TKeepDelimiters<TStrokaConsumer>>(good, data.data(), data.end(), " "); } - + Y_UNIT_TEST(TestCharLimit) { TString data("qw ab qwabcab "); TString canonic[] = {"qw", "ab"}; TVector<TString> good(canonic, canonic + 2); - + TestLimitingConsumerOnString(good, data.data(), " ", 3, " qwabcab "); TestLimitingConsumerOnRange(good, data.data(), data.end(), " ", 3, " qwabcab "); } - + Y_UNIT_TEST(TestCharStringDelimiter) { TString data("qw ab qwababcab"); TString canonic[] = {"qw ", " qw", "", "c", ""}; TVector<TString> good(canonic, canonic + 5); TStringDelimiter<const char> delim("ab"); - + TestDelimiterOnString<TContainerConsumer>(good, data.data(), delim); TestDelimiterOnRange<TContainerConsumer>(good, data.data(), data.end(), delim); } - + Y_UNIT_TEST(TestWideStringDelimiter) { TUtf16String data(u"qw ab qwababcab"); TUtf16String canonic[] = {u"qw ", u" qw", TUtf16String(), u"c", TUtf16String()}; TVector<TUtf16String> good(canonic, canonic + 5); TUtf16String wideDelim(u"ab"); TStringDelimiter<const wchar16> delim(wideDelim.data()); - + TestDelimiterOnString<TContainerConsumer>(good, data.data(), delim); TestDelimiterOnRange<TContainerConsumer>(good, data.data(), data.end(), delim); } - + Y_UNIT_TEST(TestCharSetDelimiter) { TString data("qw ab qwababccab"); TString canonic[] = {"q", " ab q", "abab", "", "ab"}; TVector<TString> good(canonic, canonic + 5); TSetDelimiter<const char> delim("wc"); - + TestDelimiterOnString<TContainerConsumer>(good, data.data(), delim); TestDelimiterOnRange<TContainerConsumer>(good, data.data(), data.end(), delim); } - + Y_UNIT_TEST(TestWideSetDelimiter) { TUtf16String data(u"qw ab qwababccab"); TUtf16String canonic[] = {u"q", u" ab q", u"abab", TUtf16String(), u"ab"}; TVector<TUtf16String> good(canonic, canonic + 5); TUtf16String wideDelim(u"wc"); TSetDelimiter<const wchar16> delim(wideDelim.data()); - + TestDelimiterOnString<TContainerConsumer>(good, data.data(), delim); } - + Y_UNIT_TEST(TestWideSetDelimiterRange) { TUtf16String data(u"qw ab qwababccab"); TUtf16String canonic[] = {u"q", u" ab q", u"abab", TUtf16String(), u"ab"}; @@ -403,9 +403,9 @@ Y_UNIT_TEST_SUITE(StringSplitter) { TVector<TString> tokens; auto f = [](char a) { return a == ' ' || a == '\t' || a == '\n'; }; for (auto v : StringSplitter(s).SplitByFunc(f)) { - if (v) { + if (v) { tokens.emplace_back(v); - } + } } UNIT_ASSERT(tokens == pattern); @@ -461,9 +461,9 @@ Y_UNIT_TEST_SUITE(StringSplitter) { } Y_UNIT_TEST(TestCompile) { - (void)StringSplitter(TString()); - (void)StringSplitter(TStringBuf()); - (void)StringSplitter("", 0); + (void)StringSplitter(TString()); + (void)StringSplitter(TStringBuf()); + (void)StringSplitter("", 0); } Y_UNIT_TEST(TestStringSplitterCountEmpty) { @@ -497,12 +497,12 @@ Y_UNIT_TEST_SUITE(StringSplitter) { } Y_UNIT_TEST(TestStringSplitterConsumeConditional) { - TVector<TString> expected = {"1", "2"}; + TVector<TString> expected = {"1", "2"}; TVector<TString> actual; auto func = [&actual](const TBasicStringBuf<char>& token) { - if (token == "3") { + if (token == "3") { return false; - } + } actual.push_back(TString(token)); return true; }; @@ -622,37 +622,37 @@ Y_UNIT_TEST_SUITE(StringSplitter) { } Y_UNIT_TEST(TestAssigment) { - TVector<TString> expected0 = {"1", "2", "3", "4"}; + TVector<TString> expected0 = {"1", "2", "3", "4"}; TVector<TString> actual0 = StringSplitter("1 2 3 4").Split(' '); UNIT_ASSERT_VALUES_EQUAL(expected0, actual0); - TSet<TString> expected1 = {"11", "22", "33", "44"}; + TSet<TString> expected1 = {"11", "22", "33", "44"}; TSet<TString> actual1 = StringSplitter("11 22 33 44").Split(' '); UNIT_ASSERT_VALUES_EQUAL(expected1, actual1); - TSet<TString> expected2 = {"11", "aa"}; + TSet<TString> expected2 = {"11", "aa"}; auto actual2 = static_cast<TSet<TString>>(StringSplitter("11 aa 11 11 aa").Split(' ')); UNIT_ASSERT_VALUES_EQUAL(expected2, actual2); - TVector<TString> expected3 = {"dd", "bb"}; + TVector<TString> expected3 = {"dd", "bb"}; auto actual3 = TVector<TString>(StringSplitter("dd\tbb").Split('\t')); UNIT_ASSERT_VALUES_EQUAL(expected3, actual3); } Y_UNIT_TEST(TestRangeBasedFor) { - TVector<TString> actual0 = {"11", "22", "33", "44"}; + TVector<TString> actual0 = {"11", "22", "33", "44"}; size_t num = 0; for (TStringBuf elem : StringSplitter("11 22 33 44").Split(' ')) { UNIT_ASSERT_VALUES_EQUAL(elem, actual0[num++]); } - TVector<TString> actual1 = {"another", "one,", "and", "another", "one"}; + TVector<TString> actual1 = {"another", "one,", "and", "another", "one"}; num = 0; for (TStringBuf elem : StringSplitter(TStringBuf("another one, and \n\n another one")).SplitBySet(" \n").SkipEmpty()) { UNIT_ASSERT_VALUES_EQUAL(elem, actual1[num++]); } - TVector<TUtf16String> actual2 = {u"привет,", u"как", u"дела"}; + TVector<TUtf16String> actual2 = {u"привет,", u"как", u"дела"}; num = 0; for (TWtringBuf elem : StringSplitter(u"привет, как дела").Split(wchar16(' '))) { UNIT_ASSERT_VALUES_EQUAL(elem, actual2[num++]); @@ -665,21 +665,21 @@ Y_UNIT_TEST_SUITE(StringSplitter) { } Y_UNIT_TEST(TestParseInto) { - TVector<int> actual0 = {1, 2, 3, 4}; + TVector<int> actual0 = {1, 2, 3, 4}; TVector<int> answer0; StringSplitter("1 2 3 4").Split(' ').ParseInto(&answer0); UNIT_ASSERT_VALUES_EQUAL(actual0, answer0); - TVector<int> actual1 = {42, 1, 2, 3, 4}; - TVector<int> answer1 = {42}; + TVector<int> actual1 = {42, 1, 2, 3, 4}; + TVector<int> answer1 = {42}; StringSplitter("1 2 3 4").Split(' ').ParseInto(&answer1); UNIT_ASSERT_VALUES_EQUAL(actual1, answer1); answer1.clear(); UNIT_ASSERT_EXCEPTION(StringSplitter("1 2 3 4").Split(' ').ParseInto(&answer1), yexception); - answer1 = {42}; + answer1 = {42}; StringSplitter(" 1 2 3 4").Split(' ').SkipEmpty().ParseInto(&answer1); UNIT_ASSERT_VALUES_EQUAL(actual1, answer1); @@ -709,7 +709,7 @@ Y_UNIT_TEST_SUITE(StringSplitter) { Y_UNIT_TEST(TestStdSplitAfterSplit) { std::string_view input = "a*b+a*b"; - for (std::string_view summand : StringSplitter(input).Split('+')) { + for (std::string_view summand : StringSplitter(input).Split('+')) { //FIXME: std::string is used to workaround MSVC ICE UNIT_ASSERT_VALUES_EQUAL(std::string(summand), "a*b"); std::string_view multiplier1, multiplier2; @@ -729,8 +729,8 @@ Y_UNIT_TEST_SUITE(StringSplitter) { } Y_UNIT_TEST(TestArcadiaStdInterop) { - TVector<TString> expected0 = {"a", "b"}; - TVector<TStringBuf> expected1 = {"a", "b"}; + TVector<TString> expected0 = {"a", "b"}; + TVector<TStringBuf> expected1 = {"a", "b"}; std::string src1("a b"); std::string_view src2("a b"); TVector<TString> actual0 = StringSplitter(src1).Split(' ').SkipEmpty(); @@ -750,7 +750,7 @@ Y_UNIT_TEST_SUITE(StringSplitter) { std::vector<TStringBuf> v; StringSplitter(b, e).Split(';').AddTo(&v); - std::vector<TStringBuf> expected = {"a", "b"}; + std::vector<TStringBuf> expected = {"a", "b"}; UNIT_ASSERT_VALUES_EQUAL(v, expected); } @@ -759,16 +759,16 @@ Y_UNIT_TEST_SUITE(StringSplitter) { char* str = s.Detach(); std::vector<TStringBuf> v = StringSplitter(str).Split('o'); - std::vector<TStringBuf> expected = {"l", "l"}; + std::vector<TStringBuf> expected = {"l", "l"}; UNIT_ASSERT_VALUES_EQUAL(v, expected); } Y_UNIT_TEST(TestSplitVector) { - std::vector<char> buffer = {'a', ';', 'b'}; + std::vector<char> buffer = {'a', ';', 'b'}; std::vector<TStringBuf> v = StringSplitter(buffer).Split(';'); - std::vector<TStringBuf> expected = {"a", "b"}; + std::vector<TStringBuf> expected = {"a", "b"}; UNIT_ASSERT_VALUES_EQUAL(v, expected); } @@ -783,10 +783,10 @@ Y_UNIT_TEST_SUITE(StringSplitter) { TDoubleIterator() = default; - TDoubleIterator(const char* ptr) - : Ptr_(ptr) - { - } + TDoubleIterator(const char* ptr) + : Ptr_(ptr) + { + } TDoubleIterator operator++() { Ptr_ += 2; @@ -819,7 +819,7 @@ Y_UNIT_TEST_SUITE(StringSplitter) { const char* beg = "1213002233000011"; const char* end = beg + strlen(beg); - std::vector<std::vector<int>> expected = {{12, 13}, {22, 33}, {}, {11}}; + std::vector<std::vector<int>> expected = {{12, 13}, {22, 33}, {}, {11}}; int i = 0; for (TIteratorRange<TDoubleIterator> part : StringSplitter(TDoubleIterator(beg), TDoubleIterator(end)).SplitByFunc([](int value) { return value == 0; })) { diff --git a/util/string/strip.cpp b/util/string/strip.cpp index b5e853cbb2..c921571cf0 100644 --- a/util/string/strip.cpp +++ b/util/string/strip.cpp @@ -1,23 +1,23 @@ -#include "strip.h" -#include "ascii.h" - +#include "strip.h" +#include "ascii.h" + #include <util/string/reverse.h> bool Collapse(const TString& from, TString& to, size_t maxLen) { return CollapseImpl<TString, bool (*)(unsigned char)>(from, to, maxLen, IsAsciiSpace); -} - +} + void CollapseText(const TString& from, TString& to, size_t maxLen) { Collapse(from, to, maxLen); StripInPlace(to); if (to.size() >= maxLen) { to.remove(maxLen - 5); // " ..." ReverseInPlace(to); - size_t pos = to.find_first_of(" .,;"); - if (pos != TString::npos && pos < 32) { - to.remove(0, pos + 1); - } + size_t pos = to.find_first_of(" .,;"); + if (pos != TString::npos && pos < 32) { + to.remove(0, pos + 1); + } ReverseInPlace(to); - to.append(" ..."); - } -} + to.append(" ..."); + } +} diff --git a/util/string/strip.h b/util/string/strip.h index f797dcea09..d5ef6da96d 100644 --- a/util/string/strip.h +++ b/util/string/strip.h @@ -1,11 +1,11 @@ -#pragma once - -#include "ascii.h" - +#pragma once + +#include "ascii.h" + #include <util/generic/string.h> -#include <util/generic/strbuf.h> +#include <util/generic/strbuf.h> #include <utility> - + template <class It> struct TIsAsciiSpaceAdapter { bool operator()(const It& it) const noexcept { @@ -15,7 +15,7 @@ struct TIsAsciiSpaceAdapter { template <class It> TIsAsciiSpaceAdapter<It> IsAsciiSpaceAdapter(It) { - return {}; + return {}; } template <class TChar> @@ -24,47 +24,47 @@ struct TEqualsStripAdapter { : Ch(ch) { } - + template <class It> bool operator()(const It& it) const noexcept { return *it == Ch; } - const TChar Ch; + const TChar Ch; }; template <class TChar> TEqualsStripAdapter<TChar> EqualsStripAdapter(TChar ch) { - return {ch}; + return {ch}; } -template <class It, class TStripCriterion> +template <class It, class TStripCriterion> inline void StripRangeBegin(It& b, const It& e, TStripCriterion&& criterion) noexcept { while (b < e && criterion(b)) { - ++b; - } -} - -template <class It> + ++b; + } +} + +template <class It> inline void StripRangeBegin(It& b, const It& e) noexcept { StripRangeBegin(b, e, IsAsciiSpaceAdapter(b)); } -template <class It, class TStripCriterion> +template <class It, class TStripCriterion> inline void StripRangeEnd(const It& b, It& e, TStripCriterion&& criterion) noexcept { while (b < e && criterion(e - 1)) { - --e; - } -} - -template <class It> + --e; + } +} + +template <class It> inline void StripRangeEnd(const It& b, It& e) noexcept { StripRangeEnd(b, e, IsAsciiSpaceAdapter(b)); } template <bool stripBeg, bool stripEnd> struct TStripImpl { - template <class It, class TStripCriterion> + template <class It, class TStripCriterion> static inline bool StripRange(It& b, It& e, TStripCriterion&& criterion) noexcept { const size_t oldLen = e - b; @@ -80,10 +80,10 @@ struct TStripImpl { return newLen != oldLen; } - template <class T, class TStripCriterion> - static inline bool StripString(const T& from, T& to, TStripCriterion&& criterion) { - auto b = from.begin(); - auto e = from.end(); + template <class T, class TStripCriterion> + static inline bool StripString(const T& from, T& to, TStripCriterion&& criterion) { + auto b = from.begin(); + auto e = from.end(); if (StripRange(b, e, criterion)) { to = T(b, e - b); @@ -96,8 +96,8 @@ struct TStripImpl { return false; } - template <class T, class TStripCriterion> - static inline T StripString(const T& from, TStripCriterion&& criterion) { + template <class T, class TStripCriterion> + static inline T StripString(const T& from, TStripCriterion&& criterion) { T ret; StripString(from, ret, criterion); return ret; @@ -109,49 +109,49 @@ struct TStripImpl { } }; -template <class It, class TStripCriterion> +template <class It, class TStripCriterion> inline bool StripRange(It& b, It& e, TStripCriterion&& criterion) noexcept { return TStripImpl<true, true>::StripRange(b, e, criterion); -} - -template <class It> +} + +template <class It> inline bool StripRange(It& b, It& e) noexcept { return StripRange(b, e, IsAsciiSpaceAdapter(b)); } -template <class It, class TStripCriterion> +template <class It, class TStripCriterion> inline bool Strip(It& b, size_t& len, TStripCriterion&& criterion) noexcept { - It e = b + len; - + It e = b + len; + if (StripRange(b, e, criterion)) { - len = e - b; - - return true; - } - - return false; -} - + len = e - b; + + return true; + } + + return false; +} + template <class It> inline bool Strip(It& b, size_t& len) noexcept { return Strip(b, len, IsAsciiSpaceAdapter(b)); } -template <class T, class TStripCriterion> -static inline bool StripString(const T& from, T& to, TStripCriterion&& criterion) { +template <class T, class TStripCriterion> +static inline bool StripString(const T& from, T& to, TStripCriterion&& criterion) { return TStripImpl<true, true>::StripString(from, to, criterion); -} - -template <class T> +} + +template <class T> static inline bool StripString(const T& from, T& to) { return StripString(from, to, IsAsciiSpaceAdapter(from.begin())); } -template <class T, class TStripCriterion> -static inline T StripString(const T& from, TStripCriterion&& criterion) { +template <class T, class TStripCriterion> +static inline T StripString(const T& from, TStripCriterion&& criterion) { return TStripImpl<true, true>::StripString(from, criterion); -} - +} + template <class T> static inline T StripString(const T& from) { return TStripImpl<true, true>::StripString(from); @@ -167,13 +167,13 @@ static inline T StripStringRight(const T& from) { return TStripImpl<false, true>::StripString(from); } -template <class T, class TStripCriterion> -static inline T StripStringLeft(const T& from, TStripCriterion&& criterion) { +template <class T, class TStripCriterion> +static inline T StripStringLeft(const T& from, TStripCriterion&& criterion) { return TStripImpl<true, false>::StripString(from, criterion); } -template <class T, class TStripCriterion> -static inline T StripStringRight(const T& from, TStripCriterion&& criterion) { +template <class T, class TStripCriterion> +static inline T StripStringRight(const T& from, TStripCriterion&& criterion) { return TStripImpl<false, true>::StripString(from, criterion); } @@ -181,21 +181,21 @@ static inline T StripStringRight(const T& from, TStripCriterion&& criterion) { static inline bool Strip(const TString& from, TString& to) { return StripString(from, to); } - + /// Removes leading and trailing spaces from the string. inline TString& StripInPlace(TString& s) { Strip(s, s); return s; } - + /// Returns a copy of the given string with removed leading and trailing spaces. inline TString Strip(const TString& s) Y_WARN_UNUSED_RESULT; inline TString Strip(const TString& s) { TString ret = s; Strip(ret, ret); - return ret; -} - + return ret; +} + template <class TChar, class TWhitespaceFunc> size_t CollapseImpl(TChar* s, size_t n, const TWhitespaceFunc& isWhitespace) { size_t newLen = 0; @@ -231,27 +231,27 @@ bool CollapseImpl(const TStringType& from, TStringType& to, size_t maxLen, const } bool Collapse(const TString& from, TString& to, size_t maxLen = 0); - + /// Replaces several consequtive space symbols with one (processing is limited to maxLen bytes) inline TString& CollapseInPlace(TString& s, size_t maxLen = 0) { Collapse(s, s, maxLen); - return s; -} + return s; +} /// Replaces several consequtive space symbols with one (processing is limited to maxLen bytes) inline TString Collapse(const TString& s, size_t maxLen = 0) Y_WARN_UNUSED_RESULT; inline TString Collapse(const TString& s, size_t maxLen) { TString ret; Collapse(s, ret, maxLen); - return ret; -} - + return ret; +} + void CollapseText(const TString& from, TString& to, size_t maxLen); - + /// The same as Collapse() + truncates the string to maxLen. /// @details An ellipsis is inserted at the end of the truncated line. inline void CollapseText(TString& s, size_t maxLen) { TString to; CollapseText(s, to, maxLen); - s = to; -} + s = to; +} diff --git a/util/string/strip_ut.cpp b/util/string/strip_ut.cpp index 6e3ef90499..d1029d1498 100644 --- a/util/string/strip_ut.cpp +++ b/util/string/strip_ut.cpp @@ -1,9 +1,9 @@ -#include "strip.h" - +#include "strip.h" + #include <library/cpp/testing/unittest/registar.h> -#include <util/charset/wide.h> - +#include <util/charset/wide.h> + Y_UNIT_TEST_SUITE(TStripStringTest) { Y_UNIT_TEST(TestStrip) { struct TTest { @@ -43,7 +43,7 @@ Y_UNIT_TEST_SUITE(TStripStringTest) { UNIT_ASSERT_EQUAL(StripStringRight(inputStrBuf), test.StripRightRes); }; } - + Y_UNIT_TEST(TestCustomStrip) { struct TTest { const char* Str; @@ -89,7 +89,7 @@ Y_UNIT_TEST_SUITE(TStripStringTest) { Y_UNIT_TEST(TestNullStringStrip) { TStringBuf nullString(nullptr, nullptr); UNIT_ASSERT_EQUAL( - StripString(nullString), + StripString(nullString), TString()); } @@ -97,11 +97,11 @@ Y_UNIT_TEST_SUITE(TStripStringTest) { UNIT_ASSERT_EQUAL(StripString(TWtringBuf(u" abc ")), u"abc"); UNIT_ASSERT_EQUAL(StripStringLeft(TWtringBuf(u" abc ")), u"abc "); UNIT_ASSERT_EQUAL(StripStringRight(TWtringBuf(u" abc ")), u" abc"); - } - + } + Y_UNIT_TEST(TestWtrokaCustomStrip) { UNIT_ASSERT_EQUAL( - StripString( + StripString( TWtringBuf(u"/abc/"), EqualsStripAdapter(u'/')), u"abc"); @@ -123,7 +123,7 @@ Y_UNIT_TEST_SUITE(TStripStringTest) { UNIT_ASSERT(s.c_str() == s2.c_str()); // Collapse() does not change the string at all #endif } - + Y_UNIT_TEST(TestCollapseText) { TString abs1("Very long description string written in unknown language."); TString abs2(abs1); @@ -135,4 +135,4 @@ Y_UNIT_TEST_SUITE(TStripStringTest) { UNIT_ASSERT_EQUAL(abs2 == "Very long description string written in unknown ...", true); UNIT_ASSERT_EQUAL(abs3 == "Very long description string written in ...", true); } -} +} diff --git a/util/string/strspn.cpp b/util/string/strspn.cpp index a12c24b3bd..cdb8d7ca9b 100644 --- a/util/string/strspn.cpp +++ b/util/string/strspn.cpp @@ -1 +1 @@ -#include "strspn.h" +#include "strspn.h" diff --git a/util/string/strspn.h b/util/string/strspn.h index a6e4ff0fd6..8229e74a9c 100644 --- a/util/string/strspn.h +++ b/util/string/strspn.h @@ -1,65 +1,65 @@ -#pragma once - -#include "cstriter.h" - -#include <util/generic/bitmap.h> - +#pragma once + +#include "cstriter.h" + +#include <util/generic/bitmap.h> + template <class TSetType> -class TStrSpnImpl { -public: - inline TStrSpnImpl(const char* b, const char* e) { - Init(b, e); - } - - inline TStrSpnImpl(const char* s) { - Init(s, TCStringEndIterator()); - } - - //FirstOf - template <class It> +class TStrSpnImpl { +public: + inline TStrSpnImpl(const char* b, const char* e) { + Init(b, e); + } + + inline TStrSpnImpl(const char* s) { + Init(s, TCStringEndIterator()); + } + + //FirstOf + template <class It> inline It FindFirstOf(It b, const char* e) const noexcept { - return FindFirst<false>(b, e); - } - - template <class It> + return FindFirst<false>(b, e); + } + + template <class It> inline It FindFirstOf(It s) const noexcept { - return FindFirst<false>(s, TCStringEndIterator()); - } - - //FirstNotOf - template <class It> + return FindFirst<false>(s, TCStringEndIterator()); + } + + //FirstNotOf + template <class It> inline It FindFirstNotOf(It b, const char* e) const noexcept { - return FindFirst<true>(b, e); - } - - template <class It> + return FindFirst<true>(b, e); + } + + template <class It> inline It FindFirstNotOf(It s) const noexcept { - return FindFirst<true>(s, TCStringEndIterator()); - } - + return FindFirst<true>(s, TCStringEndIterator()); + } + inline void Set(ui8 b) noexcept { - S_.Set(b); - } - -private: - template <bool Result, class It1, class It2> + S_.Set(b); + } + +private: + template <bool Result, class It1, class It2> inline It1 FindFirst(It1 b, It2 e) const noexcept { - while (b != e && (S_.Get((ui8)*b) == Result)) { - ++b; - } - - return b; - } - - template <class It1, class It2> - inline void Init(It1 b, It2 e) { - while (b != e) { - this->Set((ui8)*b++); - } - } - -private: + while (b != e && (S_.Get((ui8)*b) == Result)) { + ++b; + } + + return b; + } + + template <class It1, class It2> + inline void Init(It1 b, It2 e) { + while (b != e) { + this->Set((ui8)*b++); + } + } + +private: TSetType S_; -}; - +}; + using TCompactStrSpn = TStrSpnImpl<TBitMap<256>>; diff --git a/util/string/subst.cpp b/util/string/subst.cpp index 9a19d702c6..b2df328dc1 100644 --- a/util/string/subst.cpp +++ b/util/string/subst.cpp @@ -1,4 +1,4 @@ -#include "subst.h" +#include "subst.h" #include <util/generic/strbuf.h> #include <util/generic/string.h> @@ -175,9 +175,9 @@ size_t SubstGlobal(TUtf32String& text, const TUtf32StringBuf what, const TUtf32S size_t SubstGlobal(std::u16string& text, const TWtringBuf what, const TWtringBuf with, size_t from) { return SubstGlobalImpl(text, - std::u16string_view(reinterpret_cast<const char16_t*>(what.data()), what.size()), - std::u16string_view(reinterpret_cast<const char16_t*>(with.data()), with.size()), - from); + std::u16string_view(reinterpret_cast<const char16_t*>(what.data()), what.size()), + std::u16string_view(reinterpret_cast<const char16_t*>(with.data()), with.size()), + from); } size_t SubstGlobal(TString& text, char what, char with, size_t from) { @@ -197,5 +197,5 @@ size_t SubstGlobal(std::u16string& text, wchar16 what, wchar16 with, size_t from } size_t SubstGlobal(TUtf32String& text, wchar32 what, wchar32 with, size_t from) { - return SubstCharGlobalImpl(text, (char32_t)what, (char32_t)with, from); + return SubstCharGlobalImpl(text, (char32_t)what, (char32_t)with, from); } diff --git a/util/string/subst.h b/util/string/subst.h index 3745a52b5a..45b622fbef 100644 --- a/util/string/subst.h +++ b/util/string/subst.h @@ -1,7 +1,7 @@ -#pragma once - +#pragma once + #include <util/generic/fwd.h> - + #include <stlfwd> /* Replace all occurences of substring `what` with string `with` starting from position `from`. @@ -33,7 +33,7 @@ size_t SubstGlobal(std::string& text, char what, char with, size_t from = 0); size_t SubstGlobal(TUtf16String& text, wchar16 what, wchar16 with, size_t from = 0); size_t SubstGlobal(std::u16string& text, wchar16 what, wchar16 with, size_t from = 0); size_t SubstGlobal(TUtf32String& text, wchar32 what, wchar32 with, size_t from = 0); - + // TODO(yazevnul): // - rename `SubstGlobal` to `ReplaceAll` for convenience // - add `SubstGlobalCopy(TStringBuf)` for convenience diff --git a/util/string/subst_ut.cpp b/util/string/subst_ut.cpp index e0bb45f039..21eccef779 100644 --- a/util/string/subst_ut.cpp +++ b/util/string/subst_ut.cpp @@ -1,7 +1,7 @@ #include "join.h" -#include "subst.h" +#include "subst.h" #include <string> - + #include <library/cpp/testing/unittest/registar.h> Y_UNIT_TEST_SUITE(TStringSubst) { @@ -61,7 +61,7 @@ Y_UNIT_TEST_SUITE(TStringSubst) { } } - static void DoTestSubstGlobal(TVector<TString>& parts, const size_t minBeg, const size_t sz, + static void DoTestSubstGlobal(TVector<TString>& parts, const size_t minBeg, const size_t sz, const TString& from, const size_t fromPos, const size_t numSubst) { const size_t numLeft = numSubst - parts.size(); for (size_t fromBeg = minBeg; fromBeg <= sz - numLeft * from.size(); ++fromBeg) { @@ -122,13 +122,13 @@ Y_UNIT_TEST_SUITE(TStringSubst) { Y_UNIT_TEST(TestSubstGlobalOld) { TString s; s = "aaa"; - SubstGlobal(s, "a", "bb"); + SubstGlobal(s, "a", "bb"); UNIT_ASSERT_EQUAL(s, TString("bbbbbb")); s = "aaa"; - SubstGlobal(s, "a", "b"); + SubstGlobal(s, "a", "b"); UNIT_ASSERT_EQUAL(s, TString("bbb")); s = "aaa"; - SubstGlobal(s, "a", ""); + SubstGlobal(s, "a", ""); UNIT_ASSERT_EQUAL(s, TString("")); s = "abcdefbcbcdfb"; SubstGlobal(s, "bc", "bbc", 2); @@ -250,4 +250,4 @@ Y_UNIT_TEST_SUITE(TStringSubst) { UNIT_ASSERT_VALUES_EQUAL(st, ss); } } -} +} diff --git a/util/string/type.cpp b/util/string/type.cpp index 4f7ef1eaae..49671c02c2 100644 --- a/util/string/type.cpp +++ b/util/string/type.cpp @@ -1,20 +1,20 @@ -#include "type.h" -#include "ascii.h" - +#include "type.h" +#include "ascii.h" + #include <array> bool IsSpace(const char* s, size_t len) noexcept { - if (len == 0) { - return false; - } - for (const char* p = s; p < s + len; ++p) { - if (!IsAsciiSpace(*p)) { - return false; - } - } - return true; -} - + if (len == 0) { + return false; + } + for (const char* p = s; p < s + len; ++p) { + if (!IsAsciiSpace(*p)) { + return false; + } + } + return true; +} + template <typename TStringType> static bool IsNumberT(const TStringType& s) noexcept { if (s.empty()) { @@ -35,12 +35,12 @@ bool IsNumber(const TWtringBuf s) noexcept { template <typename TStringType> static bool IsHexNumberT(const TStringType& s) noexcept { if (s.empty()) { - return false; + return false; } - + return std::all_of(s.begin(), s.end(), IsAsciiHex<typename TStringType::value_type>); -} - +} + bool IsHexNumber(const TStringBuf s) noexcept { return IsHexNumberT(s); } @@ -57,10 +57,10 @@ namespace { return true; } } - return false; + return false; } } //anonymous namespace - + bool IsTrue(const TStringBuf v) noexcept { static constexpr std::array<TStringBuf, 7> trueOptions{ "true", @@ -71,7 +71,7 @@ bool IsTrue(const TStringBuf v) noexcept { "1", "da"}; return IsCaseInsensitiveAnyOf(v, trueOptions); -} +} bool IsFalse(const TStringBuf v) noexcept { static constexpr std::array<TStringBuf, 7> falseOptions{ diff --git a/util/string/type.h b/util/string/type.h index 29ede3550d..d6cb29ea58 100644 --- a/util/string/type.h +++ b/util/string/type.h @@ -1,23 +1,23 @@ -#pragma once - -#include <util/generic/strbuf.h> - -Y_PURE_FUNCTION bool IsSpace(const char* s, size_t len) noexcept; - +#pragma once + +#include <util/generic/strbuf.h> + +Y_PURE_FUNCTION bool IsSpace(const char* s, size_t len) noexcept; + /// Checks if a string is a set of only space symbols. -Y_PURE_FUNCTION static inline bool IsSpace(const TStringBuf s) noexcept { +Y_PURE_FUNCTION static inline bool IsSpace(const TStringBuf s) noexcept { return IsSpace(s.data(), s.size()); -} - +} + /// Returns "true" if the given string is an arabic number ([0-9]+) -Y_PURE_FUNCTION bool IsNumber(const TStringBuf s) noexcept; +Y_PURE_FUNCTION bool IsNumber(const TStringBuf s) noexcept; + +Y_PURE_FUNCTION bool IsNumber(const TWtringBuf s) noexcept; -Y_PURE_FUNCTION bool IsNumber(const TWtringBuf s) noexcept; - /// Returns "true" if the given string is a hex number ([0-9a-fA-F]+) -Y_PURE_FUNCTION bool IsHexNumber(const TStringBuf s) noexcept; +Y_PURE_FUNCTION bool IsHexNumber(const TStringBuf s) noexcept; -Y_PURE_FUNCTION bool IsHexNumber(const TWtringBuf s) noexcept; +Y_PURE_FUNCTION bool IsHexNumber(const TWtringBuf s) noexcept; /* Tests if the given string is case insensitive equal to one of: * - "true", @@ -28,8 +28,8 @@ Y_PURE_FUNCTION bool IsHexNumber(const TWtringBuf s) noexcept; * - "1", * - "da". */ -Y_PURE_FUNCTION bool IsTrue(const TStringBuf value) noexcept; - +Y_PURE_FUNCTION bool IsTrue(const TStringBuf value) noexcept; + /* Tests if the given string is case insensitive equal to one of: * - "false", * - "f", @@ -39,4 +39,4 @@ Y_PURE_FUNCTION bool IsTrue(const TStringBuf value) noexcept; * - "0", * - "net". */ -Y_PURE_FUNCTION bool IsFalse(const TStringBuf value) noexcept; +Y_PURE_FUNCTION bool IsFalse(const TStringBuf value) noexcept; diff --git a/util/string/type_ut.cpp b/util/string/type_ut.cpp index e3473b00a0..03e7af62bd 100644 --- a/util/string/type_ut.cpp +++ b/util/string/type_ut.cpp @@ -1,5 +1,5 @@ -#include "type.h" - +#include "type.h" + #include <library/cpp/testing/unittest/registar.h> #include <util/charset/wide.h> @@ -11,7 +11,7 @@ Y_UNIT_TEST_SUITE(TStringClassify) { UNIT_ASSERT_EQUAL(IsSpace(""), false); UNIT_ASSERT_EQUAL(IsSpace(" a"), false); } - + Y_UNIT_TEST(TestIsTrue) { UNIT_ASSERT(IsTrue("1")); UNIT_ASSERT(IsTrue("yes")); @@ -73,4 +73,4 @@ Y_UNIT_TEST_SUITE(TStringClassify) { UNIT_ASSERT(!IsHexNumber(u"foobar")); UNIT_ASSERT(!IsHexNumber(TUtf16String())); } -} +} diff --git a/util/string/ut/ya.make b/util/string/ut/ya.make index ade2a4508b..6e80812825 100644 --- a/util/string/ut/ya.make +++ b/util/string/ut/ya.make @@ -1,22 +1,22 @@ -UNITTEST_FOR(util) +UNITTEST_FOR(util) OWNER(g:util) SUBSCRIBER(g:util-subscribers) SRCS( - string/builder_ut.cpp - string/cast_ut.cpp - string/escape_ut.cpp - string/join_ut.cpp - string/hex_ut.cpp - string/printf_ut.cpp - string/split_ut.cpp - string/strip_ut.cpp - string/subst_ut.cpp - string/type_ut.cpp - string/util_ut.cpp - string/vector_ut.cpp - string/ascii_ut.cpp + string/builder_ut.cpp + string/cast_ut.cpp + string/escape_ut.cpp + string/join_ut.cpp + string/hex_ut.cpp + string/printf_ut.cpp + string/split_ut.cpp + string/strip_ut.cpp + string/subst_ut.cpp + string/type_ut.cpp + string/util_ut.cpp + string/vector_ut.cpp + string/ascii_ut.cpp ) INCLUDE(${ARCADIA_ROOT}/util/tests/ya_util_tests.inc) diff --git a/util/string/util.cpp b/util/string/util.cpp index 27aa11dcd5..b14f20bf75 100644 --- a/util/string/util.cpp +++ b/util/string/util.cpp @@ -1,72 +1,72 @@ -#include "util.h" - -#include <util/generic/utility.h> - +#include "util.h" + +#include <util/generic/utility.h> + #include <cstdio> #include <cstdarg> #include <cstdlib> int a2i(const TString& s) { - return atoi(s.c_str()); -} - + return atoi(s.c_str()); +} + //============================== span ===================================== -void str_spn::init(const char* charset, bool extended) { +void str_spn::init(const char* charset, bool extended) { // chars_table_1 is necessary to avoid some unexpected // multi-threading issues ui8 chars_table_1[256]; memset(chars_table_1, 0, sizeof(chars_table_1)); if (extended) { - for (const char* cs = charset; *cs; cs++) { + for (const char* cs = charset; *cs; cs++) { if (cs[1] == '-' && cs[2] != 0) { - for (int c = (ui8)*cs; c <= (ui8)cs[2]; c++) { + for (int c = (ui8)*cs; c <= (ui8)cs[2]; c++) { chars_table_1[c] = 1; - } + } cs += 2; continue; } chars_table_1[(ui8)*cs] = 1; } } else { - for (; *charset; charset++) { + for (; *charset; charset++) { chars_table_1[(ui8)*charset] = 1; - } + } } memcpy(chars_table, chars_table_1, 256); chars_table_1[0] = 1; - for (int n = 0; n < 256; n++) { + for (int n = 0; n < 256; n++) { c_chars_table[n] = !chars_table_1[n]; - } + } } -Tr::Tr(const char* from, const char* to) { - for (size_t n = 0; n < 256; n++) { +Tr::Tr(const char* from, const char* to) { + for (size_t n = 0; n < 256; n++) { Map[n] = (char)n; - } - for (; *from && *to; from++, to++) { + } + for (; *from && *to; from++, to++) { Map[(ui8)*from] = *to; - } + } } size_t Tr::FindFirstChangePosition(const TString& str) const { - for (auto it = str.begin(); it != str.end(); ++it) { + for (auto it = str.begin(); it != str.end(); ++it) { if (ConvertChar(*it) != *it) { return it - str.begin(); } } - + return TString::npos; } void Tr::Do(TString& str) const { const size_t changePosition = FindFirstChangePosition(str); - + if (changePosition == TString::npos) { return; - } - - for (auto it = str.begin() + changePosition; it != str.end(); ++it) { + } + + for (auto it = str.begin() + changePosition; it != str.end(); ++it) { *it = ConvertChar(*it); - } + } } diff --git a/util/string/util.h b/util/string/util.h index bcc989d67e..0d77a5042b 100644 --- a/util/string/util.h +++ b/util/string/util.h @@ -1,7 +1,7 @@ #pragma once -//THIS FILE A COMPAT STUB HEADER - +//THIS FILE A COMPAT STUB HEADER + #include <cstring> #include <cstdarg> #include <algorithm> @@ -9,7 +9,7 @@ #include <util/system/defaults.h> #include <util/generic/string.h> #include <util/generic/strbuf.h> - + /// @addtogroup Strings_Miscellaneous /// @{ int a2i(const TString& s); @@ -24,23 +24,23 @@ inline void RemoveIfLast(T& s, int c) { /// Adds lastCh symbol to the the of the string if it is not already there. inline void addIfNotLast(TString& s, int lastCh) { - size_t len = s.length(); - if (!len || s[len - 1] != lastCh) { - s.append(char(lastCh)); - } + size_t len = s.length(); + if (!len || s[len - 1] != lastCh) { + s.append(char(lastCh)); + } } /// @details Finishes the string with lastCh1 if lastCh2 is not present in the string and lastCh1 is not already at the end of the string. /// Else, if lastCh2 is not equal to the symbol before the last, it finishes the string with lastCh2. /// @todo ?? Define, when to apply the function. Is in use several times for URLs parsing. inline void addIfAbsent(TString& s, char lastCh1, char lastCh2) { - size_t pos = s.find(lastCh2); + size_t pos = s.find(lastCh2); if (pos == TString::npos) { - //s.append((char)lastCh1); - addIfNotLast(s, lastCh1); - } else if (pos < s.length() - 1) { - addIfNotLast(s, lastCh2); - } + //s.append((char)lastCh1); + addIfNotLast(s, lastCh1); + } else if (pos < s.length() - 1) { + addIfNotLast(s, lastCh2); + } } /// @} @@ -60,9 +60,9 @@ struct ui8_256 { ui8 c_chars_table[256]; }; -class str_spn: public ui8_256 { -public: - explicit str_spn(const char* charset, bool extended = false) { +class str_spn: public ui8_256 { +public: + explicit str_spn(const char* charset, bool extended = false) { // exteneded: if true, treat charset string more like // interior of brackets [ ], e.g. "a-z0-9" init(charset, extended); @@ -71,8 +71,8 @@ public: /// Return first character in table, like strpbrk() /// That is, skip all characters not in table /// [DIFFERENCE FOR NOT_FOUND CASE: Returns end of string, not NULL] - const char* brk(const char* s) const { - while (c_chars_table[(ui8)*s]) + const char* brk(const char* s) const { + while (c_chars_table[(ui8)*s]) ++s; return s; } @@ -85,8 +85,8 @@ public: /// Return first character not in table, like strpbrk() for inverted table. /// That is, skip all characters in table - const char* cbrk(const char* s) const { - while (chars_table[(ui8)*s]) + const char* cbrk(const char* s) const { + while (chars_table[(ui8)*s]) ++s; return s; } @@ -98,43 +98,43 @@ public: } /// Offset of the first character not in table, like strspn(). - size_t spn(const char* s) const { - return cbrk(s) - s; - } + size_t spn(const char* s) const { + return cbrk(s) - s; + } size_t spn(const char* s, const char* e) const { return cbrk(s, e) - s; } /// Offset of the first character in table, like strcspn(). - size_t cspn(const char* s) const { - return brk(s) - s; - } + size_t cspn(const char* s) const { + return brk(s) - s; + } size_t cspn(const char* s, const char* e) const { return brk(s, e) - s; } - char* brk(char* s) const { + char* brk(char* s) const { return const_cast<char*>(brk((const char*)s)); - } + } - char* cbrk(char* s) const { + char* cbrk(char* s) const { return const_cast<char*>(cbrk((const char*)s)); - } - + } + /// See strsep [BUT argument is *&, not **] - char* sep(char*& s) const { + char* sep(char*& s) const { char sep_char; // unused; return sep(s, sep_char); } /// strsep + remember character that was destroyed - char* sep(char*& s, char& sep_char) const { + char* sep(char*& s, char& sep_char) const { if (!s) return nullptr; - char* ret = s; - char* next = brk(ret); + char* ret = s; + char* next = brk(ret); if (*next) { sep_char = *next; *next = 0; @@ -146,30 +146,30 @@ public: return ret; } -protected: - void init(const char* charset, bool extended); +protected: + void init(const char* charset, bool extended); str_spn() = default; }; // an analogue of tr/$from/$to/ class Tr { public: - Tr(const char* from, const char* to); + Tr(const char* from, const char* to); char ConvertChar(char ch) const { return Map[(ui8)ch]; } - void Do(char* s) const { + void Do(char* s) const { for (; *s; s++) *s = ConvertChar(*s); } - void Do(const char* src, char* dst) const { + void Do(const char* src, char* dst) const { for (; *src; src++) *dst++ = ConvertChar(*src); *dst = 0; } - void Do(char* s, size_t l) const { + void Do(char* s, size_t l) const { for (size_t i = 0; i < l && s[i]; i++) s[i] = ConvertChar(s[i]); } diff --git a/util/string/util_ut.cpp b/util/string/util_ut.cpp index 3e8dcc3b3d..18a2d8e195 100644 --- a/util/string/util_ut.cpp +++ b/util/string/util_ut.cpp @@ -1,25 +1,25 @@ -#include "util.h" - +#include "util.h" + #include <library/cpp/testing/unittest/registar.h> -class TStrUtilTest: public TTestBase { - UNIT_TEST_SUITE(TStrUtilTest); - UNIT_TEST(TestSpn); - UNIT_TEST(TestRemoveAll); - UNIT_TEST_SUITE_END(); - +class TStrUtilTest: public TTestBase { + UNIT_TEST_SUITE(TStrUtilTest); + UNIT_TEST(TestSpn); + UNIT_TEST(TestRemoveAll); + UNIT_TEST_SUITE_END(); + public: - void TestSpn() { + void TestSpn() { str_spn rul("a-z", true); char s[] = "!@#$ab%^&c+-"; - UNIT_ASSERT_EQUAL(rul.brk(s), s + 4); - UNIT_ASSERT_EQUAL(rul.brk(s + 4), s + 4); - UNIT_ASSERT_EQUAL(rul.brk(s + 10), s + 12); - char* s1 = s; - UNIT_ASSERT_EQUAL(strcmp(rul.sep(s1), "!@#$"), 0); - UNIT_ASSERT_EQUAL(strcmp(rul.sep(s1), ""), 0); - UNIT_ASSERT_EQUAL(strcmp(rul.sep(s1), "%^&"), 0); - UNIT_ASSERT_EQUAL(strcmp(rul.sep(s1), "+-"), 0); + UNIT_ASSERT_EQUAL(rul.brk(s), s + 4); + UNIT_ASSERT_EQUAL(rul.brk(s + 4), s + 4); + UNIT_ASSERT_EQUAL(rul.brk(s + 10), s + 12); + char* s1 = s; + UNIT_ASSERT_EQUAL(strcmp(rul.sep(s1), "!@#$"), 0); + UNIT_ASSERT_EQUAL(strcmp(rul.sep(s1), ""), 0); + UNIT_ASSERT_EQUAL(strcmp(rul.sep(s1), "%^&"), 0); + UNIT_ASSERT_EQUAL(strcmp(rul.sep(s1), "+-"), 0); UNIT_ASSERT_EQUAL(rul.sep(s1), nullptr); } @@ -28,12 +28,12 @@ public: const char* Str; char Ch; const char* Result; - } tests[] = { - {"", 'x', ""}, - {"hello world", 'h', "ello world"}, - {"hello world", 'l', "heo word"}, - {"hello world", 'x', "hello world"}, - }; + } tests[] = { + {"", 'x', ""}, + {"hello world", 'h', "ello world"}, + {"hello world", 'l', "heo word"}, + {"hello world", 'x', "hello world"}, + }; for (const T* t = tests; t != std::end(tests); ++t) { TString str(t->Str); @@ -43,4 +43,4 @@ public: } }; -UNIT_TEST_SUITE_REGISTRATION(TStrUtilTest); +UNIT_TEST_SUITE_REGISTRATION(TStrUtilTest); diff --git a/util/string/vector.cpp b/util/string/vector.cpp index ebd7d94496..9ba401f0a2 100644 --- a/util/string/vector.cpp +++ b/util/string/vector.cpp @@ -1,9 +1,9 @@ #include "util.h" #include "split.h" -#include "vector.h" - -#include <util/system/defaults.h> - +#include "vector.h" + +#include <util/system/defaults.h> + template <class TConsumer, class TDelim, typename TChr> static inline void DoSplit2(TConsumer& c, TDelim& d, const TBasicStringBuf<TChr> str, int) { SplitString(str.data(), str.data() + str.size(), d, c); @@ -47,7 +47,7 @@ static inline void DoSplit0(C* res, const TBasicStringBuf<TChr> str, TDelim& d, template <typename TChr> static void SplitStringImplT(TVector<std::conditional_t<std::is_same<TChr, wchar16>::value, TUtf16String, TString>>* res, - const TBasicStringBuf<TChr> str, const TChr* delim, size_t maxFields, int options) { + const TBasicStringBuf<TChr> str, const TChr* delim, size_t maxFields, int options) { if (!*delim) { return; } @@ -86,6 +86,6 @@ TUtf16String JoinStrings(const TVector<TUtf16String>& v, const TWtringBuf delim) TUtf16String JoinStrings(const TVector<TUtf16String>& v, size_t index, size_t count, const TWtringBuf delim) { const size_t f = Min(index, v.size()); const size_t l = f + Min(count, v.size() - f); - + return JoinStrings(v.begin() + f, v.begin() + l, delim); } diff --git a/util/string/vector.h b/util/string/vector.h index fc8c78e2e2..e36c348bbe 100644 --- a/util/string/vector.h +++ b/util/string/vector.h @@ -10,7 +10,7 @@ #include <util/string/cast.h> #include <util/system/yassert.h> -#define KEEP_EMPTY_TOKENS 0x01 +#define KEEP_EMPTY_TOKENS 0x01 // // NOTE: Check StringSplitter below to get more convenient split string interface. @@ -66,36 +66,36 @@ SplitString(const typename ::NPrivate::TStringDeducer<C>::type& str, const C* de return SplitString(str.data(), str.size(), delimiter, maxFields, options); } -template <class TIter> +template <class TIter> inline TString JoinStrings(TIter begin, TIter end, const TStringBuf delim) { if (begin == end) return TString(); - + TString result = ToString(*begin); - + for (++begin; begin != end; ++begin) { result.append(delim); - result.append(ToString(*begin)); + result.append(ToString(*begin)); } - + return result; } -template <class TIter> +template <class TIter> inline TUtf16String JoinStrings(TIter begin, TIter end, const TWtringBuf delim) { - if (begin == end) + if (begin == end) return TUtf16String(); - + TUtf16String result = ToWtring(*begin); - - for (++begin; begin != end; ++begin) { - result.append(delim); - result.append(ToWtring(*begin)); - } - - return result; -} - + + for (++begin; begin != end; ++begin) { + result.append(delim); + result.append(ToWtring(*begin)); + } + + return result; +} + /// Concatenates elements of given TVector<TString>. inline TString JoinStrings(const TVector<TString>& v, const TStringBuf delim) { return JoinStrings(v.begin(), v.end(), delim); @@ -125,7 +125,7 @@ template <typename T, typename TStringType> TVector<T> Scan(const TVector<TStringType>& input) { TVector<T> output; output.reserve(input.size()); - for (int i = 0; i < input.ysize(); ++i) { + for (int i = 0; i < input.ysize(); ++i) { output.push_back(FromString<T>(input[i])); } return output; diff --git a/util/string/vector_ut.cpp b/util/string/vector_ut.cpp index fe7ad32b76..817120f268 100644 --- a/util/string/vector_ut.cpp +++ b/util/string/vector_ut.cpp @@ -1,33 +1,33 @@ #include <library/cpp/testing/unittest/registar.h> #include <util/charset/wide.h> - -#include "cast.h" -#include "vector.h" - + +#include "cast.h" +#include "vector.h" + Y_UNIT_TEST_SUITE(TStringJoinTest) { Y_UNIT_TEST(Test1) { TVector<TUtf16String> v; - + UNIT_ASSERT_EQUAL(JoinStrings(v, ToWtring("")), ToWtring("")); - } - + } + Y_UNIT_TEST(Test2) { TVector<TUtf16String> v; - - v.push_back(ToWtring("1")); - v.push_back(ToWtring("2")); - + + v.push_back(ToWtring("1")); + v.push_back(ToWtring("2")); + UNIT_ASSERT_EQUAL(JoinStrings(v, ToWtring(" ")), ToWtring("1 2")); - } - + } + Y_UNIT_TEST(Test3) { TVector<TUtf16String> v; - - v.push_back(ToWtring("1")); - v.push_back(ToWtring("2")); - + + v.push_back(ToWtring("1")); + v.push_back(ToWtring("2")); + UNIT_ASSERT_EQUAL(JoinStrings(v, 1, 10, ToWtring(" ")), ToWtring("2")); - } + } Y_UNIT_TEST(TestJoinWStrings) { const TUtf16String str = u"Яндекс"; @@ -35,4 +35,4 @@ Y_UNIT_TEST_SUITE(TStringJoinTest) { UNIT_ASSERT_EQUAL(JoinStrings(v, TUtf16String()), str); } -} +} |