diff options
author | Anton Samokhvalov <pg83@yandex.ru> | 2022-02-10 16:45:17 +0300 |
---|---|---|
committer | Daniil Cherednik <dcherednik@yandex-team.ru> | 2022-02-10 16:45:17 +0300 |
commit | d3a398281c6fd1d3672036cb2d63f842d2cb28c5 (patch) | |
tree | dd4bd3ca0f36b817e96812825ffaf10d645803f2 /library/cpp/string_utils | |
parent | 72cb13b4aff9bc9cf22e49251bc8fd143f82538f (diff) | |
download | ydb-d3a398281c6fd1d3672036cb2d63f842d2cb28c5.tar.gz |
Restoring authorship annotation for Anton Samokhvalov <pg83@yandex.ru>. Commit 2 of 2.
Diffstat (limited to 'library/cpp/string_utils')
21 files changed, 654 insertions, 654 deletions
diff --git a/library/cpp/string_utils/base64/base64.cpp b/library/cpp/string_utils/base64/base64.cpp index 08533d2b47..05c201f0de 100644 --- a/library/cpp/string_utils/base64/base64.cpp +++ b/library/cpp/string_utils/base64/base64.cpp @@ -1,5 +1,5 @@ -#include "base64.h" - +#include "base64.h" + #include <contrib/libs/base64/avx2/libbase64.h> #include <contrib/libs/base64/ssse3/libbase64.h> #include <contrib/libs/base64/neon32/libbase64.h> @@ -7,10 +7,10 @@ #include <contrib/libs/base64/plain32/libbase64.h> #include <contrib/libs/base64/plain64/libbase64.h> -#include <util/generic/yexception.h> +#include <util/generic/yexception.h> #include <util/system/cpu_id.h> #include <util/system/platform.h> - + #include <cstdlib> namespace { @@ -71,7 +71,7 @@ namespace { } } -static const char base64_etab_std[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; +static const char base64_etab_std[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; static const char base64_bkw[] = { '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', // 0..15 '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', // 16..31 @@ -93,7 +93,7 @@ static const char base64_bkw[] = { static_assert(Y_ARRAY_SIZE(base64_bkw) == 256, "wrong size"); // Base64 for url encoding, RFC3548 -static const char base64_etab_url[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_"; +static const char base64_etab_url[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_"; static inline unsigned char GetBase64EncodedIndex0(unsigned char octet0) { return (octet0 >> 2); @@ -111,13 +111,13 @@ static inline unsigned char GetBase64EncodedIndex3(unsigned char octet2) { return (octet2 & 0x3f); } -template <bool urlVersion> -static inline char* Base64EncodeImpl(char* outstr, const unsigned char* instr, size_t len) { +template <bool urlVersion> +static inline char* Base64EncodeImpl(char* outstr, const unsigned char* instr, size_t len) { const char* const base64_etab = (urlVersion ? base64_etab_url : base64_etab_std); const char pad = (urlVersion ? ',' : '='); - size_t idx = 0; - + size_t idx = 0; + while (idx + 2 < len) { *outstr++ = base64_etab[GetBase64EncodedIndex0(instr[idx])]; *outstr++ = base64_etab[GetBase64EncodedIndex1(instr[idx], instr[idx + 1])]; @@ -136,44 +136,44 @@ static inline char* Base64EncodeImpl(char* outstr, const unsigned char* instr, s } *outstr++ = pad; } - *outstr = 0; - + *outstr = 0; + return outstr; } static char* Base64EncodePlain(char* outstr, const unsigned char* instr, size_t len) { - return Base64EncodeImpl<false>(outstr, instr, len); + return Base64EncodeImpl<false>(outstr, instr, len); } -char* Base64EncodeUrl(char* outstr, const unsigned char* instr, size_t len) { - return Base64EncodeImpl<true>(outstr, instr, len); +char* Base64EncodeUrl(char* outstr, const unsigned char* instr, size_t len) { + return Base64EncodeImpl<true>(outstr, instr, len); } -inline void uudecode_1(char* dst, unsigned char* src) { +inline void uudecode_1(char* dst, unsigned char* src) { dst[0] = char((base64_bkw[src[0]] << 2) | (base64_bkw[src[1]] >> 4)); dst[1] = char((base64_bkw[src[1]] << 4) | (base64_bkw[src[2]] >> 2)); dst[2] = char((base64_bkw[src[2]] << 6) | base64_bkw[src[3]]); } static size_t Base64DecodePlain(void* dst, const char* b, const char* e) { - size_t n = 0; - while (b < e) { - uudecode_1((char*)dst + n, (unsigned char*)b); - - b += 4; - n += 3; - } - + size_t n = 0; + while (b < e) { + uudecode_1((char*)dst + n, (unsigned char*)b); + + b += 4; + n += 3; + } + if (n > 0) { - if (b[-1] == ',' || b[-1] == '=') { + if (b[-1] == ',' || b[-1] == '=') { n--; - - if (b[-2] == ',' || b[-2] == '=') { + + if (b[-2] == ',' || b[-2] == '=') { n--; - } + } } } - + return n; } @@ -189,9 +189,9 @@ static const char base64_bkw_strict[] = "\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100\100"; size_t Base64StrictDecode(void* out, const char* b, const char* e) { - char* dst = (char*)out; - const unsigned char* src = (unsigned char*)b; - const unsigned char* const end = (unsigned char*)e; + char* dst = (char*)out; + const unsigned char* src = (unsigned char*)b; + const unsigned char* const end = (unsigned char*)e; Y_ENSURE(!((e - b) % 4), "incorrect input length for base64 decode"); @@ -228,7 +228,7 @@ size_t Base64StrictDecode(void* out, const char* b, const char* e) { } } - return dst - (char*)out; + return dst - (char*)out; } size_t Base64Decode(void* dst, const char* b, const char* e) { diff --git a/library/cpp/string_utils/base64/base64.h b/library/cpp/string_utils/base64/base64.h index 2f59572c51..f778a6425a 100644 --- a/library/cpp/string_utils/base64/base64.h +++ b/library/cpp/string_utils/base64/base64.h @@ -1,15 +1,15 @@ #pragma once - + #include <util/system/defaults.h> -#include <util/generic/strbuf.h> +#include <util/generic/strbuf.h> #include <util/generic/string.h> - + /* @return Size of the buffer required to decode Base64 encoded data of size `len`. */ constexpr size_t Base64DecodeBufSize(const size_t len) noexcept { return (len + 3) / 4 * 3; -} - +} + /* Decode Base64 encoded data. Can decode both regular Base64 and Base64URL encoded data. Can decode * only valid Base64[URL] data, behaviour for invalid data is unspecified. * @@ -21,12 +21,12 @@ constexpr size_t Base64DecodeBufSize(const size_t len) noexcept { * * @return Return number of bytes decoded. */ -size_t Base64Decode(void* dst, const char* b, const char* e); - +size_t Base64Decode(void* dst, const char* b, const char* e); + inline TStringBuf Base64Decode(const TStringBuf src, void* dst) { - return TStringBuf((const char*)dst, Base64Decode(dst, src.begin(), src.end())); -} - + return TStringBuf((const char*)dst, Base64Decode(dst, src.begin(), src.end())); +} + inline void Base64Decode(const TStringBuf src, TString& dst) { dst.ReserveAndResize(Base64DecodeBufSize(src.size())); dst.resize(Base64Decode(src, dst.begin()).size()); @@ -36,9 +36,9 @@ inline void Base64Decode(const TStringBuf src, TString& dst) { inline TString Base64Decode(const TStringBuf s) { TString ret; Base64Decode(s, ret); - return ret; -} - + return ret; +} + /// /// @brief Decodes Base64 string with strict verification /// of invalid symbols, also tries to decode Base64 string with padding @@ -64,7 +64,7 @@ size_t Base64StrictDecode(void* dst, const char* b, const char* e); /// @return Returns dst wrapped into TStringBuf. /// inline TStringBuf Base64StrictDecode(const TStringBuf src, void* dst) { - return TStringBuf((const char*)dst, Base64StrictDecode(dst, src.begin(), src.end())); + return TStringBuf((const char*)dst, Base64StrictDecode(dst, src.begin(), src.end())); } /// @@ -91,22 +91,22 @@ inline TString Base64StrictDecode(const TStringBuf src) { /// Works with strings which length is not divisible by 4. TString Base64DecodeUneven(const TStringBuf s); -//encode +//encode constexpr size_t Base64EncodeBufSize(const size_t len) noexcept { return (len + 2) / 3 * 4 + 1; -} - -char* Base64Encode(char* outstr, const unsigned char* instr, size_t len); -char* Base64EncodeUrl(char* outstr, const unsigned char* instr, size_t len); - +} + +char* Base64Encode(char* outstr, const unsigned char* instr, size_t len); +char* Base64EncodeUrl(char* outstr, const unsigned char* instr, size_t len); + inline TStringBuf Base64Encode(const TStringBuf src, void* tmp) { return TStringBuf((const char*)tmp, Base64Encode((char*)tmp, (const unsigned char*)src.data(), src.size())); -} - +} + inline TStringBuf Base64EncodeUrl(const TStringBuf src, void* tmp) { return TStringBuf((const char*)tmp, Base64EncodeUrl((char*)tmp, (const unsigned char*)src.data(), src.size())); -} - +} + inline void Base64Encode(const TStringBuf src, TString& dst) { dst.ReserveAndResize(Base64EncodeBufSize(src.size())); dst.resize(Base64Encode(src, dst.begin()).size()); @@ -120,11 +120,11 @@ inline void Base64EncodeUrl(const TStringBuf src, TString& dst) { inline TString Base64Encode(const TStringBuf s) { TString ret; Base64Encode(s, ret); - return ret; -} - + return ret; +} + inline TString Base64EncodeUrl(const TStringBuf s) { TString ret; Base64EncodeUrl(s, ret); - return ret; -} + return ret; +} diff --git a/library/cpp/string_utils/base64/base64_ut.cpp b/library/cpp/string_utils/base64/base64_ut.cpp index 16eb9c1bf2..bcc1e65879 100644 --- a/library/cpp/string_utils/base64/base64_ut.cpp +++ b/library/cpp/string_utils/base64/base64_ut.cpp @@ -1,5 +1,5 @@ -#include "base64.h" - +#include "base64.h" + #include <contrib/libs/base64/avx2/libbase64.h> #include <contrib/libs/base64/neon32/libbase64.h> #include <contrib/libs/base64/neon64/libbase64.h> @@ -197,11 +197,11 @@ static void TestEncodeStrictDecodeIntoString(const TString& plain, const TString Y_UNIT_TEST_SUITE(TBase64) { Y_UNIT_TEST(TestEncode) { - UNIT_ASSERT_VALUES_EQUAL(Base64Encode("12z"), "MTJ6"); - UNIT_ASSERT_VALUES_EQUAL(Base64Encode("123"), "MTIz"); - UNIT_ASSERT_VALUES_EQUAL(Base64Encode("12"), "MTI="); - UNIT_ASSERT_VALUES_EQUAL(Base64Encode("1"), "MQ=="); - } + UNIT_ASSERT_VALUES_EQUAL(Base64Encode("12z"), "MTJ6"); + UNIT_ASSERT_VALUES_EQUAL(Base64Encode("123"), "MTIz"); + UNIT_ASSERT_VALUES_EQUAL(Base64Encode("12"), "MTI="); + UNIT_ASSERT_VALUES_EQUAL(Base64Encode("1"), "MQ=="); + } Y_UNIT_TEST(TestIntoString) { { @@ -248,7 +248,7 @@ Y_UNIT_TEST_SUITE(TBase64) { UNIT_ASSERT_VALUES_EQUAL(Base64Decode(""), ""); UNIT_ASSERT_VALUES_EQUAL(Base64StrictDecode(""), ""); - UNIT_ASSERT_VALUES_EQUAL(Base64Decode("MTI="), "12"); + UNIT_ASSERT_VALUES_EQUAL(Base64Decode("MTI="), "12"); UNIT_ASSERT_VALUES_EQUAL(Base64StrictDecode("MTI="), "12"); UNIT_ASSERT_VALUES_EQUAL(Base64Decode("QQ=="), "A"); @@ -265,7 +265,7 @@ Y_UNIT_TEST_SUITE(TBase64) { UNIT_ASSERT_EXCEPTION(Base64StrictDecode("aHh=="), yexception); UNIT_ASSERT_EXCEPTION(Base64StrictDecode("\1\1\1\2"), yexception); - } + } Y_UNIT_TEST(TestDecodeUneven) { UNIT_ASSERT_VALUES_EQUAL(Base64DecodeUneven(""), ""); @@ -494,4 +494,4 @@ Y_UNIT_TEST_SUITE(TBase64) { const auto xDec = Base64Decode("SG93IGRvIEkgY29udmVydCBiZXR3ZWVuIGJpZy1lbmRpYW4gYW5kIGxpdHRsZS1lbmRpYW4gdmFsdWVzIGluIEMrKz9hYQ,,"); UNIT_ASSERT_VALUES_EQUAL(x, xDec); } -} +} diff --git a/library/cpp/string_utils/indent_text/indent_text.cpp b/library/cpp/string_utils/indent_text/indent_text.cpp index e2fb1d69d0..09a4f6bca8 100644 --- a/library/cpp/string_utils/indent_text/indent_text.cpp +++ b/library/cpp/string_utils/indent_text/indent_text.cpp @@ -1,5 +1,5 @@ -#include "indent_text.h" - +#include "indent_text.h" + #include <util/stream/str.h> TString IndentText(TStringBuf text, TStringBuf indent) { diff --git a/library/cpp/string_utils/indent_text/ya.make b/library/cpp/string_utils/indent_text/ya.make index 9757fde208..cd0ed9ec61 100644 --- a/library/cpp/string_utils/indent_text/ya.make +++ b/library/cpp/string_utils/indent_text/ya.make @@ -1,9 +1,9 @@ -LIBRARY() - -OWNER(nga) - -SRCS( - indent_text.cpp -) - -END() +LIBRARY() + +OWNER(nga) + +SRCS( + indent_text.cpp +) + +END() diff --git a/library/cpp/string_utils/levenshtein_diff/levenshtein_diff.h b/library/cpp/string_utils/levenshtein_diff/levenshtein_diff.h index e3d3f5247e..8a240bfed8 100644 --- a/library/cpp/string_utils/levenshtein_diff/levenshtein_diff.h +++ b/library/cpp/string_utils/levenshtein_diff/levenshtein_diff.h @@ -9,34 +9,34 @@ #include <utility> namespace NLevenshtein { - enum EEditMoveType { - EMT_SPECIAL, - EMT_PRESERVE, - EMT_REPLACE, - EMT_DELETE, - EMT_INSERT - }; - - inline bool IsImportantEditMove(EEditMoveType p) { - return (p != EMT_SPECIAL && p != EMT_PRESERVE); - } - - inline void MakeMove(EEditMoveType t, int& p1, int& p2) { - switch (t) { - case EMT_PRESERVE: - case EMT_REPLACE: - p1++; - p2++; - break; - case EMT_DELETE: - p1++; - break; - case EMT_INSERT: - p2++; - break; - default: - break; - } + enum EEditMoveType { + EMT_SPECIAL, + EMT_PRESERVE, + EMT_REPLACE, + EMT_DELETE, + EMT_INSERT + }; + + inline bool IsImportantEditMove(EEditMoveType p) { + return (p != EMT_SPECIAL && p != EMT_PRESERVE); + } + + inline void MakeMove(EEditMoveType t, int& p1, int& p2) { + switch (t) { + case EMT_PRESERVE: + case EMT_REPLACE: + p1++; + p2++; + break; + case EMT_DELETE: + p1++; + break; + case EMT_INSERT: + p2++; + break; + default: + break; + } } using TEditChain = TVector<EEditMoveType>; @@ -58,7 +58,7 @@ namespace NLevenshtein { template <typename TStringType> using TCharType = typename std::decay_t<decltype(std::add_const_t<TStringType>()[0])>; - /// Finds sequence of "edit moves" for two strings + /// Finds sequence of "edit moves" for two strings template <class TStringType, class TWeightType = int, class TReplaceWeigher = TWeightOneBinaryGetter<TCharType<TStringType>>, class TDeleteWeigher = TWeightOneUnaryGetter<TCharType<TStringType>>, @@ -69,20 +69,20 @@ namespace NLevenshtein { const TDeleteWeigher& deleteWeigher = TDeleteWeigher(), const TInsertWeigher& insertWeigher = TInsertWeigher()) { - int l1 = (int)str1.size(); - int l2 = (int)str2.size(); + int l1 = (int)str1.size(); + int l2 = (int)str2.size(); TMatrix<std::pair<TWeightType, EEditMoveType>> ma(l1 + 1, l2 + 1); /// ma[i][j].first = diff(str1[0..i-1], str2[0..j-1]) - ma[0][0] = std::make_pair(0, EMT_SPECIAL); // starting point - for (int i = 1; i <= l1; i++) { + ma[0][0] = std::make_pair(0, EMT_SPECIAL); // starting point + for (int i = 1; i <= l1; i++) { ma[i][0] = std::make_pair(ma[i - 1][0].first + deleteWeigher(str1[i - 1]), EMT_DELETE); } - for (int i = 1; i <= l2; i++) { + for (int i = 1; i <= l2; i++) { ma[0][i] = std::make_pair(ma[0][i - 1].first + insertWeigher(str2[i - 1]), EMT_INSERT); } - // Here goes basic Levestein's algorithm - for (int i = 1; i <= l1; i++) { - for (int j = 1; j <= l2; j++) { + // Here goes basic Levestein's algorithm + for (int i = 1; i <= l1; i++) { + for (int j = 1; j <= l2; j++) { if (str1[i - 1] == str2[j - 1]) { ma[i][j] = std::make_pair(ma[i - 1][j - 1].first, EMT_PRESERVE); } else { @@ -108,31 +108,31 @@ namespace NLevenshtein { ma[i][j] = std::make_pair(insertPathWeight, EMT_INSERT); } } - } - } - // Tracing the path from final point - res.clear(); + } + } + // Tracing the path from final point + res.clear(); res.reserve(Max<size_t>(l1, l2)); - for (int i = l1, j = l2; ma[i][j].second != EMT_SPECIAL;) { - res.push_back(ma[i][j].second); - switch (ma[i][j].second) { - case EMT_PRESERVE: - case EMT_REPLACE: - --i; - --j; - break; - case EMT_DELETE: - --i; - break; - case EMT_INSERT: - --j; - break; - default: - // TODO: throw exception - break; - } - } - std::reverse(res.begin(), res.end()); + for (int i = l1, j = l2; ma[i][j].second != EMT_SPECIAL;) { + res.push_back(ma[i][j].second); + switch (ma[i][j].second) { + case EMT_PRESERVE: + case EMT_REPLACE: + --i; + --j; + break; + case EMT_DELETE: + --i; + break; + case EMT_INSERT: + --j; + break; + default: + // TODO: throw exception + break; + } + } + std::reverse(res.begin(), res.end()); if (weight != nullptr) { *weight = ma[l1][l2].first; @@ -151,9 +151,9 @@ namespace NLevenshtein { return result; } - /// Calculates substrings to be replaced for str1->str2 transformation - struct TReplacement { - int CorrectOffset, CorrectLength, MisspelledOffset, MisspelledLength; + /// Calculates substrings to be replaced for str1->str2 transformation + struct TReplacement { + int CorrectOffset, CorrectLength, MisspelledOffset, MisspelledLength; TReplacement() : CorrectOffset(0) , CorrectLength(0) @@ -161,32 +161,32 @@ namespace NLevenshtein { , MisspelledLength(0) { } - TReplacement(int correctOffset, int correctLength, int misspelledOffset, int misspelledLength) - : CorrectOffset(correctOffset) - , CorrectLength(correctLength) - , MisspelledOffset(misspelledOffset) - , MisspelledLength(misspelledLength) - { - } - }; + TReplacement(int correctOffset, int correctLength, int misspelledOffset, int misspelledLength) + : CorrectOffset(correctOffset) + , CorrectLength(correctLength) + , MisspelledOffset(misspelledOffset) + , MisspelledLength(misspelledLength) + { + } + }; template <class TStringType> void GetStringReplacements(const TStringType& str1, const TStringType& str2, TVector<TReplacement>& res) { - TEditChain editChain; - GetEditChain(str1, str2, editChain); - editChain.push_back(EMT_SPECIAL); - int c1 = 0, c2 = 0; - res.clear(); - for (TEditChain::const_iterator it = editChain.begin(); it != editChain.end(); it++) { - if (IsImportantEditMove(*it)) { - int sc1 = c1, sc2 = c2; - do { - MakeMove(*it, c1, c2); - ++it; - } while (IsImportantEditMove(*it)); - res.push_back(TReplacement(sc1, c1 - sc1, sc2, c2 - sc2)); - } - MakeMove(*it, c1, c2); - } + TEditChain editChain; + GetEditChain(str1, str2, editChain); + editChain.push_back(EMT_SPECIAL); + int c1 = 0, c2 = 0; + res.clear(); + for (TEditChain::const_iterator it = editChain.begin(); it != editChain.end(); it++) { + if (IsImportantEditMove(*it)) { + int sc1 = c1, sc2 = c2; + do { + MakeMove(*it, c1, c2); + ++it; + } while (IsImportantEditMove(*it)); + res.push_back(TReplacement(sc1, c1 - sc1, sc2, c2 - sc2)); + } + MakeMove(*it, c1, c2); + } } } diff --git a/library/cpp/string_utils/levenshtein_diff/ya.make b/library/cpp/string_utils/levenshtein_diff/ya.make index e51c08aa1e..bafefe5365 100644 --- a/library/cpp/string_utils/levenshtein_diff/ya.make +++ b/library/cpp/string_utils/levenshtein_diff/ya.make @@ -1,13 +1,13 @@ -LIBRARY() - +LIBRARY() + OWNER(g:mt) - -SRCS( + +SRCS( levenshtein_diff.cpp -) - -PEERDIR( - util/draft -) - -END() +) + +PEERDIR( + util/draft +) + +END() diff --git a/library/cpp/string_utils/parse_size/parse_size.cpp b/library/cpp/string_utils/parse_size/parse_size.cpp index 1195f89a51..39188d560b 100644 --- a/library/cpp/string_utils/parse_size/parse_size.cpp +++ b/library/cpp/string_utils/parse_size/parse_size.cpp @@ -2,89 +2,89 @@ #include <util/generic/yexception.h> #include <util/generic/ylimits.h> -#include <util/string/cast.h> +#include <util/string/cast.h> #include <util/stream/output.h> namespace { - enum ESuffixShifts { - ESS_KILO_BYTES = 10, - ESS_MEGA_BYTES = 20, - ESS_GIGA_BYTES = 30, - ESS_TERA_BYTES = 40, - }; - - bool TryShiftValue(ui64& value, ui64 shift) { - if (value > (Max<ui64>() >> shift)) { - return false; - } - - value <<= shift; - return true; + enum ESuffixShifts { + ESS_KILO_BYTES = 10, + ESS_MEGA_BYTES = 20, + ESS_GIGA_BYTES = 30, + ESS_TERA_BYTES = 40, + }; + + bool TryShiftValue(ui64& value, ui64 shift) { + if (value > (Max<ui64>() >> shift)) { + return false; + } + + value <<= shift; + return true; } - ui64 ShiftValue(ui64 value, ui64 shift) { - if (!TryShiftValue(value, shift)) { - ythrow yexception() << "value overflow '" << value << " << " << shift << "'"; - } else { - return value; - } + ui64 ShiftValue(ui64 value, ui64 shift) { + if (!TryShiftValue(value, shift)) { + ythrow yexception() << "value overflow '" << value << " << " << shift << "'"; + } else { + return value; + } } } namespace NSize { - ui64 ParseSize(TStringBuf str) { + ui64 ParseSize(TStringBuf str) { if (! str.size()) - ythrow yexception() << "Wrong size " << str; + ythrow yexception() << "Wrong size " << str; char suff = tolower(str[str.size() - 1]); - if (isdigit(suff)) - return FromString<ui64>(str); - ui64 shift = 1; - switch (suff) { - case 'k': - shift = ESS_KILO_BYTES; - break; - case 'm': - shift = ESS_MEGA_BYTES; - break; - case 'g': - shift = ESS_GIGA_BYTES; - break; - case 't': - shift = ESS_TERA_BYTES; - break; - default: - ythrow yexception() << "Unknown suffix " << str; - } + if (isdigit(suff)) + return FromString<ui64>(str); + ui64 shift = 1; + switch (suff) { + case 'k': + shift = ESS_KILO_BYTES; + break; + case 'm': + shift = ESS_MEGA_BYTES; + break; + case 'g': + shift = ESS_GIGA_BYTES; + break; + case 't': + shift = ESS_TERA_BYTES; + break; + default: + ythrow yexception() << "Unknown suffix " << str; + } ui64 value = FromString<ui64>(str.substr(0, str.size() - 1)); - - if (!TryShiftValue(value, shift)) { - ythrow yexception() << "Value overflow " << str; - } else { - return value; - } + + if (!TryShiftValue(value, shift)) { + ythrow yexception() << "Value overflow " << str; + } else { + return value; + } } - TSize FromKiloBytes(ui64 value) { - return TSize(ShiftValue(value, ESS_KILO_BYTES)); - } + TSize FromKiloBytes(ui64 value) { + return TSize(ShiftValue(value, ESS_KILO_BYTES)); + } - TSize FromMegaBytes(ui64 value) { - return TSize(ShiftValue(value, ESS_MEGA_BYTES)); + TSize FromMegaBytes(ui64 value) { + return TSize(ShiftValue(value, ESS_MEGA_BYTES)); } - TSize FromGigaBytes(ui64 value) { - return TSize(ShiftValue(value, ESS_GIGA_BYTES)); - } + TSize FromGigaBytes(ui64 value) { + return TSize(ShiftValue(value, ESS_GIGA_BYTES)); + } - TSize FromTeraBytes(ui64 value) { - return TSize(ShiftValue(value, ESS_TERA_BYTES)); - } + TSize FromTeraBytes(ui64 value) { + return TSize(ShiftValue(value, ESS_TERA_BYTES)); + } } -template <> +template <> NSize::TSize FromStringImpl<NSize::TSize>(const char* data, size_t len) { return NSize::TSize(NSize::ParseSize(TStringBuf(data, len))); } diff --git a/library/cpp/string_utils/parse_size/parse_size.h b/library/cpp/string_utils/parse_size/parse_size.h index c9fa92980b..ad235ef02f 100644 --- a/library/cpp/string_utils/parse_size/parse_size.h +++ b/library/cpp/string_utils/parse_size/parse_size.h @@ -3,31 +3,31 @@ #include <util/generic/strbuf.h> namespace NSize { - ui64 ParseSize(TStringBuf size); - - // Convenient disk size representation with string parsing and integer comparison - class TSize { - public: - TSize(ui64 value = 0) - : Value(value) - { - } - - ui64 GetValue() const { - return Value; - } - - operator ui64() const { - return Value; - } - - private: - ui64 Value; - }; - - TSize FromKiloBytes(ui64 value); - TSize FromMegaBytes(ui64 value); - TSize FromGigaBytes(ui64 value); - TSize FromTeraBytes(ui64 value); + ui64 ParseSize(TStringBuf size); + + // Convenient disk size representation with string parsing and integer comparison + class TSize { + public: + TSize(ui64 value = 0) + : Value(value) + { + } + + ui64 GetValue() const { + return Value; + } + + operator ui64() const { + return Value; + } + + private: + ui64 Value; + }; + + TSize FromKiloBytes(ui64 value); + TSize FromMegaBytes(ui64 value); + TSize FromGigaBytes(ui64 value); + TSize FromTeraBytes(ui64 value); } diff --git a/library/cpp/string_utils/quote/quote.cpp b/library/cpp/string_utils/quote/quote.cpp index 02bcfaa6e1..e523350b80 100644 --- a/library/cpp/string_utils/quote/quote.cpp +++ b/library/cpp/string_utils/quote/quote.cpp @@ -1,18 +1,18 @@ -#include "quote.h" - -#include <util/memory/tempbuf.h> +#include "quote.h" + +#include <util/memory/tempbuf.h> #include <util/string/ascii.h> #include <util/string/cstriter.h> -#include <cctype> - +#include <cctype> + /* note: (x & 0xdf) makes x upper case */ -#define GETXC \ - do { \ - c *= 16; \ - c += (x[0] >= 'A' ? ((x[0] & 0xdf) - 'A') + 10 : (x[0] - '0')); \ - ++x; \ - } while (0) +#define GETXC \ + do { \ + c *= 16; \ + c += (x[0] >= 'A' ? ((x[0] & 0xdf) - 'A') + 10 : (x[0] - '0')); \ + ++x; \ + } while (0) #define GETSBXC \ do { \ @@ -25,18 +25,18 @@ namespace { class TFromHexZeroTerm { public: - static inline char x2c(const char*& x) { - if (!IsAsciiHex((ui8)x[0]) || !IsAsciiHex((ui8)x[1])) + static inline char x2c(const char*& x) { + if (!IsAsciiHex((ui8)x[0]) || !IsAsciiHex((ui8)x[1])) return '%'; ui8 c = 0; - GETXC; - GETXC; + GETXC; + GETXC; return c; } - static inline char x2c(TStringBuf& x) { - if (!IsAsciiHex((ui8)x[0]) || !IsAsciiHex((ui8)x[1])) + static inline char x2c(TStringBuf& x) { + if (!IsAsciiHex((ui8)x[0]) || !IsAsciiHex((ui8)x[1])) return '%'; ui8 c = 0; @@ -53,7 +53,7 @@ namespace { { } - inline char x2c(const char*& x) { + inline char x2c(const char*& x) { if (x + 2 > End) return '%'; return TFromHexZeroTerm::x2c(x); @@ -64,80 +64,80 @@ namespace { }; } -static inline char d2x(unsigned x) { - return (char)((x < 10) ? ('0' + x) : ('A' + x - 10)); +static inline char d2x(unsigned x) { + return (char)((x < 10) ? ('0' + x) : ('A' + x - 10)); } static inline const char* FixZero(const char* s) noexcept { - return s ? s : ""; -} - + return s ? s : ""; +} + // we escape: -// '\"', '|', '(', ')', +// '\"', '|', '(', ')', // '%', '&', '+', ',', -// '#', '<', '=', '>', +// '#', '<', '=', '>', // '[', '\\',']', '?', // ':', '{', '}', // all below ' ' (0x20) and above '~' (0x7E). // ' ' converted to '+' static const bool chars_to_url_escape[256] = { - // 0 1 2 3 4 5 6 7 8 9 A B C D E F - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, //0 - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, //1 - 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 0, 0, 0, //2 - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, //3 - - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, //4 - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, //5 - 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, //6 - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1, //7 - - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, //8 - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, //9 - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, //A - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, //B - - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, //C - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, //D - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, //E - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, //F + // 0 1 2 3 4 5 6 7 8 9 A B C D E F + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, //0 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, //1 + 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 0, 0, 0, //2 + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, //3 + + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, //4 + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, //5 + 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, //6 + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1, //7 + + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, //8 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, //9 + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, //A + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, //B + + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, //C + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, //D + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, //E + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, //F }; -template <class It1, class It2, class It3> -static inline It1 Escape(It1 to, It2 from, It3 end, const bool* escape_map = chars_to_url_escape) { - while (from != end) { +template <class It1, class It2, class It3> +static inline It1 Escape(It1 to, It2 from, It3 end, const bool* escape_map = chars_to_url_escape) { + while (from != end) { if (escape_map[(unsigned char)*from]) { *to++ = '%'; - *to++ = d2x((unsigned char)*from >> 4); - *to++ = d2x((unsigned char)*from & 0xF); + *to++ = d2x((unsigned char)*from >> 4); + *to++ = d2x((unsigned char)*from & 0xF); } else { *to++ = (*from == ' ' ? '+' : *from); } - + ++from; } - + *to = 0; - + return to; } template <class It1, class It2, class It3, class FromHex> static inline It1 Unescape(It1 to, It2 from, It3 end, FromHex fromHex) { - (void)fromHex; - + (void)fromHex; + while (from != end) { switch (*from) { - case '%': + case '%': ++from; - *to++ = fromHex.x2c(from); - break; - case '+': - *to++ = ' '; + *to++ = fromHex.x2c(from); + break; + case '+': + *to++ = ' '; ++from; - break; - default: - *to++ = *from++; + break; + default: + *to++ = *from++; } } *to = 0; @@ -147,18 +147,18 @@ static inline It1 Unescape(It1 to, It2 from, It3 end, FromHex fromHex) { // CGIEscape returns pointer to the end of the result string // so as it could be possible to populate single long buffer // with several calls to CGIEscape in a row. -char* CGIEscape(char* to, const char* from) { - return Escape(to, FixZero(from), TCStringEndIterator()); +char* CGIEscape(char* to, const char* from) { + return Escape(to, FixZero(from), TCStringEndIterator()); +} + +char* CGIEscape(char* to, const char* from, size_t len) { + return Escape(to, from, from + len); } -char* CGIEscape(char* to, const char* from, size_t len) { - return Escape(to, from, from + len); -} - void CGIEscape(TString& url) { TTempBuf tempBuf(CgiEscapeBufLen(url.size())); - char* to = tempBuf.Data(); - + char* to = tempBuf.Data(); + url.AssignNoAlias(to, CGIEscape(to, url.data(), url.size())); } @@ -178,7 +178,7 @@ TString& AppendCgiEscaped(const TStringBuf value, TString& to) { // More general version of CGIEscape. The optional safe parameter specifies // additional characters that should not be quoted — its default value is '/'. - + // Also returns pointer to the end of result string. template <class It1, class It2, class It3> @@ -210,14 +210,14 @@ char* Quote(char* to, const TStringBuf s, const char* safe) { void Quote(TString& url, const char* safe) { TTempBuf tempBuf(CgiEscapeBufLen(url.size())); char* to = tempBuf.Data(); - + url.AssignNoAlias(to, Quote(to, url, safe)); } -char* CGIUnescape(char* to, const char* from) { +char* CGIUnescape(char* to, const char* from) { return Unescape(to, FixZero(from), TCStringEndIterator(), TFromHexZeroTerm()); } - + char* CGIUnescape(char* to, const char* from, size_t len) { return Unescape(to, from, from + len, TFromHexLenLimited(from + len)); } @@ -250,10 +250,10 @@ char* UrlUnescape(char* to, TStringBuf from) { ch = TFromHexZeroTerm::x2c(from); *to++ = ch; } - + *to = 0; - - return to; + + return to; } void UrlUnescape(TString& url) { @@ -277,29 +277,29 @@ TString UrlUnescapeRet(const TStringBuf from) { } char* UrlEscape(char* to, const char* from, bool forceEscape) { - from = FixZero(from); - - while (*from) { + from = FixZero(from); + + while (*from) { const bool escapePercent = (*from == '%') && (forceEscape || !((*(from + 1) && IsAsciiHex(*(from + 1)) && *(from + 2) && IsAsciiHex(*(from + 2))))); if (escapePercent || (unsigned char)*from <= ' ' || (unsigned char)*from > '~') { - *to++ = '%'; - *to++ = d2x((unsigned char)*from >> 4); - *to++ = d2x((unsigned char)*from & 0xF); - } else + *to++ = '%'; + *to++ = d2x((unsigned char)*from >> 4); + *to++ = d2x((unsigned char)*from & 0xF); + } else *to++ = *from; ++from; } - + *to = 0; - - return to; + + return to; } void UrlEscape(TString& url, bool forceEscape) { TTempBuf tempBuf(CgiEscapeBufLen(url.size())); - char* to = tempBuf.Data(); + char* to = tempBuf.Data(); url.AssignNoAlias(to, UrlEscape(to, url.data(), forceEscape)); } diff --git a/library/cpp/string_utils/quote/quote.h b/library/cpp/string_utils/quote/quote.h index 29c2bf7f89..3b7221154e 100644 --- a/library/cpp/string_utils/quote/quote.h +++ b/library/cpp/string_utils/quote/quote.h @@ -1,29 +1,29 @@ -#pragma once - -#include <util/generic/strbuf.h> +#pragma once + +#include <util/generic/strbuf.h> #include <util/generic/string.h> - + //CGIEscape*: // ' ' converted to '+', // Some punctuation and chars outside [32, 126] range are converted to %xx // Use function CgiEscapeBufLen to determine number of characters needed for 'char* to' parameter. // Returns pointer to the end of the result string -char* CGIEscape(char* to, const char* from); -char* CGIEscape(char* to, const char* from, size_t len); +char* CGIEscape(char* to, const char* from); +char* CGIEscape(char* to, const char* from, size_t len); inline char* CGIEscape(char* to, const TStringBuf from) { return CGIEscape(to, from.data(), from.size()); } void CGIEscape(TString& url); TString CGIEscapeRet(const TStringBuf url); TString& AppendCgiEscaped(const TStringBuf value, TString& to); - + inline TStringBuf CgiEscapeBuf(char* to, const TStringBuf from) { return TStringBuf(to, CGIEscape(to, from.data(), from.size())); } inline TStringBuf CgiEscape(void* tmp, const TStringBuf s) { - return CgiEscapeBuf(static_cast<char*>(tmp), s); -} - + return CgiEscapeBuf(static_cast<char*>(tmp), s); +} + //CgiUnescape*: // Decodes '%xx' to bytes, '+' to space. // Use function CgiUnescapeBufLen to determine number of characters needed for 'char* to' parameter. @@ -37,15 +37,15 @@ inline TStringBuf CgiUnescapeBuf(char* to, const TStringBuf from) { return TStringBuf(to, CGIUnescape(to, from.data(), from.size())); } inline TStringBuf CgiUnescape(void* tmp, const TStringBuf s) { - return CgiUnescapeBuf(static_cast<char*>(tmp), s); -} - + return CgiUnescapeBuf(static_cast<char*>(tmp), s); +} + //Quote: // Is like CGIEscape, also skips encoding of user-supplied 'safe' characters. -char* Quote(char* to, const char* from, const char* safe = "/"); +char* Quote(char* to, const char* from, const char* safe = "/"); char* Quote(char* to, const TStringBuf s, const char* safe = "/"); void Quote(TString& url, const char* safe = "/"); - + //UrlEscape: // Can't be used for cgi parameters ('&' character is not escaped)! // escapes only '%' not followed by two hex-digits or if forceEscape set to ture, @@ -61,12 +61,12 @@ TString UrlEscapeRet(const TStringBuf from, bool forceEscape = false); char* UrlUnescape(char* to, TStringBuf from); void UrlUnescape(TString& url); TString UrlUnescapeRet(const TStringBuf from); - + //*BufLen: how much characters you should allocate for 'char* to' buffers. constexpr size_t CgiEscapeBufLen(const size_t len) noexcept { - return 3 * len + 1; -} - + return 3 * len + 1; +} + constexpr size_t CgiUnescapeBufLen(const size_t len) noexcept { - return len + 1; -} + return len + 1; +} diff --git a/library/cpp/string_utils/quote/quote_ut.cpp b/library/cpp/string_utils/quote/quote_ut.cpp index b89a9525a2..6c552b279e 100644 --- a/library/cpp/string_utils/quote/quote_ut.cpp +++ b/library/cpp/string_utils/quote/quote_ut.cpp @@ -1,7 +1,7 @@ -#include "quote.h" - +#include "quote.h" + #include <library/cpp/testing/unittest/registar.h> - + Y_UNIT_TEST_SUITE(TCGIEscapeTest) { Y_UNIT_TEST(ReturnsEndOfTo) { char r[10]; @@ -9,21 +9,21 @@ Y_UNIT_TEST_SUITE(TCGIEscapeTest) { UNIT_ASSERT_VALUES_EQUAL(r + strlen("123"), returned); UNIT_ASSERT_VALUES_EQUAL('\0', *returned); } - + Y_UNIT_TEST(NotZeroTerminated) { - char r[] = {'1', '2', '3', '4'}; - char buf[sizeof(r) * 3 + 2]; - + char r[] = {'1', '2', '3', '4'}; + char buf[sizeof(r) * 3 + 2]; + TString ret(buf, CGIEscape(buf, r, sizeof(r))); - - UNIT_ASSERT_EQUAL(ret, "1234"); - } - + + UNIT_ASSERT_EQUAL(ret, "1234"); + } + Y_UNIT_TEST(StringBuf) { - char tmp[100]; - + char tmp[100]; + UNIT_ASSERT_VALUES_EQUAL(CgiEscape(tmp, "!@#$%^&*(){}[]\" "), TStringBuf("!@%23$%25^%26*%28%29%7B%7D%5B%5D%22+")); - } + } Y_UNIT_TEST(StrokaRet) { UNIT_ASSERT_VALUES_EQUAL(CGIEscapeRet("!@#$%^&*(){}[]\" "), TString("!@%23$%25^%26*%28%29%7B%7D%5B%5D%22+")); @@ -49,11 +49,11 @@ Y_UNIT_TEST_SUITE(TCGIEscapeTest) { Y_UNIT_TEST_SUITE(TCGIUnescapeTest) { Y_UNIT_TEST(StringBuf) { - char tmp[100]; - + char tmp[100]; + UNIT_ASSERT_VALUES_EQUAL(CgiUnescape(tmp, "!@%23$%25^%26*%28%29"), TStringBuf("!@#$%^&*()")); - } - + } + Y_UNIT_TEST(TestValidZeroTerm) { char r[10]; diff --git a/library/cpp/string_utils/relaxed_escaper/relaxed_escaper.cpp b/library/cpp/string_utils/relaxed_escaper/relaxed_escaper.cpp index 8a04323fac..ac624dca85 100644 --- a/library/cpp/string_utils/relaxed_escaper/relaxed_escaper.cpp +++ b/library/cpp/string_utils/relaxed_escaper/relaxed_escaper.cpp @@ -1 +1 @@ -#include "relaxed_escaper.h" +#include "relaxed_escaper.h" diff --git a/library/cpp/string_utils/relaxed_escaper/relaxed_escaper.h b/library/cpp/string_utils/relaxed_escaper/relaxed_escaper.h index 999a75b601..d7ea7c1259 100644 --- a/library/cpp/string_utils/relaxed_escaper/relaxed_escaper.h +++ b/library/cpp/string_utils/relaxed_escaper/relaxed_escaper.h @@ -2,207 +2,207 @@ #include <util/stream/output.h> #include <util/string/escape.h> -#include <util/memory/tempbuf.h> -#include <util/generic/strbuf.h> +#include <util/memory/tempbuf.h> +#include <util/generic/strbuf.h> namespace NEscJ { - // almost copypaste from util/string/escape.h - // todo: move there (note difference in IsPrintable and handling of string) - - inline char HexDigit(char value) { - if (value < 10) - return '0' + value; - else - return 'A' + value - 10; - } - - inline char OctDigit(char value) { + // almost copypaste from util/string/escape.h + // todo: move there (note difference in IsPrintable and handling of string) + + inline char HexDigit(char value) { + if (value < 10) + return '0' + value; + else + return 'A' + value - 10; + } + + inline char OctDigit(char value) { return '0' + value; - } - - inline bool IsUTF8(ui8 c) { - return c < 0xf5 && c != 0xC0 && c != 0xC1; - } - - inline bool IsControl(ui8 c) { - return c < 0x20 || c == 0x7f; - } - - inline bool IsPrintable(ui8 c) { - return IsUTF8(c) && !IsControl(c); - } - - inline bool IsHexDigit(ui8 c) { - return (c >= '0' && c <= '9') || (c >= 'A' && c <= 'F') || (c >= 'a' && c <= 'f'); - } - - inline bool IsOctDigit(ui8 c) { - return c >= '0' && c <= '7'; - } - - struct TEscapeUtil { - static const size_t ESCAPE_C_BUFFER_SIZE = 6; - - template <bool asunicode> - static inline size_t EscapeJ(ui8 c, ui8 next, char r[ESCAPE_C_BUFFER_SIZE], TStringBuf safe, TStringBuf unsafe) { - // (1) Printable characters go as-is, except backslash and double quote. - // (2) Characters \r, \n, \t and \0 ... \7 replaced by their simple escape characters (if possible). - // (3) Otherwise, character is encoded using hexadecimal escape sequence (if possible), or octal. - if (safe.find(c) != TStringBuf::npos) { - r[0] = c; - return 1; - } - if (c == '\"') { - r[0] = '\\'; - r[1] = '\"'; - return 2; - } else if (c == '\\') { - r[0] = '\\'; - r[1] = '\\'; - return 2; - } else if (IsPrintable(c) && unsafe.find(c) == TStringBuf::npos) { - r[0] = c; - return 1; - } else if (c == '\b') { - r[0] = '\\'; - r[1] = 'b'; - return 2; - } else if (c == '\f') { - r[0] = '\\'; - r[1] = 'f'; - return 2; - } else if (c == '\r') { - r[0] = '\\'; - r[1] = 'r'; - return 2; - } else if (c == '\n') { - r[0] = '\\'; - r[1] = 'n'; - return 2; - } else if (c == '\t') { - r[0] = '\\'; - r[1] = 't'; - return 2; - } else if (asunicode && IsUTF8(c)) { // utf8 controls escape for json - r[0] = '\\'; - r[1] = 'u'; - r[2] = '0'; - r[3] = '0'; - r[4] = HexDigit((c & 0xF0) >> 4); - r[5] = HexDigit((c & 0x0F) >> 0); - return 6; - } else if (c < 8 && !IsOctDigit(next)) { - r[0] = '\\'; - r[1] = OctDigit(c); - return 2; - } else if (!IsHexDigit(next)) { - r[0] = '\\'; - r[1] = 'x'; - r[2] = HexDigit((c & 0xF0) >> 4); - r[3] = HexDigit((c & 0x0F) >> 0); - return 4; - } else { - r[0] = '\\'; - r[1] = OctDigit((c & 0700) >> 6); - r[2] = OctDigit((c & 0070) >> 3); - r[3] = OctDigit((c & 0007) >> 0); - return 4; - } - } - - static inline size_t EscapeJ(ui8 c, ui8 next, char r[ESCAPE_C_BUFFER_SIZE], TStringBuf safe, TStringBuf unsafe) { - return EscapeJ<false>(c, next, r, safe, unsafe); + } + + inline bool IsUTF8(ui8 c) { + return c < 0xf5 && c != 0xC0 && c != 0xC1; + } + + inline bool IsControl(ui8 c) { + return c < 0x20 || c == 0x7f; + } + + inline bool IsPrintable(ui8 c) { + return IsUTF8(c) && !IsControl(c); + } + + inline bool IsHexDigit(ui8 c) { + return (c >= '0' && c <= '9') || (c >= 'A' && c <= 'F') || (c >= 'a' && c <= 'f'); + } + + inline bool IsOctDigit(ui8 c) { + return c >= '0' && c <= '7'; + } + + struct TEscapeUtil { + static const size_t ESCAPE_C_BUFFER_SIZE = 6; + + template <bool asunicode> + static inline size_t EscapeJ(ui8 c, ui8 next, char r[ESCAPE_C_BUFFER_SIZE], TStringBuf safe, TStringBuf unsafe) { + // (1) Printable characters go as-is, except backslash and double quote. + // (2) Characters \r, \n, \t and \0 ... \7 replaced by their simple escape characters (if possible). + // (3) Otherwise, character is encoded using hexadecimal escape sequence (if possible), or octal. + if (safe.find(c) != TStringBuf::npos) { + r[0] = c; + return 1; + } + if (c == '\"') { + r[0] = '\\'; + r[1] = '\"'; + return 2; + } else if (c == '\\') { + r[0] = '\\'; + r[1] = '\\'; + return 2; + } else if (IsPrintable(c) && unsafe.find(c) == TStringBuf::npos) { + r[0] = c; + return 1; + } else if (c == '\b') { + r[0] = '\\'; + r[1] = 'b'; + return 2; + } else if (c == '\f') { + r[0] = '\\'; + r[1] = 'f'; + return 2; + } else if (c == '\r') { + r[0] = '\\'; + r[1] = 'r'; + return 2; + } else if (c == '\n') { + r[0] = '\\'; + r[1] = 'n'; + return 2; + } else if (c == '\t') { + r[0] = '\\'; + r[1] = 't'; + return 2; + } else if (asunicode && IsUTF8(c)) { // utf8 controls escape for json + r[0] = '\\'; + r[1] = 'u'; + r[2] = '0'; + r[3] = '0'; + r[4] = HexDigit((c & 0xF0) >> 4); + r[5] = HexDigit((c & 0x0F) >> 0); + return 6; + } else if (c < 8 && !IsOctDigit(next)) { + r[0] = '\\'; + r[1] = OctDigit(c); + return 2; + } else if (!IsHexDigit(next)) { + r[0] = '\\'; + r[1] = 'x'; + r[2] = HexDigit((c & 0xF0) >> 4); + r[3] = HexDigit((c & 0x0F) >> 0); + return 4; + } else { + r[0] = '\\'; + r[1] = OctDigit((c & 0700) >> 6); + r[2] = OctDigit((c & 0070) >> 3); + r[3] = OctDigit((c & 0007) >> 0); + return 4; + } + } + + static inline size_t EscapeJ(ui8 c, ui8 next, char r[ESCAPE_C_BUFFER_SIZE], TStringBuf safe, TStringBuf unsafe) { + return EscapeJ<false>(c, next, r, safe, unsafe); } - }; + }; - inline size_t SuggestBuffer(size_t len) { - return len * TEscapeUtil::ESCAPE_C_BUFFER_SIZE; + inline size_t SuggestBuffer(size_t len) { + return len * TEscapeUtil::ESCAPE_C_BUFFER_SIZE; } - template <bool tounicode> - inline size_t EscapeJ(const char* str, size_t len, char* out, TStringBuf safe = TStringBuf(), TStringBuf unsafe = TStringBuf()) { - char* out0 = out; - char buffer[TEscapeUtil::ESCAPE_C_BUFFER_SIZE]; + template <bool tounicode> + inline size_t EscapeJ(const char* str, size_t len, char* out, TStringBuf safe = TStringBuf(), TStringBuf unsafe = TStringBuf()) { + char* out0 = out; + char buffer[TEscapeUtil::ESCAPE_C_BUFFER_SIZE]; - size_t i, j; - for (i = 0, j = 0; i < len; ++i) { - size_t rlen = TEscapeUtil::EscapeJ<tounicode>(str[i], (i + 1 < len ? str[i + 1] : 0), buffer, safe, unsafe); + size_t i, j; + for (i = 0, j = 0; i < len; ++i) { + size_t rlen = TEscapeUtil::EscapeJ<tounicode>(str[i], (i + 1 < len ? str[i + 1] : 0), buffer, safe, unsafe); - if (rlen > 1) { - strncpy(out, str + j, i - j); - out += i - j; - j = i + 1; + if (rlen > 1) { + strncpy(out, str + j, i - j); + out += i - j; + j = i + 1; - strncpy(out, buffer, rlen); - out += rlen; - } - } + strncpy(out, buffer, rlen); + out += rlen; + } + } - if (j > 0) { - strncpy(out, str + j, len - j); - out += len - j; - } else { - strncpy(out, str, len); - out += len; + if (j > 0) { + strncpy(out, str + j, len - j); + out += len - j; + } else { + strncpy(out, str, len); + out += len; } - return out - out0; + return out - out0; } - template <bool quote, bool tounicode> + template <bool quote, bool tounicode> inline void EscapeJ(TStringBuf in, IOutputStream& out, TStringBuf safe = TStringBuf(), TStringBuf unsafe = TStringBuf()) { TTempBuf b(SuggestBuffer(in.size()) + 2); - if (quote) - b.Append("\"", 1); + if (quote) + b.Append("\"", 1); b.Proceed(EscapeJ<tounicode>(in.data(), in.size(), b.Current(), safe, unsafe)); - if (quote) - b.Append("\"", 1); + if (quote) + b.Append("\"", 1); - out.Write(b.Data(), b.Filled()); - } + out.Write(b.Data(), b.Filled()); + } - template <bool quote, bool tounicode> + template <bool quote, bool tounicode> inline void EscapeJ(TStringBuf in, TString& out, TStringBuf safe = TStringBuf(), TStringBuf unsafe = TStringBuf()) { TTempBuf b(SuggestBuffer(in.size()) + 2); - if (quote) - b.Append("\"", 1); + if (quote) + b.Append("\"", 1); b.Proceed(EscapeJ<tounicode>(in.data(), in.size(), b.Current(), safe, unsafe)); - if (quote) - b.Append("\"", 1); + if (quote) + b.Append("\"", 1); - out.append(b.Data(), b.Filled()); - } + out.append(b.Data(), b.Filled()); + } - template <bool quote, bool tounicode> + template <bool quote, bool tounicode> inline TString EscapeJ(TStringBuf in, TStringBuf safe = TStringBuf(), TStringBuf unsafe = TStringBuf()) { TString s; - EscapeJ<quote, tounicode>(in, s, safe, unsafe); - return s; - } + EscapeJ<quote, tounicode>(in, s, safe, unsafe); + return s; + } - // If the template parameter "tounicode" is ommited, then use the default value false - inline size_t EscapeJ(const char* str, size_t len, char* out, TStringBuf safe = TStringBuf(), TStringBuf unsafe = TStringBuf()) { - return EscapeJ<false>(str, len, out, safe, unsafe); - } + // If the template parameter "tounicode" is ommited, then use the default value false + inline size_t EscapeJ(const char* str, size_t len, char* out, TStringBuf safe = TStringBuf(), TStringBuf unsafe = TStringBuf()) { + return EscapeJ<false>(str, len, out, safe, unsafe); + } - template <bool quote> + template <bool quote> inline void EscapeJ(TStringBuf in, IOutputStream& out, TStringBuf safe = TStringBuf(), TStringBuf unsafe = TStringBuf()) { - EscapeJ<quote, false>(in, out, safe, unsafe); - } + EscapeJ<quote, false>(in, out, safe, unsafe); + } - template <bool quote> + template <bool quote> inline void EscapeJ(TStringBuf in, TString& out, TStringBuf safe = TStringBuf(), TStringBuf unsafe = TStringBuf()) { - EscapeJ<quote, false>(in, out, safe, unsafe); - } + EscapeJ<quote, false>(in, out, safe, unsafe); + } - template <bool quote> + template <bool quote> inline TString EscapeJ(TStringBuf in, TStringBuf safe = TStringBuf(), TStringBuf unsafe = TStringBuf()) { - return EscapeJ<quote, false>(in, safe, unsafe); - } + return EscapeJ<quote, false>(in, safe, unsafe); + } } diff --git a/library/cpp/string_utils/relaxed_escaper/relaxed_escaper_ut.cpp b/library/cpp/string_utils/relaxed_escaper/relaxed_escaper_ut.cpp index 496c30ab01..768555ea3a 100644 --- a/library/cpp/string_utils/relaxed_escaper/relaxed_escaper_ut.cpp +++ b/library/cpp/string_utils/relaxed_escaper/relaxed_escaper_ut.cpp @@ -10,19 +10,19 @@ static const TStringBuf CommonTestData[] = { RESC_FIXED_STR("http://ya.ru/\\0"), RESC_FIXED_STR("http://ya.ru/\0"), RESC_FIXED_STR("http://ya.ru/\\0\\0"), RESC_FIXED_STR("http://ya.ru/\0\0"), - RESC_FIXED_STR("http://ya.ru/\\0\\0000"), RESC_FIXED_STR("http://ya.ru/\0\0" - "0"), - RESC_FIXED_STR("http://ya.ru/\\0\\0001"), RESC_FIXED_STR("http://ya.ru/\0\x00" - "1"), + RESC_FIXED_STR("http://ya.ru/\\0\\0000"), RESC_FIXED_STR("http://ya.ru/\0\0" + "0"), + RESC_FIXED_STR("http://ya.ru/\\0\\0001"), RESC_FIXED_STR("http://ya.ru/\0\x00" + "1"), - RESC_FIXED_STR("\\2\\4\\00678"), RESC_FIXED_STR("\2\4\6" - "78"), + RESC_FIXED_STR("\\2\\4\\00678"), RESC_FIXED_STR("\2\4\6" + "78"), RESC_FIXED_STR("\\2\\4\\689"), RESC_FIXED_STR("\2\4\689"), RESC_FIXED_STR("\\\"Hello\\\", Alice said."), RESC_FIXED_STR("\"Hello\", Alice said."), RESC_FIXED_STR("Slash\\\\dash!"), RESC_FIXED_STR("Slash\\dash!"), RESC_FIXED_STR("There\\nare\\r\\nnewlines."), RESC_FIXED_STR("There\nare\r\nnewlines."), - RESC_FIXED_STR("There\\tare\\ttabs."), RESC_FIXED_STR("There\tare\ttabs.")}; + RESC_FIXED_STR("There\\tare\\ttabs."), RESC_FIXED_STR("There\tare\ttabs.")}; #undef RESC_FIXED_STR Y_UNIT_TEST_SUITE(TRelaxedEscaperTest) { @@ -52,15 +52,15 @@ Y_UNIT_TEST_SUITE(TRelaxedEscaperTest) { UNIT_ASSERT_VALUES_EQUAL("\"\\xFF\"", EscapeJ<true>("\xFF")); UNIT_ASSERT_VALUES_EQUAL("\xFF", UnescapeC("\\xFF")); - UNIT_ASSERT_VALUES_EQUAL("\\377f", EscapeJ<false>("\xff" - "f")); - UNIT_ASSERT_VALUES_EQUAL("\xff" - "f", - UnescapeC("\\377f")); - UNIT_ASSERT_VALUES_EQUAL("\\xFFg", EscapeJ<false>("\xff" - "g")); - UNIT_ASSERT_VALUES_EQUAL("\xff" - "g", - UnescapeC("\\xFFg")); + UNIT_ASSERT_VALUES_EQUAL("\\377f", EscapeJ<false>("\xff" + "f")); + UNIT_ASSERT_VALUES_EQUAL("\xff" + "f", + UnescapeC("\\377f")); + UNIT_ASSERT_VALUES_EQUAL("\\xFFg", EscapeJ<false>("\xff" + "g")); + UNIT_ASSERT_VALUES_EQUAL("\xff" + "g", + UnescapeC("\\xFFg")); } } diff --git a/library/cpp/string_utils/relaxed_escaper/ut/ya.make b/library/cpp/string_utils/relaxed_escaper/ut/ya.make index 55a9611de1..7ebd393c48 100644 --- a/library/cpp/string_utils/relaxed_escaper/ut/ya.make +++ b/library/cpp/string_utils/relaxed_escaper/ut/ya.make @@ -1,9 +1,9 @@ UNITTEST_FOR(library/cpp/string_utils/relaxed_escaper) - -OWNER(velavokr) - -SRCS( - relaxed_escaper_ut.cpp -) - -END() + +OWNER(velavokr) + +SRCS( + relaxed_escaper_ut.cpp +) + +END() diff --git a/library/cpp/string_utils/relaxed_escaper/ya.make b/library/cpp/string_utils/relaxed_escaper/ya.make index 4e0364c633..3f0fa5bc07 100644 --- a/library/cpp/string_utils/relaxed_escaper/ya.make +++ b/library/cpp/string_utils/relaxed_escaper/ya.make @@ -1,9 +1,9 @@ -LIBRARY() - -OWNER(velavokr) - -SRCS( - relaxed_escaper.cpp -) - -END() +LIBRARY() + +OWNER(velavokr) + +SRCS( + relaxed_escaper.cpp +) + +END() diff --git a/library/cpp/string_utils/scan/scan.cpp b/library/cpp/string_utils/scan/scan.cpp index 5e418a0cb8..fbc1fdf08f 100644 --- a/library/cpp/string_utils/scan/scan.cpp +++ b/library/cpp/string_utils/scan/scan.cpp @@ -1 +1 @@ -#include "scan.h" +#include "scan.h" diff --git a/library/cpp/string_utils/scan/scan.h b/library/cpp/string_utils/scan/scan.h index 466ea46537..703db54321 100644 --- a/library/cpp/string_utils/scan/scan.h +++ b/library/cpp/string_utils/scan/scan.h @@ -1,22 +1,22 @@ -#pragma once - -#include <util/generic/strbuf.h> - +#pragma once + +#include <util/generic/strbuf.h> + template <bool addAll, char sep, char sepKeyVal, class F> -static inline void ScanKeyValue(TStringBuf s, F&& f) { +static inline void ScanKeyValue(TStringBuf s, F&& f) { TStringBuf key, val; - + while (!s.empty()) { val = s.NextTok(sep); - + if (val.empty()) { continue; // && case - } - + } + key = val.NextTok(sepKeyVal); - - if (addAll || val.IsInited()) { + + if (addAll || val.IsInited()) { f(key, val); // includes empty keys - } - } -} + } + } +} diff --git a/library/cpp/string_utils/url/url.cpp b/library/cpp/string_utils/url/url.cpp index 0e9e482ed6..85f4ac5d69 100644 --- a/library/cpp/string_utils/url/url.cpp +++ b/library/cpp/string_utils/url/url.cpp @@ -1,5 +1,5 @@ -#include "url.h" - +#include "url.h" + #include <util/string/cast.h> #include <util/string/util.h> #include <util/string/cstriter.h> @@ -11,7 +11,7 @@ #include <util/generic/algorithm.h> #include <util/generic/hash_set.h> #include <util/generic/yexception.h> -#include <util/generic/singleton.h> +#include <util/generic/singleton.h> #include <cstdlib> @@ -26,8 +26,8 @@ namespace { size_t MySize; explicit TKnownSize(size_t sz) : MySize(sz) - { - } + { + } bool Has(size_t sz) const { return sz <= MySize; } @@ -43,9 +43,9 @@ namespace { } template <typename TChar, typename TBounds> - inline size_t GetHttpPrefixSizeImpl(const TChar* url, const TBounds& urlSize, bool ignorehttps) { - const TChar httpPrefix[] = {'h', 't', 't', 'p', ':', '/', '/', 0}; - const TChar httpsPrefix[] = {'h', 't', 't', 'p', 's', ':', '/', '/', 0}; + inline size_t GetHttpPrefixSizeImpl(const TChar* url, const TBounds& urlSize, bool ignorehttps) { + const TChar httpPrefix[] = {'h', 't', 't', 'p', ':', '/', '/', 0}; + const TChar httpsPrefix[] = {'h', 't', 't', 'p', 's', ':', '/', '/', 0}; if (urlSize.Has(7) && Compare1Case2(url, httpPrefix, 7) == 0) return 7; if (!ignorehttps && urlSize.Has(8) && Compare1Case2(url, httpsPrefix, 8) == 0) @@ -98,20 +98,20 @@ TWtringBuf CutHttpPrefix(const TWtringBuf url, bool ignorehttps) noexcept { } size_t GetSchemePrefixSize(const TStringBuf url) noexcept { - struct TDelim: public str_spn { - inline TDelim() - : str_spn("!-/:-@[-`{|}", true) - { - } - }; - - const auto& delim = *Singleton<TDelim>(); + struct TDelim: public str_spn { + inline TDelim() + : str_spn("!-/:-@[-`{|}", true) + { + } + }; + + const auto& delim = *Singleton<TDelim>(); const char* n = delim.brk(url.data(), url.end()); - - if (n + 2 >= url.end() || *n != ':' || n[1] != '/' || n[2] != '/') { + + if (n + 2 >= url.end() || *n != ':' || n[1] != '/' || n[2] != '/') { return 0; - } - + } + return n + 3 - url.begin(); } @@ -123,27 +123,27 @@ TStringBuf CutSchemePrefix(const TStringBuf url) noexcept { return url.Tail(GetSchemePrefixSize(url)); } -template <bool KeepPort> +template <bool KeepPort> static inline TStringBuf GetHostAndPortImpl(const TStringBuf url) { TStringBuf urlNoScheme = url; - + urlNoScheme.Skip(GetHttpPrefixSize(url)); - struct TDelim: public str_spn { - inline TDelim() - : str_spn(KeepPort ? "/;?#" : "/:;?#") - { - } - }; - - const auto& nonHostCharacters = *Singleton<TDelim>(); + struct TDelim: public str_spn { + inline TDelim() + : str_spn(KeepPort ? "/;?#" : "/:;?#") + { + } + }; + + const auto& nonHostCharacters = *Singleton<TDelim>(); const char* firstNonHostCharacter = nonHostCharacters.brk(urlNoScheme.begin(), urlNoScheme.end()); - - if (firstNonHostCharacter != urlNoScheme.end()) { + + if (firstNonHostCharacter != urlNoScheme.end()) { return urlNoScheme.substr(0, firstNonHostCharacter - urlNoScheme.data()); - } - - return urlNoScheme; + } + + return urlNoScheme; } TStringBuf GetHost(const TStringBuf url) noexcept { @@ -337,27 +337,27 @@ TString AddSchemePrefix(const TString& url, TStringBuf scheme) { #define X(c) (c >= 'A' ? ((c & 0xdf) - 'A') + 10 : (c - '0')) static inline int x2c(unsigned char* x) { - if (!IsAsciiHex(x[0]) || !IsAsciiHex(x[1])) + if (!IsAsciiHex(x[0]) || !IsAsciiHex(x[1])) return -1; return X(x[0]) * 16 + X(x[1]); } #undef X -static inline int Unescape(char* str) { - char *to, *from; +static inline int Unescape(char* str) { + char *to, *from; int dlen = 0; if ((str = strchr(str, '%')) == nullptr) return dlen; for (to = str, from = str; *from; from++, to++) { if ((*to = *from) == '%') { - int c = x2c((unsigned char*)from + 1); + int c = x2c((unsigned char*)from + 1); *to = char((c > 0) ? c : '0'); from += 2; dlen += 2; } } - *to = 0; /* terminate it at the new length */ + *to = 0; /* terminate it at the new length */ return dlen; } @@ -379,8 +379,8 @@ size_t NormalizeHostName(char* dest, const TStringBuf source, size_t dest_size, char buf[8] = ":"; size_t buflen = 1 + ToString(defport, buf + 1, sizeof(buf) - 2); buf[buflen] = '\0'; - char* ptr = strstr(dest, buf); - if (ptr && ptr[buflen] == 0) { + char* ptr = strstr(dest, buf); + if (ptr && ptr[buflen] == 0) { len -= buflen; *ptr = 0; } diff --git a/library/cpp/string_utils/url/url_ut.cpp b/library/cpp/string_utils/url/url_ut.cpp index bf12597c98..1588013893 100644 --- a/library/cpp/string_utils/url/url_ut.cpp +++ b/library/cpp/string_utils/url/url_ut.cpp @@ -1,5 +1,5 @@ -#include "url.h" - +#include "url.h" + #include <util/string/cast.h> #include <library/cpp/testing/unittest/registar.h> @@ -100,7 +100,7 @@ Y_UNIT_TEST_SUITE(TUtilUrlTest) { UNIT_ASSERT_VALUES_EQUAL("ya.ru/zzz", CutHttpPrefix("http://ya.ru/zzz", true)); UNIT_ASSERT_VALUES_EQUAL("ya.ru/zzz", CutHttpPrefix("https://ya.ru/zzz")); UNIT_ASSERT_VALUES_EQUAL("https://ya.ru/zzz", CutHttpPrefix("https://ya.ru/zzz", true)); - UNIT_ASSERT_VALUES_EQUAL("", CutHttpPrefix("https://")); // is that right? + UNIT_ASSERT_VALUES_EQUAL("", CutHttpPrefix("https://")); // is that right? UNIT_ASSERT_VALUES_EQUAL("https://", CutHttpPrefix("https://", true)); // is that right? } |