diff options
author | Anton Samokhvalov <pg83@yandex.ru> | 2022-02-10 16:45:15 +0300 |
---|---|---|
committer | Daniil Cherednik <dcherednik@yandex-team.ru> | 2022-02-10 16:45:15 +0300 |
commit | 72cb13b4aff9bc9cf22e49251bc8fd143f82538f (patch) | |
tree | da2c34829458c7d4e74bdfbdf85dff449e9e7fb8 /library/cpp/string_utils/relaxed_escaper | |
parent | 778e51ba091dc39e7b7fcab2b9cf4dbedfb6f2b5 (diff) | |
download | ydb-72cb13b4aff9bc9cf22e49251bc8fd143f82538f.tar.gz |
Restoring authorship annotation for Anton Samokhvalov <pg83@yandex.ru>. Commit 1 of 2.
Diffstat (limited to 'library/cpp/string_utils/relaxed_escaper')
5 files changed, 198 insertions, 198 deletions
diff --git a/library/cpp/string_utils/relaxed_escaper/relaxed_escaper.cpp b/library/cpp/string_utils/relaxed_escaper/relaxed_escaper.cpp index ac624dca85..8a04323fac 100644 --- a/library/cpp/string_utils/relaxed_escaper/relaxed_escaper.cpp +++ b/library/cpp/string_utils/relaxed_escaper/relaxed_escaper.cpp @@ -1 +1 @@ -#include "relaxed_escaper.h" +#include "relaxed_escaper.h" diff --git a/library/cpp/string_utils/relaxed_escaper/relaxed_escaper.h b/library/cpp/string_utils/relaxed_escaper/relaxed_escaper.h index d7ea7c1259..999a75b601 100644 --- a/library/cpp/string_utils/relaxed_escaper/relaxed_escaper.h +++ b/library/cpp/string_utils/relaxed_escaper/relaxed_escaper.h @@ -2,207 +2,207 @@ #include <util/stream/output.h> #include <util/string/escape.h> -#include <util/memory/tempbuf.h> -#include <util/generic/strbuf.h> +#include <util/memory/tempbuf.h> +#include <util/generic/strbuf.h> namespace NEscJ { - // almost copypaste from util/string/escape.h - // todo: move there (note difference in IsPrintable and handling of string) - - inline char HexDigit(char value) { - if (value < 10) - return '0' + value; - else - return 'A' + value - 10; - } - - inline char OctDigit(char value) { + // almost copypaste from util/string/escape.h + // todo: move there (note difference in IsPrintable and handling of string) + + inline char HexDigit(char value) { + if (value < 10) + return '0' + value; + else + return 'A' + value - 10; + } + + inline char OctDigit(char value) { return '0' + value; - } - - inline bool IsUTF8(ui8 c) { - return c < 0xf5 && c != 0xC0 && c != 0xC1; - } - - inline bool IsControl(ui8 c) { - return c < 0x20 || c == 0x7f; - } - - inline bool IsPrintable(ui8 c) { - return IsUTF8(c) && !IsControl(c); - } - - inline bool IsHexDigit(ui8 c) { - return (c >= '0' && c <= '9') || (c >= 'A' && c <= 'F') || (c >= 'a' && c <= 'f'); - } - - inline bool IsOctDigit(ui8 c) { - return c >= '0' && c <= '7'; - } - - struct TEscapeUtil { - static const size_t ESCAPE_C_BUFFER_SIZE = 6; - - template <bool asunicode> - static inline size_t EscapeJ(ui8 c, ui8 next, char r[ESCAPE_C_BUFFER_SIZE], TStringBuf safe, TStringBuf unsafe) { - // (1) Printable characters go as-is, except backslash and double quote. - // (2) Characters \r, \n, \t and \0 ... \7 replaced by their simple escape characters (if possible). - // (3) Otherwise, character is encoded using hexadecimal escape sequence (if possible), or octal. - if (safe.find(c) != TStringBuf::npos) { - r[0] = c; - return 1; - } - if (c == '\"') { - r[0] = '\\'; - r[1] = '\"'; - return 2; - } else if (c == '\\') { - r[0] = '\\'; - r[1] = '\\'; - return 2; - } else if (IsPrintable(c) && unsafe.find(c) == TStringBuf::npos) { - r[0] = c; - return 1; - } else if (c == '\b') { - r[0] = '\\'; - r[1] = 'b'; - return 2; - } else if (c == '\f') { - r[0] = '\\'; - r[1] = 'f'; - return 2; - } else if (c == '\r') { - r[0] = '\\'; - r[1] = 'r'; - return 2; - } else if (c == '\n') { - r[0] = '\\'; - r[1] = 'n'; - return 2; - } else if (c == '\t') { - r[0] = '\\'; - r[1] = 't'; - return 2; - } else if (asunicode && IsUTF8(c)) { // utf8 controls escape for json - r[0] = '\\'; - r[1] = 'u'; - r[2] = '0'; - r[3] = '0'; - r[4] = HexDigit((c & 0xF0) >> 4); - r[5] = HexDigit((c & 0x0F) >> 0); - return 6; - } else if (c < 8 && !IsOctDigit(next)) { - r[0] = '\\'; - r[1] = OctDigit(c); - return 2; - } else if (!IsHexDigit(next)) { - r[0] = '\\'; - r[1] = 'x'; - r[2] = HexDigit((c & 0xF0) >> 4); - r[3] = HexDigit((c & 0x0F) >> 0); - return 4; - } else { - r[0] = '\\'; - r[1] = OctDigit((c & 0700) >> 6); - r[2] = OctDigit((c & 0070) >> 3); - r[3] = OctDigit((c & 0007) >> 0); - return 4; - } - } - - static inline size_t EscapeJ(ui8 c, ui8 next, char r[ESCAPE_C_BUFFER_SIZE], TStringBuf safe, TStringBuf unsafe) { - return EscapeJ<false>(c, next, r, safe, unsafe); + } + + inline bool IsUTF8(ui8 c) { + return c < 0xf5 && c != 0xC0 && c != 0xC1; + } + + inline bool IsControl(ui8 c) { + return c < 0x20 || c == 0x7f; + } + + inline bool IsPrintable(ui8 c) { + return IsUTF8(c) && !IsControl(c); + } + + inline bool IsHexDigit(ui8 c) { + return (c >= '0' && c <= '9') || (c >= 'A' && c <= 'F') || (c >= 'a' && c <= 'f'); + } + + inline bool IsOctDigit(ui8 c) { + return c >= '0' && c <= '7'; + } + + struct TEscapeUtil { + static const size_t ESCAPE_C_BUFFER_SIZE = 6; + + template <bool asunicode> + static inline size_t EscapeJ(ui8 c, ui8 next, char r[ESCAPE_C_BUFFER_SIZE], TStringBuf safe, TStringBuf unsafe) { + // (1) Printable characters go as-is, except backslash and double quote. + // (2) Characters \r, \n, \t and \0 ... \7 replaced by their simple escape characters (if possible). + // (3) Otherwise, character is encoded using hexadecimal escape sequence (if possible), or octal. + if (safe.find(c) != TStringBuf::npos) { + r[0] = c; + return 1; + } + if (c == '\"') { + r[0] = '\\'; + r[1] = '\"'; + return 2; + } else if (c == '\\') { + r[0] = '\\'; + r[1] = '\\'; + return 2; + } else if (IsPrintable(c) && unsafe.find(c) == TStringBuf::npos) { + r[0] = c; + return 1; + } else if (c == '\b') { + r[0] = '\\'; + r[1] = 'b'; + return 2; + } else if (c == '\f') { + r[0] = '\\'; + r[1] = 'f'; + return 2; + } else if (c == '\r') { + r[0] = '\\'; + r[1] = 'r'; + return 2; + } else if (c == '\n') { + r[0] = '\\'; + r[1] = 'n'; + return 2; + } else if (c == '\t') { + r[0] = '\\'; + r[1] = 't'; + return 2; + } else if (asunicode && IsUTF8(c)) { // utf8 controls escape for json + r[0] = '\\'; + r[1] = 'u'; + r[2] = '0'; + r[3] = '0'; + r[4] = HexDigit((c & 0xF0) >> 4); + r[5] = HexDigit((c & 0x0F) >> 0); + return 6; + } else if (c < 8 && !IsOctDigit(next)) { + r[0] = '\\'; + r[1] = OctDigit(c); + return 2; + } else if (!IsHexDigit(next)) { + r[0] = '\\'; + r[1] = 'x'; + r[2] = HexDigit((c & 0xF0) >> 4); + r[3] = HexDigit((c & 0x0F) >> 0); + return 4; + } else { + r[0] = '\\'; + r[1] = OctDigit((c & 0700) >> 6); + r[2] = OctDigit((c & 0070) >> 3); + r[3] = OctDigit((c & 0007) >> 0); + return 4; + } + } + + static inline size_t EscapeJ(ui8 c, ui8 next, char r[ESCAPE_C_BUFFER_SIZE], TStringBuf safe, TStringBuf unsafe) { + return EscapeJ<false>(c, next, r, safe, unsafe); } - }; + }; - inline size_t SuggestBuffer(size_t len) { - return len * TEscapeUtil::ESCAPE_C_BUFFER_SIZE; + inline size_t SuggestBuffer(size_t len) { + return len * TEscapeUtil::ESCAPE_C_BUFFER_SIZE; } - template <bool tounicode> - inline size_t EscapeJ(const char* str, size_t len, char* out, TStringBuf safe = TStringBuf(), TStringBuf unsafe = TStringBuf()) { - char* out0 = out; - char buffer[TEscapeUtil::ESCAPE_C_BUFFER_SIZE]; + template <bool tounicode> + inline size_t EscapeJ(const char* str, size_t len, char* out, TStringBuf safe = TStringBuf(), TStringBuf unsafe = TStringBuf()) { + char* out0 = out; + char buffer[TEscapeUtil::ESCAPE_C_BUFFER_SIZE]; - size_t i, j; - for (i = 0, j = 0; i < len; ++i) { - size_t rlen = TEscapeUtil::EscapeJ<tounicode>(str[i], (i + 1 < len ? str[i + 1] : 0), buffer, safe, unsafe); + size_t i, j; + for (i = 0, j = 0; i < len; ++i) { + size_t rlen = TEscapeUtil::EscapeJ<tounicode>(str[i], (i + 1 < len ? str[i + 1] : 0), buffer, safe, unsafe); - if (rlen > 1) { - strncpy(out, str + j, i - j); - out += i - j; - j = i + 1; + if (rlen > 1) { + strncpy(out, str + j, i - j); + out += i - j; + j = i + 1; - strncpy(out, buffer, rlen); - out += rlen; - } - } + strncpy(out, buffer, rlen); + out += rlen; + } + } - if (j > 0) { - strncpy(out, str + j, len - j); - out += len - j; - } else { - strncpy(out, str, len); - out += len; + if (j > 0) { + strncpy(out, str + j, len - j); + out += len - j; + } else { + strncpy(out, str, len); + out += len; } - return out - out0; + return out - out0; } - template <bool quote, bool tounicode> + template <bool quote, bool tounicode> inline void EscapeJ(TStringBuf in, IOutputStream& out, TStringBuf safe = TStringBuf(), TStringBuf unsafe = TStringBuf()) { TTempBuf b(SuggestBuffer(in.size()) + 2); - if (quote) - b.Append("\"", 1); + if (quote) + b.Append("\"", 1); b.Proceed(EscapeJ<tounicode>(in.data(), in.size(), b.Current(), safe, unsafe)); - if (quote) - b.Append("\"", 1); + if (quote) + b.Append("\"", 1); - out.Write(b.Data(), b.Filled()); - } + out.Write(b.Data(), b.Filled()); + } - template <bool quote, bool tounicode> + template <bool quote, bool tounicode> inline void EscapeJ(TStringBuf in, TString& out, TStringBuf safe = TStringBuf(), TStringBuf unsafe = TStringBuf()) { TTempBuf b(SuggestBuffer(in.size()) + 2); - if (quote) - b.Append("\"", 1); + if (quote) + b.Append("\"", 1); b.Proceed(EscapeJ<tounicode>(in.data(), in.size(), b.Current(), safe, unsafe)); - if (quote) - b.Append("\"", 1); + if (quote) + b.Append("\"", 1); - out.append(b.Data(), b.Filled()); - } + out.append(b.Data(), b.Filled()); + } - template <bool quote, bool tounicode> + template <bool quote, bool tounicode> inline TString EscapeJ(TStringBuf in, TStringBuf safe = TStringBuf(), TStringBuf unsafe = TStringBuf()) { TString s; - EscapeJ<quote, tounicode>(in, s, safe, unsafe); - return s; - } + EscapeJ<quote, tounicode>(in, s, safe, unsafe); + return s; + } - // If the template parameter "tounicode" is ommited, then use the default value false - inline size_t EscapeJ(const char* str, size_t len, char* out, TStringBuf safe = TStringBuf(), TStringBuf unsafe = TStringBuf()) { - return EscapeJ<false>(str, len, out, safe, unsafe); - } + // If the template parameter "tounicode" is ommited, then use the default value false + inline size_t EscapeJ(const char* str, size_t len, char* out, TStringBuf safe = TStringBuf(), TStringBuf unsafe = TStringBuf()) { + return EscapeJ<false>(str, len, out, safe, unsafe); + } - template <bool quote> + template <bool quote> inline void EscapeJ(TStringBuf in, IOutputStream& out, TStringBuf safe = TStringBuf(), TStringBuf unsafe = TStringBuf()) { - EscapeJ<quote, false>(in, out, safe, unsafe); - } + EscapeJ<quote, false>(in, out, safe, unsafe); + } - template <bool quote> + template <bool quote> inline void EscapeJ(TStringBuf in, TString& out, TStringBuf safe = TStringBuf(), TStringBuf unsafe = TStringBuf()) { - EscapeJ<quote, false>(in, out, safe, unsafe); - } + EscapeJ<quote, false>(in, out, safe, unsafe); + } - template <bool quote> + template <bool quote> inline TString EscapeJ(TStringBuf in, TStringBuf safe = TStringBuf(), TStringBuf unsafe = TStringBuf()) { - return EscapeJ<quote, false>(in, safe, unsafe); - } + return EscapeJ<quote, false>(in, safe, unsafe); + } } diff --git a/library/cpp/string_utils/relaxed_escaper/relaxed_escaper_ut.cpp b/library/cpp/string_utils/relaxed_escaper/relaxed_escaper_ut.cpp index 768555ea3a..496c30ab01 100644 --- a/library/cpp/string_utils/relaxed_escaper/relaxed_escaper_ut.cpp +++ b/library/cpp/string_utils/relaxed_escaper/relaxed_escaper_ut.cpp @@ -10,19 +10,19 @@ static const TStringBuf CommonTestData[] = { RESC_FIXED_STR("http://ya.ru/\\0"), RESC_FIXED_STR("http://ya.ru/\0"), RESC_FIXED_STR("http://ya.ru/\\0\\0"), RESC_FIXED_STR("http://ya.ru/\0\0"), - RESC_FIXED_STR("http://ya.ru/\\0\\0000"), RESC_FIXED_STR("http://ya.ru/\0\0" - "0"), - RESC_FIXED_STR("http://ya.ru/\\0\\0001"), RESC_FIXED_STR("http://ya.ru/\0\x00" - "1"), + RESC_FIXED_STR("http://ya.ru/\\0\\0000"), RESC_FIXED_STR("http://ya.ru/\0\0" + "0"), + RESC_FIXED_STR("http://ya.ru/\\0\\0001"), RESC_FIXED_STR("http://ya.ru/\0\x00" + "1"), - RESC_FIXED_STR("\\2\\4\\00678"), RESC_FIXED_STR("\2\4\6" - "78"), + RESC_FIXED_STR("\\2\\4\\00678"), RESC_FIXED_STR("\2\4\6" + "78"), RESC_FIXED_STR("\\2\\4\\689"), RESC_FIXED_STR("\2\4\689"), RESC_FIXED_STR("\\\"Hello\\\", Alice said."), RESC_FIXED_STR("\"Hello\", Alice said."), RESC_FIXED_STR("Slash\\\\dash!"), RESC_FIXED_STR("Slash\\dash!"), RESC_FIXED_STR("There\\nare\\r\\nnewlines."), RESC_FIXED_STR("There\nare\r\nnewlines."), - RESC_FIXED_STR("There\\tare\\ttabs."), RESC_FIXED_STR("There\tare\ttabs.")}; + RESC_FIXED_STR("There\\tare\\ttabs."), RESC_FIXED_STR("There\tare\ttabs.")}; #undef RESC_FIXED_STR Y_UNIT_TEST_SUITE(TRelaxedEscaperTest) { @@ -52,15 +52,15 @@ Y_UNIT_TEST_SUITE(TRelaxedEscaperTest) { UNIT_ASSERT_VALUES_EQUAL("\"\\xFF\"", EscapeJ<true>("\xFF")); UNIT_ASSERT_VALUES_EQUAL("\xFF", UnescapeC("\\xFF")); - UNIT_ASSERT_VALUES_EQUAL("\\377f", EscapeJ<false>("\xff" - "f")); - UNIT_ASSERT_VALUES_EQUAL("\xff" - "f", - UnescapeC("\\377f")); - UNIT_ASSERT_VALUES_EQUAL("\\xFFg", EscapeJ<false>("\xff" - "g")); - UNIT_ASSERT_VALUES_EQUAL("\xff" - "g", - UnescapeC("\\xFFg")); + UNIT_ASSERT_VALUES_EQUAL("\\377f", EscapeJ<false>("\xff" + "f")); + UNIT_ASSERT_VALUES_EQUAL("\xff" + "f", + UnescapeC("\\377f")); + UNIT_ASSERT_VALUES_EQUAL("\\xFFg", EscapeJ<false>("\xff" + "g")); + UNIT_ASSERT_VALUES_EQUAL("\xff" + "g", + UnescapeC("\\xFFg")); } } diff --git a/library/cpp/string_utils/relaxed_escaper/ut/ya.make b/library/cpp/string_utils/relaxed_escaper/ut/ya.make index 7ebd393c48..55a9611de1 100644 --- a/library/cpp/string_utils/relaxed_escaper/ut/ya.make +++ b/library/cpp/string_utils/relaxed_escaper/ut/ya.make @@ -1,9 +1,9 @@ UNITTEST_FOR(library/cpp/string_utils/relaxed_escaper) - -OWNER(velavokr) - -SRCS( - relaxed_escaper_ut.cpp -) - -END() + +OWNER(velavokr) + +SRCS( + relaxed_escaper_ut.cpp +) + +END() diff --git a/library/cpp/string_utils/relaxed_escaper/ya.make b/library/cpp/string_utils/relaxed_escaper/ya.make index 3f0fa5bc07..4e0364c633 100644 --- a/library/cpp/string_utils/relaxed_escaper/ya.make +++ b/library/cpp/string_utils/relaxed_escaper/ya.make @@ -1,9 +1,9 @@ -LIBRARY() - -OWNER(velavokr) - -SRCS( - relaxed_escaper.cpp -) - -END() +LIBRARY() + +OWNER(velavokr) + +SRCS( + relaxed_escaper.cpp +) + +END() |