aboutsummaryrefslogtreecommitdiffstats
path: root/library/cpp/string_utils/relaxed_escaper
diff options
context:
space:
mode:
authorAnton Samokhvalov <pg83@yandex.ru>2022-02-10 16:45:15 +0300
committerDaniil Cherednik <dcherednik@yandex-team.ru>2022-02-10 16:45:15 +0300
commit72cb13b4aff9bc9cf22e49251bc8fd143f82538f (patch)
treeda2c34829458c7d4e74bdfbdf85dff449e9e7fb8 /library/cpp/string_utils/relaxed_escaper
parent778e51ba091dc39e7b7fcab2b9cf4dbedfb6f2b5 (diff)
downloadydb-72cb13b4aff9bc9cf22e49251bc8fd143f82538f.tar.gz
Restoring authorship annotation for Anton Samokhvalov <pg83@yandex.ru>. Commit 1 of 2.
Diffstat (limited to 'library/cpp/string_utils/relaxed_escaper')
-rw-r--r--library/cpp/string_utils/relaxed_escaper/relaxed_escaper.cpp2
-rw-r--r--library/cpp/string_utils/relaxed_escaper/relaxed_escaper.h326
-rw-r--r--library/cpp/string_utils/relaxed_escaper/relaxed_escaper_ut.cpp34
-rw-r--r--library/cpp/string_utils/relaxed_escaper/ut/ya.make16
-rw-r--r--library/cpp/string_utils/relaxed_escaper/ya.make18
5 files changed, 198 insertions, 198 deletions
diff --git a/library/cpp/string_utils/relaxed_escaper/relaxed_escaper.cpp b/library/cpp/string_utils/relaxed_escaper/relaxed_escaper.cpp
index ac624dca85..8a04323fac 100644
--- a/library/cpp/string_utils/relaxed_escaper/relaxed_escaper.cpp
+++ b/library/cpp/string_utils/relaxed_escaper/relaxed_escaper.cpp
@@ -1 +1 @@
-#include "relaxed_escaper.h"
+#include "relaxed_escaper.h"
diff --git a/library/cpp/string_utils/relaxed_escaper/relaxed_escaper.h b/library/cpp/string_utils/relaxed_escaper/relaxed_escaper.h
index d7ea7c1259..999a75b601 100644
--- a/library/cpp/string_utils/relaxed_escaper/relaxed_escaper.h
+++ b/library/cpp/string_utils/relaxed_escaper/relaxed_escaper.h
@@ -2,207 +2,207 @@
#include <util/stream/output.h>
#include <util/string/escape.h>
-#include <util/memory/tempbuf.h>
-#include <util/generic/strbuf.h>
+#include <util/memory/tempbuf.h>
+#include <util/generic/strbuf.h>
namespace NEscJ {
- // almost copypaste from util/string/escape.h
- // todo: move there (note difference in IsPrintable and handling of string)
-
- inline char HexDigit(char value) {
- if (value < 10)
- return '0' + value;
- else
- return 'A' + value - 10;
- }
-
- inline char OctDigit(char value) {
+ // almost copypaste from util/string/escape.h
+ // todo: move there (note difference in IsPrintable and handling of string)
+
+ inline char HexDigit(char value) {
+ if (value < 10)
+ return '0' + value;
+ else
+ return 'A' + value - 10;
+ }
+
+ inline char OctDigit(char value) {
return '0' + value;
- }
-
- inline bool IsUTF8(ui8 c) {
- return c < 0xf5 && c != 0xC0 && c != 0xC1;
- }
-
- inline bool IsControl(ui8 c) {
- return c < 0x20 || c == 0x7f;
- }
-
- inline bool IsPrintable(ui8 c) {
- return IsUTF8(c) && !IsControl(c);
- }
-
- inline bool IsHexDigit(ui8 c) {
- return (c >= '0' && c <= '9') || (c >= 'A' && c <= 'F') || (c >= 'a' && c <= 'f');
- }
-
- inline bool IsOctDigit(ui8 c) {
- return c >= '0' && c <= '7';
- }
-
- struct TEscapeUtil {
- static const size_t ESCAPE_C_BUFFER_SIZE = 6;
-
- template <bool asunicode>
- static inline size_t EscapeJ(ui8 c, ui8 next, char r[ESCAPE_C_BUFFER_SIZE], TStringBuf safe, TStringBuf unsafe) {
- // (1) Printable characters go as-is, except backslash and double quote.
- // (2) Characters \r, \n, \t and \0 ... \7 replaced by their simple escape characters (if possible).
- // (3) Otherwise, character is encoded using hexadecimal escape sequence (if possible), or octal.
- if (safe.find(c) != TStringBuf::npos) {
- r[0] = c;
- return 1;
- }
- if (c == '\"') {
- r[0] = '\\';
- r[1] = '\"';
- return 2;
- } else if (c == '\\') {
- r[0] = '\\';
- r[1] = '\\';
- return 2;
- } else if (IsPrintable(c) && unsafe.find(c) == TStringBuf::npos) {
- r[0] = c;
- return 1;
- } else if (c == '\b') {
- r[0] = '\\';
- r[1] = 'b';
- return 2;
- } else if (c == '\f') {
- r[0] = '\\';
- r[1] = 'f';
- return 2;
- } else if (c == '\r') {
- r[0] = '\\';
- r[1] = 'r';
- return 2;
- } else if (c == '\n') {
- r[0] = '\\';
- r[1] = 'n';
- return 2;
- } else if (c == '\t') {
- r[0] = '\\';
- r[1] = 't';
- return 2;
- } else if (asunicode && IsUTF8(c)) { // utf8 controls escape for json
- r[0] = '\\';
- r[1] = 'u';
- r[2] = '0';
- r[3] = '0';
- r[4] = HexDigit((c & 0xF0) >> 4);
- r[5] = HexDigit((c & 0x0F) >> 0);
- return 6;
- } else if (c < 8 && !IsOctDigit(next)) {
- r[0] = '\\';
- r[1] = OctDigit(c);
- return 2;
- } else if (!IsHexDigit(next)) {
- r[0] = '\\';
- r[1] = 'x';
- r[2] = HexDigit((c & 0xF0) >> 4);
- r[3] = HexDigit((c & 0x0F) >> 0);
- return 4;
- } else {
- r[0] = '\\';
- r[1] = OctDigit((c & 0700) >> 6);
- r[2] = OctDigit((c & 0070) >> 3);
- r[3] = OctDigit((c & 0007) >> 0);
- return 4;
- }
- }
-
- static inline size_t EscapeJ(ui8 c, ui8 next, char r[ESCAPE_C_BUFFER_SIZE], TStringBuf safe, TStringBuf unsafe) {
- return EscapeJ<false>(c, next, r, safe, unsafe);
+ }
+
+ inline bool IsUTF8(ui8 c) {
+ return c < 0xf5 && c != 0xC0 && c != 0xC1;
+ }
+
+ inline bool IsControl(ui8 c) {
+ return c < 0x20 || c == 0x7f;
+ }
+
+ inline bool IsPrintable(ui8 c) {
+ return IsUTF8(c) && !IsControl(c);
+ }
+
+ inline bool IsHexDigit(ui8 c) {
+ return (c >= '0' && c <= '9') || (c >= 'A' && c <= 'F') || (c >= 'a' && c <= 'f');
+ }
+
+ inline bool IsOctDigit(ui8 c) {
+ return c >= '0' && c <= '7';
+ }
+
+ struct TEscapeUtil {
+ static const size_t ESCAPE_C_BUFFER_SIZE = 6;
+
+ template <bool asunicode>
+ static inline size_t EscapeJ(ui8 c, ui8 next, char r[ESCAPE_C_BUFFER_SIZE], TStringBuf safe, TStringBuf unsafe) {
+ // (1) Printable characters go as-is, except backslash and double quote.
+ // (2) Characters \r, \n, \t and \0 ... \7 replaced by their simple escape characters (if possible).
+ // (3) Otherwise, character is encoded using hexadecimal escape sequence (if possible), or octal.
+ if (safe.find(c) != TStringBuf::npos) {
+ r[0] = c;
+ return 1;
+ }
+ if (c == '\"') {
+ r[0] = '\\';
+ r[1] = '\"';
+ return 2;
+ } else if (c == '\\') {
+ r[0] = '\\';
+ r[1] = '\\';
+ return 2;
+ } else if (IsPrintable(c) && unsafe.find(c) == TStringBuf::npos) {
+ r[0] = c;
+ return 1;
+ } else if (c == '\b') {
+ r[0] = '\\';
+ r[1] = 'b';
+ return 2;
+ } else if (c == '\f') {
+ r[0] = '\\';
+ r[1] = 'f';
+ return 2;
+ } else if (c == '\r') {
+ r[0] = '\\';
+ r[1] = 'r';
+ return 2;
+ } else if (c == '\n') {
+ r[0] = '\\';
+ r[1] = 'n';
+ return 2;
+ } else if (c == '\t') {
+ r[0] = '\\';
+ r[1] = 't';
+ return 2;
+ } else if (asunicode && IsUTF8(c)) { // utf8 controls escape for json
+ r[0] = '\\';
+ r[1] = 'u';
+ r[2] = '0';
+ r[3] = '0';
+ r[4] = HexDigit((c & 0xF0) >> 4);
+ r[5] = HexDigit((c & 0x0F) >> 0);
+ return 6;
+ } else if (c < 8 && !IsOctDigit(next)) {
+ r[0] = '\\';
+ r[1] = OctDigit(c);
+ return 2;
+ } else if (!IsHexDigit(next)) {
+ r[0] = '\\';
+ r[1] = 'x';
+ r[2] = HexDigit((c & 0xF0) >> 4);
+ r[3] = HexDigit((c & 0x0F) >> 0);
+ return 4;
+ } else {
+ r[0] = '\\';
+ r[1] = OctDigit((c & 0700) >> 6);
+ r[2] = OctDigit((c & 0070) >> 3);
+ r[3] = OctDigit((c & 0007) >> 0);
+ return 4;
+ }
+ }
+
+ static inline size_t EscapeJ(ui8 c, ui8 next, char r[ESCAPE_C_BUFFER_SIZE], TStringBuf safe, TStringBuf unsafe) {
+ return EscapeJ<false>(c, next, r, safe, unsafe);
}
- };
+ };
- inline size_t SuggestBuffer(size_t len) {
- return len * TEscapeUtil::ESCAPE_C_BUFFER_SIZE;
+ inline size_t SuggestBuffer(size_t len) {
+ return len * TEscapeUtil::ESCAPE_C_BUFFER_SIZE;
}
- template <bool tounicode>
- inline size_t EscapeJ(const char* str, size_t len, char* out, TStringBuf safe = TStringBuf(), TStringBuf unsafe = TStringBuf()) {
- char* out0 = out;
- char buffer[TEscapeUtil::ESCAPE_C_BUFFER_SIZE];
+ template <bool tounicode>
+ inline size_t EscapeJ(const char* str, size_t len, char* out, TStringBuf safe = TStringBuf(), TStringBuf unsafe = TStringBuf()) {
+ char* out0 = out;
+ char buffer[TEscapeUtil::ESCAPE_C_BUFFER_SIZE];
- size_t i, j;
- for (i = 0, j = 0; i < len; ++i) {
- size_t rlen = TEscapeUtil::EscapeJ<tounicode>(str[i], (i + 1 < len ? str[i + 1] : 0), buffer, safe, unsafe);
+ size_t i, j;
+ for (i = 0, j = 0; i < len; ++i) {
+ size_t rlen = TEscapeUtil::EscapeJ<tounicode>(str[i], (i + 1 < len ? str[i + 1] : 0), buffer, safe, unsafe);
- if (rlen > 1) {
- strncpy(out, str + j, i - j);
- out += i - j;
- j = i + 1;
+ if (rlen > 1) {
+ strncpy(out, str + j, i - j);
+ out += i - j;
+ j = i + 1;
- strncpy(out, buffer, rlen);
- out += rlen;
- }
- }
+ strncpy(out, buffer, rlen);
+ out += rlen;
+ }
+ }
- if (j > 0) {
- strncpy(out, str + j, len - j);
- out += len - j;
- } else {
- strncpy(out, str, len);
- out += len;
+ if (j > 0) {
+ strncpy(out, str + j, len - j);
+ out += len - j;
+ } else {
+ strncpy(out, str, len);
+ out += len;
}
- return out - out0;
+ return out - out0;
}
- template <bool quote, bool tounicode>
+ template <bool quote, bool tounicode>
inline void EscapeJ(TStringBuf in, IOutputStream& out, TStringBuf safe = TStringBuf(), TStringBuf unsafe = TStringBuf()) {
TTempBuf b(SuggestBuffer(in.size()) + 2);
- if (quote)
- b.Append("\"", 1);
+ if (quote)
+ b.Append("\"", 1);
b.Proceed(EscapeJ<tounicode>(in.data(), in.size(), b.Current(), safe, unsafe));
- if (quote)
- b.Append("\"", 1);
+ if (quote)
+ b.Append("\"", 1);
- out.Write(b.Data(), b.Filled());
- }
+ out.Write(b.Data(), b.Filled());
+ }
- template <bool quote, bool tounicode>
+ template <bool quote, bool tounicode>
inline void EscapeJ(TStringBuf in, TString& out, TStringBuf safe = TStringBuf(), TStringBuf unsafe = TStringBuf()) {
TTempBuf b(SuggestBuffer(in.size()) + 2);
- if (quote)
- b.Append("\"", 1);
+ if (quote)
+ b.Append("\"", 1);
b.Proceed(EscapeJ<tounicode>(in.data(), in.size(), b.Current(), safe, unsafe));
- if (quote)
- b.Append("\"", 1);
+ if (quote)
+ b.Append("\"", 1);
- out.append(b.Data(), b.Filled());
- }
+ out.append(b.Data(), b.Filled());
+ }
- template <bool quote, bool tounicode>
+ template <bool quote, bool tounicode>
inline TString EscapeJ(TStringBuf in, TStringBuf safe = TStringBuf(), TStringBuf unsafe = TStringBuf()) {
TString s;
- EscapeJ<quote, tounicode>(in, s, safe, unsafe);
- return s;
- }
+ EscapeJ<quote, tounicode>(in, s, safe, unsafe);
+ return s;
+ }
- // If the template parameter "tounicode" is ommited, then use the default value false
- inline size_t EscapeJ(const char* str, size_t len, char* out, TStringBuf safe = TStringBuf(), TStringBuf unsafe = TStringBuf()) {
- return EscapeJ<false>(str, len, out, safe, unsafe);
- }
+ // If the template parameter "tounicode" is ommited, then use the default value false
+ inline size_t EscapeJ(const char* str, size_t len, char* out, TStringBuf safe = TStringBuf(), TStringBuf unsafe = TStringBuf()) {
+ return EscapeJ<false>(str, len, out, safe, unsafe);
+ }
- template <bool quote>
+ template <bool quote>
inline void EscapeJ(TStringBuf in, IOutputStream& out, TStringBuf safe = TStringBuf(), TStringBuf unsafe = TStringBuf()) {
- EscapeJ<quote, false>(in, out, safe, unsafe);
- }
+ EscapeJ<quote, false>(in, out, safe, unsafe);
+ }
- template <bool quote>
+ template <bool quote>
inline void EscapeJ(TStringBuf in, TString& out, TStringBuf safe = TStringBuf(), TStringBuf unsafe = TStringBuf()) {
- EscapeJ<quote, false>(in, out, safe, unsafe);
- }
+ EscapeJ<quote, false>(in, out, safe, unsafe);
+ }
- template <bool quote>
+ template <bool quote>
inline TString EscapeJ(TStringBuf in, TStringBuf safe = TStringBuf(), TStringBuf unsafe = TStringBuf()) {
- return EscapeJ<quote, false>(in, safe, unsafe);
- }
+ return EscapeJ<quote, false>(in, safe, unsafe);
+ }
}
diff --git a/library/cpp/string_utils/relaxed_escaper/relaxed_escaper_ut.cpp b/library/cpp/string_utils/relaxed_escaper/relaxed_escaper_ut.cpp
index 768555ea3a..496c30ab01 100644
--- a/library/cpp/string_utils/relaxed_escaper/relaxed_escaper_ut.cpp
+++ b/library/cpp/string_utils/relaxed_escaper/relaxed_escaper_ut.cpp
@@ -10,19 +10,19 @@ static const TStringBuf CommonTestData[] = {
RESC_FIXED_STR("http://ya.ru/\\0"), RESC_FIXED_STR("http://ya.ru/\0"),
RESC_FIXED_STR("http://ya.ru/\\0\\0"), RESC_FIXED_STR("http://ya.ru/\0\0"),
- RESC_FIXED_STR("http://ya.ru/\\0\\0000"), RESC_FIXED_STR("http://ya.ru/\0\0"
- "0"),
- RESC_FIXED_STR("http://ya.ru/\\0\\0001"), RESC_FIXED_STR("http://ya.ru/\0\x00"
- "1"),
+ RESC_FIXED_STR("http://ya.ru/\\0\\0000"), RESC_FIXED_STR("http://ya.ru/\0\0"
+ "0"),
+ RESC_FIXED_STR("http://ya.ru/\\0\\0001"), RESC_FIXED_STR("http://ya.ru/\0\x00"
+ "1"),
- RESC_FIXED_STR("\\2\\4\\00678"), RESC_FIXED_STR("\2\4\6"
- "78"),
+ RESC_FIXED_STR("\\2\\4\\00678"), RESC_FIXED_STR("\2\4\6"
+ "78"),
RESC_FIXED_STR("\\2\\4\\689"), RESC_FIXED_STR("\2\4\689"),
RESC_FIXED_STR("\\\"Hello\\\", Alice said."), RESC_FIXED_STR("\"Hello\", Alice said."),
RESC_FIXED_STR("Slash\\\\dash!"), RESC_FIXED_STR("Slash\\dash!"),
RESC_FIXED_STR("There\\nare\\r\\nnewlines."), RESC_FIXED_STR("There\nare\r\nnewlines."),
- RESC_FIXED_STR("There\\tare\\ttabs."), RESC_FIXED_STR("There\tare\ttabs.")};
+ RESC_FIXED_STR("There\\tare\\ttabs."), RESC_FIXED_STR("There\tare\ttabs.")};
#undef RESC_FIXED_STR
Y_UNIT_TEST_SUITE(TRelaxedEscaperTest) {
@@ -52,15 +52,15 @@ Y_UNIT_TEST_SUITE(TRelaxedEscaperTest) {
UNIT_ASSERT_VALUES_EQUAL("\"\\xFF\"", EscapeJ<true>("\xFF"));
UNIT_ASSERT_VALUES_EQUAL("\xFF", UnescapeC("\\xFF"));
- UNIT_ASSERT_VALUES_EQUAL("\\377f", EscapeJ<false>("\xff"
- "f"));
- UNIT_ASSERT_VALUES_EQUAL("\xff"
- "f",
- UnescapeC("\\377f"));
- UNIT_ASSERT_VALUES_EQUAL("\\xFFg", EscapeJ<false>("\xff"
- "g"));
- UNIT_ASSERT_VALUES_EQUAL("\xff"
- "g",
- UnescapeC("\\xFFg"));
+ UNIT_ASSERT_VALUES_EQUAL("\\377f", EscapeJ<false>("\xff"
+ "f"));
+ UNIT_ASSERT_VALUES_EQUAL("\xff"
+ "f",
+ UnescapeC("\\377f"));
+ UNIT_ASSERT_VALUES_EQUAL("\\xFFg", EscapeJ<false>("\xff"
+ "g"));
+ UNIT_ASSERT_VALUES_EQUAL("\xff"
+ "g",
+ UnescapeC("\\xFFg"));
}
}
diff --git a/library/cpp/string_utils/relaxed_escaper/ut/ya.make b/library/cpp/string_utils/relaxed_escaper/ut/ya.make
index 7ebd393c48..55a9611de1 100644
--- a/library/cpp/string_utils/relaxed_escaper/ut/ya.make
+++ b/library/cpp/string_utils/relaxed_escaper/ut/ya.make
@@ -1,9 +1,9 @@
UNITTEST_FOR(library/cpp/string_utils/relaxed_escaper)
-
-OWNER(velavokr)
-
-SRCS(
- relaxed_escaper_ut.cpp
-)
-
-END()
+
+OWNER(velavokr)
+
+SRCS(
+ relaxed_escaper_ut.cpp
+)
+
+END()
diff --git a/library/cpp/string_utils/relaxed_escaper/ya.make b/library/cpp/string_utils/relaxed_escaper/ya.make
index 3f0fa5bc07..4e0364c633 100644
--- a/library/cpp/string_utils/relaxed_escaper/ya.make
+++ b/library/cpp/string_utils/relaxed_escaper/ya.make
@@ -1,9 +1,9 @@
-LIBRARY()
-
-OWNER(velavokr)
-
-SRCS(
- relaxed_escaper.cpp
-)
-
-END()
+LIBRARY()
+
+OWNER(velavokr)
+
+SRCS(
+ relaxed_escaper.cpp
+)
+
+END()