aboutsummaryrefslogtreecommitdiffstats
path: root/util/string/escape_ut.cpp
diff options
context:
space:
mode:
authorDevtools Arcadia <arcadia-devtools@yandex-team.ru>2022-02-07 18:08:42 +0300
committerDevtools Arcadia <arcadia-devtools@mous.vla.yp-c.yandex.net>2022-02-07 18:08:42 +0300
commit1110808a9d39d4b808aef724c861a2e1a38d2a69 (patch)
treee26c9fed0de5d9873cce7e00bc214573dc2195b7 /util/string/escape_ut.cpp
downloadydb-1110808a9d39d4b808aef724c861a2e1a38d2a69.tar.gz
intermediate changes
ref:cde9a383711a11544ce7e107a78147fb96cc4029
Diffstat (limited to 'util/string/escape_ut.cpp')
-rw-r--r--util/string/escape_ut.cpp148
1 files changed, 148 insertions, 0 deletions
diff --git a/util/string/escape_ut.cpp b/util/string/escape_ut.cpp
new file mode 100644
index 0000000000..cd38ecffd3
--- /dev/null
+++ b/util/string/escape_ut.cpp
@@ -0,0 +1,148 @@
+#include "escape.h"
+
+#include <library/cpp/testing/unittest/registar.h>
+
+#include <util/generic/string.h>
+#include <util/charset/wide.h>
+
+using namespace std::string_view_literals;
+
+namespace {
+ struct TExample {
+ TString Expected;
+ TString Source;
+
+ TExample(const TStringBuf expected, const TStringBuf source)
+ : Expected{expected}
+ , Source{source}
+ {
+ }
+ };
+}
+
+static const TExample CommonTestData[] = {
+ // Should be valid UTF-8.
+ {"http://ya.ru/", "http://ya.ru/"},
+ {"http://ya.ru/\\x17\\n", "http://ya.ru/\x17\n"},
+
+ {"http://ya.ru/\\0", "http://ya.ru/\0"sv},
+ {"http://ya.ru/\\0\\0", "http://ya.ru/\0\0"sv},
+ {"http://ya.ru/\\0\\0000", "http://ya.ru/\0\0"
+ "0"sv},
+ {"http://ya.ru/\\0\\0001", "http://ya.ru/\0\x00"
+ "1"sv},
+
+ {R"(\2\4\00678)", "\2\4\6"
+ "78"sv}, // \6 -> \006 because next char '7' is "octal"
+ {R"(\2\4\689)", "\2\4\6"
+ "89"sv}, // \6 -> \6 because next char '8' is not "octal"
+
+ {R"(\"Hello\", Alice said.)", "\"Hello\", Alice said."},
+ {"Slash\\\\dash!", "Slash\\dash!"},
+ {R"(There\nare\r\nnewlines.)", "There\nare\r\nnewlines."},
+ {"There\\tare\\ttabs.", "There\tare\ttabs."},
+
+ {"There are questions \\x3F\\x3F?", "There are questions ???"},
+ {"There are questions \\x3F?", "There are questions ??"},
+};
+
+Y_UNIT_TEST_SUITE(TEscapeCTest) {
+ Y_UNIT_TEST(TestStrokaEscapeC) {
+ for (const auto& e : CommonTestData) {
+ TString expected(e.Expected);
+ TString source(e.Source);
+ TString actual(EscapeC(e.Source));
+ TString actual2(UnescapeC(e.Expected));
+
+ UNIT_ASSERT_VALUES_EQUAL(e.Expected, actual);
+ UNIT_ASSERT_VALUES_EQUAL(e.Source, actual2);
+ }
+
+ UNIT_ASSERT_VALUES_EQUAL("http://ya.ru/\\x17\\n\\xAB", EscapeC(TString("http://ya.ru/\x17\n\xab")));
+ UNIT_ASSERT_VALUES_EQUAL("http://ya.ru/\x17\n\xab", UnescapeC(TString("http://ya.ru/\\x17\\n\\xAB")));
+ UNIT_ASSERT_VALUES_EQUAL("h", EscapeC('h'));
+ UNIT_ASSERT_VALUES_EQUAL("h", UnescapeC(TString("h")));
+ UNIT_ASSERT_VALUES_EQUAL("\\xFF", EscapeC('\xFF'));
+ UNIT_ASSERT_VALUES_EQUAL("\xFF", UnescapeC(TString("\\xFF")));
+
+ UNIT_ASSERT_VALUES_EQUAL("\\377f", EscapeC(TString("\xff"
+ "f")));
+ UNIT_ASSERT_VALUES_EQUAL("\xff"
+ "f",
+ UnescapeC(TString("\\377f")));
+ UNIT_ASSERT_VALUES_EQUAL("\\xFFg", EscapeC(TString("\xff"
+ "g")));
+ UNIT_ASSERT_VALUES_EQUAL("\xff"
+ "g",
+ UnescapeC(TString("\\xFFg")));
+ UNIT_ASSERT_VALUES_EQUAL("\xEA\x9A\x96", UnescapeC(TString("\\uA696")));
+ UNIT_ASSERT_VALUES_EQUAL("Странный компроматтест", UnescapeC(TString("\\u0421\\u0442\\u0440\\u0430\\u043d\\u043d\\u044b\\u0439 \\u043a\\u043e\\u043c\\u043f\\u0440\\u043e\\u043c\\u0430\\u0442тест")));
+ }
+
+ Y_UNIT_TEST(TestWtrokaEscapeC) {
+ for (const auto& e : CommonTestData) {
+ TUtf16String expected(UTF8ToWide(e.Expected));
+ TUtf16String source(UTF8ToWide(e.Source));
+ TUtf16String actual(EscapeC(source));
+ TUtf16String actual2(UnescapeC(expected));
+
+ UNIT_ASSERT_VALUES_EQUAL(expected, actual);
+ UNIT_ASSERT_VALUES_EQUAL(source, actual2);
+ }
+
+ UNIT_ASSERT_VALUES_EQUAL(u"http://ya.ru/\\x17\\n\\u1234", EscapeC(u"http://ya.ru/\x17\n\u1234"));
+ UNIT_ASSERT_VALUES_EQUAL(u"h", EscapeC(u'h'));
+ UNIT_ASSERT_VALUES_EQUAL(u"\\xFF", EscapeC(wchar16(255)));
+ }
+
+ Y_UNIT_TEST(TestEscapeTrigraphs) {
+ UNIT_ASSERT_VALUES_EQUAL("?", EscapeC(TString("?")));
+ UNIT_ASSERT_VALUES_EQUAL("\\x3F?", EscapeC(TString("??")));
+ UNIT_ASSERT_VALUES_EQUAL("\\x3F\\x3F?", EscapeC(TString("???")));
+ // ok but may cause warning about trigraphs
+ // UNIT_ASSERT_VALUES_EQUAL("[x]?z", EscapeC(TString("??(x??)?z")));
+ UNIT_ASSERT_VALUES_EQUAL("\\x3F?x\\x3F\\x3F?z", EscapeC(TString("??x???z")));
+ }
+
+ Y_UNIT_TEST(TestUnescapeCCharLen) {
+ auto test = [](const char* str, size_t len) {
+ UNIT_ASSERT_EQUAL(UnescapeCCharLen(str, str + strlen(str)), len);
+ };
+
+ test("", 0);
+ test("abc", 1);
+ test("\\", 1);
+ test("\\\\", 2);
+ test("\\#", 2);
+ test("\\n10", 2);
+ test("\\r\\n", 2);
+ test("\\x05abc", 4);
+ test("\\u11117777", 6);
+ test("\\u123yyy", 2);
+ test("\\U11117777cccc", 10);
+ test("\\U111yyy", 2);
+ test("\\0\\1", 2);
+ test("\\01\\1", 3);
+ test("\\012\\1", 4);
+ test("\\0123\\1", 4);
+ test("\\4\\1", 2);
+ test("\\40\\1", 3);
+ test("\\400\\1", 3);
+ test("\\4xxx", 2);
+ }
+
+ Y_UNIT_TEST(TestUnbounded) {
+ char buf[100000];
+
+ for (const auto& x : CommonTestData) {
+ char* end = UnescapeC(x.Expected.data(), x.Expected.size(), buf);
+
+ UNIT_ASSERT_VALUES_EQUAL(x.Source, TStringBuf(buf, end));
+ }
+ }
+
+ Y_UNIT_TEST(TestCapitalUEscapes) {
+ UNIT_ASSERT_VALUES_EQUAL(UnescapeC("\\U00000020"), " ");
+ UNIT_ASSERT_VALUES_EQUAL(UnescapeC("\\Uxxx"), "Uxxx");
+ }
+}