diff options
author | Devtools Arcadia <arcadia-devtools@yandex-team.ru> | 2022-02-07 18:08:42 +0300 |
---|---|---|
committer | Devtools Arcadia <arcadia-devtools@mous.vla.yp-c.yandex.net> | 2022-02-07 18:08:42 +0300 |
commit | 1110808a9d39d4b808aef724c861a2e1a38d2a69 (patch) | |
tree | e26c9fed0de5d9873cce7e00bc214573dc2195b7 /util/string/escape_ut.cpp | |
download | ydb-1110808a9d39d4b808aef724c861a2e1a38d2a69.tar.gz |
intermediate changes
ref:cde9a383711a11544ce7e107a78147fb96cc4029
Diffstat (limited to 'util/string/escape_ut.cpp')
-rw-r--r-- | util/string/escape_ut.cpp | 148 |
1 files changed, 148 insertions, 0 deletions
diff --git a/util/string/escape_ut.cpp b/util/string/escape_ut.cpp new file mode 100644 index 0000000000..cd38ecffd3 --- /dev/null +++ b/util/string/escape_ut.cpp @@ -0,0 +1,148 @@ +#include "escape.h" + +#include <library/cpp/testing/unittest/registar.h> + +#include <util/generic/string.h> +#include <util/charset/wide.h> + +using namespace std::string_view_literals; + +namespace { + struct TExample { + TString Expected; + TString Source; + + TExample(const TStringBuf expected, const TStringBuf source) + : Expected{expected} + , Source{source} + { + } + }; +} + +static const TExample CommonTestData[] = { + // Should be valid UTF-8. + {"http://ya.ru/", "http://ya.ru/"}, + {"http://ya.ru/\\x17\\n", "http://ya.ru/\x17\n"}, + + {"http://ya.ru/\\0", "http://ya.ru/\0"sv}, + {"http://ya.ru/\\0\\0", "http://ya.ru/\0\0"sv}, + {"http://ya.ru/\\0\\0000", "http://ya.ru/\0\0" + "0"sv}, + {"http://ya.ru/\\0\\0001", "http://ya.ru/\0\x00" + "1"sv}, + + {R"(\2\4\00678)", "\2\4\6" + "78"sv}, // \6 -> \006 because next char '7' is "octal" + {R"(\2\4\689)", "\2\4\6" + "89"sv}, // \6 -> \6 because next char '8' is not "octal" + + {R"(\"Hello\", Alice said.)", "\"Hello\", Alice said."}, + {"Slash\\\\dash!", "Slash\\dash!"}, + {R"(There\nare\r\nnewlines.)", "There\nare\r\nnewlines."}, + {"There\\tare\\ttabs.", "There\tare\ttabs."}, + + {"There are questions \\x3F\\x3F?", "There are questions ???"}, + {"There are questions \\x3F?", "There are questions ??"}, +}; + +Y_UNIT_TEST_SUITE(TEscapeCTest) { + Y_UNIT_TEST(TestStrokaEscapeC) { + for (const auto& e : CommonTestData) { + TString expected(e.Expected); + TString source(e.Source); + TString actual(EscapeC(e.Source)); + TString actual2(UnescapeC(e.Expected)); + + UNIT_ASSERT_VALUES_EQUAL(e.Expected, actual); + UNIT_ASSERT_VALUES_EQUAL(e.Source, actual2); + } + + UNIT_ASSERT_VALUES_EQUAL("http://ya.ru/\\x17\\n\\xAB", EscapeC(TString("http://ya.ru/\x17\n\xab"))); + UNIT_ASSERT_VALUES_EQUAL("http://ya.ru/\x17\n\xab", UnescapeC(TString("http://ya.ru/\\x17\\n\\xAB"))); + UNIT_ASSERT_VALUES_EQUAL("h", EscapeC('h')); + UNIT_ASSERT_VALUES_EQUAL("h", UnescapeC(TString("h"))); + UNIT_ASSERT_VALUES_EQUAL("\\xFF", EscapeC('\xFF')); + UNIT_ASSERT_VALUES_EQUAL("\xFF", UnescapeC(TString("\\xFF"))); + + UNIT_ASSERT_VALUES_EQUAL("\\377f", EscapeC(TString("\xff" + "f"))); + UNIT_ASSERT_VALUES_EQUAL("\xff" + "f", + UnescapeC(TString("\\377f"))); + UNIT_ASSERT_VALUES_EQUAL("\\xFFg", EscapeC(TString("\xff" + "g"))); + UNIT_ASSERT_VALUES_EQUAL("\xff" + "g", + UnescapeC(TString("\\xFFg"))); + UNIT_ASSERT_VALUES_EQUAL("\xEA\x9A\x96", UnescapeC(TString("\\uA696"))); + UNIT_ASSERT_VALUES_EQUAL("Странный компроматтест", UnescapeC(TString("\\u0421\\u0442\\u0440\\u0430\\u043d\\u043d\\u044b\\u0439 \\u043a\\u043e\\u043c\\u043f\\u0440\\u043e\\u043c\\u0430\\u0442тест"))); + } + + Y_UNIT_TEST(TestWtrokaEscapeC) { + for (const auto& e : CommonTestData) { + TUtf16String expected(UTF8ToWide(e.Expected)); + TUtf16String source(UTF8ToWide(e.Source)); + TUtf16String actual(EscapeC(source)); + TUtf16String actual2(UnescapeC(expected)); + + UNIT_ASSERT_VALUES_EQUAL(expected, actual); + UNIT_ASSERT_VALUES_EQUAL(source, actual2); + } + + UNIT_ASSERT_VALUES_EQUAL(u"http://ya.ru/\\x17\\n\\u1234", EscapeC(u"http://ya.ru/\x17\n\u1234")); + UNIT_ASSERT_VALUES_EQUAL(u"h", EscapeC(u'h')); + UNIT_ASSERT_VALUES_EQUAL(u"\\xFF", EscapeC(wchar16(255))); + } + + Y_UNIT_TEST(TestEscapeTrigraphs) { + UNIT_ASSERT_VALUES_EQUAL("?", EscapeC(TString("?"))); + UNIT_ASSERT_VALUES_EQUAL("\\x3F?", EscapeC(TString("??"))); + UNIT_ASSERT_VALUES_EQUAL("\\x3F\\x3F?", EscapeC(TString("???"))); + // ok but may cause warning about trigraphs + // UNIT_ASSERT_VALUES_EQUAL("[x]?z", EscapeC(TString("??(x??)?z"))); + UNIT_ASSERT_VALUES_EQUAL("\\x3F?x\\x3F\\x3F?z", EscapeC(TString("??x???z"))); + } + + Y_UNIT_TEST(TestUnescapeCCharLen) { + auto test = [](const char* str, size_t len) { + UNIT_ASSERT_EQUAL(UnescapeCCharLen(str, str + strlen(str)), len); + }; + + test("", 0); + test("abc", 1); + test("\\", 1); + test("\\\\", 2); + test("\\#", 2); + test("\\n10", 2); + test("\\r\\n", 2); + test("\\x05abc", 4); + test("\\u11117777", 6); + test("\\u123yyy", 2); + test("\\U11117777cccc", 10); + test("\\U111yyy", 2); + test("\\0\\1", 2); + test("\\01\\1", 3); + test("\\012\\1", 4); + test("\\0123\\1", 4); + test("\\4\\1", 2); + test("\\40\\1", 3); + test("\\400\\1", 3); + test("\\4xxx", 2); + } + + Y_UNIT_TEST(TestUnbounded) { + char buf[100000]; + + for (const auto& x : CommonTestData) { + char* end = UnescapeC(x.Expected.data(), x.Expected.size(), buf); + + UNIT_ASSERT_VALUES_EQUAL(x.Source, TStringBuf(buf, end)); + } + } + + Y_UNIT_TEST(TestCapitalUEscapes) { + UNIT_ASSERT_VALUES_EQUAL(UnescapeC("\\U00000020"), " "); + UNIT_ASSERT_VALUES_EQUAL(UnescapeC("\\Uxxx"), "Uxxx"); + } +} |