diff options
author | mowgli <mowgli@yandex-team.ru> | 2022-02-10 16:49:25 +0300 |
---|---|---|
committer | Daniil Cherednik <dcherednik@yandex-team.ru> | 2022-02-10 16:49:25 +0300 |
commit | 89afbbe4ca0e02e386dd4df08f7945f190dc1b84 (patch) | |
tree | c4772201af6215d48734691b8796e4cfc77c2ac8 /library/cpp/charset/codepage_ut.cpp | |
parent | 7510cec1516d17cbc8d7749974e36aa45f547a26 (diff) | |
download | ydb-89afbbe4ca0e02e386dd4df08f7945f190dc1b84.tar.gz |
Restoring authorship annotation for <mowgli@yandex-team.ru>. Commit 1 of 2.
Diffstat (limited to 'library/cpp/charset/codepage_ut.cpp')
-rw-r--r-- | library/cpp/charset/codepage_ut.cpp | 134 |
1 files changed, 67 insertions, 67 deletions
diff --git a/library/cpp/charset/codepage_ut.cpp b/library/cpp/charset/codepage_ut.cpp index c3ac3ac478..7df4d27196 100644 --- a/library/cpp/charset/codepage_ut.cpp +++ b/library/cpp/charset/codepage_ut.cpp @@ -53,8 +53,8 @@ public: void TestToLower(); void TestToUpper(); - void TestCanEncode(); - + void TestCanEncode(); + inline void TestUpperLower() { const CodePage* cp = CodePageByCharset(CODES_ASCII); char tmp[100]; @@ -343,82 +343,82 @@ void TCodepageTest::TestToUpper() { ToUpper(data, n - 1); UNIT_ASSERT(strcmp(data, yandexUpperCase) == 0); } - -static void TestCanEncodeEmpty() { - TWtringBuf empty; - UNIT_ASSERT(CanBeEncoded(empty, CODES_WIN)); - UNIT_ASSERT(CanBeEncoded(empty, CODES_YANDEX)); - UNIT_ASSERT(CanBeEncoded(empty, CODES_UTF8)); -} - -static void TestCanEncodeEach(const TWtringBuf& text, ECharset encoding, bool expectedResult) { - // char by char - for (size_t i = 0; i < text.size(); ++i) { - if (CanBeEncoded(text.SubStr(i, 1), encoding) != expectedResult) - ythrow yexception() << "assertion failed: encoding " << NameByCharset(encoding) + +static void TestCanEncodeEmpty() { + TWtringBuf empty; + UNIT_ASSERT(CanBeEncoded(empty, CODES_WIN)); + UNIT_ASSERT(CanBeEncoded(empty, CODES_YANDEX)); + UNIT_ASSERT(CanBeEncoded(empty, CODES_UTF8)); +} + +static void TestCanEncodeEach(const TWtringBuf& text, ECharset encoding, bool expectedResult) { + // char by char + for (size_t i = 0; i < text.size(); ++i) { + if (CanBeEncoded(text.SubStr(i, 1), encoding) != expectedResult) + ythrow yexception() << "assertion failed: encoding " << NameByCharset(encoding) << " on '" << text.SubStr(i, 1) << "' (expected " << expectedResult << ")"; - } - // whole text - UNIT_ASSERT_EQUAL(CanBeEncoded(text, encoding), expectedResult); -} - -void TCodepageTest::TestCanEncode() { - TestCanEncodeEmpty(); - + } + // whole text + UNIT_ASSERT_EQUAL(CanBeEncoded(text, encoding), expectedResult); +} + +void TCodepageTest::TestCanEncode() { + TestCanEncodeEmpty(); + const TUtf16String lat = u"AaBbCcDdEeFfGgHhIiJjKkLlMmNnOoPpQqRrSsTtUuVvWwXxYyZz"; - TestCanEncodeEach(lat, CODES_WIN, true); - TestCanEncodeEach(lat, CODES_YANDEX, true); - TestCanEncodeEach(lat, CODES_UTF8, true); - + TestCanEncodeEach(lat, CODES_WIN, true); + TestCanEncodeEach(lat, CODES_YANDEX, true); + TestCanEncodeEach(lat, CODES_UTF8, true); + const TUtf16String rus = u"АаБбВвГгДдЕеЁёЖжЗзИиЙйКкЛлМмНнОоПпРрСсТтУуФфХхЦцЧчШшЩщЪъЫыЬьЭэЮюЯя"; - TestCanEncodeEach(rus, CODES_WIN, true); - TestCanEncodeEach(rus, CODES_YANDEX, true); - TestCanEncodeEach(rus, CODES_UTF8, true); - + TestCanEncodeEach(rus, CODES_WIN, true); + TestCanEncodeEach(rus, CODES_YANDEX, true); + TestCanEncodeEach(rus, CODES_UTF8, true); + const TUtf16String ukr = u"ҐґЄєІіЇї"; - TestCanEncodeEach(ukr, CODES_WIN, true); - TestCanEncodeEach(ukr, CODES_YANDEX, true); - TestCanEncodeEach(ukr, CODES_UTF8, true); - + TestCanEncodeEach(ukr, CODES_WIN, true); + TestCanEncodeEach(ukr, CODES_YANDEX, true); + TestCanEncodeEach(ukr, CODES_UTF8, true); + const TUtf16String pol = u"ĄĆĘŁŃÓŚŹŻąćęłńóśźż"; - TestCanEncodeEach(pol, CODES_WIN, false); - TestCanEncodeEach(pol, CODES_YANDEX, true); - TestCanEncodeEach(pol, CODES_UTF_16BE, true); - + TestCanEncodeEach(pol, CODES_WIN, false); + TestCanEncodeEach(pol, CODES_YANDEX, true); + TestCanEncodeEach(pol, CODES_UTF_16BE, true); + const TUtf16String ger = u"ÄäÖöÜüß"; - TestCanEncodeEach(ger, CODES_WIN, false); - TestCanEncodeEach(ger, CODES_YANDEX, true); - TestCanEncodeEach(ger, CODES_UTF_16LE, true); - + TestCanEncodeEach(ger, CODES_WIN, false); + TestCanEncodeEach(ger, CODES_YANDEX, true); + TestCanEncodeEach(ger, CODES_UTF_16LE, true); + const TUtf16String fra1 = u"éàèùâêîôûëïç"; // supported in yandex cp const TUtf16String fra2 = u"ÉÀÈÙÂÊÎÔÛËÏŸÿÇ"; const TUtf16String fra3 = u"Æ挜"; - TestCanEncodeEach(fra1 + fra2 + fra3, CODES_WIN, false); - TestCanEncodeEach(fra1, CODES_YANDEX, true); - TestCanEncodeEach(fra2 + fra3, CODES_YANDEX, false); - TestCanEncodeEach(fra1 + fra2 + fra3, CODES_UTF8, true); - + TestCanEncodeEach(fra1 + fra2 + fra3, CODES_WIN, false); + TestCanEncodeEach(fra1, CODES_YANDEX, true); + TestCanEncodeEach(fra2 + fra3, CODES_YANDEX, false); + TestCanEncodeEach(fra1 + fra2 + fra3, CODES_UTF8, true); + const TUtf16String kaz = u"ӘәҒғҚқҢңӨөҰұҮүҺһ"; - TestCanEncodeEach(kaz, CODES_WIN, false); - TestCanEncodeEach(kaz, CODES_YANDEX, false); - TestCanEncodeEach(kaz, CODES_UTF8, true); - TestCanEncodeEach(kaz, CODES_KAZWIN, true); - + TestCanEncodeEach(kaz, CODES_WIN, false); + TestCanEncodeEach(kaz, CODES_YANDEX, false); + TestCanEncodeEach(kaz, CODES_UTF8, true); + TestCanEncodeEach(kaz, CODES_KAZWIN, true); + const TUtf16String tur1 = u"ĞİŞğş"; const TUtf16String tur = tur1 + u"ı"; - TestCanEncodeEach(tur, CODES_WIN, false); - TestCanEncodeEach(tur, CODES_YANDEX, false); - TestCanEncodeEach(tur, CODES_UTF8, true); - + TestCanEncodeEach(tur, CODES_WIN, false); + TestCanEncodeEach(tur, CODES_YANDEX, false); + TestCanEncodeEach(tur, CODES_UTF8, true); + const TUtf16String chi = u"新隶体新隸體"; - TestCanEncodeEach(chi, CODES_WIN, false); - TestCanEncodeEach(chi, CODES_YANDEX, false); - TestCanEncodeEach(chi, CODES_UTF8, true); - TestCanEncodeEach(chi, CODES_UTF_16LE, true); - + TestCanEncodeEach(chi, CODES_WIN, false); + TestCanEncodeEach(chi, CODES_YANDEX, false); + TestCanEncodeEach(chi, CODES_UTF8, true); + TestCanEncodeEach(chi, CODES_UTF_16LE, true); + const TUtf16String jap = u"漢字仮字交じり文"; - TestCanEncodeEach(jap, CODES_WIN, false); - TestCanEncodeEach(jap, CODES_YANDEX, false); - TestCanEncodeEach(jap, CODES_UTF8, true); - TestCanEncodeEach(jap, CODES_UTF_16BE, true); -} + TestCanEncodeEach(jap, CODES_WIN, false); + TestCanEncodeEach(jap, CODES_YANDEX, false); + TestCanEncodeEach(jap, CODES_UTF8, true); + TestCanEncodeEach(jap, CODES_UTF_16BE, true); +} |