aboutsummaryrefslogtreecommitdiffstats
path: root/library/cpp/charset/codepage_ut.cpp
diff options
context:
space:
mode:
authormowgli <mowgli@yandex-team.ru>2022-02-10 16:49:25 +0300
committerDaniil Cherednik <dcherednik@yandex-team.ru>2022-02-10 16:49:25 +0300
commit89afbbe4ca0e02e386dd4df08f7945f190dc1b84 (patch)
treec4772201af6215d48734691b8796e4cfc77c2ac8 /library/cpp/charset/codepage_ut.cpp
parent7510cec1516d17cbc8d7749974e36aa45f547a26 (diff)
downloadydb-89afbbe4ca0e02e386dd4df08f7945f190dc1b84.tar.gz
Restoring authorship annotation for <mowgli@yandex-team.ru>. Commit 1 of 2.
Diffstat (limited to 'library/cpp/charset/codepage_ut.cpp')
-rw-r--r--library/cpp/charset/codepage_ut.cpp134
1 files changed, 67 insertions, 67 deletions
diff --git a/library/cpp/charset/codepage_ut.cpp b/library/cpp/charset/codepage_ut.cpp
index c3ac3ac478..7df4d27196 100644
--- a/library/cpp/charset/codepage_ut.cpp
+++ b/library/cpp/charset/codepage_ut.cpp
@@ -53,8 +53,8 @@ public:
void TestToLower();
void TestToUpper();
- void TestCanEncode();
-
+ void TestCanEncode();
+
inline void TestUpperLower() {
const CodePage* cp = CodePageByCharset(CODES_ASCII);
char tmp[100];
@@ -343,82 +343,82 @@ void TCodepageTest::TestToUpper() {
ToUpper(data, n - 1);
UNIT_ASSERT(strcmp(data, yandexUpperCase) == 0);
}
-
-static void TestCanEncodeEmpty() {
- TWtringBuf empty;
- UNIT_ASSERT(CanBeEncoded(empty, CODES_WIN));
- UNIT_ASSERT(CanBeEncoded(empty, CODES_YANDEX));
- UNIT_ASSERT(CanBeEncoded(empty, CODES_UTF8));
-}
-
-static void TestCanEncodeEach(const TWtringBuf& text, ECharset encoding, bool expectedResult) {
- // char by char
- for (size_t i = 0; i < text.size(); ++i) {
- if (CanBeEncoded(text.SubStr(i, 1), encoding) != expectedResult)
- ythrow yexception() << "assertion failed: encoding " << NameByCharset(encoding)
+
+static void TestCanEncodeEmpty() {
+ TWtringBuf empty;
+ UNIT_ASSERT(CanBeEncoded(empty, CODES_WIN));
+ UNIT_ASSERT(CanBeEncoded(empty, CODES_YANDEX));
+ UNIT_ASSERT(CanBeEncoded(empty, CODES_UTF8));
+}
+
+static void TestCanEncodeEach(const TWtringBuf& text, ECharset encoding, bool expectedResult) {
+ // char by char
+ for (size_t i = 0; i < text.size(); ++i) {
+ if (CanBeEncoded(text.SubStr(i, 1), encoding) != expectedResult)
+ ythrow yexception() << "assertion failed: encoding " << NameByCharset(encoding)
<< " on '" << text.SubStr(i, 1) << "' (expected " << expectedResult << ")";
- }
- // whole text
- UNIT_ASSERT_EQUAL(CanBeEncoded(text, encoding), expectedResult);
-}
-
-void TCodepageTest::TestCanEncode() {
- TestCanEncodeEmpty();
-
+ }
+ // whole text
+ UNIT_ASSERT_EQUAL(CanBeEncoded(text, encoding), expectedResult);
+}
+
+void TCodepageTest::TestCanEncode() {
+ TestCanEncodeEmpty();
+
const TUtf16String lat = u"AaBbCcDdEeFfGgHhIiJjKkLlMmNnOoPpQqRrSsTtUuVvWwXxYyZz";
- TestCanEncodeEach(lat, CODES_WIN, true);
- TestCanEncodeEach(lat, CODES_YANDEX, true);
- TestCanEncodeEach(lat, CODES_UTF8, true);
-
+ TestCanEncodeEach(lat, CODES_WIN, true);
+ TestCanEncodeEach(lat, CODES_YANDEX, true);
+ TestCanEncodeEach(lat, CODES_UTF8, true);
+
const TUtf16String rus = u"АаБбВвГгДдЕеЁёЖжЗзИиЙйКкЛлМмНнОоПпРрСсТтУуФфХхЦцЧчШшЩщЪъЫыЬьЭэЮюЯя";
- TestCanEncodeEach(rus, CODES_WIN, true);
- TestCanEncodeEach(rus, CODES_YANDEX, true);
- TestCanEncodeEach(rus, CODES_UTF8, true);
-
+ TestCanEncodeEach(rus, CODES_WIN, true);
+ TestCanEncodeEach(rus, CODES_YANDEX, true);
+ TestCanEncodeEach(rus, CODES_UTF8, true);
+
const TUtf16String ukr = u"ҐґЄєІіЇї";
- TestCanEncodeEach(ukr, CODES_WIN, true);
- TestCanEncodeEach(ukr, CODES_YANDEX, true);
- TestCanEncodeEach(ukr, CODES_UTF8, true);
-
+ TestCanEncodeEach(ukr, CODES_WIN, true);
+ TestCanEncodeEach(ukr, CODES_YANDEX, true);
+ TestCanEncodeEach(ukr, CODES_UTF8, true);
+
const TUtf16String pol = u"ĄĆĘŁŃÓŚŹŻąćęłńóśźż";
- TestCanEncodeEach(pol, CODES_WIN, false);
- TestCanEncodeEach(pol, CODES_YANDEX, true);
- TestCanEncodeEach(pol, CODES_UTF_16BE, true);
-
+ TestCanEncodeEach(pol, CODES_WIN, false);
+ TestCanEncodeEach(pol, CODES_YANDEX, true);
+ TestCanEncodeEach(pol, CODES_UTF_16BE, true);
+
const TUtf16String ger = u"ÄäÖöÜüß";
- TestCanEncodeEach(ger, CODES_WIN, false);
- TestCanEncodeEach(ger, CODES_YANDEX, true);
- TestCanEncodeEach(ger, CODES_UTF_16LE, true);
-
+ TestCanEncodeEach(ger, CODES_WIN, false);
+ TestCanEncodeEach(ger, CODES_YANDEX, true);
+ TestCanEncodeEach(ger, CODES_UTF_16LE, true);
+
const TUtf16String fra1 = u"éàèùâêîôûëïç"; // supported in yandex cp
const TUtf16String fra2 = u"ÉÀÈÙÂÊÎÔÛËÏŸÿÇ";
const TUtf16String fra3 = u"Æ挜";
- TestCanEncodeEach(fra1 + fra2 + fra3, CODES_WIN, false);
- TestCanEncodeEach(fra1, CODES_YANDEX, true);
- TestCanEncodeEach(fra2 + fra3, CODES_YANDEX, false);
- TestCanEncodeEach(fra1 + fra2 + fra3, CODES_UTF8, true);
-
+ TestCanEncodeEach(fra1 + fra2 + fra3, CODES_WIN, false);
+ TestCanEncodeEach(fra1, CODES_YANDEX, true);
+ TestCanEncodeEach(fra2 + fra3, CODES_YANDEX, false);
+ TestCanEncodeEach(fra1 + fra2 + fra3, CODES_UTF8, true);
+
const TUtf16String kaz = u"ӘәҒғҚқҢңӨөҰұҮүҺһ";
- TestCanEncodeEach(kaz, CODES_WIN, false);
- TestCanEncodeEach(kaz, CODES_YANDEX, false);
- TestCanEncodeEach(kaz, CODES_UTF8, true);
- TestCanEncodeEach(kaz, CODES_KAZWIN, true);
-
+ TestCanEncodeEach(kaz, CODES_WIN, false);
+ TestCanEncodeEach(kaz, CODES_YANDEX, false);
+ TestCanEncodeEach(kaz, CODES_UTF8, true);
+ TestCanEncodeEach(kaz, CODES_KAZWIN, true);
+
const TUtf16String tur1 = u"ĞİŞğş";
const TUtf16String tur = tur1 + u"ı";
- TestCanEncodeEach(tur, CODES_WIN, false);
- TestCanEncodeEach(tur, CODES_YANDEX, false);
- TestCanEncodeEach(tur, CODES_UTF8, true);
-
+ TestCanEncodeEach(tur, CODES_WIN, false);
+ TestCanEncodeEach(tur, CODES_YANDEX, false);
+ TestCanEncodeEach(tur, CODES_UTF8, true);
+
const TUtf16String chi = u"新隶体新隸體";
- TestCanEncodeEach(chi, CODES_WIN, false);
- TestCanEncodeEach(chi, CODES_YANDEX, false);
- TestCanEncodeEach(chi, CODES_UTF8, true);
- TestCanEncodeEach(chi, CODES_UTF_16LE, true);
-
+ TestCanEncodeEach(chi, CODES_WIN, false);
+ TestCanEncodeEach(chi, CODES_YANDEX, false);
+ TestCanEncodeEach(chi, CODES_UTF8, true);
+ TestCanEncodeEach(chi, CODES_UTF_16LE, true);
+
const TUtf16String jap = u"漢字仮字交じり文";
- TestCanEncodeEach(jap, CODES_WIN, false);
- TestCanEncodeEach(jap, CODES_YANDEX, false);
- TestCanEncodeEach(jap, CODES_UTF8, true);
- TestCanEncodeEach(jap, CODES_UTF_16BE, true);
-}
+ TestCanEncodeEach(jap, CODES_WIN, false);
+ TestCanEncodeEach(jap, CODES_YANDEX, false);
+ TestCanEncodeEach(jap, CODES_UTF8, true);
+ TestCanEncodeEach(jap, CODES_UTF_16BE, true);
+}