diff options
author | Vlad Yaroslavlev <vladon@vladon.com> | 2022-02-10 16:46:25 +0300 |
---|---|---|
committer | Daniil Cherednik <dcherednik@yandex-team.ru> | 2022-02-10 16:46:25 +0300 |
commit | 344ea37b4a345701ab0e67de2266a1c1bd7baf2d (patch) | |
tree | 1a2c5ffcf89eb53ecd79dbc9bc0a195c27404d0c /util/charset/utf8_ut.cpp | |
parent | 706b83ed7de5a473436620367af31fc0ceecde07 (diff) | |
download | ydb-344ea37b4a345701ab0e67de2266a1c1bd7baf2d.tar.gz |
Restoring authorship annotation for Vlad Yaroslavlev <vladon@vladon.com>. Commit 2 of 2.
Diffstat (limited to 'util/charset/utf8_ut.cpp')
-rw-r--r-- | util/charset/utf8_ut.cpp | 68 |
1 files changed, 34 insertions, 34 deletions
diff --git a/util/charset/utf8_ut.cpp b/util/charset/utf8_ut.cpp index fc379ff694..9e68881cca 100644 --- a/util/charset/utf8_ut.cpp +++ b/util/charset/utf8_ut.cpp @@ -17,37 +17,37 @@ Y_UNIT_TEST_SUITE(TUtfUtilTest) { UNIT_ASSERT_VALUES_EQUAL(ToLowerUTF8(TStringBuf("xyz")), "xyz"); - { - TString s = "привет!"; - TString q = "ПРИВЕТ!"; - TString tmp; + { + TString s = "привет!"; + TString q = "ПРИВЕТ!"; + TString tmp; UNIT_ASSERT(ToLowerUTF8Impl(s.data(), s.size(), tmp) == false); UNIT_ASSERT(ToLowerUTF8Impl(q.data(), q.size(), tmp) == true); - } - - { - const char* weird = "\xC8\xBE"; // 'Ⱦ', U+023E. strlen(weird)==2, strlen(tolower_utf8(weird)) is 3 - const char* turkI = "İ"; //strlen("İ") == 2, strlen(tolower_utf8("İ") == 1 - TStringBuf chars[] = {"f", "F", "Б", "б", weird, turkI}; - const int N = Y_ARRAY_SIZE(chars); - //try all combinations of these letters. - int numberOfVariants = 1; - for (int len = 0; len <= 4; ++len) { - for (int i = 0; i < numberOfVariants; ++i) { - TString s; - int k = i; - for (int j = 0; j < len; ++j) { - //Treat 'i' like number in base-N system with digits from 'chars'-array - s += chars[k % N]; - k /= N; - } - - TUtf16String tmp = UTF8ToWide(s); - tmp.to_lower(); - - UNIT_ASSERT_VALUES_EQUAL(ToLowerUTF8(s), WideToUTF8(tmp)); - } - numberOfVariants *= N; + } + + { + const char* weird = "\xC8\xBE"; // 'Ⱦ', U+023E. strlen(weird)==2, strlen(tolower_utf8(weird)) is 3 + const char* turkI = "İ"; //strlen("İ") == 2, strlen(tolower_utf8("İ") == 1 + TStringBuf chars[] = {"f", "F", "Б", "б", weird, turkI}; + const int N = Y_ARRAY_SIZE(chars); + //try all combinations of these letters. + int numberOfVariants = 1; + for (int len = 0; len <= 4; ++len) { + for (int i = 0; i < numberOfVariants; ++i) { + TString s; + int k = i; + for (int j = 0; j < len; ++j) { + //Treat 'i' like number in base-N system with digits from 'chars'-array + s += chars[k % N]; + k /= N; + } + + TUtf16String tmp = UTF8ToWide(s); + tmp.to_lower(); + + UNIT_ASSERT_VALUES_EQUAL(ToLowerUTF8(s), WideToUTF8(tmp)); + } + numberOfVariants *= N; } } } @@ -95,12 +95,12 @@ Y_UNIT_TEST_SUITE(TUtfUtilTest) { Y_UNIT_TEST(TestUTF8ToWide) { TFileInput in(ArcadiaSourceRoot() + TStringBuf("/util/charset/ut/utf8/test1.txt")); - TString text = in.ReadAll(); + TString text = in.ReadAll(); UNIT_ASSERT(WideToUTF8(UTF8ToWide(text)) == text); } Y_UNIT_TEST(TestInvalidUTF8) { - TVector<TString> testData; + TVector<TString> testData; TFileInput input(ArcadiaSourceRoot() + TStringBuf("/util/charset/ut/utf8/invalid_UTF8.bin")); Load(&input, testData); @@ -112,9 +112,9 @@ Y_UNIT_TEST_SUITE(TUtfUtilTest) { Y_UNIT_TEST(TestUTF8ToWideScalar) { TFileInput in(ArcadiaSourceRoot() + TStringBuf("/util/charset/ut/utf8/test1.txt")); - TString text = in.ReadAll(); - TUtf16String wtextSSE = UTF8ToWide(text); - TUtf16String wtextScalar = TUtf16String::Uninitialized(text.size()); + TString text = in.ReadAll(); + TUtf16String wtextSSE = UTF8ToWide(text); + TUtf16String wtextScalar = TUtf16String::Uninitialized(text.size()); const unsigned char* textBegin = reinterpret_cast<const unsigned char*>(text.c_str()); wchar16* wtextBegin = wtextScalar.begin(); ::NDetail::UTF8ToWideImplScalar<false>(textBegin, textBegin + text.size(), wtextBegin); |