aboutsummaryrefslogtreecommitdiffstats
path: root/util/charset/utf8_ut.cpp
diff options
context:
space:
mode:
authorVlad Yaroslavlev <vladon@vladon.com>2022-02-10 16:46:25 +0300
committerDaniil Cherednik <dcherednik@yandex-team.ru>2022-02-10 16:46:25 +0300
commit344ea37b4a345701ab0e67de2266a1c1bd7baf2d (patch)
tree1a2c5ffcf89eb53ecd79dbc9bc0a195c27404d0c /util/charset/utf8_ut.cpp
parent706b83ed7de5a473436620367af31fc0ceecde07 (diff)
downloadydb-344ea37b4a345701ab0e67de2266a1c1bd7baf2d.tar.gz
Restoring authorship annotation for Vlad Yaroslavlev <vladon@vladon.com>. Commit 2 of 2.
Diffstat (limited to 'util/charset/utf8_ut.cpp')
-rw-r--r--util/charset/utf8_ut.cpp68
1 files changed, 34 insertions, 34 deletions
diff --git a/util/charset/utf8_ut.cpp b/util/charset/utf8_ut.cpp
index fc379ff694..9e68881cca 100644
--- a/util/charset/utf8_ut.cpp
+++ b/util/charset/utf8_ut.cpp
@@ -17,37 +17,37 @@ Y_UNIT_TEST_SUITE(TUtfUtilTest) {
UNIT_ASSERT_VALUES_EQUAL(ToLowerUTF8(TStringBuf("xyz")), "xyz");
- {
- TString s = "привет!";
- TString q = "ПРИВЕТ!";
- TString tmp;
+ {
+ TString s = "привет!";
+ TString q = "ПРИВЕТ!";
+ TString tmp;
UNIT_ASSERT(ToLowerUTF8Impl(s.data(), s.size(), tmp) == false);
UNIT_ASSERT(ToLowerUTF8Impl(q.data(), q.size(), tmp) == true);
- }
-
- {
- const char* weird = "\xC8\xBE"; // 'Ⱦ', U+023E. strlen(weird)==2, strlen(tolower_utf8(weird)) is 3
- const char* turkI = "İ"; //strlen("İ") == 2, strlen(tolower_utf8("İ") == 1
- TStringBuf chars[] = {"f", "F", "Б", "б", weird, turkI};
- const int N = Y_ARRAY_SIZE(chars);
- //try all combinations of these letters.
- int numberOfVariants = 1;
- for (int len = 0; len <= 4; ++len) {
- for (int i = 0; i < numberOfVariants; ++i) {
- TString s;
- int k = i;
- for (int j = 0; j < len; ++j) {
- //Treat 'i' like number in base-N system with digits from 'chars'-array
- s += chars[k % N];
- k /= N;
- }
-
- TUtf16String tmp = UTF8ToWide(s);
- tmp.to_lower();
-
- UNIT_ASSERT_VALUES_EQUAL(ToLowerUTF8(s), WideToUTF8(tmp));
- }
- numberOfVariants *= N;
+ }
+
+ {
+ const char* weird = "\xC8\xBE"; // 'Ⱦ', U+023E. strlen(weird)==2, strlen(tolower_utf8(weird)) is 3
+ const char* turkI = "İ"; //strlen("İ") == 2, strlen(tolower_utf8("İ") == 1
+ TStringBuf chars[] = {"f", "F", "Б", "б", weird, turkI};
+ const int N = Y_ARRAY_SIZE(chars);
+ //try all combinations of these letters.
+ int numberOfVariants = 1;
+ for (int len = 0; len <= 4; ++len) {
+ for (int i = 0; i < numberOfVariants; ++i) {
+ TString s;
+ int k = i;
+ for (int j = 0; j < len; ++j) {
+ //Treat 'i' like number in base-N system with digits from 'chars'-array
+ s += chars[k % N];
+ k /= N;
+ }
+
+ TUtf16String tmp = UTF8ToWide(s);
+ tmp.to_lower();
+
+ UNIT_ASSERT_VALUES_EQUAL(ToLowerUTF8(s), WideToUTF8(tmp));
+ }
+ numberOfVariants *= N;
}
}
}
@@ -95,12 +95,12 @@ Y_UNIT_TEST_SUITE(TUtfUtilTest) {
Y_UNIT_TEST(TestUTF8ToWide) {
TFileInput in(ArcadiaSourceRoot() + TStringBuf("/util/charset/ut/utf8/test1.txt"));
- TString text = in.ReadAll();
+ TString text = in.ReadAll();
UNIT_ASSERT(WideToUTF8(UTF8ToWide(text)) == text);
}
Y_UNIT_TEST(TestInvalidUTF8) {
- TVector<TString> testData;
+ TVector<TString> testData;
TFileInput input(ArcadiaSourceRoot() + TStringBuf("/util/charset/ut/utf8/invalid_UTF8.bin"));
Load(&input, testData);
@@ -112,9 +112,9 @@ Y_UNIT_TEST_SUITE(TUtfUtilTest) {
Y_UNIT_TEST(TestUTF8ToWideScalar) {
TFileInput in(ArcadiaSourceRoot() + TStringBuf("/util/charset/ut/utf8/test1.txt"));
- TString text = in.ReadAll();
- TUtf16String wtextSSE = UTF8ToWide(text);
- TUtf16String wtextScalar = TUtf16String::Uninitialized(text.size());
+ TString text = in.ReadAll();
+ TUtf16String wtextSSE = UTF8ToWide(text);
+ TUtf16String wtextScalar = TUtf16String::Uninitialized(text.size());
const unsigned char* textBegin = reinterpret_cast<const unsigned char*>(text.c_str());
wchar16* wtextBegin = wtextScalar.begin();
::NDetail::UTF8ToWideImplScalar<false>(textBegin, textBegin + text.size(), wtextBegin);