aboutsummaryrefslogtreecommitdiffstats
path: root/util/charset
diff options
context:
space:
mode:
authorVlad Yaroslavlev <vladon@vladon.com>2022-02-10 16:46:25 +0300
committerDaniil Cherednik <dcherednik@yandex-team.ru>2022-02-10 16:46:25 +0300
commit344ea37b4a345701ab0e67de2266a1c1bd7baf2d (patch)
tree1a2c5ffcf89eb53ecd79dbc9bc0a195c27404d0c /util/charset
parent706b83ed7de5a473436620367af31fc0ceecde07 (diff)
downloadydb-344ea37b4a345701ab0e67de2266a1c1bd7baf2d.tar.gz
Restoring authorship annotation for Vlad Yaroslavlev <vladon@vladon.com>. Commit 2 of 2.
Diffstat (limited to 'util/charset')
-rw-r--r--util/charset/benchmark/utf8_to_wide/main.cpp6
-rw-r--r--util/charset/utf8.cpp12
-rw-r--r--util/charset/utf8.h12
-rw-r--r--util/charset/utf8_ut.cpp68
-rw-r--r--util/charset/wide.cpp36
-rw-r--r--util/charset/wide.h56
-rw-r--r--util/charset/wide_ut.cpp70
7 files changed, 130 insertions, 130 deletions
diff --git a/util/charset/benchmark/utf8_to_wide/main.cpp b/util/charset/benchmark/utf8_to_wide/main.cpp
index da4782c3bd..09fa567fe5 100644
--- a/util/charset/benchmark/utf8_to_wide/main.cpp
+++ b/util/charset/benchmark/utf8_to_wide/main.cpp
@@ -10,7 +10,7 @@
namespace {
template <size_t N>
- struct TRandomAsciiString: public TVector<char> {
+ struct TRandomAsciiString: public TVector<char> {
inline TRandomAsciiString() {
reserve(N);
for (size_t i = 0; i < N; ++i) {
@@ -20,9 +20,9 @@ namespace {
};
template <size_t N>
- struct TRandomRuString: public TVector<char> {
+ struct TRandomRuString: public TVector<char> {
inline TRandomRuString() {
- TVector<unsigned char> data(N * 2);
+ TVector<unsigned char> data(N * 2);
unsigned char* textEnd = data.begin();
for (size_t i = 0; i < N; ++i) {
size_t runeLen;
diff --git a/util/charset/utf8.cpp b/util/charset/utf8.cpp
index 33eb298185..efe3a52f61 100644
--- a/util/charset/utf8.cpp
+++ b/util/charset/utf8.cpp
@@ -129,23 +129,23 @@ EUTF8Detect UTF8Detect(const char* s, size_t len) {
return res;
}
-bool ToLowerUTF8Impl(const char* beg, size_t n, TString& newString) {
+bool ToLowerUTF8Impl(const char* beg, size_t n, TString& newString) {
return ConvertCaseUTF8Impl(ECaseConversion::ToLower, beg, n, newString);
}
-TString ToLowerUTF8(const TString& s) {
- TString newString;
+TString ToLowerUTF8(const TString& s) {
+ TString newString;
bool changed = ToLowerUTF8Impl(s.data(), s.size(), newString);
return changed ? newString : s;
}
-TString ToLowerUTF8(TStringBuf s) {
- TString newString;
+TString ToLowerUTF8(TStringBuf s) {
+ TString newString;
bool changed = ToLowerUTF8Impl(s.data(), s.size(), newString);
return changed ? newString : TString(s.data(), s.size());
}
-TString ToLowerUTF8(const char* s) {
+TString ToLowerUTF8(const char* s) {
return ToLowerUTF8(TStringBuf(s));
}
diff --git a/util/charset/utf8.h b/util/charset/utf8.h
index 1997f73771..5039b46ae9 100644
--- a/util/charset/utf8.h
+++ b/util/charset/utf8.h
@@ -3,7 +3,7 @@
#include "recode_result.h"
#include <util/generic/strbuf.h>
-#include <util/generic/string.h>
+#include <util/generic/string.h>
#include <util/generic/yexception.h>
#include <util/system/defaults.h>
#include <util/system/yassert.h>
@@ -134,7 +134,7 @@ inline bool GetNumberOfUTF8Chars(const char* text, size_t len, size_t& number) {
inline size_t GetNumberOfUTF8Chars(TStringBuf text) {
size_t number;
if (!GetNumberOfUTF8Chars(text.data(), text.size(), number)) {
- ythrow yexception() << "GetNumberOfUTF8Chars failed on invalid utf-8 " << TString(text.substr(0, 50)).Quote();
+ ythrow yexception() << "GetNumberOfUTF8Chars failed on invalid utf-8 " << TString(text.substr(0, 50)).Quote();
}
return number;
}
@@ -369,11 +369,11 @@ inline bool IsUtf(const TStringBuf input) {
//! returns true, if result is not the same as input, and put it in newString
//! returns false, if result is unmodified
-bool ToLowerUTF8Impl(const char* beg, size_t n, TString& newString);
+bool ToLowerUTF8Impl(const char* beg, size_t n, TString& newString);
-TString ToLowerUTF8(const TString& s);
-TString ToLowerUTF8(TStringBuf s);
-TString ToLowerUTF8(const char* s);
+TString ToLowerUTF8(const TString& s);
+TString ToLowerUTF8(TStringBuf s);
+TString ToLowerUTF8(const char* s);
inline TString ToLowerUTF8(const std::string& s) {
return ToLowerUTF8(TStringBuf(s));
diff --git a/util/charset/utf8_ut.cpp b/util/charset/utf8_ut.cpp
index fc379ff694..9e68881cca 100644
--- a/util/charset/utf8_ut.cpp
+++ b/util/charset/utf8_ut.cpp
@@ -17,37 +17,37 @@ Y_UNIT_TEST_SUITE(TUtfUtilTest) {
UNIT_ASSERT_VALUES_EQUAL(ToLowerUTF8(TStringBuf("xyz")), "xyz");
- {
- TString s = "привет!";
- TString q = "ПРИВЕТ!";
- TString tmp;
+ {
+ TString s = "привет!";
+ TString q = "ПРИВЕТ!";
+ TString tmp;
UNIT_ASSERT(ToLowerUTF8Impl(s.data(), s.size(), tmp) == false);
UNIT_ASSERT(ToLowerUTF8Impl(q.data(), q.size(), tmp) == true);
- }
-
- {
- const char* weird = "\xC8\xBE"; // 'Ⱦ', U+023E. strlen(weird)==2, strlen(tolower_utf8(weird)) is 3
- const char* turkI = "İ"; //strlen("İ") == 2, strlen(tolower_utf8("İ") == 1
- TStringBuf chars[] = {"f", "F", "Б", "б", weird, turkI};
- const int N = Y_ARRAY_SIZE(chars);
- //try all combinations of these letters.
- int numberOfVariants = 1;
- for (int len = 0; len <= 4; ++len) {
- for (int i = 0; i < numberOfVariants; ++i) {
- TString s;
- int k = i;
- for (int j = 0; j < len; ++j) {
- //Treat 'i' like number in base-N system with digits from 'chars'-array
- s += chars[k % N];
- k /= N;
- }
-
- TUtf16String tmp = UTF8ToWide(s);
- tmp.to_lower();
-
- UNIT_ASSERT_VALUES_EQUAL(ToLowerUTF8(s), WideToUTF8(tmp));
- }
- numberOfVariants *= N;
+ }
+
+ {
+ const char* weird = "\xC8\xBE"; // 'Ⱦ', U+023E. strlen(weird)==2, strlen(tolower_utf8(weird)) is 3
+ const char* turkI = "İ"; //strlen("İ") == 2, strlen(tolower_utf8("İ") == 1
+ TStringBuf chars[] = {"f", "F", "Б", "б", weird, turkI};
+ const int N = Y_ARRAY_SIZE(chars);
+ //try all combinations of these letters.
+ int numberOfVariants = 1;
+ for (int len = 0; len <= 4; ++len) {
+ for (int i = 0; i < numberOfVariants; ++i) {
+ TString s;
+ int k = i;
+ for (int j = 0; j < len; ++j) {
+ //Treat 'i' like number in base-N system with digits from 'chars'-array
+ s += chars[k % N];
+ k /= N;
+ }
+
+ TUtf16String tmp = UTF8ToWide(s);
+ tmp.to_lower();
+
+ UNIT_ASSERT_VALUES_EQUAL(ToLowerUTF8(s), WideToUTF8(tmp));
+ }
+ numberOfVariants *= N;
}
}
}
@@ -95,12 +95,12 @@ Y_UNIT_TEST_SUITE(TUtfUtilTest) {
Y_UNIT_TEST(TestUTF8ToWide) {
TFileInput in(ArcadiaSourceRoot() + TStringBuf("/util/charset/ut/utf8/test1.txt"));
- TString text = in.ReadAll();
+ TString text = in.ReadAll();
UNIT_ASSERT(WideToUTF8(UTF8ToWide(text)) == text);
}
Y_UNIT_TEST(TestInvalidUTF8) {
- TVector<TString> testData;
+ TVector<TString> testData;
TFileInput input(ArcadiaSourceRoot() + TStringBuf("/util/charset/ut/utf8/invalid_UTF8.bin"));
Load(&input, testData);
@@ -112,9 +112,9 @@ Y_UNIT_TEST_SUITE(TUtfUtilTest) {
Y_UNIT_TEST(TestUTF8ToWideScalar) {
TFileInput in(ArcadiaSourceRoot() + TStringBuf("/util/charset/ut/utf8/test1.txt"));
- TString text = in.ReadAll();
- TUtf16String wtextSSE = UTF8ToWide(text);
- TUtf16String wtextScalar = TUtf16String::Uninitialized(text.size());
+ TString text = in.ReadAll();
+ TUtf16String wtextSSE = UTF8ToWide(text);
+ TUtf16String wtextScalar = TUtf16String::Uninitialized(text.size());
const unsigned char* textBegin = reinterpret_cast<const unsigned char*>(text.c_str());
wchar16* wtextBegin = wtextScalar.begin();
::NDetail::UTF8ToWideImplScalar<false>(textBegin, textBegin + text.size(), wtextBegin);
diff --git a/util/charset/wide.cpp b/util/charset/wide.cpp
index 49badb7f14..a287438ddd 100644
--- a/util/charset/wide.cpp
+++ b/util/charset/wide.cpp
@@ -31,7 +31,7 @@ namespace {
}
}
-void Collapse(TUtf16String& w) {
+void Collapse(TUtf16String& w) {
CollapseImpl(w, w, 0, IsWhitespace);
}
@@ -532,20 +532,20 @@ static TUtf32String ToSmthRet(const TUtf32StringBuf text, size_t pos, size_t cou
}
TUtf16String ToLowerRet(const TWtringBuf text, size_t pos, size_t count) {
- return ToSmthRet(text, pos, count, [](const wchar16* theText, size_t length, wchar16* out) {
- ToLower(theText, length, out);
+ return ToSmthRet(text, pos, count, [](const wchar16* theText, size_t length, wchar16* out) {
+ ToLower(theText, length, out);
});
}
TUtf16String ToUpperRet(const TWtringBuf text, size_t pos, size_t count) {
- return ToSmthRet(text, pos, count, [](const wchar16* theText, size_t length, wchar16* out) {
- ToUpper(theText, length, out);
+ return ToSmthRet(text, pos, count, [](const wchar16* theText, size_t length, wchar16* out) {
+ ToUpper(theText, length, out);
});
}
TUtf16String ToTitleRet(const TWtringBuf text, size_t pos, size_t count) {
- return ToSmthRet(text, pos, count, [](const wchar16* theText, size_t length, wchar16* out) {
- ToTitle(theText, length, out);
+ return ToSmthRet(text, pos, count, [](const wchar16* theText, size_t length, wchar16* out) {
+ ToTitle(theText, length, out);
});
}
@@ -568,16 +568,16 @@ TUtf32String ToTitleRet(const TUtf32StringBuf text, size_t pos, size_t count) {
}
template <bool insertBr>
-void EscapeHtmlChars(TUtf16String& str) {
- static const TUtf16String lt(LT, Y_ARRAY_SIZE(LT));
- static const TUtf16String gt(GT, Y_ARRAY_SIZE(GT));
- static const TUtf16String amp(AMP, Y_ARRAY_SIZE(AMP));
- static const TUtf16String br(BR, Y_ARRAY_SIZE(BR));
- static const TUtf16String quot(QUOT, Y_ARRAY_SIZE(QUOT));
+void EscapeHtmlChars(TUtf16String& str) {
+ static const TUtf16String lt(LT, Y_ARRAY_SIZE(LT));
+ static const TUtf16String gt(GT, Y_ARRAY_SIZE(GT));
+ static const TUtf16String amp(AMP, Y_ARRAY_SIZE(AMP));
+ static const TUtf16String br(BR, Y_ARRAY_SIZE(BR));
+ static const TUtf16String quot(QUOT, Y_ARRAY_SIZE(QUOT));
size_t escapedLen = 0;
- const TUtf16String& cs = str;
+ const TUtf16String& cs = str;
for (size_t i = 0; i < cs.size(); ++i)
escapedLen += EscapedLen<insertBr>(cs[i]);
@@ -585,13 +585,13 @@ void EscapeHtmlChars(TUtf16String& str) {
if (escapedLen == cs.size())
return;
- TUtf16String res;
+ TUtf16String res;
res.reserve(escapedLen);
size_t start = 0;
for (size_t i = 0; i < cs.size(); ++i) {
- const TUtf16String* ent = nullptr;
+ const TUtf16String* ent = nullptr;
switch (cs[i]) {
case '<':
ent = &lt;
@@ -622,5 +622,5 @@ void EscapeHtmlChars(TUtf16String& str) {
res.swap(str);
}
-template void EscapeHtmlChars<false>(TUtf16String& str);
-template void EscapeHtmlChars<true>(TUtf16String& str);
+template void EscapeHtmlChars<false>(TUtf16String& str);
+template void EscapeHtmlChars<true>(TUtf16String& str);
diff --git a/util/charset/wide.h b/util/charset/wide.h
index 22f922df5a..04e6928aab 100644
--- a/util/charset/wide.h
+++ b/util/charset/wide.h
@@ -6,7 +6,7 @@
#include "wide_specific.h"
#include <util/generic/algorithm.h>
-#include <util/generic/string.h>
+#include <util/generic/string.h>
#include <util/generic/yexception.h>
#include <util/memory/tempbuf.h>
#include <util/system/compiler.h>
@@ -24,8 +24,8 @@ class TTempArray;
using TCharTemp = TTempArray<wchar16>;
namespace NDetail {
- inline TString InStringMsg(const char* s, size_t len) {
- return (len <= 50) ? " in string " + TString(s, len).Quote() : TString();
+ inline TString InStringMsg(const char* s, size_t len) {
+ return (len <= 50) ? " in string " + TString(s, len).Quote() : TString();
}
template <bool isPointer>
@@ -321,8 +321,8 @@ inline size_t UTF8ToWideImpl(const char* text, size_t len, TCharType* dest, size
}
template <bool robust>
-inline TUtf16String UTF8ToWide(const char* text, size_t len) {
- TUtf16String w = TUtf16String::Uninitialized(len);
+inline TUtf16String UTF8ToWide(const char* text, size_t len) {
+ TUtf16String w = TUtf16String::Uninitialized(len);
size_t written;
size_t pos = UTF8ToWideImpl<robust>(text, len, w.begin(), written);
if (pos != len)
@@ -347,7 +347,7 @@ inline bool UTF8ToWide(const char* text, size_t len, TCharType* dest, size_t& wr
}
template <bool robust>
-inline TWtringBuf UTF8ToWide(const TStringBuf src, TUtf16String& dst) {
+inline TWtringBuf UTF8ToWide(const TStringBuf src, TUtf16String& dst) {
dst.ReserveAndResize(src.size());
size_t written = 0;
UTF8ToWideImpl<robust>(src.data(), src.size(), dst.begin(), written);
@@ -365,16 +365,16 @@ inline TUtf32StringBuf UTF8ToUTF32(const TStringBuf src, TUtf32String& dst) {
return dst;
}
-inline TWtringBuf UTF8ToWide(const TStringBuf src, TUtf16String& dst) {
+inline TWtringBuf UTF8ToWide(const TStringBuf src, TUtf16String& dst) {
return UTF8ToWide<false>(src, dst);
}
-inline TUtf16String UTF8ToWide(const char* text, size_t len) {
+inline TUtf16String UTF8ToWide(const char* text, size_t len) {
return UTF8ToWide<false>(text, len);
}
template <bool robust>
-inline TUtf16String UTF8ToWide(const TStringBuf s) {
+inline TUtf16String UTF8ToWide(const TStringBuf s) {
return UTF8ToWide<robust>(s.data(), s.size());
}
@@ -385,7 +385,7 @@ inline TUtf32String UTF8ToUTF32(const TStringBuf s) {
return r;
}
-inline TUtf16String UTF8ToWide(const TStringBuf s) {
+inline TUtf16String UTF8ToWide(const TStringBuf s) {
return UTF8ToWide<false>(s.data(), s.size());
}
@@ -410,7 +410,7 @@ constexpr size_t WideToUTF8BufferSize(const size_t inputStringSize) noexcept {
return inputStringSize * 4; // * 4 because the conversion functions can convert unicode character into maximum 4 bytes of UTF8
}
-inline TStringBuf WideToUTF8(const TWtringBuf src, TString& dst) {
+inline TStringBuf WideToUTF8(const TWtringBuf src, TString& dst) {
dst.ReserveAndResize(WideToUTF8BufferSize(src.size()));
size_t written = 0;
WideToUTF8(src.data(), src.size(), dst.begin(), written);
@@ -419,8 +419,8 @@ inline TStringBuf WideToUTF8(const TWtringBuf src, TString& dst) {
return dst;
}
-inline TString WideToUTF8(const wchar16* text, size_t len) {
- TString s = TString::Uninitialized(WideToUTF8BufferSize(len));
+inline TString WideToUTF8(const wchar16* text, size_t len) {
+ TString s = TString::Uninitialized(WideToUTF8BufferSize(len));
size_t written = 0;
WideToUTF8(text, len, s.begin(), written);
Y_ASSERT(s.size() >= written);
@@ -437,7 +437,7 @@ inline TString WideToUTF8(const wchar32* text, size_t len) {
return s;
}
-inline TString WideToUTF8(const TWtringBuf w) {
+inline TString WideToUTF8(const TWtringBuf w) {
return WideToUTF8(w.data(), w.size());
}
@@ -445,8 +445,8 @@ inline TString WideToUTF8(const TUtf32StringBuf w) {
return WideToUTF8(w.data(), w.size());
}
-inline TUtf16String UTF32ToWide(const wchar32* begin, size_t len) {
- TUtf16String res;
+inline TUtf16String UTF32ToWide(const wchar32* begin, size_t len) {
+ TUtf16String res;
res.reserve(len);
const wchar32* end = begin + len;
@@ -628,29 +628,29 @@ inline void Copy(const TChar1* first, size_t len, TChar2* result) {
//! template <typename InputIterator>
//! basic_string(InputIterator begin, InputIterator end, const Allocator& a = Allocator());
//! and the family of template member functions: append, assign, insert, replace.
-template <typename TStringType, typename TChar>
-inline TStringType CopyTo(const TChar* first, const TChar* last) {
+template <typename TStringType, typename TChar>
+inline TStringType CopyTo(const TChar* first, const TChar* last) {
Y_ASSERT(first <= last);
- TStringType str = TStringType::Uninitialized(last - first);
+ TStringType str = TStringType::Uninitialized(last - first);
Copy(first, last, str.begin());
return str;
}
-template <typename TStringType, typename TChar>
-inline TStringType CopyTo(const TChar* s, size_t n) {
- TStringType str = TStringType::Uninitialized(n);
+template <typename TStringType, typename TChar>
+inline TStringType CopyTo(const TChar* s, size_t n) {
+ TStringType str = TStringType::Uninitialized(n);
Copy(s, n, str.begin());
return str;
}
-inline TString WideToASCII(const TWtringBuf w) {
+inline TString WideToASCII(const TWtringBuf w) {
Y_ASSERT(IsStringASCII(w.begin(), w.end()));
- return CopyTo<TString>(w.begin(), w.end());
+ return CopyTo<TString>(w.begin(), w.end());
}
-inline TUtf16String ASCIIToWide(const TStringBuf s) {
+inline TUtf16String ASCIIToWide(const TStringBuf s) {
Y_ASSERT(IsStringASCII(s.begin(), s.end()));
- return CopyTo<TUtf16String>(s.begin(), s.end());
+ return CopyTo<TUtf16String>(s.begin(), s.end());
}
inline TUtf32String ASCIIToUTF32(const TStringBuf s) {
@@ -679,7 +679,7 @@ inline bool IsSpace(const TWtringBuf s) {
}
//! replaces multiple sequential whitespace characters with a single space character
-void Collapse(TUtf16String& w);
+void Collapse(TUtf16String& w);
//! @return new length
size_t Collapse(wchar16* s, size_t n);
@@ -810,7 +810,7 @@ TUtf32String ToTitleRet(const TUtf32StringBuf text, size_t pos = 0, size_t count
//! replaces the '<', '>' and '&' characters in string with '&lt;', '&gt;' and '&amp;' respectively
// insertBr=true - replace '\r' and '\n' with "<BR>"
template <bool insertBr>
-void EscapeHtmlChars(TUtf16String& str);
+void EscapeHtmlChars(TUtf16String& str);
//! returns number of characters in range. Handle surrogate pairs as one character.
inline size_t CountWideChars(const wchar16* b, const wchar16* e) {
diff --git a/util/charset/wide_ut.cpp b/util/charset/wide_ut.cpp
index 9eecf20ae0..d8f3233e73 100644
--- a/util/charset/wide_ut.cpp
+++ b/util/charset/wide_ut.cpp
@@ -53,7 +53,7 @@ namespace {
{0x01C5, 0x10428, 0x10429, 0x10447, 0x10441, 0x1C03, 0x00A0, 0x10428, 0x1043D, 0x10437}, // title
};
- TUtf16String CreateUnicodeText() {
+ TUtf16String CreateUnicodeText() {
const int len = 256;
wchar16 text[len] = {
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, // 0x00 - 0x0F
@@ -77,10 +77,10 @@ namespace {
text[i] = static_cast<wchar16>(i + 0x0350); // 0x0410 - 0x044F
}
}
- return TUtf16String(text, len);
+ return TUtf16String(text, len);
}
- TString CreateUTF8Text() {
+ TString CreateUTF8Text() {
char text[] = {
'\x00', '\x01', '\x02', '\x03', '\x04', '\x05', '\x06', '\x07', '\x08', '\x09', '\x0a', '\x0b', '\x0c', '\x0d', '\x0e', '\x0f',
'\x10', '\x11', '\x12', '\x13', '\x14', '\x15', '\x16', '\x17', '\x18', '\x19', '\x1a', '\x1b', '\x1c', '\x1d', '\x1e', '\x1f',
@@ -107,12 +107,12 @@ namespace {
'\xd0', '\xbf', '\xd1', '\x80', '\xd1', '\x81', '\xd1', '\x82', '\xd1', '\x83', '\xd1', '\x84', '\xd1', '\x85', '\xd1', '\x86',
'\xd1', '\x87', '\xd1', '\x88', '\xd1', '\x89', '\xd1', '\x8a', '\xd1', '\x8b', '\xd1', '\x8c', '\xd1', '\x8d', '\xd1', '\x8e',
'\xd1', '\x8f'};
- return TString(text, Y_ARRAY_SIZE(text));
+ return TString(text, Y_ARRAY_SIZE(text));
}
//! use this function to dump UTF8 text into a file in case of any changes
// void DumpUTF8Text() {
- // TString s = WideToUTF8(UnicodeText);
+ // TString s = WideToUTF8(UnicodeText);
// std::ofstream f("utf8.txt");
// f << std::hex;
// for (int i = 0; i < (int)s.size(); ++i) {
@@ -455,12 +455,12 @@ void TConversionTest::TestWriteUTF8Char() {
}
static void TestSurrogates(const char* str, const wchar16* wide, size_t wideSize) {
- TUtf16String w = UTF8ToWide(str);
+ TUtf16String w = UTF8ToWide(str);
UNIT_ASSERT(w.size() == wideSize);
UNIT_ASSERT(!memcmp(w.c_str(), wide, wideSize));
- TString s = WideToUTF8(w);
+ TString s = WideToUTF8(w);
UNIT_ASSERT(s == str);
}
@@ -511,7 +511,7 @@ void TConversionTest::TestUTF8ToWide() {
UNIT_ASSERT_VALUES_EQUAL(WideToUTF8(UTF8ToWide(WideToUTF8(UTF8ToWide<true>(
"m\xFB\xB2\xA5\xAA\xAFyeuse.sexwebcamz.com")))),
- TString(
+ TString(
"m\xEF\xBF\xBD\xEF\xBF\xBD\xEF\xBF\xBD\xEF\xBF\xBD\xEF\xBF\xBDyeuse.sexwebcamz.com"));
}
@@ -571,7 +571,7 @@ void TConversionTest::TestUnicodeCase() {
}
void TConversionTest::TestUnicodeDetails() {
- TUtf16String temp;
+ TUtf16String temp;
for (wchar32 i = 0; i != NUnicode::UnicodeInstancesLimit(); ++i) {
temp.clear();
WriteSymbol(i, temp);
@@ -604,12 +604,12 @@ class TWideUtilTest: public TTestBase {
public:
void TestCollapse() {
- TUtf16String s;
+ TUtf16String s;
s.append(ws, Y_ARRAY_SIZE(ws)).append(3, 'a').append(ws, Y_ARRAY_SIZE(ws)).append(3, 'b').append(ws, Y_ARRAY_SIZE(ws));
Collapse(s);
UNIT_ASSERT(s == ASCIIToWide(" aaa bbb "));
{
- const TUtf16String w(ASCIIToWide(" a b c "));
+ const TUtf16String w(ASCIIToWide(" a b c "));
s = w;
Collapse(s);
UNIT_ASSERT(s == w);
@@ -637,7 +637,7 @@ public:
Collapse(s);
UNIT_ASSERT(s == ASCIIToWide("1 23 "));
{
- const TUtf16String w = ASCIIToWide(" ");
+ const TUtf16String w = ASCIIToWide(" ");
s = w;
Collapse(s);
UNIT_ASSERT(s == w);
@@ -659,11 +659,11 @@ public:
s.clear();
Collapse(s);
- UNIT_ASSERT(s == TUtf16String());
+ UNIT_ASSERT(s == TUtf16String());
}
void TestCollapseBuffer() {
- TUtf16String s;
+ TUtf16String s;
s.append(ws, Y_ARRAY_SIZE(ws)).append(3, 'a').append(ws, Y_ARRAY_SIZE(ws)).append(3, 'b').append(ws, Y_ARRAY_SIZE(ws));
size_t n = Collapse(s.begin(), s.size());
s.resize(n);
@@ -727,14 +727,14 @@ public:
s.clear();
n = Collapse(s.begin(), s.size());
UNIT_ASSERT(n == 0);
- UNIT_ASSERT(s == TUtf16String());
+ UNIT_ASSERT(s == TUtf16String());
}
void TestStrip() {
- TUtf16String s;
+ TUtf16String s;
Strip(s);
- UNIT_ASSERT(s == TUtf16String());
+ UNIT_ASSERT(s == TUtf16String());
StripLeft(s);
UNIT_ASSERT(s == TUtf16String());
StripRight(s);
@@ -742,7 +742,7 @@ public:
s = ASCIIToWide(" \t\r\n");
Strip(s);
- UNIT_ASSERT(s == TUtf16String());
+ UNIT_ASSERT(s == TUtf16String());
s = ASCIIToWide(" \t\r\n");
StripLeft(s);
UNIT_ASSERT(s == TUtf16String());
@@ -770,7 +770,7 @@ public:
StripRight(s);
UNIT_ASSERT(s == ASCIIToWide("\r\na\r\nb\t\tc"));
- const TUtf16String w(ASCIIToWide("a b"));
+ const TUtf16String w(ASCIIToWide("a b"));
s = w;
Strip(s);
UNIT_ASSERT(s == w);
@@ -792,25 +792,25 @@ public:
}
void TestIsSpace() {
- UNIT_ASSERT(!IsSpace(TUtf16String()));
+ UNIT_ASSERT(!IsSpace(TUtf16String()));
UNIT_ASSERT(IsSpace(ws, Y_ARRAY_SIZE(ws)));
- TUtf16String w;
- w.assign(ws, Y_ARRAY_SIZE(ws)).append(TUtf16String(1, '!'));
+ TUtf16String w;
+ w.assign(ws, Y_ARRAY_SIZE(ws)).append(TUtf16String(1, '!'));
UNIT_ASSERT(!IsSpace(w.c_str(), w.size()));
- w.assign(TUtf16String(1, '_')).append(ws, Y_ARRAY_SIZE(ws));
+ w.assign(TUtf16String(1, '_')).append(ws, Y_ARRAY_SIZE(ws));
UNIT_ASSERT(!IsSpace(w.c_str(), w.size()));
- w.assign(ws, Y_ARRAY_SIZE(ws)).append(TUtf16String(1, '$')).append(ws, Y_ARRAY_SIZE(ws));
+ w.assign(ws, Y_ARRAY_SIZE(ws)).append(TUtf16String(1, '$')).append(ws, Y_ARRAY_SIZE(ws));
UNIT_ASSERT(!IsSpace(w.c_str(), w.size()));
}
void TestEscapeHtmlChars() {
// characters from the first half of the ASCII table
for (wchar16 c = 1; c < 0x7F; ++c) {
- TUtf16String w(1, c);
+ TUtf16String w(1, c);
EscapeHtmlChars<false>(w);
switch (c) {
@@ -827,13 +827,13 @@ public:
UNIT_ASSERT(w == ASCIIToWide("&quot;"));
break;
default:
- UNIT_ASSERT(w == TUtf16String(1, c));
+ UNIT_ASSERT(w == TUtf16String(1, c));
break;
}
}
for (wchar16 c = 1; c < 0x7F; ++c) {
- TUtf16String w(1, c);
+ TUtf16String w(1, c);
EscapeHtmlChars<true>(w);
switch (c) {
@@ -854,7 +854,7 @@ public:
UNIT_ASSERT(w == ASCIIToWide("<BR>"));
break;
default:
- UNIT_ASSERT(w == TUtf16String(1, c));
+ UNIT_ASSERT(w == TUtf16String(1, c));
break;
}
}
@@ -877,11 +877,11 @@ public:
}
void TestWideString() {
- const TUtf16String original = UTF32ToWide(WideStringTestData[0], CaseTestDataSize);
- const TUtf16String lower = UTF32ToWide(WideStringTestData[1], CaseTestDataSize);
- const TUtf16String upper = UTF32ToWide(WideStringTestData[2], CaseTestDataSize);
- const TUtf16String title = UTF32ToWide(WideStringTestData[3], CaseTestDataSize);
- TUtf16String temp;
+ const TUtf16String original = UTF32ToWide(WideStringTestData[0], CaseTestDataSize);
+ const TUtf16String lower = UTF32ToWide(WideStringTestData[1], CaseTestDataSize);
+ const TUtf16String upper = UTF32ToWide(WideStringTestData[2], CaseTestDataSize);
+ const TUtf16String title = UTF32ToWide(WideStringTestData[3], CaseTestDataSize);
+ TUtf16String temp;
temp = original;
temp.to_lower();
@@ -907,7 +907,7 @@ public:
ToTitle(temp.begin(), temp.size());
UNIT_ASSERT(temp == title);
- TVector<wchar32> buffer(WideStringTestData[0], WideStringTestData[0] + CaseTestDataSize);
+ TVector<wchar32> buffer(WideStringTestData[0], WideStringTestData[0] + CaseTestDataSize);
std::reverse(buffer.begin(), buffer.end());
const TUtf16String reversed = UTF32ToWide(buffer.data(), buffer.size());
@@ -918,7 +918,7 @@ public:
void TestCountWideChars() {
UNIT_ASSERT_EQUAL(CountWideChars(UTF8ToWide("привет!")), 7);
- TUtf16String wideStr = UTF8ToWide("\xf0\x9f\x92\xb8привет!");
+ TUtf16String wideStr = UTF8ToWide("\xf0\x9f\x92\xb8привет!");
UNIT_ASSERT_EQUAL(wideStr.size(), 9);
UNIT_ASSERT_EQUAL(CountWideChars(wideStr), 8);
}