diff options
author | mowgli <mowgli@yandex-team.ru> | 2022-02-10 16:49:25 +0300 |
---|---|---|
committer | Daniil Cherednik <dcherednik@yandex-team.ru> | 2022-02-10 16:49:25 +0300 |
commit | 89afbbe4ca0e02e386dd4df08f7945f190dc1b84 (patch) | |
tree | c4772201af6215d48734691b8796e4cfc77c2ac8 /library/cpp/charset | |
parent | 7510cec1516d17cbc8d7749974e36aa45f547a26 (diff) | |
download | ydb-89afbbe4ca0e02e386dd4df08f7945f190dc1b84.tar.gz |
Restoring authorship annotation for <mowgli@yandex-team.ru>. Commit 1 of 2.
Diffstat (limited to 'library/cpp/charset')
-rw-r--r-- | library/cpp/charset/codepage.h | 4 | ||||
-rw-r--r-- | library/cpp/charset/codepage_ut.cpp | 134 | ||||
-rw-r--r-- | library/cpp/charset/wide.cpp | 32 | ||||
-rw-r--r-- | library/cpp/charset/wide.h | 186 | ||||
-rw-r--r-- | library/cpp/charset/wide_ut.cpp | 138 |
5 files changed, 247 insertions, 247 deletions
diff --git a/library/cpp/charset/codepage.h b/library/cpp/charset/codepage.h index 30a02a4610..419f5746bc 100644 --- a/library/cpp/charset/codepage.h +++ b/library/cpp/charset/codepage.h @@ -199,7 +199,7 @@ struct Encoder { return 0; return (unsigned char)Table[(ch >> 8) & 255][ch & 255]; } - + inline char Tr(wchar32 ch) const { char code = Code(ch); if (code == 0 && ch != 0) @@ -211,7 +211,7 @@ struct Encoder { inline unsigned char operator[](wchar32 ch) const { return Tr(ch); } - + void Tr(const wchar32* in, char* out, size_t len) const; void Tr(const wchar32* in, char* out) const; char* DefaultPlane; diff --git a/library/cpp/charset/codepage_ut.cpp b/library/cpp/charset/codepage_ut.cpp index c3ac3ac478..7df4d27196 100644 --- a/library/cpp/charset/codepage_ut.cpp +++ b/library/cpp/charset/codepage_ut.cpp @@ -53,8 +53,8 @@ public: void TestToLower(); void TestToUpper(); - void TestCanEncode(); - + void TestCanEncode(); + inline void TestUpperLower() { const CodePage* cp = CodePageByCharset(CODES_ASCII); char tmp[100]; @@ -343,82 +343,82 @@ void TCodepageTest::TestToUpper() { ToUpper(data, n - 1); UNIT_ASSERT(strcmp(data, yandexUpperCase) == 0); } - -static void TestCanEncodeEmpty() { - TWtringBuf empty; - UNIT_ASSERT(CanBeEncoded(empty, CODES_WIN)); - UNIT_ASSERT(CanBeEncoded(empty, CODES_YANDEX)); - UNIT_ASSERT(CanBeEncoded(empty, CODES_UTF8)); -} - -static void TestCanEncodeEach(const TWtringBuf& text, ECharset encoding, bool expectedResult) { - // char by char - for (size_t i = 0; i < text.size(); ++i) { - if (CanBeEncoded(text.SubStr(i, 1), encoding) != expectedResult) - ythrow yexception() << "assertion failed: encoding " << NameByCharset(encoding) + +static void TestCanEncodeEmpty() { + TWtringBuf empty; + UNIT_ASSERT(CanBeEncoded(empty, CODES_WIN)); + UNIT_ASSERT(CanBeEncoded(empty, CODES_YANDEX)); + UNIT_ASSERT(CanBeEncoded(empty, CODES_UTF8)); +} + +static void TestCanEncodeEach(const TWtringBuf& text, ECharset encoding, bool expectedResult) { + // char by char + for (size_t i = 0; i < text.size(); ++i) { + if (CanBeEncoded(text.SubStr(i, 1), encoding) != expectedResult) + ythrow yexception() << "assertion failed: encoding " << NameByCharset(encoding) << " on '" << text.SubStr(i, 1) << "' (expected " << expectedResult << ")"; - } - // whole text - UNIT_ASSERT_EQUAL(CanBeEncoded(text, encoding), expectedResult); -} - -void TCodepageTest::TestCanEncode() { - TestCanEncodeEmpty(); - + } + // whole text + UNIT_ASSERT_EQUAL(CanBeEncoded(text, encoding), expectedResult); +} + +void TCodepageTest::TestCanEncode() { + TestCanEncodeEmpty(); + const TUtf16String lat = u"AaBbCcDdEeFfGgHhIiJjKkLlMmNnOoPpQqRrSsTtUuVvWwXxYyZz"; - TestCanEncodeEach(lat, CODES_WIN, true); - TestCanEncodeEach(lat, CODES_YANDEX, true); - TestCanEncodeEach(lat, CODES_UTF8, true); - + TestCanEncodeEach(lat, CODES_WIN, true); + TestCanEncodeEach(lat, CODES_YANDEX, true); + TestCanEncodeEach(lat, CODES_UTF8, true); + const TUtf16String rus = u"АаБбВвГгДдЕеЁёЖжЗзИиЙйКкЛлМмНнОоПпРрСсТтУуФфХхЦцЧчШшЩщЪъЫыЬьЭэЮюЯя"; - TestCanEncodeEach(rus, CODES_WIN, true); - TestCanEncodeEach(rus, CODES_YANDEX, true); - TestCanEncodeEach(rus, CODES_UTF8, true); - + TestCanEncodeEach(rus, CODES_WIN, true); + TestCanEncodeEach(rus, CODES_YANDEX, true); + TestCanEncodeEach(rus, CODES_UTF8, true); + const TUtf16String ukr = u"ҐґЄєІіЇї"; - TestCanEncodeEach(ukr, CODES_WIN, true); - TestCanEncodeEach(ukr, CODES_YANDEX, true); - TestCanEncodeEach(ukr, CODES_UTF8, true); - + TestCanEncodeEach(ukr, CODES_WIN, true); + TestCanEncodeEach(ukr, CODES_YANDEX, true); + TestCanEncodeEach(ukr, CODES_UTF8, true); + const TUtf16String pol = u"ĄĆĘŁŃÓŚŹŻąćęłńóśźż"; - TestCanEncodeEach(pol, CODES_WIN, false); - TestCanEncodeEach(pol, CODES_YANDEX, true); - TestCanEncodeEach(pol, CODES_UTF_16BE, true); - + TestCanEncodeEach(pol, CODES_WIN, false); + TestCanEncodeEach(pol, CODES_YANDEX, true); + TestCanEncodeEach(pol, CODES_UTF_16BE, true); + const TUtf16String ger = u"ÄäÖöÜüß"; - TestCanEncodeEach(ger, CODES_WIN, false); - TestCanEncodeEach(ger, CODES_YANDEX, true); - TestCanEncodeEach(ger, CODES_UTF_16LE, true); - + TestCanEncodeEach(ger, CODES_WIN, false); + TestCanEncodeEach(ger, CODES_YANDEX, true); + TestCanEncodeEach(ger, CODES_UTF_16LE, true); + const TUtf16String fra1 = u"éàèùâêîôûëïç"; // supported in yandex cp const TUtf16String fra2 = u"ÉÀÈÙÂÊÎÔÛËÏŸÿÇ"; const TUtf16String fra3 = u"Æ挜"; - TestCanEncodeEach(fra1 + fra2 + fra3, CODES_WIN, false); - TestCanEncodeEach(fra1, CODES_YANDEX, true); - TestCanEncodeEach(fra2 + fra3, CODES_YANDEX, false); - TestCanEncodeEach(fra1 + fra2 + fra3, CODES_UTF8, true); - + TestCanEncodeEach(fra1 + fra2 + fra3, CODES_WIN, false); + TestCanEncodeEach(fra1, CODES_YANDEX, true); + TestCanEncodeEach(fra2 + fra3, CODES_YANDEX, false); + TestCanEncodeEach(fra1 + fra2 + fra3, CODES_UTF8, true); + const TUtf16String kaz = u"ӘәҒғҚқҢңӨөҰұҮүҺһ"; - TestCanEncodeEach(kaz, CODES_WIN, false); - TestCanEncodeEach(kaz, CODES_YANDEX, false); - TestCanEncodeEach(kaz, CODES_UTF8, true); - TestCanEncodeEach(kaz, CODES_KAZWIN, true); - + TestCanEncodeEach(kaz, CODES_WIN, false); + TestCanEncodeEach(kaz, CODES_YANDEX, false); + TestCanEncodeEach(kaz, CODES_UTF8, true); + TestCanEncodeEach(kaz, CODES_KAZWIN, true); + const TUtf16String tur1 = u"ĞİŞğş"; const TUtf16String tur = tur1 + u"ı"; - TestCanEncodeEach(tur, CODES_WIN, false); - TestCanEncodeEach(tur, CODES_YANDEX, false); - TestCanEncodeEach(tur, CODES_UTF8, true); - + TestCanEncodeEach(tur, CODES_WIN, false); + TestCanEncodeEach(tur, CODES_YANDEX, false); + TestCanEncodeEach(tur, CODES_UTF8, true); + const TUtf16String chi = u"新隶体新隸體"; - TestCanEncodeEach(chi, CODES_WIN, false); - TestCanEncodeEach(chi, CODES_YANDEX, false); - TestCanEncodeEach(chi, CODES_UTF8, true); - TestCanEncodeEach(chi, CODES_UTF_16LE, true); - + TestCanEncodeEach(chi, CODES_WIN, false); + TestCanEncodeEach(chi, CODES_YANDEX, false); + TestCanEncodeEach(chi, CODES_UTF8, true); + TestCanEncodeEach(chi, CODES_UTF_16LE, true); + const TUtf16String jap = u"漢字仮字交じり文"; - TestCanEncodeEach(jap, CODES_WIN, false); - TestCanEncodeEach(jap, CODES_YANDEX, false); - TestCanEncodeEach(jap, CODES_UTF8, true); - TestCanEncodeEach(jap, CODES_UTF_16BE, true); -} + TestCanEncodeEach(jap, CODES_WIN, false); + TestCanEncodeEach(jap, CODES_YANDEX, false); + TestCanEncodeEach(jap, CODES_UTF8, true); + TestCanEncodeEach(jap, CODES_UTF_16BE, true); +} diff --git a/library/cpp/charset/wide.cpp b/library/cpp/charset/wide.cpp index d12b293817..ae75f45355 100644 --- a/library/cpp/charset/wide.cpp +++ b/library/cpp/charset/wide.cpp @@ -1,18 +1,18 @@ #include "wide.h" -bool CanBeEncoded(TWtringBuf text, ECharset encoding) { - const size_t LEN = 16; - const size_t BUFSIZE = LEN * 4; - char encodeBuf[BUFSIZE]; - wchar16 decodeBuf[BUFSIZE]; - - while (!text.empty()) { - TWtringBuf src = text.NextTokAt(LEN); - TStringBuf encoded = NDetail::NBaseOps::Recode(src, encodeBuf, encoding); - TWtringBuf decoded = NDetail::NBaseOps::Recode(encoded, decodeBuf, encoding); - if (decoded != src) - return false; - } - - return true; -} +bool CanBeEncoded(TWtringBuf text, ECharset encoding) { + const size_t LEN = 16; + const size_t BUFSIZE = LEN * 4; + char encodeBuf[BUFSIZE]; + wchar16 decodeBuf[BUFSIZE]; + + while (!text.empty()) { + TWtringBuf src = text.NextTokAt(LEN); + TStringBuf encoded = NDetail::NBaseOps::Recode(src, encodeBuf, encoding); + TWtringBuf decoded = NDetail::NBaseOps::Recode(encoded, decodeBuf, encoding); + if (decoded != src) + return false; + } + + return true; +} diff --git a/library/cpp/charset/wide.h b/library/cpp/charset/wide.h index 32d30e849e..c8f78a9eb4 100644 --- a/library/cpp/charset/wide.h +++ b/library/cpp/charset/wide.h @@ -47,61 +47,61 @@ inline void CharToWide(const char* text, size_t len, TCharType* dest, const Code } } -namespace NDetail { - namespace NBaseOps { - // Template interface base recoding drivers, do not perform any memory management, - // do not care about buffer size, so supplied @dst - // should have enough room for the result (with proper reserve for the worst case) - - // Depending on template params, perform conversion of single-byte/multi-byte/utf8 string to/from wide string. - +namespace NDetail { + namespace NBaseOps { + // Template interface base recoding drivers, do not perform any memory management, + // do not care about buffer size, so supplied @dst + // should have enough room for the result (with proper reserve for the worst case) + + // Depending on template params, perform conversion of single-byte/multi-byte/utf8 string to/from wide string. + template <typename TCharType> inline TBasicStringBuf<TCharType> RecodeSingleByteChar(const TStringBuf src, TCharType* dst, const CodePage& cp) { Y_ASSERT(cp.SingleByteCodepage()); ::CharToWide(src.data(), src.size(), dst, cp); return TBasicStringBuf<TCharType>(dst, src.size()); - } - + } + template <typename TCharType> inline TStringBuf RecodeSingleByteChar(const TBasicStringBuf<TCharType> src, char* dst, const CodePage& cp) { Y_ASSERT(cp.SingleByteCodepage()); ::WideToChar(src.data(), src.size(), dst, cp.CPEnum); return TStringBuf(dst, src.size()); - } - + } + template <typename TCharType> inline TBasicStringBuf<TCharType> RecodeMultiByteChar(const TStringBuf src, TCharType* dst, ECharset encoding) { Y_ASSERT(!NCodepagePrivate::NativeCodepage(encoding)); - size_t read = 0; - size_t written = 0; + size_t read = 0; + size_t written = 0; ::NICONVPrivate::RecodeToUnicode(encoding, src.data(), dst, src.size(), src.size(), read, written); return TBasicStringBuf<TCharType>(dst, written); - } - + } + template <typename TCharType> inline TStringBuf RecodeMultiByteChar(const TBasicStringBuf<TCharType> src, char* dst, ECharset encoding) { Y_ASSERT(!NCodepagePrivate::NativeCodepage(encoding)); - size_t read = 0; - size_t written = 0; + size_t read = 0; + size_t written = 0; ::NICONVPrivate::RecodeFromUnicode(encoding, src.data(), dst, src.size(), src.size() * 3, read, written); - return TStringBuf(dst, written); - } - + return TStringBuf(dst, written); + } + template <typename TCharType> inline TBasicStringBuf<TCharType> RecodeUtf8(const TStringBuf src, TCharType* dst) { - size_t len = 0; + size_t len = 0; if (!::UTF8ToWide(src.data(), src.size(), dst, len)) - ythrow yexception() << "Invalid UTF8: \"" << src.SubStr(0, 50) << (src.size() > 50 ? "...\"" : "\""); + ythrow yexception() << "Invalid UTF8: \"" << src.SubStr(0, 50) << (src.size() > 50 ? "...\"" : "\""); return TBasicStringBuf<TCharType>(dst, len); - } - + } + template <typename TCharType> inline TStringBuf RecodeUtf8(const TBasicStringBuf<TCharType> src, char* dst) { - size_t len = 0; + size_t len = 0; ::WideToUTF8(src.data(), src.size(), dst, len); - return TStringBuf(dst, len); - } - + return TStringBuf(dst, len); + } + // Select one of re-coding methods from above, based on provided @encoding template <typename TCharFrom, typename TCharTo> @@ -115,73 +115,73 @@ namespace NDetail { } } - - template <typename TCharFrom> - struct TRecodeTraits; - - template <> - struct TRecodeTraits<char> { + + template <typename TCharFrom> + struct TRecodeTraits; + + template <> + struct TRecodeTraits<char> { using TCharTo = wchar16; using TStringBufTo = TWtringBuf; using TStringTo = TUtf16String; enum { ReserveSize = 4 }; // How many TCharFrom characters we should reserve for one TCharTo character in worst case // Here an unicode character can be converted up to 4 bytes of UTF8 - }; - - template <> - struct TRecodeTraits<wchar16> { + }; + + template <> + struct TRecodeTraits<wchar16> { using TCharTo = char; using TStringBufTo = TStringBuf; using TStringTo = TString; enum { ReserveSize = 2 }; // possible surrogate pairs ? - }; - - // Operations with destination buffer where recoded string will be written - template <typename TResult> - struct TRecodeResultOps { + }; + + // Operations with destination buffer where recoded string will be written + template <typename TResult> + struct TRecodeResultOps { // default implementation will work with TString and TUtf16String - 99% of usage using TResultChar = typename TResult::char_type; - - static inline size_t Size(const TResult& dst) { - return dst.size(); - } - - static inline TResultChar* Reserve(TResult& dst, size_t len) { - dst.ReserveAndResize(len); - return dst.begin(); - } - - static inline void Truncate(TResult& dst, size_t len) { - dst.resize(len); - } - }; - - // Main template interface for recoding in both directions - - template <typename TCharFrom, typename TResult> + + static inline size_t Size(const TResult& dst) { + return dst.size(); + } + + static inline TResultChar* Reserve(TResult& dst, size_t len) { + dst.ReserveAndResize(len); + return dst.begin(); + } + + static inline void Truncate(TResult& dst, size_t len) { + dst.resize(len); + } + }; + + // Main template interface for recoding in both directions + + template <typename TCharFrom, typename TResult> typename TRecodeTraits<TCharFrom>::TStringBufTo Recode(const TBasicStringBuf<TCharFrom> src, TResult& dst, ECharset encoding) { using TCharTo = typename TRecodeTraits<TCharFrom>::TCharTo; - // make enough room for re-coded string - TCharTo* dstbuf = TRecodeResultOps<TResult>::Reserve(dst, src.size() * TRecodeTraits<TCharTo>::ReserveSize); - // do re-coding + // make enough room for re-coded string + TCharTo* dstbuf = TRecodeResultOps<TResult>::Reserve(dst, src.size() * TRecodeTraits<TCharTo>::ReserveSize); + // do re-coding TBasicStringBuf<TCharTo> res = NBaseOps::Recode(src, dstbuf, encoding); - // truncate result back to proper size - TRecodeResultOps<TResult>::Truncate(dst, res.size()); - return res; - } - - // appending version of Recode() - template <typename TCharFrom, typename TResult> + // truncate result back to proper size + TRecodeResultOps<TResult>::Truncate(dst, res.size()); + return res; + } + + // appending version of Recode() + template <typename TCharFrom, typename TResult> typename TRecodeTraits<TCharFrom>::TStringBufTo RecodeAppend(const TBasicStringBuf<TCharFrom> src, TResult& dst, ECharset encoding) { using TCharTo = typename TRecodeTraits<TCharFrom>::TCharTo; - size_t dstOrigSize = TRecodeResultOps<TResult>::Size(dst); - TCharTo* dstbuf = TRecodeResultOps<TResult>::Reserve(dst, dstOrigSize + src.size() * TRecodeTraits<TCharTo>::ReserveSize); + size_t dstOrigSize = TRecodeResultOps<TResult>::Size(dst); + TCharTo* dstbuf = TRecodeResultOps<TResult>::Reserve(dst, dstOrigSize + src.size() * TRecodeTraits<TCharTo>::ReserveSize); TBasicStringBuf<TCharTo> appended = NBaseOps::Recode(src, dstbuf + dstOrigSize, encoding); - size_t dstFinalSize = dstOrigSize + appended.size(); - TRecodeResultOps<TResult>::Truncate(dst, dstFinalSize); + size_t dstFinalSize = dstOrigSize + appended.size(); + TRecodeResultOps<TResult>::Truncate(dst, dstFinalSize); return TBasicStringBuf<TCharTo>(dstbuf, dstFinalSize); - } - + } + // special implementation for robust utf8 functions template <typename TResult> TWtringBuf RecodeUTF8Robust(const TStringBuf src, TResult& dst) { @@ -197,31 +197,31 @@ namespace NDetail { return TWtringBuf(dstbuf, written); } - template <typename TCharFrom> + template <typename TCharFrom> inline typename TRecodeTraits<TCharFrom>::TStringTo Recode(const TBasicStringBuf<TCharFrom> src, ECharset encoding) { - typename TRecodeTraits<TCharFrom>::TStringTo res; - Recode<TCharFrom>(src, res, encoding); - return res; - } + typename TRecodeTraits<TCharFrom>::TStringTo res; + Recode<TCharFrom>(src, res, encoding); + return res; + } } - -// Write result into @dst. Return string-buffer pointing to re-coded content of @dst. - + +// Write result into @dst. Return string-buffer pointing to re-coded content of @dst. + template <bool robust> inline TWtringBuf CharToWide(const TStringBuf src, TUtf16String& dst, ECharset encoding) { if (robust && CODES_UTF8 == encoding) return ::NDetail::RecodeUTF8Robust(src, dst); return ::NDetail::Recode<char>(src, dst, encoding); -} - +} + inline TWtringBuf CharToWide(const TStringBuf src, TUtf16String& dst, ECharset encoding) { return ::NDetail::Recode<char>(src, dst, encoding); } inline TStringBuf WideToChar(const TWtringBuf src, TString& dst, ECharset encoding) { return ::NDetail::Recode<wchar16>(src, dst, encoding); -} - +} + //! calls either to @c WideToUTF8 or @c WideToChar depending on the encoding type inline TString WideToChar(const wchar16* text, size_t len, ECharset enc) { if (NCodepagePrivate::NativeCodepage(enc)) { @@ -301,6 +301,6 @@ inline TUtf16String CharToWide(const TStringBuf s, const CodePage& cp) { return CharToWide(s.data(), s.size(), cp); } -// true if @text can be fully encoded to specified @encoding, -// with possibility to recover exact original text after decoding -bool CanBeEncoded(TWtringBuf text, ECharset encoding); +// true if @text can be fully encoded to specified @encoding, +// with possibility to recover exact original text after decoding +bool CanBeEncoded(TWtringBuf text, ECharset encoding); diff --git a/library/cpp/charset/wide_ut.cpp b/library/cpp/charset/wide_ut.cpp index 78947d51ba..63112f432c 100644 --- a/library/cpp/charset/wide_ut.cpp +++ b/library/cpp/charset/wide_ut.cpp @@ -9,7 +9,7 @@ #include <util/generic/hash_set.h> #include <algorithm> - + namespace { //! three UTF8 encoded russian letters (A, B, V) const char yandexCyrillicAlphabet[] = @@ -143,8 +143,8 @@ public: void TestCharToWide(); void TestWideToChar(); void TestYandexEncoding(); - void TestRecodeIntoString(); - void TestRecodeAppend(); + void TestRecodeIntoString(); + void TestRecodeAppend(); void TestRecode(); void TestUnicodeLimit(); }; @@ -228,114 +228,114 @@ void TConversionTest::TestYandexEncoding() { } } -void TConversionTest::TestRecodeIntoString() { +void TConversionTest::TestRecodeIntoString() { TString sYandex(UnicodeText.size() * 4, 'x'); const char* sdata = sYandex.data(); - TStringBuf sres = NDetail::Recode<wchar16>(UnicodeText, sYandex, CODES_YANDEX); + TStringBuf sres = NDetail::Recode<wchar16>(UnicodeText, sYandex, CODES_YANDEX); UNIT_ASSERT(sYandex == YandexText); // same content UNIT_ASSERT(sYandex.data() == sdata); // reserved buffer reused UNIT_ASSERT(sYandex.data() == sres.data()); // same buffer UNIT_ASSERT(sYandex.size() == sres.size()); // same size TEST_WCHAR32(sYandex, UnicodeText, CODES_YANDEX); - + TUtf16String sUnicode; - sUnicode.reserve(YandexText.size() * 4); + sUnicode.reserve(YandexText.size() * 4); const wchar16* wdata = sUnicode.data(); - TWtringBuf wres = NDetail::Recode<char>(YandexText, sUnicode, CODES_YANDEX); + TWtringBuf wres = NDetail::Recode<char>(YandexText, sUnicode, CODES_YANDEX); UNIT_ASSERT(sUnicode == UnicodeText); // same content UNIT_ASSERT(sUnicode.data() == wdata); // reserved buffer reused UNIT_ASSERT(sUnicode.data() == wres.data()); // same buffer UNIT_ASSERT(sUnicode.size() == wres.size()); // same size - + TString sUtf8 = " "; - size_t scap = sUtf8.capacity(); - sres = NDetail::Recode<wchar16>(UnicodeText, sUtf8, CODES_UTF8); + size_t scap = sUtf8.capacity(); + sres = NDetail::Recode<wchar16>(UnicodeText, sUtf8, CODES_UTF8); UNIT_ASSERT(sUtf8 == UTF8Text); // same content UNIT_ASSERT(sUtf8.capacity() > scap); // increased buffer capacity (supplied was too small) UNIT_ASSERT(sUtf8.data() == sres.data()); // same buffer UNIT_ASSERT(sUtf8.size() == sres.size()); // same size TEST_WCHAR32(sUtf8, UnicodeText, CODES_UTF8); - - sUnicode.clear(); + + sUnicode.clear(); wdata = sUnicode.data(); TUtf16String copy = sUnicode; // increase ref-counter - wres = NDetail::Recode<char>(UTF8Text, sUnicode, CODES_UTF8); + wres = NDetail::Recode<char>(UTF8Text, sUnicode, CODES_UTF8); UNIT_ASSERT(sUnicode == UnicodeText); // same content #ifndef TSTRING_IS_STD_STRING UNIT_ASSERT(sUnicode.data() != wdata); // re-allocated (shared buffer supplied) UNIT_ASSERT(sUnicode.data() == wres.data()); // same buffer #endif UNIT_ASSERT(sUnicode.size() == wres.size()); // same content -} - +} + static TString GenerateJunk(size_t seed) { TString res; - size_t hash = NumericHash(seed); - size_t size = hash % 1024; - res.reserve(size); - for (size_t i = 0; i < size; ++i) - res += static_cast<char>(NumericHash(hash + i) % 256); - return res; -} - -void TConversionTest::TestRecodeAppend() { - { + size_t hash = NumericHash(seed); + size_t size = hash % 1024; + res.reserve(size); + for (size_t i = 0; i < size; ++i) + res += static_cast<char>(NumericHash(hash + i) % 256); + return res; +} + +void TConversionTest::TestRecodeAppend() { + { TString s1, s2; NDetail::RecodeAppend<wchar16>(TUtf16String(), s1, CODES_YANDEX); - UNIT_ASSERT(s1.empty()); - - NDetail::RecodeAppend<wchar16>(UnicodeText, s1, CODES_WIN); - s2 += WideToChar(UnicodeText, CODES_WIN); - UNIT_ASSERT_EQUAL(s1, s2); - - NDetail::RecodeAppend<wchar16>(UnicodeText, s1, CODES_YANDEX); - s2 += WideToChar(UnicodeText, CODES_YANDEX); - UNIT_ASSERT_EQUAL(s1, s2); - + UNIT_ASSERT(s1.empty()); + + NDetail::RecodeAppend<wchar16>(UnicodeText, s1, CODES_WIN); + s2 += WideToChar(UnicodeText, CODES_WIN); + UNIT_ASSERT_EQUAL(s1, s2); + + NDetail::RecodeAppend<wchar16>(UnicodeText, s1, CODES_YANDEX); + s2 += WideToChar(UnicodeText, CODES_YANDEX); + UNIT_ASSERT_EQUAL(s1, s2); + NDetail::RecodeAppend<wchar16>(TUtf16String(), s1, CODES_YANDEX); - UNIT_ASSERT_EQUAL(s1, s2); - - NDetail::RecodeAppend<wchar16>(UnicodeText, s1, CODES_UTF8); + UNIT_ASSERT_EQUAL(s1, s2); + + NDetail::RecodeAppend<wchar16>(UnicodeText, s1, CODES_UTF8); s2 += WideToUTF8(UnicodeText); - UNIT_ASSERT_EQUAL(s1, s2); + UNIT_ASSERT_EQUAL(s1, s2); - for (size_t i = 0; i < 100; ++i) { + for (size_t i = 0; i < 100; ++i) { TUtf16String junk = CharToWide(GenerateJunk(i), CODES_YANDEX); - NDetail::RecodeAppend<wchar16>(junk, s1, CODES_UTF8); + NDetail::RecodeAppend<wchar16>(junk, s1, CODES_UTF8); s2 += WideToUTF8(junk); - UNIT_ASSERT_EQUAL(s1, s2); - } - } - - { + UNIT_ASSERT_EQUAL(s1, s2); + } + } + + { TUtf16String s1, s2; NDetail::RecodeAppend<char>(TString(), s1, CODES_YANDEX); - UNIT_ASSERT(s1.empty()); - - NDetail::RecodeAppend<char>(YandexText, s1, CODES_WIN); - s2 += CharToWide(YandexText, CODES_WIN); - UNIT_ASSERT_EQUAL(s1, s2); - - NDetail::RecodeAppend<char>(YandexText, s1, CODES_YANDEX); - s2 += CharToWide(YandexText, CODES_YANDEX); - UNIT_ASSERT_EQUAL(s1, s2); - + UNIT_ASSERT(s1.empty()); + + NDetail::RecodeAppend<char>(YandexText, s1, CODES_WIN); + s2 += CharToWide(YandexText, CODES_WIN); + UNIT_ASSERT_EQUAL(s1, s2); + + NDetail::RecodeAppend<char>(YandexText, s1, CODES_YANDEX); + s2 += CharToWide(YandexText, CODES_YANDEX); + UNIT_ASSERT_EQUAL(s1, s2); + NDetail::RecodeAppend<char>(TString(), s1, CODES_YANDEX); - UNIT_ASSERT_EQUAL(s1, s2); + UNIT_ASSERT_EQUAL(s1, s2); - NDetail::RecodeAppend<char>(UTF8Text, s1, CODES_UTF8); + NDetail::RecodeAppend<char>(UTF8Text, s1, CODES_UTF8); s2 += UTF8ToWide(UTF8Text); - UNIT_ASSERT_EQUAL(s1, s2); - - for (size_t i = 0; i < 100; ++i) { + UNIT_ASSERT_EQUAL(s1, s2); + + for (size_t i = 0; i < 100; ++i) { TString junk = GenerateJunk(i); - NDetail::RecodeAppend<char>(junk, s1, CODES_YANDEX); - s2 += CharToWide(junk, CODES_YANDEX); - UNIT_ASSERT_EQUAL(s1, s2); - } - } -} - + NDetail::RecodeAppend<char>(junk, s1, CODES_YANDEX); + s2 += CharToWide(junk, CODES_YANDEX); + UNIT_ASSERT_EQUAL(s1, s2); + } + } +} + template <> void Out<RECODE_RESULT>(IOutputStream& out, RECODE_RESULT val) { out << int(val); |