aboutsummaryrefslogtreecommitdiffstats
path: root/library/cpp/charset
diff options
context:
space:
mode:
authormowgli <mowgli@yandex-team.ru>2022-02-10 16:49:25 +0300
committerDaniil Cherednik <dcherednik@yandex-team.ru>2022-02-10 16:49:25 +0300
commit89afbbe4ca0e02e386dd4df08f7945f190dc1b84 (patch)
treec4772201af6215d48734691b8796e4cfc77c2ac8 /library/cpp/charset
parent7510cec1516d17cbc8d7749974e36aa45f547a26 (diff)
downloadydb-89afbbe4ca0e02e386dd4df08f7945f190dc1b84.tar.gz
Restoring authorship annotation for <mowgli@yandex-team.ru>. Commit 1 of 2.
Diffstat (limited to 'library/cpp/charset')
-rw-r--r--library/cpp/charset/codepage.h4
-rw-r--r--library/cpp/charset/codepage_ut.cpp134
-rw-r--r--library/cpp/charset/wide.cpp32
-rw-r--r--library/cpp/charset/wide.h186
-rw-r--r--library/cpp/charset/wide_ut.cpp138
5 files changed, 247 insertions, 247 deletions
diff --git a/library/cpp/charset/codepage.h b/library/cpp/charset/codepage.h
index 30a02a4610..419f5746bc 100644
--- a/library/cpp/charset/codepage.h
+++ b/library/cpp/charset/codepage.h
@@ -199,7 +199,7 @@ struct Encoder {
return 0;
return (unsigned char)Table[(ch >> 8) & 255][ch & 255];
}
-
+
inline char Tr(wchar32 ch) const {
char code = Code(ch);
if (code == 0 && ch != 0)
@@ -211,7 +211,7 @@ struct Encoder {
inline unsigned char operator[](wchar32 ch) const {
return Tr(ch);
}
-
+
void Tr(const wchar32* in, char* out, size_t len) const;
void Tr(const wchar32* in, char* out) const;
char* DefaultPlane;
diff --git a/library/cpp/charset/codepage_ut.cpp b/library/cpp/charset/codepage_ut.cpp
index c3ac3ac478..7df4d27196 100644
--- a/library/cpp/charset/codepage_ut.cpp
+++ b/library/cpp/charset/codepage_ut.cpp
@@ -53,8 +53,8 @@ public:
void TestToLower();
void TestToUpper();
- void TestCanEncode();
-
+ void TestCanEncode();
+
inline void TestUpperLower() {
const CodePage* cp = CodePageByCharset(CODES_ASCII);
char tmp[100];
@@ -343,82 +343,82 @@ void TCodepageTest::TestToUpper() {
ToUpper(data, n - 1);
UNIT_ASSERT(strcmp(data, yandexUpperCase) == 0);
}
-
-static void TestCanEncodeEmpty() {
- TWtringBuf empty;
- UNIT_ASSERT(CanBeEncoded(empty, CODES_WIN));
- UNIT_ASSERT(CanBeEncoded(empty, CODES_YANDEX));
- UNIT_ASSERT(CanBeEncoded(empty, CODES_UTF8));
-}
-
-static void TestCanEncodeEach(const TWtringBuf& text, ECharset encoding, bool expectedResult) {
- // char by char
- for (size_t i = 0; i < text.size(); ++i) {
- if (CanBeEncoded(text.SubStr(i, 1), encoding) != expectedResult)
- ythrow yexception() << "assertion failed: encoding " << NameByCharset(encoding)
+
+static void TestCanEncodeEmpty() {
+ TWtringBuf empty;
+ UNIT_ASSERT(CanBeEncoded(empty, CODES_WIN));
+ UNIT_ASSERT(CanBeEncoded(empty, CODES_YANDEX));
+ UNIT_ASSERT(CanBeEncoded(empty, CODES_UTF8));
+}
+
+static void TestCanEncodeEach(const TWtringBuf& text, ECharset encoding, bool expectedResult) {
+ // char by char
+ for (size_t i = 0; i < text.size(); ++i) {
+ if (CanBeEncoded(text.SubStr(i, 1), encoding) != expectedResult)
+ ythrow yexception() << "assertion failed: encoding " << NameByCharset(encoding)
<< " on '" << text.SubStr(i, 1) << "' (expected " << expectedResult << ")";
- }
- // whole text
- UNIT_ASSERT_EQUAL(CanBeEncoded(text, encoding), expectedResult);
-}
-
-void TCodepageTest::TestCanEncode() {
- TestCanEncodeEmpty();
-
+ }
+ // whole text
+ UNIT_ASSERT_EQUAL(CanBeEncoded(text, encoding), expectedResult);
+}
+
+void TCodepageTest::TestCanEncode() {
+ TestCanEncodeEmpty();
+
const TUtf16String lat = u"AaBbCcDdEeFfGgHhIiJjKkLlMmNnOoPpQqRrSsTtUuVvWwXxYyZz";
- TestCanEncodeEach(lat, CODES_WIN, true);
- TestCanEncodeEach(lat, CODES_YANDEX, true);
- TestCanEncodeEach(lat, CODES_UTF8, true);
-
+ TestCanEncodeEach(lat, CODES_WIN, true);
+ TestCanEncodeEach(lat, CODES_YANDEX, true);
+ TestCanEncodeEach(lat, CODES_UTF8, true);
+
const TUtf16String rus = u"АаБбВвГгДдЕеЁёЖжЗзИиЙйКкЛлМмНнОоПпРрСсТтУуФфХхЦцЧчШшЩщЪъЫыЬьЭэЮюЯя";
- TestCanEncodeEach(rus, CODES_WIN, true);
- TestCanEncodeEach(rus, CODES_YANDEX, true);
- TestCanEncodeEach(rus, CODES_UTF8, true);
-
+ TestCanEncodeEach(rus, CODES_WIN, true);
+ TestCanEncodeEach(rus, CODES_YANDEX, true);
+ TestCanEncodeEach(rus, CODES_UTF8, true);
+
const TUtf16String ukr = u"ҐґЄєІіЇї";
- TestCanEncodeEach(ukr, CODES_WIN, true);
- TestCanEncodeEach(ukr, CODES_YANDEX, true);
- TestCanEncodeEach(ukr, CODES_UTF8, true);
-
+ TestCanEncodeEach(ukr, CODES_WIN, true);
+ TestCanEncodeEach(ukr, CODES_YANDEX, true);
+ TestCanEncodeEach(ukr, CODES_UTF8, true);
+
const TUtf16String pol = u"ĄĆĘŁŃÓŚŹŻąćęłńóśźż";
- TestCanEncodeEach(pol, CODES_WIN, false);
- TestCanEncodeEach(pol, CODES_YANDEX, true);
- TestCanEncodeEach(pol, CODES_UTF_16BE, true);
-
+ TestCanEncodeEach(pol, CODES_WIN, false);
+ TestCanEncodeEach(pol, CODES_YANDEX, true);
+ TestCanEncodeEach(pol, CODES_UTF_16BE, true);
+
const TUtf16String ger = u"ÄäÖöÜüß";
- TestCanEncodeEach(ger, CODES_WIN, false);
- TestCanEncodeEach(ger, CODES_YANDEX, true);
- TestCanEncodeEach(ger, CODES_UTF_16LE, true);
-
+ TestCanEncodeEach(ger, CODES_WIN, false);
+ TestCanEncodeEach(ger, CODES_YANDEX, true);
+ TestCanEncodeEach(ger, CODES_UTF_16LE, true);
+
const TUtf16String fra1 = u"éàèùâêîôûëïç"; // supported in yandex cp
const TUtf16String fra2 = u"ÉÀÈÙÂÊÎÔÛËÏŸÿÇ";
const TUtf16String fra3 = u"Æ挜";
- TestCanEncodeEach(fra1 + fra2 + fra3, CODES_WIN, false);
- TestCanEncodeEach(fra1, CODES_YANDEX, true);
- TestCanEncodeEach(fra2 + fra3, CODES_YANDEX, false);
- TestCanEncodeEach(fra1 + fra2 + fra3, CODES_UTF8, true);
-
+ TestCanEncodeEach(fra1 + fra2 + fra3, CODES_WIN, false);
+ TestCanEncodeEach(fra1, CODES_YANDEX, true);
+ TestCanEncodeEach(fra2 + fra3, CODES_YANDEX, false);
+ TestCanEncodeEach(fra1 + fra2 + fra3, CODES_UTF8, true);
+
const TUtf16String kaz = u"ӘәҒғҚқҢңӨөҰұҮүҺһ";
- TestCanEncodeEach(kaz, CODES_WIN, false);
- TestCanEncodeEach(kaz, CODES_YANDEX, false);
- TestCanEncodeEach(kaz, CODES_UTF8, true);
- TestCanEncodeEach(kaz, CODES_KAZWIN, true);
-
+ TestCanEncodeEach(kaz, CODES_WIN, false);
+ TestCanEncodeEach(kaz, CODES_YANDEX, false);
+ TestCanEncodeEach(kaz, CODES_UTF8, true);
+ TestCanEncodeEach(kaz, CODES_KAZWIN, true);
+
const TUtf16String tur1 = u"ĞİŞğş";
const TUtf16String tur = tur1 + u"ı";
- TestCanEncodeEach(tur, CODES_WIN, false);
- TestCanEncodeEach(tur, CODES_YANDEX, false);
- TestCanEncodeEach(tur, CODES_UTF8, true);
-
+ TestCanEncodeEach(tur, CODES_WIN, false);
+ TestCanEncodeEach(tur, CODES_YANDEX, false);
+ TestCanEncodeEach(tur, CODES_UTF8, true);
+
const TUtf16String chi = u"新隶体新隸體";
- TestCanEncodeEach(chi, CODES_WIN, false);
- TestCanEncodeEach(chi, CODES_YANDEX, false);
- TestCanEncodeEach(chi, CODES_UTF8, true);
- TestCanEncodeEach(chi, CODES_UTF_16LE, true);
-
+ TestCanEncodeEach(chi, CODES_WIN, false);
+ TestCanEncodeEach(chi, CODES_YANDEX, false);
+ TestCanEncodeEach(chi, CODES_UTF8, true);
+ TestCanEncodeEach(chi, CODES_UTF_16LE, true);
+
const TUtf16String jap = u"漢字仮字交じり文";
- TestCanEncodeEach(jap, CODES_WIN, false);
- TestCanEncodeEach(jap, CODES_YANDEX, false);
- TestCanEncodeEach(jap, CODES_UTF8, true);
- TestCanEncodeEach(jap, CODES_UTF_16BE, true);
-}
+ TestCanEncodeEach(jap, CODES_WIN, false);
+ TestCanEncodeEach(jap, CODES_YANDEX, false);
+ TestCanEncodeEach(jap, CODES_UTF8, true);
+ TestCanEncodeEach(jap, CODES_UTF_16BE, true);
+}
diff --git a/library/cpp/charset/wide.cpp b/library/cpp/charset/wide.cpp
index d12b293817..ae75f45355 100644
--- a/library/cpp/charset/wide.cpp
+++ b/library/cpp/charset/wide.cpp
@@ -1,18 +1,18 @@
#include "wide.h"
-bool CanBeEncoded(TWtringBuf text, ECharset encoding) {
- const size_t LEN = 16;
- const size_t BUFSIZE = LEN * 4;
- char encodeBuf[BUFSIZE];
- wchar16 decodeBuf[BUFSIZE];
-
- while (!text.empty()) {
- TWtringBuf src = text.NextTokAt(LEN);
- TStringBuf encoded = NDetail::NBaseOps::Recode(src, encodeBuf, encoding);
- TWtringBuf decoded = NDetail::NBaseOps::Recode(encoded, decodeBuf, encoding);
- if (decoded != src)
- return false;
- }
-
- return true;
-}
+bool CanBeEncoded(TWtringBuf text, ECharset encoding) {
+ const size_t LEN = 16;
+ const size_t BUFSIZE = LEN * 4;
+ char encodeBuf[BUFSIZE];
+ wchar16 decodeBuf[BUFSIZE];
+
+ while (!text.empty()) {
+ TWtringBuf src = text.NextTokAt(LEN);
+ TStringBuf encoded = NDetail::NBaseOps::Recode(src, encodeBuf, encoding);
+ TWtringBuf decoded = NDetail::NBaseOps::Recode(encoded, decodeBuf, encoding);
+ if (decoded != src)
+ return false;
+ }
+
+ return true;
+}
diff --git a/library/cpp/charset/wide.h b/library/cpp/charset/wide.h
index 32d30e849e..c8f78a9eb4 100644
--- a/library/cpp/charset/wide.h
+++ b/library/cpp/charset/wide.h
@@ -47,61 +47,61 @@ inline void CharToWide(const char* text, size_t len, TCharType* dest, const Code
}
}
-namespace NDetail {
- namespace NBaseOps {
- // Template interface base recoding drivers, do not perform any memory management,
- // do not care about buffer size, so supplied @dst
- // should have enough room for the result (with proper reserve for the worst case)
-
- // Depending on template params, perform conversion of single-byte/multi-byte/utf8 string to/from wide string.
-
+namespace NDetail {
+ namespace NBaseOps {
+ // Template interface base recoding drivers, do not perform any memory management,
+ // do not care about buffer size, so supplied @dst
+ // should have enough room for the result (with proper reserve for the worst case)
+
+ // Depending on template params, perform conversion of single-byte/multi-byte/utf8 string to/from wide string.
+
template <typename TCharType>
inline TBasicStringBuf<TCharType> RecodeSingleByteChar(const TStringBuf src, TCharType* dst, const CodePage& cp) {
Y_ASSERT(cp.SingleByteCodepage());
::CharToWide(src.data(), src.size(), dst, cp);
return TBasicStringBuf<TCharType>(dst, src.size());
- }
-
+ }
+
template <typename TCharType>
inline TStringBuf RecodeSingleByteChar(const TBasicStringBuf<TCharType> src, char* dst, const CodePage& cp) {
Y_ASSERT(cp.SingleByteCodepage());
::WideToChar(src.data(), src.size(), dst, cp.CPEnum);
return TStringBuf(dst, src.size());
- }
-
+ }
+
template <typename TCharType>
inline TBasicStringBuf<TCharType> RecodeMultiByteChar(const TStringBuf src, TCharType* dst, ECharset encoding) {
Y_ASSERT(!NCodepagePrivate::NativeCodepage(encoding));
- size_t read = 0;
- size_t written = 0;
+ size_t read = 0;
+ size_t written = 0;
::NICONVPrivate::RecodeToUnicode(encoding, src.data(), dst, src.size(), src.size(), read, written);
return TBasicStringBuf<TCharType>(dst, written);
- }
-
+ }
+
template <typename TCharType>
inline TStringBuf RecodeMultiByteChar(const TBasicStringBuf<TCharType> src, char* dst, ECharset encoding) {
Y_ASSERT(!NCodepagePrivate::NativeCodepage(encoding));
- size_t read = 0;
- size_t written = 0;
+ size_t read = 0;
+ size_t written = 0;
::NICONVPrivate::RecodeFromUnicode(encoding, src.data(), dst, src.size(), src.size() * 3, read, written);
- return TStringBuf(dst, written);
- }
-
+ return TStringBuf(dst, written);
+ }
+
template <typename TCharType>
inline TBasicStringBuf<TCharType> RecodeUtf8(const TStringBuf src, TCharType* dst) {
- size_t len = 0;
+ size_t len = 0;
if (!::UTF8ToWide(src.data(), src.size(), dst, len))
- ythrow yexception() << "Invalid UTF8: \"" << src.SubStr(0, 50) << (src.size() > 50 ? "...\"" : "\"");
+ ythrow yexception() << "Invalid UTF8: \"" << src.SubStr(0, 50) << (src.size() > 50 ? "...\"" : "\"");
return TBasicStringBuf<TCharType>(dst, len);
- }
-
+ }
+
template <typename TCharType>
inline TStringBuf RecodeUtf8(const TBasicStringBuf<TCharType> src, char* dst) {
- size_t len = 0;
+ size_t len = 0;
::WideToUTF8(src.data(), src.size(), dst, len);
- return TStringBuf(dst, len);
- }
-
+ return TStringBuf(dst, len);
+ }
+
// Select one of re-coding methods from above, based on provided @encoding
template <typename TCharFrom, typename TCharTo>
@@ -115,73 +115,73 @@ namespace NDetail {
}
}
-
- template <typename TCharFrom>
- struct TRecodeTraits;
-
- template <>
- struct TRecodeTraits<char> {
+
+ template <typename TCharFrom>
+ struct TRecodeTraits;
+
+ template <>
+ struct TRecodeTraits<char> {
using TCharTo = wchar16;
using TStringBufTo = TWtringBuf;
using TStringTo = TUtf16String;
enum { ReserveSize = 4 }; // How many TCharFrom characters we should reserve for one TCharTo character in worst case
// Here an unicode character can be converted up to 4 bytes of UTF8
- };
-
- template <>
- struct TRecodeTraits<wchar16> {
+ };
+
+ template <>
+ struct TRecodeTraits<wchar16> {
using TCharTo = char;
using TStringBufTo = TStringBuf;
using TStringTo = TString;
enum { ReserveSize = 2 }; // possible surrogate pairs ?
- };
-
- // Operations with destination buffer where recoded string will be written
- template <typename TResult>
- struct TRecodeResultOps {
+ };
+
+ // Operations with destination buffer where recoded string will be written
+ template <typename TResult>
+ struct TRecodeResultOps {
// default implementation will work with TString and TUtf16String - 99% of usage
using TResultChar = typename TResult::char_type;
-
- static inline size_t Size(const TResult& dst) {
- return dst.size();
- }
-
- static inline TResultChar* Reserve(TResult& dst, size_t len) {
- dst.ReserveAndResize(len);
- return dst.begin();
- }
-
- static inline void Truncate(TResult& dst, size_t len) {
- dst.resize(len);
- }
- };
-
- // Main template interface for recoding in both directions
-
- template <typename TCharFrom, typename TResult>
+
+ static inline size_t Size(const TResult& dst) {
+ return dst.size();
+ }
+
+ static inline TResultChar* Reserve(TResult& dst, size_t len) {
+ dst.ReserveAndResize(len);
+ return dst.begin();
+ }
+
+ static inline void Truncate(TResult& dst, size_t len) {
+ dst.resize(len);
+ }
+ };
+
+ // Main template interface for recoding in both directions
+
+ template <typename TCharFrom, typename TResult>
typename TRecodeTraits<TCharFrom>::TStringBufTo Recode(const TBasicStringBuf<TCharFrom> src, TResult& dst, ECharset encoding) {
using TCharTo = typename TRecodeTraits<TCharFrom>::TCharTo;
- // make enough room for re-coded string
- TCharTo* dstbuf = TRecodeResultOps<TResult>::Reserve(dst, src.size() * TRecodeTraits<TCharTo>::ReserveSize);
- // do re-coding
+ // make enough room for re-coded string
+ TCharTo* dstbuf = TRecodeResultOps<TResult>::Reserve(dst, src.size() * TRecodeTraits<TCharTo>::ReserveSize);
+ // do re-coding
TBasicStringBuf<TCharTo> res = NBaseOps::Recode(src, dstbuf, encoding);
- // truncate result back to proper size
- TRecodeResultOps<TResult>::Truncate(dst, res.size());
- return res;
- }
-
- // appending version of Recode()
- template <typename TCharFrom, typename TResult>
+ // truncate result back to proper size
+ TRecodeResultOps<TResult>::Truncate(dst, res.size());
+ return res;
+ }
+
+ // appending version of Recode()
+ template <typename TCharFrom, typename TResult>
typename TRecodeTraits<TCharFrom>::TStringBufTo RecodeAppend(const TBasicStringBuf<TCharFrom> src, TResult& dst, ECharset encoding) {
using TCharTo = typename TRecodeTraits<TCharFrom>::TCharTo;
- size_t dstOrigSize = TRecodeResultOps<TResult>::Size(dst);
- TCharTo* dstbuf = TRecodeResultOps<TResult>::Reserve(dst, dstOrigSize + src.size() * TRecodeTraits<TCharTo>::ReserveSize);
+ size_t dstOrigSize = TRecodeResultOps<TResult>::Size(dst);
+ TCharTo* dstbuf = TRecodeResultOps<TResult>::Reserve(dst, dstOrigSize + src.size() * TRecodeTraits<TCharTo>::ReserveSize);
TBasicStringBuf<TCharTo> appended = NBaseOps::Recode(src, dstbuf + dstOrigSize, encoding);
- size_t dstFinalSize = dstOrigSize + appended.size();
- TRecodeResultOps<TResult>::Truncate(dst, dstFinalSize);
+ size_t dstFinalSize = dstOrigSize + appended.size();
+ TRecodeResultOps<TResult>::Truncate(dst, dstFinalSize);
return TBasicStringBuf<TCharTo>(dstbuf, dstFinalSize);
- }
-
+ }
+
// special implementation for robust utf8 functions
template <typename TResult>
TWtringBuf RecodeUTF8Robust(const TStringBuf src, TResult& dst) {
@@ -197,31 +197,31 @@ namespace NDetail {
return TWtringBuf(dstbuf, written);
}
- template <typename TCharFrom>
+ template <typename TCharFrom>
inline typename TRecodeTraits<TCharFrom>::TStringTo Recode(const TBasicStringBuf<TCharFrom> src, ECharset encoding) {
- typename TRecodeTraits<TCharFrom>::TStringTo res;
- Recode<TCharFrom>(src, res, encoding);
- return res;
- }
+ typename TRecodeTraits<TCharFrom>::TStringTo res;
+ Recode<TCharFrom>(src, res, encoding);
+ return res;
+ }
}
-
-// Write result into @dst. Return string-buffer pointing to re-coded content of @dst.
-
+
+// Write result into @dst. Return string-buffer pointing to re-coded content of @dst.
+
template <bool robust>
inline TWtringBuf CharToWide(const TStringBuf src, TUtf16String& dst, ECharset encoding) {
if (robust && CODES_UTF8 == encoding)
return ::NDetail::RecodeUTF8Robust(src, dst);
return ::NDetail::Recode<char>(src, dst, encoding);
-}
-
+}
+
inline TWtringBuf CharToWide(const TStringBuf src, TUtf16String& dst, ECharset encoding) {
return ::NDetail::Recode<char>(src, dst, encoding);
}
inline TStringBuf WideToChar(const TWtringBuf src, TString& dst, ECharset encoding) {
return ::NDetail::Recode<wchar16>(src, dst, encoding);
-}
-
+}
+
//! calls either to @c WideToUTF8 or @c WideToChar depending on the encoding type
inline TString WideToChar(const wchar16* text, size_t len, ECharset enc) {
if (NCodepagePrivate::NativeCodepage(enc)) {
@@ -301,6 +301,6 @@ inline TUtf16String CharToWide(const TStringBuf s, const CodePage& cp) {
return CharToWide(s.data(), s.size(), cp);
}
-// true if @text can be fully encoded to specified @encoding,
-// with possibility to recover exact original text after decoding
-bool CanBeEncoded(TWtringBuf text, ECharset encoding);
+// true if @text can be fully encoded to specified @encoding,
+// with possibility to recover exact original text after decoding
+bool CanBeEncoded(TWtringBuf text, ECharset encoding);
diff --git a/library/cpp/charset/wide_ut.cpp b/library/cpp/charset/wide_ut.cpp
index 78947d51ba..63112f432c 100644
--- a/library/cpp/charset/wide_ut.cpp
+++ b/library/cpp/charset/wide_ut.cpp
@@ -9,7 +9,7 @@
#include <util/generic/hash_set.h>
#include <algorithm>
-
+
namespace {
//! three UTF8 encoded russian letters (A, B, V)
const char yandexCyrillicAlphabet[] =
@@ -143,8 +143,8 @@ public:
void TestCharToWide();
void TestWideToChar();
void TestYandexEncoding();
- void TestRecodeIntoString();
- void TestRecodeAppend();
+ void TestRecodeIntoString();
+ void TestRecodeAppend();
void TestRecode();
void TestUnicodeLimit();
};
@@ -228,114 +228,114 @@ void TConversionTest::TestYandexEncoding() {
}
}
-void TConversionTest::TestRecodeIntoString() {
+void TConversionTest::TestRecodeIntoString() {
TString sYandex(UnicodeText.size() * 4, 'x');
const char* sdata = sYandex.data();
- TStringBuf sres = NDetail::Recode<wchar16>(UnicodeText, sYandex, CODES_YANDEX);
+ TStringBuf sres = NDetail::Recode<wchar16>(UnicodeText, sYandex, CODES_YANDEX);
UNIT_ASSERT(sYandex == YandexText); // same content
UNIT_ASSERT(sYandex.data() == sdata); // reserved buffer reused
UNIT_ASSERT(sYandex.data() == sres.data()); // same buffer
UNIT_ASSERT(sYandex.size() == sres.size()); // same size
TEST_WCHAR32(sYandex, UnicodeText, CODES_YANDEX);
-
+
TUtf16String sUnicode;
- sUnicode.reserve(YandexText.size() * 4);
+ sUnicode.reserve(YandexText.size() * 4);
const wchar16* wdata = sUnicode.data();
- TWtringBuf wres = NDetail::Recode<char>(YandexText, sUnicode, CODES_YANDEX);
+ TWtringBuf wres = NDetail::Recode<char>(YandexText, sUnicode, CODES_YANDEX);
UNIT_ASSERT(sUnicode == UnicodeText); // same content
UNIT_ASSERT(sUnicode.data() == wdata); // reserved buffer reused
UNIT_ASSERT(sUnicode.data() == wres.data()); // same buffer
UNIT_ASSERT(sUnicode.size() == wres.size()); // same size
-
+
TString sUtf8 = " ";
- size_t scap = sUtf8.capacity();
- sres = NDetail::Recode<wchar16>(UnicodeText, sUtf8, CODES_UTF8);
+ size_t scap = sUtf8.capacity();
+ sres = NDetail::Recode<wchar16>(UnicodeText, sUtf8, CODES_UTF8);
UNIT_ASSERT(sUtf8 == UTF8Text); // same content
UNIT_ASSERT(sUtf8.capacity() > scap); // increased buffer capacity (supplied was too small)
UNIT_ASSERT(sUtf8.data() == sres.data()); // same buffer
UNIT_ASSERT(sUtf8.size() == sres.size()); // same size
TEST_WCHAR32(sUtf8, UnicodeText, CODES_UTF8);
-
- sUnicode.clear();
+
+ sUnicode.clear();
wdata = sUnicode.data();
TUtf16String copy = sUnicode; // increase ref-counter
- wres = NDetail::Recode<char>(UTF8Text, sUnicode, CODES_UTF8);
+ wres = NDetail::Recode<char>(UTF8Text, sUnicode, CODES_UTF8);
UNIT_ASSERT(sUnicode == UnicodeText); // same content
#ifndef TSTRING_IS_STD_STRING
UNIT_ASSERT(sUnicode.data() != wdata); // re-allocated (shared buffer supplied)
UNIT_ASSERT(sUnicode.data() == wres.data()); // same buffer
#endif
UNIT_ASSERT(sUnicode.size() == wres.size()); // same content
-}
-
+}
+
static TString GenerateJunk(size_t seed) {
TString res;
- size_t hash = NumericHash(seed);
- size_t size = hash % 1024;
- res.reserve(size);
- for (size_t i = 0; i < size; ++i)
- res += static_cast<char>(NumericHash(hash + i) % 256);
- return res;
-}
-
-void TConversionTest::TestRecodeAppend() {
- {
+ size_t hash = NumericHash(seed);
+ size_t size = hash % 1024;
+ res.reserve(size);
+ for (size_t i = 0; i < size; ++i)
+ res += static_cast<char>(NumericHash(hash + i) % 256);
+ return res;
+}
+
+void TConversionTest::TestRecodeAppend() {
+ {
TString s1, s2;
NDetail::RecodeAppend<wchar16>(TUtf16String(), s1, CODES_YANDEX);
- UNIT_ASSERT(s1.empty());
-
- NDetail::RecodeAppend<wchar16>(UnicodeText, s1, CODES_WIN);
- s2 += WideToChar(UnicodeText, CODES_WIN);
- UNIT_ASSERT_EQUAL(s1, s2);
-
- NDetail::RecodeAppend<wchar16>(UnicodeText, s1, CODES_YANDEX);
- s2 += WideToChar(UnicodeText, CODES_YANDEX);
- UNIT_ASSERT_EQUAL(s1, s2);
-
+ UNIT_ASSERT(s1.empty());
+
+ NDetail::RecodeAppend<wchar16>(UnicodeText, s1, CODES_WIN);
+ s2 += WideToChar(UnicodeText, CODES_WIN);
+ UNIT_ASSERT_EQUAL(s1, s2);
+
+ NDetail::RecodeAppend<wchar16>(UnicodeText, s1, CODES_YANDEX);
+ s2 += WideToChar(UnicodeText, CODES_YANDEX);
+ UNIT_ASSERT_EQUAL(s1, s2);
+
NDetail::RecodeAppend<wchar16>(TUtf16String(), s1, CODES_YANDEX);
- UNIT_ASSERT_EQUAL(s1, s2);
-
- NDetail::RecodeAppend<wchar16>(UnicodeText, s1, CODES_UTF8);
+ UNIT_ASSERT_EQUAL(s1, s2);
+
+ NDetail::RecodeAppend<wchar16>(UnicodeText, s1, CODES_UTF8);
s2 += WideToUTF8(UnicodeText);
- UNIT_ASSERT_EQUAL(s1, s2);
+ UNIT_ASSERT_EQUAL(s1, s2);
- for (size_t i = 0; i < 100; ++i) {
+ for (size_t i = 0; i < 100; ++i) {
TUtf16String junk = CharToWide(GenerateJunk(i), CODES_YANDEX);
- NDetail::RecodeAppend<wchar16>(junk, s1, CODES_UTF8);
+ NDetail::RecodeAppend<wchar16>(junk, s1, CODES_UTF8);
s2 += WideToUTF8(junk);
- UNIT_ASSERT_EQUAL(s1, s2);
- }
- }
-
- {
+ UNIT_ASSERT_EQUAL(s1, s2);
+ }
+ }
+
+ {
TUtf16String s1, s2;
NDetail::RecodeAppend<char>(TString(), s1, CODES_YANDEX);
- UNIT_ASSERT(s1.empty());
-
- NDetail::RecodeAppend<char>(YandexText, s1, CODES_WIN);
- s2 += CharToWide(YandexText, CODES_WIN);
- UNIT_ASSERT_EQUAL(s1, s2);
-
- NDetail::RecodeAppend<char>(YandexText, s1, CODES_YANDEX);
- s2 += CharToWide(YandexText, CODES_YANDEX);
- UNIT_ASSERT_EQUAL(s1, s2);
-
+ UNIT_ASSERT(s1.empty());
+
+ NDetail::RecodeAppend<char>(YandexText, s1, CODES_WIN);
+ s2 += CharToWide(YandexText, CODES_WIN);
+ UNIT_ASSERT_EQUAL(s1, s2);
+
+ NDetail::RecodeAppend<char>(YandexText, s1, CODES_YANDEX);
+ s2 += CharToWide(YandexText, CODES_YANDEX);
+ UNIT_ASSERT_EQUAL(s1, s2);
+
NDetail::RecodeAppend<char>(TString(), s1, CODES_YANDEX);
- UNIT_ASSERT_EQUAL(s1, s2);
+ UNIT_ASSERT_EQUAL(s1, s2);
- NDetail::RecodeAppend<char>(UTF8Text, s1, CODES_UTF8);
+ NDetail::RecodeAppend<char>(UTF8Text, s1, CODES_UTF8);
s2 += UTF8ToWide(UTF8Text);
- UNIT_ASSERT_EQUAL(s1, s2);
-
- for (size_t i = 0; i < 100; ++i) {
+ UNIT_ASSERT_EQUAL(s1, s2);
+
+ for (size_t i = 0; i < 100; ++i) {
TString junk = GenerateJunk(i);
- NDetail::RecodeAppend<char>(junk, s1, CODES_YANDEX);
- s2 += CharToWide(junk, CODES_YANDEX);
- UNIT_ASSERT_EQUAL(s1, s2);
- }
- }
-}
-
+ NDetail::RecodeAppend<char>(junk, s1, CODES_YANDEX);
+ s2 += CharToWide(junk, CODES_YANDEX);
+ UNIT_ASSERT_EQUAL(s1, s2);
+ }
+ }
+}
+
template <>
void Out<RECODE_RESULT>(IOutputStream& out, RECODE_RESULT val) {
out << int(val);