summaryrefslogtreecommitdiffstats
path: root/library/cpp/charset
diff options
context:
space:
mode:
authoralbert <[email protected]>2022-02-10 16:48:15 +0300
committerDaniil Cherednik <[email protected]>2022-02-10 16:48:15 +0300
commita817f5de12611ec73085eba17f8ec7740a46bdb7 (patch)
treeb222e5ac2e2e98872661c51ccceee5da0d291e13 /library/cpp/charset
parent9f25ef3232c288ca664ceee6c376cf64e4349a2e (diff)
Restoring authorship annotation for <[email protected]>. Commit 2 of 2.
Diffstat (limited to 'library/cpp/charset')
-rw-r--r--library/cpp/charset/codepage_ut.cpp22
-rw-r--r--library/cpp/charset/recyr.hh36
-rw-r--r--library/cpp/charset/recyr_int.hh12
-rw-r--r--library/cpp/charset/wide.h44
-rw-r--r--library/cpp/charset/wide_ut.cpp26
5 files changed, 70 insertions, 70 deletions
diff --git a/library/cpp/charset/codepage_ut.cpp b/library/cpp/charset/codepage_ut.cpp
index 47ec1fb2c50..c3ac3ac478e 100644
--- a/library/cpp/charset/codepage_ut.cpp
+++ b/library/cpp/charset/codepage_ut.cpp
@@ -69,7 +69,7 @@ public:
}
void TestBrokenRune() {
- UNIT_ASSERT_VALUES_EQUAL(BROKEN_RUNE, 0xFFFDu);
+ UNIT_ASSERT_VALUES_EQUAL(BROKEN_RUNE, 0xFFFDu);
}
};
@@ -198,17 +198,17 @@ void TCodepageTest::TestUTFFromUnknownPlane() {
UNIT_ASSERT(res == RECODE_OK);
UNIT_ASSERT(samplelen == readchars);
- size_t writtenbytes2 = 0;
- char bytebuffer2[BUFFER_SIZE];
- for (size_t i = 0; i != samplelen; ++i) {
- size_t nwr = 0;
+ size_t writtenbytes2 = 0;
+ char bytebuffer2[BUFFER_SIZE];
+ for (size_t i = 0; i != samplelen; ++i) {
+ size_t nwr = 0;
const int res = RecodeFromUnicode(CODES_UTF8, sampletext[i], bytebuffer2 + writtenbytes2, BUFFER_SIZE - writtenbytes2, nwr);
- UNIT_ASSERT_VALUES_EQUAL(res, int(RECODE_OK));
- writtenbytes2 += nwr;
- UNIT_ASSERT(BUFFER_SIZE > writtenbytes2);
- }
- UNIT_ASSERT_VALUES_EQUAL(TStringBuf(bytebuffer, writtenbytes), TStringBuf(bytebuffer2, writtenbytes2));
-
+ UNIT_ASSERT_VALUES_EQUAL(res, int(RECODE_OK));
+ writtenbytes2 += nwr;
+ UNIT_ASSERT(BUFFER_SIZE > writtenbytes2);
+ }
+ UNIT_ASSERT_VALUES_EQUAL(TStringBuf(bytebuffer, writtenbytes), TStringBuf(bytebuffer2, writtenbytes2));
+
wchar32 charbuffer[BUFFER_SIZE];
size_t readbytes = 0;
size_t writtenchars = 0;
diff --git a/library/cpp/charset/recyr.hh b/library/cpp/charset/recyr.hh
index 9fcac303929..5ec8734bcfb 100644
--- a/library/cpp/charset/recyr.hh
+++ b/library/cpp/charset/recyr.hh
@@ -30,8 +30,8 @@ inline RECODE_RESULT RecodeFromUnicode(ECharset to, const TCharType* in, char* o
inline RECODE_RESULT RecodeFromUnicode(ECharset to, wchar32 rune, char* out, size_t outSize, size_t& outWritten) {
return NCodepagePrivate::_recodeFromUnicode(to, rune, out, outSize, outWritten);
-}
-
+}
+
template <class TCharType>
inline RECODE_RESULT RecodeToUnicode(ECharset from, const char* in, TCharType* out, size_t inSize, size_t outSize) {
size_t inRead = 0;
@@ -101,36 +101,36 @@ inline RECODE_RESULT Recode(ECharset from, ECharset to, const char* in, char* ou
return Recode(from, to, in, out, inSize, outSize, inRead, outWritten);
}
-/**
- * Recode from one charset to another; throw an exception if conversion failed
+/**
+ * Recode from one charset to another; throw an exception if conversion failed
* @param[in] from the source character set
* @param[in] to the target character set
- * @param[in] in the input string buffer
- * @param[out] out the output string object if conversion was successful
- * @return false if conversion was not attempted (charsets were the same),
- * true if successful
- */
+ * @param[in] in the input string buffer
+ * @param[out] out the output string object if conversion was successful
+ * @return false if conversion was not attempted (charsets were the same),
+ * true if successful
+ */
inline bool Recode(ECharset from, ECharset to, const TStringBuf& in, TString& out) {
if (to == from)
- return false;
-
+ return false;
+
const size_t inSize = in.length();
const size_t outSize = SingleByteCodepage(to) ? inSize : 3 * inSize;
- out.clear(); // so we don't copy stuff around when resizing
+ out.clear(); // so we don't copy stuff around when resizing
out.ReserveAndResize(outSize);
-
+
size_t inRead = 0;
size_t outWritten = 0;
const RECODE_RESULT res = Recode(from, to, in.data(), out.begin(), inSize, outSize, inRead, outWritten);
Y_ENSURE(RECODE_OK == res, "Recode failed. ");
if (outWritten > outSize)
- ythrow yexception() << "Recode overrun the buffer: size="
+ ythrow yexception() << "Recode overrun the buffer: size="
<< outSize << " need=" << outWritten;
-
+
out.remove(outWritten);
- return true;
-}
-
+ return true;
+}
+
///////////////////////////////////////////////////////////////////////////////////////
// TString -> TString //
///////////////////////////////////////////////////////////////////////////////////////
diff --git a/library/cpp/charset/recyr_int.hh b/library/cpp/charset/recyr_int.hh
index dcaecfc5e95..353af53305e 100644
--- a/library/cpp/charset/recyr_int.hh
+++ b/library/cpp/charset/recyr_int.hh
@@ -172,7 +172,7 @@ namespace NCodepagePrivate {
inline RECODE_RESULT _recodeUnicodeToUTF8(wchar32 rune, char* out, size_t out_size, size_t& nwritten) {
return SafeWriteUTF8Char(rune, nwritten, (unsigned char*)out, out_size);
}
-
+
template <class TCharType, int Size = sizeof(TCharType)>
struct TCharTypeSwitch;
@@ -223,7 +223,7 @@ namespace NCodepagePrivate {
nwritten = 1;
return RECODE_OK;
}
-
+
inline RECODE_RESULT _rune2hex(wchar32 in, char* out, size_t out_size, size_t& out_writed) {
static const char hex_digs[] = "0123456789ABCDEF";
out_writed = 0;
@@ -301,17 +301,17 @@ namespace NCodepagePrivate {
return NCodepagePrivate::_recodeUnicodeToUTF8(in, out, in_size, out_size, in_readed, out_writed);
return NCodepagePrivate::_recodeUnicodeToSB(To, in, out, in_size, out_size, in_readed, out_writed);
- }
-
+ }
+
inline RECODE_RESULT _recodeFromUnicode(ECharset To, wchar32 rune, char* out, size_t out_size, size_t& nwritten) {
if (!ValidCodepage(To))
return RECODE_ERROR;
-
+
if (!NCodepagePrivate::NativeCodepage(To)) {
size_t nread = 0;
return NICONVPrivate::RecodeFromUnicodeNoThrow(To, &rune, out, 1, out_size, nread, nwritten);
}
-
+
if (To == CODES_UTF8)
return NCodepagePrivate::_recodeUnicodeToUTF8(rune, out, out_size, nwritten);
diff --git a/library/cpp/charset/wide.h b/library/cpp/charset/wide.h
index 22707738646..32d30e849e9 100644
--- a/library/cpp/charset/wide.h
+++ b/library/cpp/charset/wide.h
@@ -16,15 +16,15 @@
//! converts text from unicode to yandex codepage
//! @attention destination buffer must be long enough to fit all characters of the text
//! @note @c dest buffer must fit at least @c len number of characters
-template <typename TCharType>
+template <typename TCharType>
inline size_t WideToChar(const TCharType* text, size_t len, char* dest, ECharset enc) {
Y_ASSERT(SingleByteCodepage(enc));
const char* start = dest;
const Encoder* const encoder = &EncoderByCharset(enc);
- const TCharType* const last = text + len;
- for (const TCharType* cur = text; cur != last; ++dest) {
+ const TCharType* const last = text + len;
+ for (const TCharType* cur = text; cur != last; ++dest) {
*dest = encoder->Tr(ReadSymbolAndAdvance(cur, last));
}
@@ -38,12 +38,12 @@ inline size_t WideToChar(const TCharType* text, size_t len, char* dest, ECharset
//! string using the @c strlen function and pass as the @c len parameter;
//! it does not make sense to create an additional version of this function because
//! it will call to @c strlen anyway in order to allocate destination buffer
-template <typename TCharType>
+template <typename TCharType>
inline void CharToWide(const char* text, size_t len, TCharType* dest, const CodePage& cp) {
const unsigned char* cur = reinterpret_cast<const unsigned char*>(text);
const unsigned char* const last = cur + len;
for (; cur != last; ++cur, ++dest) {
- *dest = static_cast<TCharType>(cp.unicode[*cur]); // static_cast is safe as no 1char codepage contains non-BMP symbols
+ *dest = static_cast<TCharType>(cp.unicode[*cur]); // static_cast is safe as no 1char codepage contains non-BMP symbols
}
}
@@ -55,21 +55,21 @@ namespace NDetail {
// Depending on template params, perform conversion of single-byte/multi-byte/utf8 string to/from wide string.
- template <typename TCharType>
+ template <typename TCharType>
inline TBasicStringBuf<TCharType> RecodeSingleByteChar(const TStringBuf src, TCharType* dst, const CodePage& cp) {
Y_ASSERT(cp.SingleByteCodepage());
::CharToWide(src.data(), src.size(), dst, cp);
return TBasicStringBuf<TCharType>(dst, src.size());
}
- template <typename TCharType>
+ template <typename TCharType>
inline TStringBuf RecodeSingleByteChar(const TBasicStringBuf<TCharType> src, char* dst, const CodePage& cp) {
Y_ASSERT(cp.SingleByteCodepage());
::WideToChar(src.data(), src.size(), dst, cp.CPEnum);
return TStringBuf(dst, src.size());
}
- template <typename TCharType>
+ template <typename TCharType>
inline TBasicStringBuf<TCharType> RecodeMultiByteChar(const TStringBuf src, TCharType* dst, ECharset encoding) {
Y_ASSERT(!NCodepagePrivate::NativeCodepage(encoding));
size_t read = 0;
@@ -78,7 +78,7 @@ namespace NDetail {
return TBasicStringBuf<TCharType>(dst, written);
}
- template <typename TCharType>
+ template <typename TCharType>
inline TStringBuf RecodeMultiByteChar(const TBasicStringBuf<TCharType> src, char* dst, ECharset encoding) {
Y_ASSERT(!NCodepagePrivate::NativeCodepage(encoding));
size_t read = 0;
@@ -87,7 +87,7 @@ namespace NDetail {
return TStringBuf(dst, written);
}
- template <typename TCharType>
+ template <typename TCharType>
inline TBasicStringBuf<TCharType> RecodeUtf8(const TStringBuf src, TCharType* dst) {
size_t len = 0;
if (!::UTF8ToWide(src.data(), src.size(), dst, len))
@@ -95,25 +95,25 @@ namespace NDetail {
return TBasicStringBuf<TCharType>(dst, len);
}
- template <typename TCharType>
+ template <typename TCharType>
inline TStringBuf RecodeUtf8(const TBasicStringBuf<TCharType> src, char* dst) {
size_t len = 0;
::WideToUTF8(src.data(), src.size(), dst, len);
return TStringBuf(dst, len);
}
- // Select one of re-coding methods from above, based on provided @encoding
-
- template <typename TCharFrom, typename TCharTo>
+ // Select one of re-coding methods from above, based on provided @encoding
+
+ template <typename TCharFrom, typename TCharTo>
TBasicStringBuf<TCharTo> Recode(const TBasicStringBuf<TCharFrom> src, TCharTo* dst, ECharset encoding) {
- if (encoding == CODES_UTF8)
- return RecodeUtf8(src, dst);
- else if (SingleByteCodepage(encoding))
- return RecodeSingleByteChar(src, dst, *CodePageByCharset(encoding));
- else
- return RecodeMultiByteChar(src, dst, encoding);
- }
-
+ if (encoding == CODES_UTF8)
+ return RecodeUtf8(src, dst);
+ else if (SingleByteCodepage(encoding))
+ return RecodeSingleByteChar(src, dst, *CodePageByCharset(encoding));
+ else
+ return RecodeMultiByteChar(src, dst, encoding);
+ }
+
}
template <typename TCharFrom>
diff --git a/library/cpp/charset/wide_ut.cpp b/library/cpp/charset/wide_ut.cpp
index 6bd754e0dab..78947d51bad 100644
--- a/library/cpp/charset/wide_ut.cpp
+++ b/library/cpp/charset/wide_ut.cpp
@@ -151,7 +151,7 @@ public:
UNIT_TEST_SUITE_REGISTRATION(TConversionTest);
-// test conversions (char -> wchar32), (wchar32 -> char) and (wchar32 -> wchar16)
+// test conversions (char -> wchar32), (wchar32 -> char) and (wchar32 -> wchar16)
#define TEST_WCHAR32(sbuf, wbuf, enc) \
do { \
/* convert char to wchar32 */ \
@@ -169,7 +169,7 @@ UNIT_TEST_SUITE_REGISTRATION(TConversionTest);
UNIT_ASSERT_VALUES_EQUAL(sbuf, s1buf); \
UNIT_ASSERT_VALUES_EQUAL(wbuf, wstr2); \
} while (false)
-
+
void TConversionTest::TestCharToWide() {
TUtf16String w = CharToWide(YandexText, CODES_YANDEX);
@@ -236,7 +236,7 @@ void TConversionTest::TestRecodeIntoString() {
UNIT_ASSERT(sYandex.data() == sdata); // reserved buffer reused
UNIT_ASSERT(sYandex.data() == sres.data()); // same buffer
UNIT_ASSERT(sYandex.size() == sres.size()); // same size
- TEST_WCHAR32(sYandex, UnicodeText, CODES_YANDEX);
+ TEST_WCHAR32(sYandex, UnicodeText, CODES_YANDEX);
TUtf16String sUnicode;
sUnicode.reserve(YandexText.size() * 4);
@@ -254,7 +254,7 @@ void TConversionTest::TestRecodeIntoString() {
UNIT_ASSERT(sUtf8.capacity() > scap); // increased buffer capacity (supplied was too small)
UNIT_ASSERT(sUtf8.data() == sres.data()); // same buffer
UNIT_ASSERT(sUtf8.size() == sres.size()); // same size
- TEST_WCHAR32(sUtf8, UnicodeText, CODES_UTF8);
+ TEST_WCHAR32(sUtf8, UnicodeText, CODES_UTF8);
sUnicode.clear();
wdata = sUnicode.data();
@@ -336,11 +336,11 @@ void TConversionTest::TestRecodeAppend() {
}
}
-template <>
+template <>
void Out<RECODE_RESULT>(IOutputStream& out, RECODE_RESULT val) {
- out << int(val);
-}
-
+ out << int(val);
+}
+
void TConversionTest::TestRecode() {
for (int c = 0; c != CODES_MAX; ++c) {
ECharset enc = static_cast<ECharset>(c);
@@ -367,11 +367,11 @@ void TConversionTest::TestRecode() {
res = RecodeFromUnicode(enc, &wch, &rch, 1, 1, read, written);
UNIT_ASSERT(res == RECODE_OK);
- char rch2 = 0;
- UNIT_ASSERT_VALUES_EQUAL(RECODE_OK, RecodeFromUnicode(enc, wch, &rch2, 1, written));
- UNIT_ASSERT_VALUES_EQUAL(size_t(1), written);
- UNIT_ASSERT_VALUES_EQUAL(rch2, rch);
-
+ char rch2 = 0;
+ UNIT_ASSERT_VALUES_EQUAL(RECODE_OK, RecodeFromUnicode(enc, wch, &rch2, 1, written));
+ UNIT_ASSERT_VALUES_EQUAL(size_t(1), written);
+ UNIT_ASSERT_VALUES_EQUAL(rch2, rch);
+
if (hash.contains(rch)) { // there are some stupid encodings with duplicate characters
continue;
} else {