diff options
author | albert <[email protected]> | 2022-02-10 16:48:14 +0300 |
---|---|---|
committer | Daniil Cherednik <[email protected]> | 2022-02-10 16:48:14 +0300 |
commit | 9f25ef3232c288ca664ceee6c376cf64e4349a2e (patch) | |
tree | b192eaf3150845f7302fafd460a972b0439d6fe5 /library/cpp/charset | |
parent | 6a1e535429145ec1ecfbc5f1efd3c95323261fb5 (diff) |
Restoring authorship annotation for <[email protected]>. Commit 1 of 2.
Diffstat (limited to 'library/cpp/charset')
-rw-r--r-- | library/cpp/charset/codepage_ut.cpp | 22 | ||||
-rw-r--r-- | library/cpp/charset/recyr.hh | 36 | ||||
-rw-r--r-- | library/cpp/charset/recyr_int.hh | 12 | ||||
-rw-r--r-- | library/cpp/charset/wide.h | 44 | ||||
-rw-r--r-- | library/cpp/charset/wide_ut.cpp | 26 |
5 files changed, 70 insertions, 70 deletions
diff --git a/library/cpp/charset/codepage_ut.cpp b/library/cpp/charset/codepage_ut.cpp index c3ac3ac478e..47ec1fb2c50 100644 --- a/library/cpp/charset/codepage_ut.cpp +++ b/library/cpp/charset/codepage_ut.cpp @@ -69,7 +69,7 @@ public: } void TestBrokenRune() { - UNIT_ASSERT_VALUES_EQUAL(BROKEN_RUNE, 0xFFFDu); + UNIT_ASSERT_VALUES_EQUAL(BROKEN_RUNE, 0xFFFDu); } }; @@ -198,17 +198,17 @@ void TCodepageTest::TestUTFFromUnknownPlane() { UNIT_ASSERT(res == RECODE_OK); UNIT_ASSERT(samplelen == readchars); - size_t writtenbytes2 = 0; - char bytebuffer2[BUFFER_SIZE]; - for (size_t i = 0; i != samplelen; ++i) { - size_t nwr = 0; + size_t writtenbytes2 = 0; + char bytebuffer2[BUFFER_SIZE]; + for (size_t i = 0; i != samplelen; ++i) { + size_t nwr = 0; const int res = RecodeFromUnicode(CODES_UTF8, sampletext[i], bytebuffer2 + writtenbytes2, BUFFER_SIZE - writtenbytes2, nwr); - UNIT_ASSERT_VALUES_EQUAL(res, int(RECODE_OK)); - writtenbytes2 += nwr; - UNIT_ASSERT(BUFFER_SIZE > writtenbytes2); - } - UNIT_ASSERT_VALUES_EQUAL(TStringBuf(bytebuffer, writtenbytes), TStringBuf(bytebuffer2, writtenbytes2)); - + UNIT_ASSERT_VALUES_EQUAL(res, int(RECODE_OK)); + writtenbytes2 += nwr; + UNIT_ASSERT(BUFFER_SIZE > writtenbytes2); + } + UNIT_ASSERT_VALUES_EQUAL(TStringBuf(bytebuffer, writtenbytes), TStringBuf(bytebuffer2, writtenbytes2)); + wchar32 charbuffer[BUFFER_SIZE]; size_t readbytes = 0; size_t writtenchars = 0; diff --git a/library/cpp/charset/recyr.hh b/library/cpp/charset/recyr.hh index 5ec8734bcfb..9fcac303929 100644 --- a/library/cpp/charset/recyr.hh +++ b/library/cpp/charset/recyr.hh @@ -30,8 +30,8 @@ inline RECODE_RESULT RecodeFromUnicode(ECharset to, const TCharType* in, char* o inline RECODE_RESULT RecodeFromUnicode(ECharset to, wchar32 rune, char* out, size_t outSize, size_t& outWritten) { return NCodepagePrivate::_recodeFromUnicode(to, rune, out, outSize, outWritten); -} - +} + template <class TCharType> inline RECODE_RESULT RecodeToUnicode(ECharset from, const char* in, TCharType* out, size_t inSize, size_t outSize) { size_t inRead = 0; @@ -101,36 +101,36 @@ inline RECODE_RESULT Recode(ECharset from, ECharset to, const char* in, char* ou return Recode(from, to, in, out, inSize, outSize, inRead, outWritten); } -/** - * Recode from one charset to another; throw an exception if conversion failed +/** + * Recode from one charset to another; throw an exception if conversion failed * @param[in] from the source character set * @param[in] to the target character set - * @param[in] in the input string buffer - * @param[out] out the output string object if conversion was successful - * @return false if conversion was not attempted (charsets were the same), - * true if successful - */ + * @param[in] in the input string buffer + * @param[out] out the output string object if conversion was successful + * @return false if conversion was not attempted (charsets were the same), + * true if successful + */ inline bool Recode(ECharset from, ECharset to, const TStringBuf& in, TString& out) { if (to == from) - return false; - + return false; + const size_t inSize = in.length(); const size_t outSize = SingleByteCodepage(to) ? inSize : 3 * inSize; - out.clear(); // so we don't copy stuff around when resizing + out.clear(); // so we don't copy stuff around when resizing out.ReserveAndResize(outSize); - + size_t inRead = 0; size_t outWritten = 0; const RECODE_RESULT res = Recode(from, to, in.data(), out.begin(), inSize, outSize, inRead, outWritten); Y_ENSURE(RECODE_OK == res, "Recode failed. "); if (outWritten > outSize) - ythrow yexception() << "Recode overrun the buffer: size=" + ythrow yexception() << "Recode overrun the buffer: size=" << outSize << " need=" << outWritten; - + out.remove(outWritten); - return true; -} - + return true; +} + /////////////////////////////////////////////////////////////////////////////////////// // TString -> TString // /////////////////////////////////////////////////////////////////////////////////////// diff --git a/library/cpp/charset/recyr_int.hh b/library/cpp/charset/recyr_int.hh index 353af53305e..dcaecfc5e95 100644 --- a/library/cpp/charset/recyr_int.hh +++ b/library/cpp/charset/recyr_int.hh @@ -172,7 +172,7 @@ namespace NCodepagePrivate { inline RECODE_RESULT _recodeUnicodeToUTF8(wchar32 rune, char* out, size_t out_size, size_t& nwritten) { return SafeWriteUTF8Char(rune, nwritten, (unsigned char*)out, out_size); } - + template <class TCharType, int Size = sizeof(TCharType)> struct TCharTypeSwitch; @@ -223,7 +223,7 @@ namespace NCodepagePrivate { nwritten = 1; return RECODE_OK; } - + inline RECODE_RESULT _rune2hex(wchar32 in, char* out, size_t out_size, size_t& out_writed) { static const char hex_digs[] = "0123456789ABCDEF"; out_writed = 0; @@ -301,17 +301,17 @@ namespace NCodepagePrivate { return NCodepagePrivate::_recodeUnicodeToUTF8(in, out, in_size, out_size, in_readed, out_writed); return NCodepagePrivate::_recodeUnicodeToSB(To, in, out, in_size, out_size, in_readed, out_writed); - } - + } + inline RECODE_RESULT _recodeFromUnicode(ECharset To, wchar32 rune, char* out, size_t out_size, size_t& nwritten) { if (!ValidCodepage(To)) return RECODE_ERROR; - + if (!NCodepagePrivate::NativeCodepage(To)) { size_t nread = 0; return NICONVPrivate::RecodeFromUnicodeNoThrow(To, &rune, out, 1, out_size, nread, nwritten); } - + if (To == CODES_UTF8) return NCodepagePrivate::_recodeUnicodeToUTF8(rune, out, out_size, nwritten); diff --git a/library/cpp/charset/wide.h b/library/cpp/charset/wide.h index 32d30e849e9..22707738646 100644 --- a/library/cpp/charset/wide.h +++ b/library/cpp/charset/wide.h @@ -16,15 +16,15 @@ //! converts text from unicode to yandex codepage //! @attention destination buffer must be long enough to fit all characters of the text //! @note @c dest buffer must fit at least @c len number of characters -template <typename TCharType> +template <typename TCharType> inline size_t WideToChar(const TCharType* text, size_t len, char* dest, ECharset enc) { Y_ASSERT(SingleByteCodepage(enc)); const char* start = dest; const Encoder* const encoder = &EncoderByCharset(enc); - const TCharType* const last = text + len; - for (const TCharType* cur = text; cur != last; ++dest) { + const TCharType* const last = text + len; + for (const TCharType* cur = text; cur != last; ++dest) { *dest = encoder->Tr(ReadSymbolAndAdvance(cur, last)); } @@ -38,12 +38,12 @@ inline size_t WideToChar(const TCharType* text, size_t len, char* dest, ECharset //! string using the @c strlen function and pass as the @c len parameter; //! it does not make sense to create an additional version of this function because //! it will call to @c strlen anyway in order to allocate destination buffer -template <typename TCharType> +template <typename TCharType> inline void CharToWide(const char* text, size_t len, TCharType* dest, const CodePage& cp) { const unsigned char* cur = reinterpret_cast<const unsigned char*>(text); const unsigned char* const last = cur + len; for (; cur != last; ++cur, ++dest) { - *dest = static_cast<TCharType>(cp.unicode[*cur]); // static_cast is safe as no 1char codepage contains non-BMP symbols + *dest = static_cast<TCharType>(cp.unicode[*cur]); // static_cast is safe as no 1char codepage contains non-BMP symbols } } @@ -55,21 +55,21 @@ namespace NDetail { // Depending on template params, perform conversion of single-byte/multi-byte/utf8 string to/from wide string. - template <typename TCharType> + template <typename TCharType> inline TBasicStringBuf<TCharType> RecodeSingleByteChar(const TStringBuf src, TCharType* dst, const CodePage& cp) { Y_ASSERT(cp.SingleByteCodepage()); ::CharToWide(src.data(), src.size(), dst, cp); return TBasicStringBuf<TCharType>(dst, src.size()); } - template <typename TCharType> + template <typename TCharType> inline TStringBuf RecodeSingleByteChar(const TBasicStringBuf<TCharType> src, char* dst, const CodePage& cp) { Y_ASSERT(cp.SingleByteCodepage()); ::WideToChar(src.data(), src.size(), dst, cp.CPEnum); return TStringBuf(dst, src.size()); } - template <typename TCharType> + template <typename TCharType> inline TBasicStringBuf<TCharType> RecodeMultiByteChar(const TStringBuf src, TCharType* dst, ECharset encoding) { Y_ASSERT(!NCodepagePrivate::NativeCodepage(encoding)); size_t read = 0; @@ -78,7 +78,7 @@ namespace NDetail { return TBasicStringBuf<TCharType>(dst, written); } - template <typename TCharType> + template <typename TCharType> inline TStringBuf RecodeMultiByteChar(const TBasicStringBuf<TCharType> src, char* dst, ECharset encoding) { Y_ASSERT(!NCodepagePrivate::NativeCodepage(encoding)); size_t read = 0; @@ -87,7 +87,7 @@ namespace NDetail { return TStringBuf(dst, written); } - template <typename TCharType> + template <typename TCharType> inline TBasicStringBuf<TCharType> RecodeUtf8(const TStringBuf src, TCharType* dst) { size_t len = 0; if (!::UTF8ToWide(src.data(), src.size(), dst, len)) @@ -95,25 +95,25 @@ namespace NDetail { return TBasicStringBuf<TCharType>(dst, len); } - template <typename TCharType> + template <typename TCharType> inline TStringBuf RecodeUtf8(const TBasicStringBuf<TCharType> src, char* dst) { size_t len = 0; ::WideToUTF8(src.data(), src.size(), dst, len); return TStringBuf(dst, len); } - // Select one of re-coding methods from above, based on provided @encoding - - template <typename TCharFrom, typename TCharTo> + // Select one of re-coding methods from above, based on provided @encoding + + template <typename TCharFrom, typename TCharTo> TBasicStringBuf<TCharTo> Recode(const TBasicStringBuf<TCharFrom> src, TCharTo* dst, ECharset encoding) { - if (encoding == CODES_UTF8) - return RecodeUtf8(src, dst); - else if (SingleByteCodepage(encoding)) - return RecodeSingleByteChar(src, dst, *CodePageByCharset(encoding)); - else - return RecodeMultiByteChar(src, dst, encoding); - } - + if (encoding == CODES_UTF8) + return RecodeUtf8(src, dst); + else if (SingleByteCodepage(encoding)) + return RecodeSingleByteChar(src, dst, *CodePageByCharset(encoding)); + else + return RecodeMultiByteChar(src, dst, encoding); + } + } template <typename TCharFrom> diff --git a/library/cpp/charset/wide_ut.cpp b/library/cpp/charset/wide_ut.cpp index 78947d51bad..6bd754e0dab 100644 --- a/library/cpp/charset/wide_ut.cpp +++ b/library/cpp/charset/wide_ut.cpp @@ -151,7 +151,7 @@ public: UNIT_TEST_SUITE_REGISTRATION(TConversionTest); -// test conversions (char -> wchar32), (wchar32 -> char) and (wchar32 -> wchar16) +// test conversions (char -> wchar32), (wchar32 -> char) and (wchar32 -> wchar16) #define TEST_WCHAR32(sbuf, wbuf, enc) \ do { \ /* convert char to wchar32 */ \ @@ -169,7 +169,7 @@ UNIT_TEST_SUITE_REGISTRATION(TConversionTest); UNIT_ASSERT_VALUES_EQUAL(sbuf, s1buf); \ UNIT_ASSERT_VALUES_EQUAL(wbuf, wstr2); \ } while (false) - + void TConversionTest::TestCharToWide() { TUtf16String w = CharToWide(YandexText, CODES_YANDEX); @@ -236,7 +236,7 @@ void TConversionTest::TestRecodeIntoString() { UNIT_ASSERT(sYandex.data() == sdata); // reserved buffer reused UNIT_ASSERT(sYandex.data() == sres.data()); // same buffer UNIT_ASSERT(sYandex.size() == sres.size()); // same size - TEST_WCHAR32(sYandex, UnicodeText, CODES_YANDEX); + TEST_WCHAR32(sYandex, UnicodeText, CODES_YANDEX); TUtf16String sUnicode; sUnicode.reserve(YandexText.size() * 4); @@ -254,7 +254,7 @@ void TConversionTest::TestRecodeIntoString() { UNIT_ASSERT(sUtf8.capacity() > scap); // increased buffer capacity (supplied was too small) UNIT_ASSERT(sUtf8.data() == sres.data()); // same buffer UNIT_ASSERT(sUtf8.size() == sres.size()); // same size - TEST_WCHAR32(sUtf8, UnicodeText, CODES_UTF8); + TEST_WCHAR32(sUtf8, UnicodeText, CODES_UTF8); sUnicode.clear(); wdata = sUnicode.data(); @@ -336,11 +336,11 @@ void TConversionTest::TestRecodeAppend() { } } -template <> +template <> void Out<RECODE_RESULT>(IOutputStream& out, RECODE_RESULT val) { - out << int(val); -} - + out << int(val); +} + void TConversionTest::TestRecode() { for (int c = 0; c != CODES_MAX; ++c) { ECharset enc = static_cast<ECharset>(c); @@ -367,11 +367,11 @@ void TConversionTest::TestRecode() { res = RecodeFromUnicode(enc, &wch, &rch, 1, 1, read, written); UNIT_ASSERT(res == RECODE_OK); - char rch2 = 0; - UNIT_ASSERT_VALUES_EQUAL(RECODE_OK, RecodeFromUnicode(enc, wch, &rch2, 1, written)); - UNIT_ASSERT_VALUES_EQUAL(size_t(1), written); - UNIT_ASSERT_VALUES_EQUAL(rch2, rch); - + char rch2 = 0; + UNIT_ASSERT_VALUES_EQUAL(RECODE_OK, RecodeFromUnicode(enc, wch, &rch2, 1, written)); + UNIT_ASSERT_VALUES_EQUAL(size_t(1), written); + UNIT_ASSERT_VALUES_EQUAL(rch2, rch); + if (hash.contains(rch)) { // there are some stupid encodings with duplicate characters continue; } else { |