diff options
| author | albert <[email protected]> | 2022-02-10 16:48:14 +0300 | 
|---|---|---|
| committer | Daniil Cherednik <[email protected]> | 2022-02-10 16:48:14 +0300 | 
| commit | 9f25ef3232c288ca664ceee6c376cf64e4349a2e (patch) | |
| tree | b192eaf3150845f7302fafd460a972b0439d6fe5 /library/cpp/charset | |
| parent | 6a1e535429145ec1ecfbc5f1efd3c95323261fb5 (diff) | |
Restoring authorship annotation for <[email protected]>. Commit 1 of 2.
Diffstat (limited to 'library/cpp/charset')
| -rw-r--r-- | library/cpp/charset/codepage_ut.cpp | 22 | ||||
| -rw-r--r-- | library/cpp/charset/recyr.hh | 36 | ||||
| -rw-r--r-- | library/cpp/charset/recyr_int.hh | 12 | ||||
| -rw-r--r-- | library/cpp/charset/wide.h | 44 | ||||
| -rw-r--r-- | library/cpp/charset/wide_ut.cpp | 26 | 
5 files changed, 70 insertions, 70 deletions
diff --git a/library/cpp/charset/codepage_ut.cpp b/library/cpp/charset/codepage_ut.cpp index c3ac3ac478e..47ec1fb2c50 100644 --- a/library/cpp/charset/codepage_ut.cpp +++ b/library/cpp/charset/codepage_ut.cpp @@ -69,7 +69,7 @@ public:      }      void TestBrokenRune() { -        UNIT_ASSERT_VALUES_EQUAL(BROKEN_RUNE, 0xFFFDu); +        UNIT_ASSERT_VALUES_EQUAL(BROKEN_RUNE, 0xFFFDu);       }  }; @@ -198,17 +198,17 @@ void TCodepageTest::TestUTFFromUnknownPlane() {      UNIT_ASSERT(res == RECODE_OK);      UNIT_ASSERT(samplelen == readchars); -    size_t writtenbytes2 = 0; -    char bytebuffer2[BUFFER_SIZE]; -    for (size_t i = 0; i != samplelen; ++i) { -        size_t nwr = 0; +    size_t writtenbytes2 = 0;  +    char bytebuffer2[BUFFER_SIZE];  +    for (size_t i = 0; i != samplelen; ++i) {  +        size_t nwr = 0;           const int res = RecodeFromUnicode(CODES_UTF8, sampletext[i], bytebuffer2 + writtenbytes2, BUFFER_SIZE - writtenbytes2, nwr); -        UNIT_ASSERT_VALUES_EQUAL(res, int(RECODE_OK)); -        writtenbytes2 += nwr; -        UNIT_ASSERT(BUFFER_SIZE > writtenbytes2); -    } -    UNIT_ASSERT_VALUES_EQUAL(TStringBuf(bytebuffer, writtenbytes), TStringBuf(bytebuffer2, writtenbytes2)); - +        UNIT_ASSERT_VALUES_EQUAL(res, int(RECODE_OK));  +        writtenbytes2 += nwr;  +        UNIT_ASSERT(BUFFER_SIZE > writtenbytes2);  +    }  +    UNIT_ASSERT_VALUES_EQUAL(TStringBuf(bytebuffer, writtenbytes), TStringBuf(bytebuffer2, writtenbytes2));  +       wchar32 charbuffer[BUFFER_SIZE];      size_t readbytes = 0;      size_t writtenchars = 0; diff --git a/library/cpp/charset/recyr.hh b/library/cpp/charset/recyr.hh index 5ec8734bcfb..9fcac303929 100644 --- a/library/cpp/charset/recyr.hh +++ b/library/cpp/charset/recyr.hh @@ -30,8 +30,8 @@ inline RECODE_RESULT RecodeFromUnicode(ECharset to, const TCharType* in, char* o  inline RECODE_RESULT RecodeFromUnicode(ECharset to, wchar32 rune, char* out, size_t outSize, size_t& outWritten) {      return NCodepagePrivate::_recodeFromUnicode(to, rune, out, outSize, outWritten); -} - +}  +   template <class TCharType>  inline RECODE_RESULT RecodeToUnicode(ECharset from, const char* in, TCharType* out, size_t inSize, size_t outSize) {      size_t inRead = 0; @@ -101,36 +101,36 @@ inline RECODE_RESULT Recode(ECharset from, ECharset to, const char* in, char* ou      return Recode(from, to, in, out, inSize, outSize, inRead, outWritten);  } -/** - * Recode from one charset to another; throw an exception if conversion failed +/**  + * Recode from one charset to another; throw an exception if conversion failed    * @param[in] from the source character set   * @param[in] to the target character set - * @param[in] in    the input string buffer - * @param[out] out  the output string object if conversion was successful - * @return false if conversion was not attempted (charsets were the same), - *         true if successful - */ + * @param[in] in    the input string buffer  + * @param[out] out  the output string object if conversion was successful  + * @return false if conversion was not attempted (charsets were the same),  + *         true if successful  + */   inline bool Recode(ECharset from, ECharset to, const TStringBuf& in, TString& out) {      if (to == from) -        return false; - +        return false;  +       const size_t inSize = in.length();      const size_t outSize = SingleByteCodepage(to) ? inSize : 3 * inSize; -    out.clear(); // so we don't copy stuff around when resizing +    out.clear(); // so we don't copy stuff around when resizing       out.ReserveAndResize(outSize); - +       size_t inRead = 0;      size_t outWritten = 0;      const RECODE_RESULT res = Recode(from, to, in.data(), out.begin(), inSize, outSize, inRead, outWritten);      Y_ENSURE(RECODE_OK == res, "Recode failed. ");      if (outWritten > outSize) -        ythrow yexception() << "Recode overrun the buffer: size=" +        ythrow yexception() << "Recode overrun the buffer: size="                               << outSize << " need=" << outWritten; - +       out.remove(outWritten); -    return true; -} - +    return true;  +}  +   ///////////////////////////////////////////////////////////////////////////////////////  //     TString -> TString                                                              //  /////////////////////////////////////////////////////////////////////////////////////// diff --git a/library/cpp/charset/recyr_int.hh b/library/cpp/charset/recyr_int.hh index 353af53305e..dcaecfc5e95 100644 --- a/library/cpp/charset/recyr_int.hh +++ b/library/cpp/charset/recyr_int.hh @@ -172,7 +172,7 @@ namespace NCodepagePrivate {      inline RECODE_RESULT _recodeUnicodeToUTF8(wchar32 rune, char* out, size_t out_size, size_t& nwritten) {          return SafeWriteUTF8Char(rune, nwritten, (unsigned char*)out, out_size);      } - +       template <class TCharType, int Size = sizeof(TCharType)>      struct TCharTypeSwitch; @@ -223,7 +223,7 @@ namespace NCodepagePrivate {          nwritten = 1;          return RECODE_OK;      } - +       inline RECODE_RESULT _rune2hex(wchar32 in, char* out, size_t out_size, size_t& out_writed) {          static const char hex_digs[] = "0123456789ABCDEF";          out_writed = 0; @@ -301,17 +301,17 @@ namespace NCodepagePrivate {              return NCodepagePrivate::_recodeUnicodeToUTF8(in, out, in_size, out_size, in_readed, out_writed);          return NCodepagePrivate::_recodeUnicodeToSB(To, in, out, in_size, out_size, in_readed, out_writed); -    } - +    }  +       inline RECODE_RESULT _recodeFromUnicode(ECharset To, wchar32 rune, char* out, size_t out_size, size_t& nwritten) {          if (!ValidCodepage(To))              return RECODE_ERROR; - +           if (!NCodepagePrivate::NativeCodepage(To)) {              size_t nread = 0;              return NICONVPrivate::RecodeFromUnicodeNoThrow(To, &rune, out, 1, out_size, nread, nwritten);          } - +           if (To == CODES_UTF8)              return NCodepagePrivate::_recodeUnicodeToUTF8(rune, out, out_size, nwritten); diff --git a/library/cpp/charset/wide.h b/library/cpp/charset/wide.h index 32d30e849e9..22707738646 100644 --- a/library/cpp/charset/wide.h +++ b/library/cpp/charset/wide.h @@ -16,15 +16,15 @@  //! converts text from unicode to yandex codepage  //! @attention destination buffer must be long enough to fit all characters of the text  //! @note @c dest buffer must fit at least @c len number of characters -template <typename TCharType> +template <typename TCharType>   inline size_t WideToChar(const TCharType* text, size_t len, char* dest, ECharset enc) {      Y_ASSERT(SingleByteCodepage(enc));      const char* start = dest;      const Encoder* const encoder = &EncoderByCharset(enc); -    const TCharType* const last = text + len; -    for (const TCharType* cur = text; cur != last; ++dest) { +    const TCharType* const last = text + len;  +    for (const TCharType* cur = text; cur != last; ++dest) {           *dest = encoder->Tr(ReadSymbolAndAdvance(cur, last));      } @@ -38,12 +38,12 @@ inline size_t WideToChar(const TCharType* text, size_t len, char* dest, ECharset  //!       string using the @c strlen function and pass as the @c len parameter;  //!       it does not make sense to create an additional version of this function because  //!       it will call to @c strlen anyway in order to allocate destination buffer -template <typename TCharType> +template <typename TCharType>   inline void CharToWide(const char* text, size_t len, TCharType* dest, const CodePage& cp) {      const unsigned char* cur = reinterpret_cast<const unsigned char*>(text);      const unsigned char* const last = cur + len;      for (; cur != last; ++cur, ++dest) { -        *dest = static_cast<TCharType>(cp.unicode[*cur]); // static_cast is safe as no 1char codepage contains non-BMP symbols +        *dest = static_cast<TCharType>(cp.unicode[*cur]); // static_cast is safe as no 1char codepage contains non-BMP symbols       }  } @@ -55,21 +55,21 @@ namespace NDetail {          // Depending on template params, perform conversion of single-byte/multi-byte/utf8 string to/from wide string. -        template <typename TCharType> +        template <typename TCharType>           inline TBasicStringBuf<TCharType> RecodeSingleByteChar(const TStringBuf src, TCharType* dst, const CodePage& cp) {              Y_ASSERT(cp.SingleByteCodepage());              ::CharToWide(src.data(), src.size(), dst, cp);              return TBasicStringBuf<TCharType>(dst, src.size());          } -        template <typename TCharType> +        template <typename TCharType>           inline TStringBuf RecodeSingleByteChar(const TBasicStringBuf<TCharType> src, char* dst, const CodePage& cp) {              Y_ASSERT(cp.SingleByteCodepage());              ::WideToChar(src.data(), src.size(), dst, cp.CPEnum);              return TStringBuf(dst, src.size());          } -        template <typename TCharType> +        template <typename TCharType>           inline TBasicStringBuf<TCharType> RecodeMultiByteChar(const TStringBuf src, TCharType* dst, ECharset encoding) {              Y_ASSERT(!NCodepagePrivate::NativeCodepage(encoding));              size_t read = 0; @@ -78,7 +78,7 @@ namespace NDetail {              return TBasicStringBuf<TCharType>(dst, written);          } -        template <typename TCharType> +        template <typename TCharType>           inline TStringBuf RecodeMultiByteChar(const TBasicStringBuf<TCharType> src, char* dst, ECharset encoding) {              Y_ASSERT(!NCodepagePrivate::NativeCodepage(encoding));              size_t read = 0; @@ -87,7 +87,7 @@ namespace NDetail {              return TStringBuf(dst, written);          } -        template <typename TCharType> +        template <typename TCharType>           inline TBasicStringBuf<TCharType> RecodeUtf8(const TStringBuf src, TCharType* dst) {              size_t len = 0;              if (!::UTF8ToWide(src.data(), src.size(), dst, len)) @@ -95,25 +95,25 @@ namespace NDetail {              return TBasicStringBuf<TCharType>(dst, len);          } -        template <typename TCharType> +        template <typename TCharType>           inline TStringBuf RecodeUtf8(const TBasicStringBuf<TCharType> src, char* dst) {              size_t len = 0;              ::WideToUTF8(src.data(), src.size(), dst, len);              return TStringBuf(dst, len);          } -        // Select one of re-coding methods from above, based on provided @encoding - -        template <typename TCharFrom, typename TCharTo> +        // Select one of re-coding methods from above, based on provided @encoding  +  +        template <typename TCharFrom, typename TCharTo>           TBasicStringBuf<TCharTo> Recode(const TBasicStringBuf<TCharFrom> src, TCharTo* dst, ECharset encoding) { -            if (encoding == CODES_UTF8) -                return RecodeUtf8(src, dst); -            else if (SingleByteCodepage(encoding)) -                return RecodeSingleByteChar(src, dst, *CodePageByCharset(encoding)); -            else -                return RecodeMultiByteChar(src, dst, encoding); -        } - +            if (encoding == CODES_UTF8)  +                return RecodeUtf8(src, dst);  +            else if (SingleByteCodepage(encoding))  +                return RecodeSingleByteChar(src, dst, *CodePageByCharset(encoding));  +            else  +                return RecodeMultiByteChar(src, dst, encoding);  +        }  +       }      template <typename TCharFrom> diff --git a/library/cpp/charset/wide_ut.cpp b/library/cpp/charset/wide_ut.cpp index 78947d51bad..6bd754e0dab 100644 --- a/library/cpp/charset/wide_ut.cpp +++ b/library/cpp/charset/wide_ut.cpp @@ -151,7 +151,7 @@ public:  UNIT_TEST_SUITE_REGISTRATION(TConversionTest); -// test conversions (char -> wchar32), (wchar32 -> char) and (wchar32 -> wchar16) +// test conversions (char -> wchar32), (wchar32 -> char) and (wchar32 -> wchar16)   #define TEST_WCHAR32(sbuf, wbuf, enc)                                                                                                 \      do {                                                                                                                              \          /* convert char to wchar32 */                                                                                                 \ @@ -169,7 +169,7 @@ UNIT_TEST_SUITE_REGISTRATION(TConversionTest);          UNIT_ASSERT_VALUES_EQUAL(sbuf, s1buf);                                                                                        \          UNIT_ASSERT_VALUES_EQUAL(wbuf, wstr2);                                                                                        \      } while (false) - +   void TConversionTest::TestCharToWide() {      TUtf16String w = CharToWide(YandexText, CODES_YANDEX); @@ -236,7 +236,7 @@ void TConversionTest::TestRecodeIntoString() {      UNIT_ASSERT(sYandex.data() == sdata);     // reserved buffer reused      UNIT_ASSERT(sYandex.data() == sres.data());     // same buffer      UNIT_ASSERT(sYandex.size() == sres.size());     // same size -    TEST_WCHAR32(sYandex, UnicodeText, CODES_YANDEX); +    TEST_WCHAR32(sYandex, UnicodeText, CODES_YANDEX);       TUtf16String sUnicode;      sUnicode.reserve(YandexText.size() * 4); @@ -254,7 +254,7 @@ void TConversionTest::TestRecodeIntoString() {      UNIT_ASSERT(sUtf8.capacity() > scap); // increased buffer capacity (supplied was too small)      UNIT_ASSERT(sUtf8.data() == sres.data());         // same buffer      UNIT_ASSERT(sUtf8.size() == sres.size());         // same size -    TEST_WCHAR32(sUtf8, UnicodeText, CODES_UTF8); +    TEST_WCHAR32(sUtf8, UnicodeText, CODES_UTF8);       sUnicode.clear();      wdata = sUnicode.data(); @@ -336,11 +336,11 @@ void TConversionTest::TestRecodeAppend() {      }  } -template <> +template <>   void Out<RECODE_RESULT>(IOutputStream& out, RECODE_RESULT val) { -    out << int(val); -} - +    out << int(val);  +}  +   void TConversionTest::TestRecode() {      for (int c = 0; c != CODES_MAX; ++c) {          ECharset enc = static_cast<ECharset>(c); @@ -367,11 +367,11 @@ void TConversionTest::TestRecode() {              res = RecodeFromUnicode(enc, &wch, &rch, 1, 1, read, written);              UNIT_ASSERT(res == RECODE_OK); -            char rch2 = 0; -            UNIT_ASSERT_VALUES_EQUAL(RECODE_OK, RecodeFromUnicode(enc, wch, &rch2, 1, written)); -            UNIT_ASSERT_VALUES_EQUAL(size_t(1), written); -            UNIT_ASSERT_VALUES_EQUAL(rch2, rch); - +            char rch2 = 0;  +            UNIT_ASSERT_VALUES_EQUAL(RECODE_OK, RecodeFromUnicode(enc, wch, &rch2, 1, written));  +            UNIT_ASSERT_VALUES_EQUAL(size_t(1), written);  +            UNIT_ASSERT_VALUES_EQUAL(rch2, rch);  +               if (hash.contains(rch)) { // there are some stupid encodings with duplicate characters                  continue;              } else {  | 
