diff options
author | smalov <smalov@yandex-team.ru> | 2022-02-10 16:47:36 +0300 |
---|---|---|
committer | Daniil Cherednik <dcherednik@yandex-team.ru> | 2022-02-10 16:47:36 +0300 |
commit | f70d9720e13aef3a935e3f405b0eac554529e76e (patch) | |
tree | 5519c392aebdb16153197de07e4774c0a2be261a /library | |
parent | 7b659037613268d5eac4a1b6a7c5eff3cd36d4bf (diff) | |
download | ydb-f70d9720e13aef3a935e3f405b0eac554529e76e.tar.gz |
Restoring authorship annotation for <smalov@yandex-team.ru>. Commit 1 of 2.
Diffstat (limited to 'library')
-rw-r--r-- | library/cpp/charset/codepage.h | 18 | ||||
-rw-r--r-- | library/cpp/charset/codepage_ut.cpp | 40 | ||||
-rw-r--r-- | library/cpp/charset/recyr_int.hh | 4 | ||||
-rw-r--r-- | library/cpp/charset/wide.h | 90 | ||||
-rw-r--r-- | library/cpp/charset/wide_ut.cpp | 198 | ||||
-rw-r--r-- | library/cpp/on_disk/chunks/chunked_helpers.h | 2 | ||||
-rw-r--r-- | library/cpp/string_utils/url/url.cpp | 2 | ||||
-rw-r--r-- | library/cpp/uri/assign.cpp | 2 | ||||
-rw-r--r-- | library/cpp/uri/parse.h | 2 |
9 files changed, 179 insertions, 179 deletions
diff --git a/library/cpp/charset/codepage.h b/library/cpp/charset/codepage.h index 30a02a4610..bc50d5890f 100644 --- a/library/cpp/charset/codepage.h +++ b/library/cpp/charset/codepage.h @@ -291,17 +291,17 @@ extern const CodePage& csYandex; void DecodeUnknownPlane(wchar16* start, wchar16*& end, const ECharset enc4unk); void DecodeUnknownPlane(wchar32* start, wchar32*& end, const ECharset enc4unk); -inline void ToLower(char* s, size_t n, const CodePage& cp = csYandex) { - char* const e = s + n; - for (; s != e; ++s) +inline void ToLower(char* s, size_t n, const CodePage& cp = csYandex) { + char* const e = s + n; + for (; s != e; ++s) *s = cp.ToLower(*s); -} - -inline void ToUpper(char* s, size_t n, const CodePage& cp = csYandex) { - char* const e = s + n; - for (; s != e; ++s) +} + +inline void ToUpper(char* s, size_t n, const CodePage& cp = csYandex) { + char* const e = s + n; + for (; s != e; ++s) *s = cp.ToUpper(*s); -} +} inline TString ToLower(TString s, const CodePage& cp, size_t pos = 0, size_t n = TString::npos) { s.Transform([&cp](size_t, char c) { return cp.ToLower(c); }, pos, n); diff --git a/library/cpp/charset/codepage_ut.cpp b/library/cpp/charset/codepage_ut.cpp index c3ac3ac478..9d4b83ba80 100644 --- a/library/cpp/charset/codepage_ut.cpp +++ b/library/cpp/charset/codepage_ut.cpp @@ -11,7 +11,7 @@ #pragma warning(disable : 4309) /*truncation of constant value*/ #endif -namespace { +namespace { const char yandexUpperCase[] = "\x81\x82\x83\x84\x85\x86\x87" "\x8E" @@ -19,7 +19,7 @@ namespace { "\xA8\xA9\xAA\xAB\xAC\xAD\xAE\xAF" "\xC0\xC1\xC2\xC3\xC4\xC5\xC6\xC7\xC8\xC9\xCA\xCB\xCC\xCD\xCE\xCF" "\xD0\xD1\xD2\xD3\xD4\xD5\xD6\xD7\xD8\xD9\xDA\xDB\xDC\xDD\xDE\xDF"; - + const char yandexLowerCase[] = "\x91\x92\x93\x94\x95\x96\x97" "\x9E" @@ -27,8 +27,8 @@ namespace { "\xB8\xB9\xBA\xBB\xBC\xBD\xBE\xBF" "\xE0\xE1\xE2\xE3\xE4\xE5\xE6\xE7\xE8\xE9\xEA\xEB\xEC\xED\xEE\xEF" "\xF0\xF1\xF2\xF3\xF4\xF5\xF6\xF7\xF8\xF9\xFA\xFB\xFC\xFD\xFE\xFF"; -} - +} + class TCodepageTest: public TTestBase { private: UNIT_TEST_SUITE(TCodepageTest); @@ -50,8 +50,8 @@ public: void TestBrokenMultibyte(); void TestSurrogatePairs(); void TestEncodingHints(); - void TestToLower(); - void TestToUpper(); + void TestToLower(); + void TestToUpper(); void TestCanEncode(); @@ -325,24 +325,24 @@ void TCodepageTest::TestEncodingHints() { UNIT_ASSERT(CODES_UNSUPPORTED != EncodingHintByName("ISO-2022-KR")); UNIT_ASSERT(CODES_UNSUPPORTED != EncodingHintByName("ISO-2022-jp")); } - -void TCodepageTest::TestToLower() { - TTempBuf buf; - char* data = buf.Data(); + +void TCodepageTest::TestToLower() { + TTempBuf buf; + char* data = buf.Data(); const size_t n = Y_ARRAY_SIZE(yandexUpperCase); // including NTS - memcpy(data, yandexUpperCase, n); - ToLower(data, n - 1); + memcpy(data, yandexUpperCase, n); + ToLower(data, n - 1); UNIT_ASSERT(strcmp(data, yandexLowerCase) == 0); -} - -void TCodepageTest::TestToUpper() { - TTempBuf buf; - char* data = buf.Data(); +} + +void TCodepageTest::TestToUpper() { + TTempBuf buf; + char* data = buf.Data(); const size_t n = Y_ARRAY_SIZE(yandexLowerCase); // including NTS - memcpy(data, yandexLowerCase, n); - ToUpper(data, n - 1); + memcpy(data, yandexLowerCase, n); + ToUpper(data, n - 1); UNIT_ASSERT(strcmp(data, yandexUpperCase) == 0); -} +} static void TestCanEncodeEmpty() { TWtringBuf empty; diff --git a/library/cpp/charset/recyr_int.hh b/library/cpp/charset/recyr_int.hh index 353af53305..c02f863b0d 100644 --- a/library/cpp/charset/recyr_int.hh +++ b/library/cpp/charset/recyr_int.hh @@ -45,13 +45,13 @@ namespace NCodepagePrivate { out_writed = (unsigned char*)out - out_start; return res; } - + inline RECODE_RESULT _recodeFromUTF8(ECharset to, const char* in, char* out, size_t in_size, size_t out_size, size_t& in_readed, size_t& out_writed) { if (to == CODES_UTF8) return _recodeCopy(in, out, in_size, out_size, in_readed, out_writed); Y_ASSERT(CODES_UNKNOWN < to && to < CODES_MAX); const Encoder* enc = &EncoderByCharset(to); - + const unsigned char* in_start = (const unsigned char*)in; const unsigned char* in_end = in_start + in_size; const unsigned char* out_start = (unsigned char*)out; diff --git a/library/cpp/charset/wide.h b/library/cpp/charset/wide.h index 32d30e849e..02250c42be 100644 --- a/library/cpp/charset/wide.h +++ b/library/cpp/charset/wide.h @@ -13,9 +13,9 @@ #include <util/memory/tempbuf.h> #include <util/system/yassert.h> -//! converts text from unicode to yandex codepage -//! @attention destination buffer must be long enough to fit all characters of the text -//! @note @c dest buffer must fit at least @c len number of characters +//! converts text from unicode to yandex codepage +//! @attention destination buffer must be long enough to fit all characters of the text +//! @note @c dest buffer must fit at least @c len number of characters template <typename TCharType> inline size_t WideToChar(const TCharType* text, size_t len, char* dest, ECharset enc) { Y_ASSERT(SingleByteCodepage(enc)); @@ -26,27 +26,27 @@ inline size_t WideToChar(const TCharType* text, size_t len, char* dest, ECharset const TCharType* const last = text + len; for (const TCharType* cur = text; cur != last; ++dest) { *dest = encoder->Tr(ReadSymbolAndAdvance(cur, last)); - } + } return dest - start; -} - -//! converts text to unicode using a codepage object -//! @attention destination buffer must be long enough to fit all characters of the text -//! @note @c dest buffer must fit at least @c len number of characters; -//! if you need convert zero terminated string you should determine length of the -//! string using the @c strlen function and pass as the @c len parameter; -//! it does not make sense to create an additional version of this function because -//! it will call to @c strlen anyway in order to allocate destination buffer +} + +//! converts text to unicode using a codepage object +//! @attention destination buffer must be long enough to fit all characters of the text +//! @note @c dest buffer must fit at least @c len number of characters; +//! if you need convert zero terminated string you should determine length of the +//! string using the @c strlen function and pass as the @c len parameter; +//! it does not make sense to create an additional version of this function because +//! it will call to @c strlen anyway in order to allocate destination buffer template <typename TCharType> inline void CharToWide(const char* text, size_t len, TCharType* dest, const CodePage& cp) { - const unsigned char* cur = reinterpret_cast<const unsigned char*>(text); - const unsigned char* const last = cur + len; - for (; cur != last; ++cur, ++dest) { + const unsigned char* cur = reinterpret_cast<const unsigned char*>(text); + const unsigned char* const last = cur + len; + for (; cur != last; ++cur, ++dest) { *dest = static_cast<TCharType>(cp.unicode[*cur]); // static_cast is safe as no 1char codepage contains non-BMP symbols - } -} - + } +} + namespace NDetail { namespace NBaseOps { // Template interface base recoding drivers, do not perform any memory management, @@ -222,7 +222,7 @@ inline TStringBuf WideToChar(const TWtringBuf src, TString& dst, ECharset encodi return ::NDetail::Recode<wchar16>(src, dst, encoding); } -//! calls either to @c WideToUTF8 or @c WideToChar depending on the encoding type +//! calls either to @c WideToUTF8 or @c WideToChar depending on the encoding type inline TString WideToChar(const wchar16* text, size_t len, ECharset enc) { if (NCodepagePrivate::NativeCodepage(enc)) { if (enc == CODES_UTF8) @@ -231,8 +231,8 @@ inline TString WideToChar(const wchar16* text, size_t len, ECharset enc) { TString s = TString::Uninitialized(len); s.remove(WideToChar(text, len, s.begin(), enc)); - return s; - } + return s; + } TString s = TString::Uninitialized(len * 3); @@ -242,15 +242,15 @@ inline TString WideToChar(const wchar16* text, size_t len, ECharset enc) { s.remove(written); return s; -} - +} + inline TUtf16String CharToWide(const char* text, size_t len, const CodePage& cp) { TUtf16String w = TUtf16String::Uninitialized(len); - CharToWide(text, len, w.begin(), cp); - return w; -} - -//! calls either to @c UTF8ToWide or @c CharToWide depending on the encoding type + CharToWide(text, len, w.begin(), cp); + return w; +} + +//! calls either to @c UTF8ToWide or @c CharToWide depending on the encoding type template <bool robust> inline TUtf16String CharToWide(const char* text, size_t len, ECharset enc) { if (NCodepagePrivate::NativeCodepage(enc)) { @@ -268,29 +268,29 @@ inline TUtf16String CharToWide(const char* text, size_t len, ECharset enc) { w.remove(written); return w; -} - -//! converts text from UTF8 to unicode, if conversion fails it uses codepage to convert the text -//! @param text text to be converted -//! @param len length of the text in characters -//! @param cp a codepage that is used in case of failed conversion from UTF8 +} + +//! converts text from UTF8 to unicode, if conversion fails it uses codepage to convert the text +//! @param text text to be converted +//! @param len length of the text in characters +//! @param cp a codepage that is used in case of failed conversion from UTF8 inline TUtf16String UTF8ToWide(const char* text, size_t len, const CodePage& cp) { TUtf16String w = TUtf16String::Uninitialized(len); - size_t written = 0; - if (UTF8ToWide(text, len, w.begin(), written)) - w.remove(written); - else - CharToWide(text, len, w.begin(), cp); - return w; -} - + size_t written = 0; + if (UTF8ToWide(text, len, w.begin(), written)) + w.remove(written); + else + CharToWide(text, len, w.begin(), cp); + return w; +} + inline TString WideToChar(const TWtringBuf w, ECharset enc) { return WideToChar(w.data(), w.size(), enc); } inline TUtf16String CharToWide(const TStringBuf s, ECharset enc) { return CharToWide<false>(s.data(), s.size(), enc); -} +} template <bool robust> inline TUtf16String CharToWide(const TStringBuf s, ECharset enc) { @@ -299,7 +299,7 @@ inline TUtf16String CharToWide(const TStringBuf s, ECharset enc) { inline TUtf16String CharToWide(const TStringBuf s, const CodePage& cp) { return CharToWide(s.data(), s.size(), cp); -} +} // true if @text can be fully encoded to specified @encoding, // with possibility to recover exact original text after decoding diff --git a/library/cpp/charset/wide_ut.cpp b/library/cpp/charset/wide_ut.cpp index 78947d51ba..8d00c5ec07 100644 --- a/library/cpp/charset/wide_ut.cpp +++ b/library/cpp/charset/wide_ut.cpp @@ -7,20 +7,20 @@ #include <util/charset/utf8.h> #include <util/digest/numeric.h> #include <util/generic/hash_set.h> - + #include <algorithm> -namespace { - //! three UTF8 encoded russian letters (A, B, V) +namespace { + //! three UTF8 encoded russian letters (A, B, V) const char yandexCyrillicAlphabet[] = "\xC0\xC1\xC2\xC3\xC4\xC5\xC6\xC7\xC8\xC9\xCA\xCB\xCC\xCD\xCE\xCF" // A - P "\xD0\xD1\xD2\xD3\xD4\xD5\xD6\xD7\xD8\xD9\xDA\xDB\xDC\xDD\xDE\xDF" // R - YA "\xE0\xE1\xE2\xE3\xE4\xE5\xE6\xE7\xE8\xE9\xEA\xEB\xEC\xED\xEE\xEF" // a - p "\xF0\xF1\xF2\xF3\xF4\xF5\xF6\xF7\xF8\xF9\xFA\xFB\xFC\xFD\xFE\xFF"; // r - ya - const wchar16 wideCyrillicAlphabet[] = { - 0x0410, 0x0411, 0x0412, 0x0413, 0x0414, 0x0415, 0x0416, 0x0417, 0x0418, 0x0419, 0x041A, 0x041B, 0x041C, 0x041D, 0x041E, 0x041F, - 0x0420, 0x0421, 0x0422, 0x0423, 0x0424, 0x0425, 0x0426, 0x0427, 0x0428, 0x0429, 0x042A, 0x042B, 0x042C, 0x042D, 0x042E, 0x042F, - 0x0430, 0x0431, 0x0432, 0x0433, 0x0434, 0x0435, 0x0436, 0x0437, 0x0438, 0x0439, 0x043A, 0x043B, 0x043C, 0x043D, 0x043E, 0x043F, + const wchar16 wideCyrillicAlphabet[] = { + 0x0410, 0x0411, 0x0412, 0x0413, 0x0414, 0x0415, 0x0416, 0x0417, 0x0418, 0x0419, 0x041A, 0x041B, 0x041C, 0x041D, 0x041E, 0x041F, + 0x0420, 0x0421, 0x0422, 0x0423, 0x0424, 0x0425, 0x0426, 0x0427, 0x0428, 0x0429, 0x042A, 0x042B, 0x042C, 0x042D, 0x042E, 0x042F, + 0x0430, 0x0431, 0x0432, 0x0433, 0x0434, 0x0435, 0x0436, 0x0437, 0x0438, 0x0439, 0x043A, 0x043B, 0x043C, 0x043D, 0x043E, 0x043F, 0x0440, 0x0441, 0x0442, 0x0443, 0x0444, 0x0445, 0x0446, 0x0447, 0x0448, 0x0449, 0x044A, 0x044B, 0x044C, 0x044D, 0x044E, 0x044F, 0x00}; const char utf8CyrillicAlphabet[] = "\xd0\x90\xd0\x91\xd0\x92\xd0\x93\xd0\x94\xd0\x95\xd0\x96\xd0\x97" @@ -31,43 +31,43 @@ namespace { "\xd0\xb8\xd0\xb9\xd0\xba\xd0\xbb\xd0\xbc\xd0\xbd\xd0\xbe\xd0\xbf" "\xd1\x80\xd1\x81\xd1\x82\xd1\x83\xd1\x84\xd1\x85\xd1\x86\xd1\x87" "\xd1\x88\xd1\x89\xd1\x8a\xd1\x8b\xd1\x8c\xd1\x8d\xd1\x8e\xd1\x8f"; - + TString CreateYandexText() { - const int len = 256; + const int len = 256; char text[len] = {0}; - for (int i = 0; i < len; ++i) { - text[i] = static_cast<char>(i); - } + for (int i = 0; i < len; ++i) { + text[i] = static_cast<char>(i); + } return TString(text, len); - } - + } + TUtf16String CreateUnicodeText() { - const int len = 256; - wchar16 text[len] = { - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, // 0x00 - 0x0F - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, // 0x10 - 0x1F - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, // 0x20 - 0x2F - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, // 0x30 - 0x3F - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, // 0x40 - 0x4F - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, // 0x50 - 0x5F - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, // 0x60 - 0x6F - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, // 0x70 - 0x7F - - 0x0301, 0x00C4, 0x00D6, 0x00DC, 0x0104, 0x0106, 0x0118, 0x0141, 0x00E0, 0x00E2, 0x00E7, 0x00E8, 0x00E9, 0x00EA, 0x0490, 0x00AD, // 0x80 - 0x8F - 0x00DF, 0x00E4, 0x00F6, 0x00FC, 0x0105, 0x0107, 0x0119, 0x0142, 0x00EB, 0x00EE, 0x00EF, 0x00F4, 0x00F9, 0x00FB, 0x0491, 0x92CF, // 0x90 - 0x9F - 0x00A0, 0x0143, 0x00D3, 0x015A, 0x017B, 0x0179, 0x046C, 0x00A7, 0x0401, 0x0462, 0x0472, 0x0474, 0x040E, 0x0406, 0x0404, 0x0407, // 0xA0 - 0xAF - 0x00B0, 0x0144, 0x00F3, 0x015B, 0x017C, 0x017A, 0x046D, 0x2116, 0x0451, 0x0463, 0x0473, 0x0475, 0x045E, 0x0456, 0x0454, 0x0457 // 0xB0 - 0xBF - }; - for (int i = 0; i < len; ++i) { - if (i <= 0x7F) { // ASCII characters without 0x7 and 0x1B - text[i] = static_cast<wchar16>(i); + const int len = 256; + wchar16 text[len] = { + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, // 0x00 - 0x0F + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, // 0x10 - 0x1F + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, // 0x20 - 0x2F + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, // 0x30 - 0x3F + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, // 0x40 - 0x4F + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, // 0x50 - 0x5F + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, // 0x60 - 0x6F + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, // 0x70 - 0x7F + + 0x0301, 0x00C4, 0x00D6, 0x00DC, 0x0104, 0x0106, 0x0118, 0x0141, 0x00E0, 0x00E2, 0x00E7, 0x00E8, 0x00E9, 0x00EA, 0x0490, 0x00AD, // 0x80 - 0x8F + 0x00DF, 0x00E4, 0x00F6, 0x00FC, 0x0105, 0x0107, 0x0119, 0x0142, 0x00EB, 0x00EE, 0x00EF, 0x00F4, 0x00F9, 0x00FB, 0x0491, 0x92CF, // 0x90 - 0x9F + 0x00A0, 0x0143, 0x00D3, 0x015A, 0x017B, 0x0179, 0x046C, 0x00A7, 0x0401, 0x0462, 0x0472, 0x0474, 0x040E, 0x0406, 0x0404, 0x0407, // 0xA0 - 0xAF + 0x00B0, 0x0144, 0x00F3, 0x015B, 0x017C, 0x017A, 0x046D, 0x2116, 0x0451, 0x0463, 0x0473, 0x0475, 0x045E, 0x0456, 0x0454, 0x0457 // 0xB0 - 0xBF + }; + for (int i = 0; i < len; ++i) { + if (i <= 0x7F) { // ASCII characters without 0x7 and 0x1B + text[i] = static_cast<wchar16>(i); } else if (i >= 0xC0 && i <= 0xFF) { // russian characters (without YO and yo) - text[i] = static_cast<wchar16>(i + 0x0350); // 0x0410 - 0x044F - } - } + text[i] = static_cast<wchar16>(i + 0x0350); // 0x0410 - 0x044F + } + } return TUtf16String(text, len); - } - + } + TString CreateUTF8Text() { char text[] = { '\x00', '\x01', '\x02', '\x03', '\x04', '\x05', '\x06', '\x07', '\x08', '\x09', '\x0a', '\x0b', '\x0c', '\x0d', '\x0e', '\x0f', @@ -96,9 +96,9 @@ namespace { '\xd1', '\x87', '\xd1', '\x88', '\xd1', '\x89', '\xd1', '\x8a', '\xd1', '\x8b', '\xd1', '\x8c', '\xd1', '\x8d', '\xd1', '\x8e', '\xd1', '\x8f'}; return TString(text, Y_ARRAY_SIZE(text)); - } - - //! use this function to dump UTF8 text into a file in case of any changes + } + + //! use this function to dump UTF8 text into a file in case of any changes // void DumpUTF8Text() { // TString s = WideToUTF8(UnicodeText); // std::ofstream f("utf8.txt"); @@ -109,20 +109,20 @@ namespace { // f << std::endl; // } // } - -} - -//! this unit tests ensure validity of Yandex-Unicode and UTF8-Unicode conversions -//! @note only those conversions are verified because they are used in index + +} + +//! this unit tests ensure validity of Yandex-Unicode and UTF8-Unicode conversions +//! @note only those conversions are verified because they are used in index class TConversionTest: public TTestBase { -private: - //! @note every of the text can have zeros in the middle +private: + //! @note every of the text can have zeros in the middle const TString YandexText; const TUtf16String UnicodeText; const TString UTF8Text; - -private: - UNIT_TEST_SUITE(TConversionTest); + +private: + UNIT_TEST_SUITE(TConversionTest); UNIT_TEST(TestCharToWide); UNIT_TEST(TestWideToChar); UNIT_TEST(TestYandexEncoding); @@ -130,27 +130,27 @@ private: UNIT_TEST(TestRecodeAppend); UNIT_TEST(TestRecode); UNIT_TEST(TestUnicodeLimit); - UNIT_TEST_SUITE_END(); - -public: - TConversionTest() - : YandexText(CreateYandexText()) - , UnicodeText(CreateUnicodeText()) - , UTF8Text(CreateUTF8Text()) - { - } - - void TestCharToWide(); - void TestWideToChar(); + UNIT_TEST_SUITE_END(); + +public: + TConversionTest() + : YandexText(CreateYandexText()) + , UnicodeText(CreateUnicodeText()) + , UTF8Text(CreateUTF8Text()) + { + } + + void TestCharToWide(); + void TestWideToChar(); void TestYandexEncoding(); void TestRecodeIntoString(); void TestRecodeAppend(); - void TestRecode(); + void TestRecode(); void TestUnicodeLimit(); -}; - -UNIT_TEST_SUITE_REGISTRATION(TConversionTest); - +}; + +UNIT_TEST_SUITE_REGISTRATION(TConversionTest); + // test conversions (char -> wchar32), (wchar32 -> char) and (wchar32 -> wchar16) #define TEST_WCHAR32(sbuf, wbuf, enc) \ do { \ @@ -170,28 +170,28 @@ UNIT_TEST_SUITE_REGISTRATION(TConversionTest); UNIT_ASSERT_VALUES_EQUAL(wbuf, wstr2); \ } while (false) -void TConversionTest::TestCharToWide() { +void TConversionTest::TestCharToWide() { TUtf16String w = CharToWide(YandexText, CODES_YANDEX); - - UNIT_ASSERT(w.size() == 256); - UNIT_ASSERT(w.size() == UnicodeText.size()); - - for (int i = 0; i < 256; ++i) { - UNIT_ASSERT_VALUES_EQUAL(w[i], UnicodeText[i]); - } -} - -void TConversionTest::TestWideToChar() { + + UNIT_ASSERT(w.size() == 256); + UNIT_ASSERT(w.size() == UnicodeText.size()); + + for (int i = 0; i < 256; ++i) { + UNIT_ASSERT_VALUES_EQUAL(w[i], UnicodeText[i]); + } +} + +void TConversionTest::TestWideToChar() { TString s = WideToChar(UnicodeText, CODES_YANDEX); - - UNIT_ASSERT(s.size() == 256); - UNIT_ASSERT(s.size() == YandexText.size()); - - for (int i = 0; i < 256; ++i) { - UNIT_ASSERT_VALUES_EQUAL(s[i], YandexText[i]); - } -} - + + UNIT_ASSERT(s.size() == 256); + UNIT_ASSERT(s.size() == YandexText.size()); + + for (int i = 0; i < 256; ++i) { + UNIT_ASSERT_VALUES_EQUAL(s[i], YandexText[i]); + } +} + static void TestSurrogates(const char* str, const wchar16* wide, size_t wideSize, ECharset enc) { TUtf16String w = UTF8ToWide(str); @@ -205,9 +205,9 @@ static void TestSurrogates(const char* str, const wchar16* wide, size_t wideSize void TConversionTest::TestYandexEncoding() { TUtf16String w = UTF8ToWide(utf8CyrillicAlphabet, strlen(utf8CyrillicAlphabet), csYandex); - UNIT_ASSERT(w == wideCyrillicAlphabet); + UNIT_ASSERT(w == wideCyrillicAlphabet); w = UTF8ToWide(yandexCyrillicAlphabet, strlen(yandexCyrillicAlphabet), csYandex); - UNIT_ASSERT(w == wideCyrillicAlphabet); + UNIT_ASSERT(w == wideCyrillicAlphabet); const char* utf8NonBMP2 = "ab\xf4\x80\x89\x87n"; wchar16 wNonBMPDummy2[] = {'a', 'b', 0xDBC0, 0xDE47, 'n'}; @@ -226,8 +226,8 @@ void TConversionTest::TestYandexEncoding() { UNIT_ASSERT(yandexNonBMP2 == temp); } -} - +} + void TConversionTest::TestRecodeIntoString() { TString sYandex(UnicodeText.size() * 4, 'x'); const char* sdata = sYandex.data(); @@ -341,15 +341,15 @@ void Out<RECODE_RESULT>(IOutputStream& out, RECODE_RESULT val) { out << int(val); } -void TConversionTest::TestRecode() { +void TConversionTest::TestRecode() { for (int c = 0; c != CODES_MAX; ++c) { ECharset enc = static_cast<ECharset>(c); if (!SingleByteCodepage(enc)) continue; - + using THash = THashSet<char>; THash hash; - + for (int i = 0; i != 256; ++i) { char ch = static_cast<char>(i); @@ -357,7 +357,7 @@ void TConversionTest::TestRecode() { size_t read = 0; size_t written = 0; RECODE_RESULT res = RECODE_ERROR; - + res = RecodeToUnicode(enc, &ch, &wch, 1, 1, read, written); UNIT_ASSERT(res == RECODE_OK); if (wch == BROKEN_RUNE) @@ -380,9 +380,9 @@ void TConversionTest::TestRecode() { UNIT_ASSERT(ch == rch); } - } -} - + } +} + void TConversionTest::TestUnicodeLimit() { for (int i = 0; i != CODES_MAX; ++i) { ECharset code = static_cast<ECharset>(i); diff --git a/library/cpp/on_disk/chunks/chunked_helpers.h b/library/cpp/on_disk/chunks/chunked_helpers.h index 5fa96afdca..36eb2920fe 100644 --- a/library/cpp/on_disk/chunks/chunked_helpers.h +++ b/library/cpp/on_disk/chunks/chunked_helpers.h @@ -520,7 +520,7 @@ public: blob = TBlob::NoCopy(GetBlock(it->second), GetBlockLen(it->second)); return true; } - + private: TVector<TString> Names; THashMap<TString, size_t> NameToIndex; diff --git a/library/cpp/string_utils/url/url.cpp b/library/cpp/string_utils/url/url.cpp index 85f4ac5d69..036cd6c77d 100644 --- a/library/cpp/string_utils/url/url.cpp +++ b/library/cpp/string_utils/url/url.cpp @@ -15,7 +15,7 @@ #include <cstdlib> -namespace { +namespace { struct TUncheckedSize { static bool Has(size_t) { return true; diff --git a/library/cpp/uri/assign.cpp b/library/cpp/uri/assign.cpp index ae9125c727..015bf5a0f2 100644 --- a/library/cpp/uri/assign.cpp +++ b/library/cpp/uri/assign.cpp @@ -20,7 +20,7 @@ namespace NUri { buf = nullptr; return buf; } - + TMallocPtr<char> TUri::IDNToAscii(const TStringBuf& host, ECharset enc) { TTempBuf buf(sizeof(wchar32) * (1 + host.length())); wchar32* wbuf = reinterpret_cast<wchar32*>(buf.Data()); diff --git a/library/cpp/uri/parse.h b/library/cpp/uri/parse.h index ca2358e572..e3266d84d6 100644 --- a/library/cpp/uri/parse.h +++ b/library/cpp/uri/parse.h @@ -3,7 +3,7 @@ // #define DO_PRN #include <cstddef> - + #include "common.h" #include <library/cpp/charset/doccodes.h> |