diff options
author | Anton Samokhvalov <pg83@yandex.ru> | 2022-02-10 16:45:15 +0300 |
---|---|---|
committer | Daniil Cherednik <dcherednik@yandex-team.ru> | 2022-02-10 16:45:15 +0300 |
commit | 72cb13b4aff9bc9cf22e49251bc8fd143f82538f (patch) | |
tree | da2c34829458c7d4e74bdfbdf85dff449e9e7fb8 /library/cpp/unicode | |
parent | 778e51ba091dc39e7b7fcab2b9cf4dbedfb6f2b5 (diff) | |
download | ydb-72cb13b4aff9bc9cf22e49251bc8fd143f82538f.tar.gz |
Restoring authorship annotation for Anton Samokhvalov <pg83@yandex.ru>. Commit 1 of 2.
Diffstat (limited to 'library/cpp/unicode')
-rw-r--r-- | library/cpp/unicode/normalization/custom_encoder.cpp | 30 | ||||
-rw-r--r-- | library/cpp/unicode/normalization/custom_encoder.h | 4 | ||||
-rw-r--r-- | library/cpp/unicode/normalization/decomposition_table.h | 36 | ||||
-rw-r--r-- | library/cpp/unicode/normalization/normalization.h | 40 | ||||
-rw-r--r-- | library/cpp/unicode/normalization/ut/normalization_ut.cpp | 2 | ||||
-rw-r--r-- | library/cpp/unicode/punycode/punycode.h | 2 | ||||
-rw-r--r-- | library/cpp/unicode/punycode/punycode_ut.cpp | 24 | ||||
-rw-r--r-- | library/cpp/unicode/ya.make | 4 |
8 files changed, 71 insertions, 71 deletions
diff --git a/library/cpp/unicode/normalization/custom_encoder.cpp b/library/cpp/unicode/normalization/custom_encoder.cpp index c6f186405f..897667861c 100644 --- a/library/cpp/unicode/normalization/custom_encoder.cpp +++ b/library/cpp/unicode/normalization/custom_encoder.cpp @@ -9,7 +9,7 @@ void TCustomEncoder::addToTable(wchar32 ucode, unsigned char code, const CodePag unsigned char pos = (unsigned char)(ucode & 255); if (Table[plane] == DefaultPlane) { Table[plane] = new char[256]; - memset(Table[plane], 0, 256 * sizeof(char)); + memset(Table[plane], 0, 256 * sizeof(char)); } if (Table[plane][pos] == 0) { @@ -18,18 +18,18 @@ void TCustomEncoder::addToTable(wchar32 ucode, unsigned char code, const CodePag Y_ASSERT(target && *target->Names); if (static_cast<unsigned char>(Table[plane][pos]) > 127 && code) { Cerr << "WARNING: Only lower part of ASCII should have duplicate encodings " - << target->Names[0] - << " " << IntToString<16>(ucode) - << " " << IntToString<16>(code) - << " " << IntToString<16>(static_cast<unsigned char>(Table[plane][pos])) - << Endl; + << target->Names[0] + << " " << IntToString<16>(ucode) + << " " << IntToString<16>(code) + << " " << IntToString<16>(static_cast<unsigned char>(Table[plane][pos])) + << Endl; } } } -bool isGoodDecomp(wchar32 rune, wchar32 decomp) { +bool isGoodDecomp(wchar32 rune, wchar32 decomp) { if ( - (NUnicode::NPrivate::CharInfo(rune) == NUnicode::NPrivate::CharInfo(decomp)) || (IsAlpha(rune) && IsAlpha(decomp)) || (IsNumeric(rune) && IsNumeric(decomp)) || (IsQuotation(rune) && IsQuotation(decomp))) + (NUnicode::NPrivate::CharInfo(rune) == NUnicode::NPrivate::CharInfo(decomp)) || (IsAlpha(rune) && IsAlpha(decomp)) || (IsNumeric(rune) && IsNumeric(decomp)) || (IsQuotation(rune) && IsQuotation(decomp))) { return true; } @@ -43,23 +43,23 @@ void TCustomEncoder::Create(const CodePage* target, bool extended) { DefaultPlane = new char[256]; - memset(DefaultPlane, 0, 256 * sizeof(char)); + memset(DefaultPlane, 0, 256 * sizeof(char)); for (size_t i = 0; i != 256; ++i) Table[i] = DefaultPlane; for (size_t i = 0; i != 256; ++i) { wchar32 ucode = target->unicode[i]; if (ucode != BROKEN_RUNE) // always UNASSIGNED - addToTable(ucode, (unsigned char)i, target); + addToTable(ucode, (unsigned char)i, target); } if (!extended) return; - for (wchar32 w = 1; w < 65535; w++) { - if (Code(w) == 0) { + for (wchar32 w = 1; w < 65535; w++) { + if (Code(w) == 0) { wchar32 dw = w; - while (IsComposed(dw) && Code(dw) == 0) { + while (IsComposed(dw) && Code(dw) == 0) { const wchar32* decomp_p = NUnicode::Decomposition<true>(dw); Y_ASSERT(decomp_p != nullptr); @@ -76,8 +76,8 @@ void TCustomEncoder::Create(const CodePage* target, bool extended) { TCustomEncoder::~TCustomEncoder() { for (size_t i = 0; i != 256; ++i) { if (Table[i] != DefaultPlane) { - delete[] Table[i]; + delete[] Table[i]; } } - delete[] DefaultPlane; + delete[] DefaultPlane; } diff --git a/library/cpp/unicode/normalization/custom_encoder.h b/library/cpp/unicode/normalization/custom_encoder.h index ef4d5b7f65..28644f37fa 100644 --- a/library/cpp/unicode/normalization/custom_encoder.h +++ b/library/cpp/unicode/normalization/custom_encoder.h @@ -2,8 +2,8 @@ #include <library/cpp/charset/codepage.h> -struct TCustomEncoder: public Encoder { - void Create(const CodePage* target, bool extended = false); +struct TCustomEncoder: public Encoder { + void Create(const CodePage* target, bool extended = false); ~TCustomEncoder(); private: diff --git a/library/cpp/unicode/normalization/decomposition_table.h b/library/cpp/unicode/normalization/decomposition_table.h index 23f3da334f..5a0b30d078 100644 --- a/library/cpp/unicode/normalization/decomposition_table.h +++ b/library/cpp/unicode/normalization/decomposition_table.h @@ -3,26 +3,26 @@ #include <util/charset/unicode_table.h> namespace NUnicode { - namespace NPrivate { - typedef NUnicodeTable::TTable<NUnicodeTable::TSubtable< - NUnicodeTable::UNICODE_TABLE_SHIFT, NUnicodeTable::TValues<const wchar32*>>> - TDecompositionTable; + namespace NPrivate { + typedef NUnicodeTable::TTable<NUnicodeTable::TSubtable< + NUnicodeTable::UNICODE_TABLE_SHIFT, NUnicodeTable::TValues<const wchar32*>>> + TDecompositionTable; - const TDecompositionTable& CannonDecompositionTable(); - const TDecompositionTable& CompatDecompositionTable(); + const TDecompositionTable& CannonDecompositionTable(); + const TDecompositionTable& CompatDecompositionTable(); - template <bool compat> - inline const TDecompositionTable& DecompositionTable(); + template <bool compat> + inline const TDecompositionTable& DecompositionTable(); - template <> - inline const TDecompositionTable& DecompositionTable<false>() { - return CannonDecompositionTable(); - } + template <> + inline const TDecompositionTable& DecompositionTable<false>() { + return CannonDecompositionTable(); + } - template <> - inline const TDecompositionTable& DecompositionTable<true>() { - return CompatDecompositionTable(); - } + template <> + inline const TDecompositionTable& DecompositionTable<true>() { + return CompatDecompositionTable(); + } - } -}; // namespace NUnicode + } +}; // namespace NUnicode diff --git a/library/cpp/unicode/normalization/normalization.h b/library/cpp/unicode/normalization/normalization.h index 4f5f57881c..9fd813b761 100644 --- a/library/cpp/unicode/normalization/normalization.h +++ b/library/cpp/unicode/normalization/normalization.h @@ -40,20 +40,20 @@ namespace NUnicode { public: inline TDecompositor(const TDecompositionTable& table) : Table(table) - { - } + { + } inline const wchar32* Decomposition(wchar32 ch) const { return NPrivate::Decomposition(Table, ch); } }; - template <bool IsCompat> + template <bool IsCompat> struct TStandartDecompositor: public TDecompositor { TStandartDecompositor() - : TDecompositor(NPrivate::DecompositionTable<IsCompat>()) - { - } + : TDecompositor(NPrivate::DecompositionTable<IsCompat>()) + { + } }; template <ENormalization N> @@ -92,12 +92,12 @@ namespace NUnicode { static const TRawData RawData[]; static const size_t RawDataSize; - class TKey: public std::pair<wchar32, wchar32> { + class TKey: public std::pair<wchar32, wchar32> { public: inline TKey(wchar32 a, wchar32 b) : std::pair<wchar32, wchar32>(a, b) - { - } + { + } inline size_t Hash() const { return CombineHashes(first, second); @@ -111,7 +111,7 @@ namespace NUnicode { } }; - typedef THashMap<TKey, wchar32, THash<TKey>> TData; + typedef THashMap<TKey, wchar32, THash<TKey>> TData; TData Data; public: @@ -149,8 +149,8 @@ namespace NUnicode { public: inline TCompositor() : Composition(Singleton<TComposition>()) - { - } + { + } inline void DoComposition(TBuffer& buffer) { if (buffer.size() < 2) @@ -204,7 +204,7 @@ namespace NUnicode { } return true; } - } + } template <bool compat> inline const wchar32* Decomposition(wchar32 ch) { @@ -232,7 +232,7 @@ namespace NUnicode { } struct TComparer { - inline bool operator()(const TSymbol& a, const TSymbol& b) { + inline bool operator()(const TSymbol& a, const TSymbol& b) { return Compare(a, b); } }; @@ -304,14 +304,14 @@ namespace NUnicode { public: TNormalizer() - : Decompositor(*Singleton<NPrivate::TStandartDecompositor<IsCompat>>()) - { - } + : Decompositor(*Singleton<NPrivate::TStandartDecompositor<IsCompat>>()) + { + } TNormalizer(const TDecompositor& decompositor) : Decompositor(decompositor) - { - } + { + } template <class T, typename TCharType> inline void Normalize(const TCharType* begin, const TCharType* end, T& out) { @@ -353,7 +353,7 @@ namespace NUnicode { } } }; -} +} //! decompose utf16 or utf32 string to any container supporting push_back or to T* template <NUnicode::ENormalization Norm, class T, typename TCharType> diff --git a/library/cpp/unicode/normalization/ut/normalization_ut.cpp b/library/cpp/unicode/normalization/ut/normalization_ut.cpp index 54d4940a26..27c6f9f5ea 100644 --- a/library/cpp/unicode/normalization/ut/normalization_ut.cpp +++ b/library/cpp/unicode/normalization/ut/normalization_ut.cpp @@ -5,7 +5,7 @@ #include <library/cpp/unicode/normalization/normalization.h> Y_UNIT_TEST_SUITE(TUnicodeNormalizationTest) { - template <NUnicode::ENormalization NormType> + template <NUnicode::ENormalization NormType> void TestInit() { NUnicode::TNormalizer<NormType> normalizer; TString s("упячка detected"); diff --git a/library/cpp/unicode/punycode/punycode.h b/library/cpp/unicode/punycode/punycode.h index af4acc25c1..7d14d2a794 100644 --- a/library/cpp/unicode/punycode/punycode.h +++ b/library/cpp/unicode/punycode/punycode.h @@ -1,5 +1,5 @@ #pragma once - + #include <util/generic/string.h> #include <util/generic/strbuf.h> #include <util/generic/yexception.h> diff --git a/library/cpp/unicode/punycode/punycode_ut.cpp b/library/cpp/unicode/punycode/punycode_ut.cpp index 97271cf0d8..b0208ad7bf 100644 --- a/library/cpp/unicode/punycode/punycode_ut.cpp +++ b/library/cpp/unicode/punycode/punycode_ut.cpp @@ -39,13 +39,13 @@ Y_UNIT_TEST_SUITE(TPunycodeTest) { UNIT_ASSERT(TestRaw("пример", "e1afmkfd")); { - const wchar16 tmp[] = {0x82, 0x81, 0x80, 0}; - UNIT_ASSERT(PunycodeToWide("abc") == tmp); // "abc" is still valid punycode + const wchar16 tmp[] = {0x82, 0x81, 0x80, 0}; + UNIT_ASSERT(PunycodeToWide("abc") == tmp); // "abc" is still valid punycode } UNIT_ASSERT_EXCEPTION(PunycodeToWide(" "), TPunycodeError); UNIT_ASSERT_EXCEPTION(PunycodeToWide("абвгд"), TPunycodeError); - UNIT_ASSERT_EXCEPTION(PunycodeToWide("-"), TPunycodeError); + UNIT_ASSERT_EXCEPTION(PunycodeToWide("-"), TPunycodeError); { TString longIn; @@ -62,23 +62,23 @@ Y_UNIT_TEST_SUITE(TPunycodeTest) { TString buf1; TUtf16String buf2; //Cerr << "Testing " << utf8 << Endl; - return HostNameToPunycode(unicode) == punycode && HostNameToPunycode(UTF8ToWide(punycode)) == punycode // repeated encoding should give same result - && PunycodeToHostName(punycode) == unicode && CanBePunycodeHostName(punycode) == canBePunycode; + return HostNameToPunycode(unicode) == punycode && HostNameToPunycode(UTF8ToWide(punycode)) == punycode // repeated encoding should give same result + && PunycodeToHostName(punycode) == unicode && CanBePunycodeHostName(punycode) == canBePunycode; } static bool TestForced(const TString& bad) { - return ForceHostNameToPunycode(UTF8ToWide(bad)) == bad && ForcePunycodeToHostName(bad) == UTF8ToWide(bad); + return ForceHostNameToPunycode(UTF8ToWide(bad)) == bad && ForcePunycodeToHostName(bad) == UTF8ToWide(bad); } Y_UNIT_TEST(HostNameEncodeDecode) { UNIT_ASSERT(TestHostName("президент.рф", "xn--d1abbgf6aiiy.xn--p1ai", true)); - UNIT_ASSERT(TestHostName("яндекс.ru", "xn--d1acpjx3f.ru", true)); - UNIT_ASSERT(TestHostName("пример", "xn--e1afmkfd", true)); - UNIT_ASSERT(TestHostName("ascii.test", "ascii.test")); + UNIT_ASSERT(TestHostName("яндекс.ru", "xn--d1acpjx3f.ru", true)); + UNIT_ASSERT(TestHostName("пример", "xn--e1afmkfd", true)); + UNIT_ASSERT(TestHostName("ascii.test", "ascii.test")); UNIT_ASSERT(TestHostName("", "")); UNIT_ASSERT(TestHostName(".", ".")); - UNIT_ASSERT(TestHostName("a.", "a.")); // empty root domain is ok + UNIT_ASSERT(TestHostName("a.", "a.")); // empty root domain is ok UNIT_ASSERT(TestHostName("a.b.c.д.e.f", "a.b.c.xn--d1a.e.f", true)); UNIT_ASSERT(TestHostName("а.б.в.г.д", "xn--80a.xn--90a.xn--b1a.xn--c1a.xn--d1a", true)); @@ -97,7 +97,7 @@ Y_UNIT_TEST_SUITE(TPunycodeTest) { // too long domain label TString bad(500, 'a'); UNIT_ASSERT_EXCEPTION(HostNameToPunycode(UTF8ToWide(bad)), TPunycodeError); - UNIT_ASSERT(TestForced(bad)); // but can decode it + UNIT_ASSERT(TestForced(bad)); // but can decode it } { // already has ACE prefix @@ -121,6 +121,6 @@ Y_UNIT_TEST_SUITE(TPunycodeTest) { UNIT_ASSERT(!CanBePunycodeHostName("яндекс.рф")); // non-ascii UNIT_ASSERT(!CanBePunycodeHostName("яндекс.xn--p1ai")); // non-ascii UNIT_ASSERT(!CanBePunycodeHostName("")); - UNIT_ASSERT(!CanBePunycodeHostName("http://xn--a.b")); // scheme prefix is not detected here + UNIT_ASSERT(!CanBePunycodeHostName("http://xn--a.b")); // scheme prefix is not detected here } } diff --git a/library/cpp/unicode/ya.make b/library/cpp/unicode/ya.make index 4fcd9caacc..8410ab63fd 100644 --- a/library/cpp/unicode/ya.make +++ b/library/cpp/unicode/ya.make @@ -1,4 +1,4 @@ -RECURSE( +RECURSE( folding folding/ut normalization @@ -11,4 +11,4 @@ RECURSE( utf8_char/ut utf8_iter utf8_iter/ut -) +) |