aboutsummaryrefslogtreecommitdiffstats
path: root/library/cpp/unicode
diff options
context:
space:
mode:
authorAnton Samokhvalov <pg83@yandex.ru>2022-02-10 16:45:15 +0300
committerDaniil Cherednik <dcherednik@yandex-team.ru>2022-02-10 16:45:15 +0300
commit72cb13b4aff9bc9cf22e49251bc8fd143f82538f (patch)
treeda2c34829458c7d4e74bdfbdf85dff449e9e7fb8 /library/cpp/unicode
parent778e51ba091dc39e7b7fcab2b9cf4dbedfb6f2b5 (diff)
downloadydb-72cb13b4aff9bc9cf22e49251bc8fd143f82538f.tar.gz
Restoring authorship annotation for Anton Samokhvalov <pg83@yandex.ru>. Commit 1 of 2.
Diffstat (limited to 'library/cpp/unicode')
-rw-r--r--library/cpp/unicode/normalization/custom_encoder.cpp30
-rw-r--r--library/cpp/unicode/normalization/custom_encoder.h4
-rw-r--r--library/cpp/unicode/normalization/decomposition_table.h36
-rw-r--r--library/cpp/unicode/normalization/normalization.h40
-rw-r--r--library/cpp/unicode/normalization/ut/normalization_ut.cpp2
-rw-r--r--library/cpp/unicode/punycode/punycode.h2
-rw-r--r--library/cpp/unicode/punycode/punycode_ut.cpp24
-rw-r--r--library/cpp/unicode/ya.make4
8 files changed, 71 insertions, 71 deletions
diff --git a/library/cpp/unicode/normalization/custom_encoder.cpp b/library/cpp/unicode/normalization/custom_encoder.cpp
index c6f186405f..897667861c 100644
--- a/library/cpp/unicode/normalization/custom_encoder.cpp
+++ b/library/cpp/unicode/normalization/custom_encoder.cpp
@@ -9,7 +9,7 @@ void TCustomEncoder::addToTable(wchar32 ucode, unsigned char code, const CodePag
unsigned char pos = (unsigned char)(ucode & 255);
if (Table[plane] == DefaultPlane) {
Table[plane] = new char[256];
- memset(Table[plane], 0, 256 * sizeof(char));
+ memset(Table[plane], 0, 256 * sizeof(char));
}
if (Table[plane][pos] == 0) {
@@ -18,18 +18,18 @@ void TCustomEncoder::addToTable(wchar32 ucode, unsigned char code, const CodePag
Y_ASSERT(target && *target->Names);
if (static_cast<unsigned char>(Table[plane][pos]) > 127 && code) {
Cerr << "WARNING: Only lower part of ASCII should have duplicate encodings "
- << target->Names[0]
- << " " << IntToString<16>(ucode)
- << " " << IntToString<16>(code)
- << " " << IntToString<16>(static_cast<unsigned char>(Table[plane][pos]))
- << Endl;
+ << target->Names[0]
+ << " " << IntToString<16>(ucode)
+ << " " << IntToString<16>(code)
+ << " " << IntToString<16>(static_cast<unsigned char>(Table[plane][pos]))
+ << Endl;
}
}
}
-bool isGoodDecomp(wchar32 rune, wchar32 decomp) {
+bool isGoodDecomp(wchar32 rune, wchar32 decomp) {
if (
- (NUnicode::NPrivate::CharInfo(rune) == NUnicode::NPrivate::CharInfo(decomp)) || (IsAlpha(rune) && IsAlpha(decomp)) || (IsNumeric(rune) && IsNumeric(decomp)) || (IsQuotation(rune) && IsQuotation(decomp)))
+ (NUnicode::NPrivate::CharInfo(rune) == NUnicode::NPrivate::CharInfo(decomp)) || (IsAlpha(rune) && IsAlpha(decomp)) || (IsNumeric(rune) && IsNumeric(decomp)) || (IsQuotation(rune) && IsQuotation(decomp)))
{
return true;
}
@@ -43,23 +43,23 @@ void TCustomEncoder::Create(const CodePage* target, bool extended) {
DefaultPlane = new char[256];
- memset(DefaultPlane, 0, 256 * sizeof(char));
+ memset(DefaultPlane, 0, 256 * sizeof(char));
for (size_t i = 0; i != 256; ++i)
Table[i] = DefaultPlane;
for (size_t i = 0; i != 256; ++i) {
wchar32 ucode = target->unicode[i];
if (ucode != BROKEN_RUNE) // always UNASSIGNED
- addToTable(ucode, (unsigned char)i, target);
+ addToTable(ucode, (unsigned char)i, target);
}
if (!extended)
return;
- for (wchar32 w = 1; w < 65535; w++) {
- if (Code(w) == 0) {
+ for (wchar32 w = 1; w < 65535; w++) {
+ if (Code(w) == 0) {
wchar32 dw = w;
- while (IsComposed(dw) && Code(dw) == 0) {
+ while (IsComposed(dw) && Code(dw) == 0) {
const wchar32* decomp_p = NUnicode::Decomposition<true>(dw);
Y_ASSERT(decomp_p != nullptr);
@@ -76,8 +76,8 @@ void TCustomEncoder::Create(const CodePage* target, bool extended) {
TCustomEncoder::~TCustomEncoder() {
for (size_t i = 0; i != 256; ++i) {
if (Table[i] != DefaultPlane) {
- delete[] Table[i];
+ delete[] Table[i];
}
}
- delete[] DefaultPlane;
+ delete[] DefaultPlane;
}
diff --git a/library/cpp/unicode/normalization/custom_encoder.h b/library/cpp/unicode/normalization/custom_encoder.h
index ef4d5b7f65..28644f37fa 100644
--- a/library/cpp/unicode/normalization/custom_encoder.h
+++ b/library/cpp/unicode/normalization/custom_encoder.h
@@ -2,8 +2,8 @@
#include <library/cpp/charset/codepage.h>
-struct TCustomEncoder: public Encoder {
- void Create(const CodePage* target, bool extended = false);
+struct TCustomEncoder: public Encoder {
+ void Create(const CodePage* target, bool extended = false);
~TCustomEncoder();
private:
diff --git a/library/cpp/unicode/normalization/decomposition_table.h b/library/cpp/unicode/normalization/decomposition_table.h
index 23f3da334f..5a0b30d078 100644
--- a/library/cpp/unicode/normalization/decomposition_table.h
+++ b/library/cpp/unicode/normalization/decomposition_table.h
@@ -3,26 +3,26 @@
#include <util/charset/unicode_table.h>
namespace NUnicode {
- namespace NPrivate {
- typedef NUnicodeTable::TTable<NUnicodeTable::TSubtable<
- NUnicodeTable::UNICODE_TABLE_SHIFT, NUnicodeTable::TValues<const wchar32*>>>
- TDecompositionTable;
+ namespace NPrivate {
+ typedef NUnicodeTable::TTable<NUnicodeTable::TSubtable<
+ NUnicodeTable::UNICODE_TABLE_SHIFT, NUnicodeTable::TValues<const wchar32*>>>
+ TDecompositionTable;
- const TDecompositionTable& CannonDecompositionTable();
- const TDecompositionTable& CompatDecompositionTable();
+ const TDecompositionTable& CannonDecompositionTable();
+ const TDecompositionTable& CompatDecompositionTable();
- template <bool compat>
- inline const TDecompositionTable& DecompositionTable();
+ template <bool compat>
+ inline const TDecompositionTable& DecompositionTable();
- template <>
- inline const TDecompositionTable& DecompositionTable<false>() {
- return CannonDecompositionTable();
- }
+ template <>
+ inline const TDecompositionTable& DecompositionTable<false>() {
+ return CannonDecompositionTable();
+ }
- template <>
- inline const TDecompositionTable& DecompositionTable<true>() {
- return CompatDecompositionTable();
- }
+ template <>
+ inline const TDecompositionTable& DecompositionTable<true>() {
+ return CompatDecompositionTable();
+ }
- }
-}; // namespace NUnicode
+ }
+}; // namespace NUnicode
diff --git a/library/cpp/unicode/normalization/normalization.h b/library/cpp/unicode/normalization/normalization.h
index 4f5f57881c..9fd813b761 100644
--- a/library/cpp/unicode/normalization/normalization.h
+++ b/library/cpp/unicode/normalization/normalization.h
@@ -40,20 +40,20 @@ namespace NUnicode {
public:
inline TDecompositor(const TDecompositionTable& table)
: Table(table)
- {
- }
+ {
+ }
inline const wchar32* Decomposition(wchar32 ch) const {
return NPrivate::Decomposition(Table, ch);
}
};
- template <bool IsCompat>
+ template <bool IsCompat>
struct TStandartDecompositor: public TDecompositor {
TStandartDecompositor()
- : TDecompositor(NPrivate::DecompositionTable<IsCompat>())
- {
- }
+ : TDecompositor(NPrivate::DecompositionTable<IsCompat>())
+ {
+ }
};
template <ENormalization N>
@@ -92,12 +92,12 @@ namespace NUnicode {
static const TRawData RawData[];
static const size_t RawDataSize;
- class TKey: public std::pair<wchar32, wchar32> {
+ class TKey: public std::pair<wchar32, wchar32> {
public:
inline TKey(wchar32 a, wchar32 b)
: std::pair<wchar32, wchar32>(a, b)
- {
- }
+ {
+ }
inline size_t Hash() const {
return CombineHashes(first, second);
@@ -111,7 +111,7 @@ namespace NUnicode {
}
};
- typedef THashMap<TKey, wchar32, THash<TKey>> TData;
+ typedef THashMap<TKey, wchar32, THash<TKey>> TData;
TData Data;
public:
@@ -149,8 +149,8 @@ namespace NUnicode {
public:
inline TCompositor()
: Composition(Singleton<TComposition>())
- {
- }
+ {
+ }
inline void DoComposition(TBuffer& buffer) {
if (buffer.size() < 2)
@@ -204,7 +204,7 @@ namespace NUnicode {
}
return true;
}
- }
+ }
template <bool compat>
inline const wchar32* Decomposition(wchar32 ch) {
@@ -232,7 +232,7 @@ namespace NUnicode {
}
struct TComparer {
- inline bool operator()(const TSymbol& a, const TSymbol& b) {
+ inline bool operator()(const TSymbol& a, const TSymbol& b) {
return Compare(a, b);
}
};
@@ -304,14 +304,14 @@ namespace NUnicode {
public:
TNormalizer()
- : Decompositor(*Singleton<NPrivate::TStandartDecompositor<IsCompat>>())
- {
- }
+ : Decompositor(*Singleton<NPrivate::TStandartDecompositor<IsCompat>>())
+ {
+ }
TNormalizer(const TDecompositor& decompositor)
: Decompositor(decompositor)
- {
- }
+ {
+ }
template <class T, typename TCharType>
inline void Normalize(const TCharType* begin, const TCharType* end, T& out) {
@@ -353,7 +353,7 @@ namespace NUnicode {
}
}
};
-}
+}
//! decompose utf16 or utf32 string to any container supporting push_back or to T*
template <NUnicode::ENormalization Norm, class T, typename TCharType>
diff --git a/library/cpp/unicode/normalization/ut/normalization_ut.cpp b/library/cpp/unicode/normalization/ut/normalization_ut.cpp
index 54d4940a26..27c6f9f5ea 100644
--- a/library/cpp/unicode/normalization/ut/normalization_ut.cpp
+++ b/library/cpp/unicode/normalization/ut/normalization_ut.cpp
@@ -5,7 +5,7 @@
#include <library/cpp/unicode/normalization/normalization.h>
Y_UNIT_TEST_SUITE(TUnicodeNormalizationTest) {
- template <NUnicode::ENormalization NormType>
+ template <NUnicode::ENormalization NormType>
void TestInit() {
NUnicode::TNormalizer<NormType> normalizer;
TString s("упячка detected");
diff --git a/library/cpp/unicode/punycode/punycode.h b/library/cpp/unicode/punycode/punycode.h
index af4acc25c1..7d14d2a794 100644
--- a/library/cpp/unicode/punycode/punycode.h
+++ b/library/cpp/unicode/punycode/punycode.h
@@ -1,5 +1,5 @@
#pragma once
-
+
#include <util/generic/string.h>
#include <util/generic/strbuf.h>
#include <util/generic/yexception.h>
diff --git a/library/cpp/unicode/punycode/punycode_ut.cpp b/library/cpp/unicode/punycode/punycode_ut.cpp
index 97271cf0d8..b0208ad7bf 100644
--- a/library/cpp/unicode/punycode/punycode_ut.cpp
+++ b/library/cpp/unicode/punycode/punycode_ut.cpp
@@ -39,13 +39,13 @@ Y_UNIT_TEST_SUITE(TPunycodeTest) {
UNIT_ASSERT(TestRaw("пример", "e1afmkfd"));
{
- const wchar16 tmp[] = {0x82, 0x81, 0x80, 0};
- UNIT_ASSERT(PunycodeToWide("abc") == tmp); // "abc" is still valid punycode
+ const wchar16 tmp[] = {0x82, 0x81, 0x80, 0};
+ UNIT_ASSERT(PunycodeToWide("abc") == tmp); // "abc" is still valid punycode
}
UNIT_ASSERT_EXCEPTION(PunycodeToWide(" "), TPunycodeError);
UNIT_ASSERT_EXCEPTION(PunycodeToWide("абвгд"), TPunycodeError);
- UNIT_ASSERT_EXCEPTION(PunycodeToWide("-"), TPunycodeError);
+ UNIT_ASSERT_EXCEPTION(PunycodeToWide("-"), TPunycodeError);
{
TString longIn;
@@ -62,23 +62,23 @@ Y_UNIT_TEST_SUITE(TPunycodeTest) {
TString buf1;
TUtf16String buf2;
//Cerr << "Testing " << utf8 << Endl;
- return HostNameToPunycode(unicode) == punycode && HostNameToPunycode(UTF8ToWide(punycode)) == punycode // repeated encoding should give same result
- && PunycodeToHostName(punycode) == unicode && CanBePunycodeHostName(punycode) == canBePunycode;
+ return HostNameToPunycode(unicode) == punycode && HostNameToPunycode(UTF8ToWide(punycode)) == punycode // repeated encoding should give same result
+ && PunycodeToHostName(punycode) == unicode && CanBePunycodeHostName(punycode) == canBePunycode;
}
static bool TestForced(const TString& bad) {
- return ForceHostNameToPunycode(UTF8ToWide(bad)) == bad && ForcePunycodeToHostName(bad) == UTF8ToWide(bad);
+ return ForceHostNameToPunycode(UTF8ToWide(bad)) == bad && ForcePunycodeToHostName(bad) == UTF8ToWide(bad);
}
Y_UNIT_TEST(HostNameEncodeDecode) {
UNIT_ASSERT(TestHostName("президент.рф", "xn--d1abbgf6aiiy.xn--p1ai", true));
- UNIT_ASSERT(TestHostName("яндекс.ru", "xn--d1acpjx3f.ru", true));
- UNIT_ASSERT(TestHostName("пример", "xn--e1afmkfd", true));
- UNIT_ASSERT(TestHostName("ascii.test", "ascii.test"));
+ UNIT_ASSERT(TestHostName("яндекс.ru", "xn--d1acpjx3f.ru", true));
+ UNIT_ASSERT(TestHostName("пример", "xn--e1afmkfd", true));
+ UNIT_ASSERT(TestHostName("ascii.test", "ascii.test"));
UNIT_ASSERT(TestHostName("", ""));
UNIT_ASSERT(TestHostName(".", "."));
- UNIT_ASSERT(TestHostName("a.", "a.")); // empty root domain is ok
+ UNIT_ASSERT(TestHostName("a.", "a.")); // empty root domain is ok
UNIT_ASSERT(TestHostName("a.b.c.д.e.f", "a.b.c.xn--d1a.e.f", true));
UNIT_ASSERT(TestHostName("а.б.в.г.д", "xn--80a.xn--90a.xn--b1a.xn--c1a.xn--d1a", true));
@@ -97,7 +97,7 @@ Y_UNIT_TEST_SUITE(TPunycodeTest) {
// too long domain label
TString bad(500, 'a');
UNIT_ASSERT_EXCEPTION(HostNameToPunycode(UTF8ToWide(bad)), TPunycodeError);
- UNIT_ASSERT(TestForced(bad)); // but can decode it
+ UNIT_ASSERT(TestForced(bad)); // but can decode it
}
{
// already has ACE prefix
@@ -121,6 +121,6 @@ Y_UNIT_TEST_SUITE(TPunycodeTest) {
UNIT_ASSERT(!CanBePunycodeHostName("яндекс.рф")); // non-ascii
UNIT_ASSERT(!CanBePunycodeHostName("яндекс.xn--p1ai")); // non-ascii
UNIT_ASSERT(!CanBePunycodeHostName(""));
- UNIT_ASSERT(!CanBePunycodeHostName("http://xn--a.b")); // scheme prefix is not detected here
+ UNIT_ASSERT(!CanBePunycodeHostName("http://xn--a.b")); // scheme prefix is not detected here
}
}
diff --git a/library/cpp/unicode/ya.make b/library/cpp/unicode/ya.make
index 4fcd9caacc..8410ab63fd 100644
--- a/library/cpp/unicode/ya.make
+++ b/library/cpp/unicode/ya.make
@@ -1,4 +1,4 @@
-RECURSE(
+RECURSE(
folding
folding/ut
normalization
@@ -11,4 +11,4 @@ RECURSE(
utf8_char/ut
utf8_iter
utf8_iter/ut
-)
+)