diff options
author | umnov <umnov@yandex-team.ru> | 2022-02-10 16:50:28 +0300 |
---|---|---|
committer | Daniil Cherednik <dcherednik@yandex-team.ru> | 2022-02-10 16:50:28 +0300 |
commit | 0bc655f0b88816a992ff638c25c09627d67e55d0 (patch) | |
tree | 24a7e41d3f11e3890654ee681a0a26a780170fa3 /library/cpp/unicode/normalization/normalization.cpp | |
parent | 9138262b9b527644a2423b034122d89ddbfb25d2 (diff) | |
download | ydb-0bc655f0b88816a992ff638c25c09627d67e55d0.tar.gz |
Restoring authorship annotation for <umnov@yandex-team.ru>. Commit 1 of 2.
Diffstat (limited to 'library/cpp/unicode/normalization/normalization.cpp')
-rw-r--r-- | library/cpp/unicode/normalization/normalization.cpp | 128 |
1 files changed, 64 insertions, 64 deletions
diff --git a/library/cpp/unicode/normalization/normalization.cpp b/library/cpp/unicode/normalization/normalization.cpp index 7da7211514..f27840fe2d 100644 --- a/library/cpp/unicode/normalization/normalization.cpp +++ b/library/cpp/unicode/normalization/normalization.cpp @@ -1,66 +1,66 @@ -#include "normalization.h" - -static const wchar32 S_BASE = 0xAC00; -static const wchar32 L_BASE = 0x1100; -static const wchar32 V_BASE = 0x1161; -static const wchar32 T_BASE = 0x11A7; -static const int L_COUNT = 19; -static const int V_COUNT = 21; -static const int T_COUNT = 28; -static const int N_COUNT = V_COUNT * T_COUNT; // 588 -static const int S_COUNT = L_COUNT * N_COUNT; // 11172 - -static inline wchar32 ComposeHangul(wchar32 lead, wchar32 tail) { - // 1. check to see if two current characters are L and V - int lIndex = lead - L_BASE; - if (0 <= lIndex && lIndex < L_COUNT) { - int vIndex = tail - V_BASE; - if (0 <= vIndex && vIndex < V_COUNT) { - // make syllable of form LV - lead = (wchar32)(S_BASE + (lIndex * V_COUNT + vIndex) * T_COUNT); - return lead; - } - } - - // 2. check to see if two current characters are LV and T - int sIndex = lead - S_BASE; - if (0 <= sIndex && sIndex < S_COUNT && (sIndex % T_COUNT) == 0) { - int TIndex = tail - T_BASE; - if (0 < TIndex && TIndex < T_COUNT) { - // make syllable of form LVT - lead += TIndex; - return lead; - } - } - - return 0; -} - -NUnicode::NPrivate::TComposition::TComposition() { - for (size_t i = 0; i != RawDataSize; ++i) { - const TRawData& data = RawData[i]; - - if (DecompositionCombining(data.Lead) != 0) - continue; - - Data[TKey(data.Lead, data.Tail)] = data.Comp; - } - - for (wchar32 s = 0xAC00; s != 0xD7A4; ++s) { - const wchar32* decompBegin = NUnicode::Decomposition<true>(s); - +#include "normalization.h" + +static const wchar32 S_BASE = 0xAC00; +static const wchar32 L_BASE = 0x1100; +static const wchar32 V_BASE = 0x1161; +static const wchar32 T_BASE = 0x11A7; +static const int L_COUNT = 19; +static const int V_COUNT = 21; +static const int T_COUNT = 28; +static const int N_COUNT = V_COUNT * T_COUNT; // 588 +static const int S_COUNT = L_COUNT * N_COUNT; // 11172 + +static inline wchar32 ComposeHangul(wchar32 lead, wchar32 tail) { + // 1. check to see if two current characters are L and V + int lIndex = lead - L_BASE; + if (0 <= lIndex && lIndex < L_COUNT) { + int vIndex = tail - V_BASE; + if (0 <= vIndex && vIndex < V_COUNT) { + // make syllable of form LV + lead = (wchar32)(S_BASE + (lIndex * V_COUNT + vIndex) * T_COUNT); + return lead; + } + } + + // 2. check to see if two current characters are LV and T + int sIndex = lead - S_BASE; + if (0 <= sIndex && sIndex < S_COUNT && (sIndex % T_COUNT) == 0) { + int TIndex = tail - T_BASE; + if (0 < TIndex && TIndex < T_COUNT) { + // make syllable of form LVT + lead += TIndex; + return lead; + } + } + + return 0; +} + +NUnicode::NPrivate::TComposition::TComposition() { + for (size_t i = 0; i != RawDataSize; ++i) { + const TRawData& data = RawData[i]; + + if (DecompositionCombining(data.Lead) != 0) + continue; + + Data[TKey(data.Lead, data.Tail)] = data.Comp; + } + + for (wchar32 s = 0xAC00; s != 0xD7A4; ++s) { + const wchar32* decompBegin = NUnicode::Decomposition<true>(s); + if (decompBegin == nullptr) - continue; - - wchar32 lead = *(decompBegin++); - while (*decompBegin) { - wchar32 tail = *(decompBegin++); - wchar32 comp = ComposeHangul(lead, tail); + continue; + + wchar32 lead = *(decompBegin++); + while (*decompBegin) { + wchar32 tail = *(decompBegin++); + wchar32 comp = ComposeHangul(lead, tail); Y_ASSERT(comp != 0); - - Data[TKey(lead, tail)] = comp; - - lead = comp; - } - } -} + + Data[TKey(lead, tail)] = comp; + + lead = comp; + } + } +} |