diff options
author | umnov <umnov@yandex-team.ru> | 2022-02-10 16:50:28 +0300 |
---|---|---|
committer | Daniil Cherednik <dcherednik@yandex-team.ru> | 2022-02-10 16:50:28 +0300 |
commit | 0bc655f0b88816a992ff638c25c09627d67e55d0 (patch) | |
tree | 24a7e41d3f11e3890654ee681a0a26a780170fa3 /library/cpp/unicode/normalization/custom_encoder.cpp | |
parent | 9138262b9b527644a2423b034122d89ddbfb25d2 (diff) | |
download | ydb-0bc655f0b88816a992ff638c25c09627d67e55d0.tar.gz |
Restoring authorship annotation for <umnov@yandex-team.ru>. Commit 1 of 2.
Diffstat (limited to 'library/cpp/unicode/normalization/custom_encoder.cpp')
-rw-r--r-- | library/cpp/unicode/normalization/custom_encoder.cpp | 128 |
1 files changed, 64 insertions, 64 deletions
diff --git a/library/cpp/unicode/normalization/custom_encoder.cpp b/library/cpp/unicode/normalization/custom_encoder.cpp index c6f186405f..f164a53f3b 100644 --- a/library/cpp/unicode/normalization/custom_encoder.cpp +++ b/library/cpp/unicode/normalization/custom_encoder.cpp @@ -1,83 +1,83 @@ -#include "custom_encoder.h" -#include "normalization.h" - -#include <util/string/cast.h> -#include <util/stream/output.h> - -void TCustomEncoder::addToTable(wchar32 ucode, unsigned char code, const CodePage* target) { - unsigned char plane = (unsigned char)(ucode >> 8); - unsigned char pos = (unsigned char)(ucode & 255); - if (Table[plane] == DefaultPlane) { - Table[plane] = new char[256]; +#include "custom_encoder.h" +#include "normalization.h" + +#include <util/string/cast.h> +#include <util/stream/output.h> + +void TCustomEncoder::addToTable(wchar32 ucode, unsigned char code, const CodePage* target) { + unsigned char plane = (unsigned char)(ucode >> 8); + unsigned char pos = (unsigned char)(ucode & 255); + if (Table[plane] == DefaultPlane) { + Table[plane] = new char[256]; memset(Table[plane], 0, 256 * sizeof(char)); - } - - if (Table[plane][pos] == 0) { - Table[plane][pos] = code; - } else { + } + + if (Table[plane][pos] == 0) { + Table[plane][pos] = code; + } else { Y_ASSERT(target && *target->Names); - if (static_cast<unsigned char>(Table[plane][pos]) > 127 && code) { - Cerr << "WARNING: Only lower part of ASCII should have duplicate encodings " + if (static_cast<unsigned char>(Table[plane][pos]) > 127 && code) { + Cerr << "WARNING: Only lower part of ASCII should have duplicate encodings " << target->Names[0] << " " << IntToString<16>(ucode) << " " << IntToString<16>(code) << " " << IntToString<16>(static_cast<unsigned char>(Table[plane][pos])) << Endl; - } - } -} - + } + } +} + bool isGoodDecomp(wchar32 rune, wchar32 decomp) { - if ( + if ( (NUnicode::NPrivate::CharInfo(rune) == NUnicode::NPrivate::CharInfo(decomp)) || (IsAlpha(rune) && IsAlpha(decomp)) || (IsNumeric(rune) && IsNumeric(decomp)) || (IsQuotation(rune) && IsQuotation(decomp))) - { - return true; - } - return false; -} - -void TCustomEncoder::Create(const CodePage* target, bool extended) { + { + return true; + } + return false; +} + +void TCustomEncoder::Create(const CodePage* target, bool extended) { Y_ASSERT(target); - - DefaultChar = (const char*)target->DefaultChar; - - DefaultPlane = new char[256]; - + + DefaultChar = (const char*)target->DefaultChar; + + DefaultPlane = new char[256]; + memset(DefaultPlane, 0, 256 * sizeof(char)); - for (size_t i = 0; i != 256; ++i) - Table[i] = DefaultPlane; - - for (size_t i = 0; i != 256; ++i) { - wchar32 ucode = target->unicode[i]; - if (ucode != BROKEN_RUNE) // always UNASSIGNED + for (size_t i = 0; i != 256; ++i) + Table[i] = DefaultPlane; + + for (size_t i = 0; i != 256; ++i) { + wchar32 ucode = target->unicode[i]; + if (ucode != BROKEN_RUNE) // always UNASSIGNED addToTable(ucode, (unsigned char)i, target); - } - - if (!extended) - return; - + } + + if (!extended) + return; + for (wchar32 w = 1; w < 65535; w++) { if (Code(w) == 0) { - wchar32 dw = w; + wchar32 dw = w; while (IsComposed(dw) && Code(dw) == 0) { - const wchar32* decomp_p = NUnicode::Decomposition<true>(dw); + const wchar32* decomp_p = NUnicode::Decomposition<true>(dw); Y_ASSERT(decomp_p != nullptr); - - dw = decomp_p[0]; + + dw = decomp_p[0]; if (std::char_traits<wchar32>::length(decomp_p) > 1 && (dw == (wchar32)' ' || dw == (wchar32)'(')) - dw = decomp_p[1]; - } - if (Code(dw) != 0 && isGoodDecomp(w, dw)) - addToTable(w, Code(dw), target); - } - } -} - -TCustomEncoder::~TCustomEncoder() { - for (size_t i = 0; i != 256; ++i) { - if (Table[i] != DefaultPlane) { + dw = decomp_p[1]; + } + if (Code(dw) != 0 && isGoodDecomp(w, dw)) + addToTable(w, Code(dw), target); + } + } +} + +TCustomEncoder::~TCustomEncoder() { + for (size_t i = 0; i != 256; ++i) { + if (Table[i] != DefaultPlane) { delete[] Table[i]; - } - } + } + } delete[] DefaultPlane; -} +} |