aboutsummaryrefslogtreecommitdiffstats
path: root/library/cpp/unicode
diff options
context:
space:
mode:
authormonster <monster@ydb.tech>2022-07-07 14:41:37 +0300
committermonster <monster@ydb.tech>2022-07-07 14:41:37 +0300
commit06e5c21a835c0e923506c4ff27929f34e00761c2 (patch)
tree75efcbc6854ef9bd476eb8bf00cc5c900da436a2 /library/cpp/unicode
parent03f024c4412e3aa613bb543cf1660176320ba8f4 (diff)
downloadydb-06e5c21a835c0e923506c4ff27929f34e00761c2.tar.gz
fix ya.make
Diffstat (limited to 'library/cpp/unicode')
-rw-r--r--library/cpp/unicode/normalization/custom_encoder.cpp83
-rw-r--r--library/cpp/unicode/normalization/custom_encoder.h11
2 files changed, 0 insertions, 94 deletions
diff --git a/library/cpp/unicode/normalization/custom_encoder.cpp b/library/cpp/unicode/normalization/custom_encoder.cpp
deleted file mode 100644
index c6f186405f..0000000000
--- a/library/cpp/unicode/normalization/custom_encoder.cpp
+++ /dev/null
@@ -1,83 +0,0 @@
-#include "custom_encoder.h"
-#include "normalization.h"
-
-#include <util/string/cast.h>
-#include <util/stream/output.h>
-
-void TCustomEncoder::addToTable(wchar32 ucode, unsigned char code, const CodePage* target) {
- unsigned char plane = (unsigned char)(ucode >> 8);
- unsigned char pos = (unsigned char)(ucode & 255);
- if (Table[plane] == DefaultPlane) {
- Table[plane] = new char[256];
- memset(Table[plane], 0, 256 * sizeof(char));
- }
-
- if (Table[plane][pos] == 0) {
- Table[plane][pos] = code;
- } else {
- Y_ASSERT(target && *target->Names);
- if (static_cast<unsigned char>(Table[plane][pos]) > 127 && code) {
- Cerr << "WARNING: Only lower part of ASCII should have duplicate encodings "
- << target->Names[0]
- << " " << IntToString<16>(ucode)
- << " " << IntToString<16>(code)
- << " " << IntToString<16>(static_cast<unsigned char>(Table[plane][pos]))
- << Endl;
- }
- }
-}
-
-bool isGoodDecomp(wchar32 rune, wchar32 decomp) {
- if (
- (NUnicode::NPrivate::CharInfo(rune) == NUnicode::NPrivate::CharInfo(decomp)) || (IsAlpha(rune) && IsAlpha(decomp)) || (IsNumeric(rune) && IsNumeric(decomp)) || (IsQuotation(rune) && IsQuotation(decomp)))
- {
- return true;
- }
- return false;
-}
-
-void TCustomEncoder::Create(const CodePage* target, bool extended) {
- Y_ASSERT(target);
-
- DefaultChar = (const char*)target->DefaultChar;
-
- DefaultPlane = new char[256];
-
- memset(DefaultPlane, 0, 256 * sizeof(char));
- for (size_t i = 0; i != 256; ++i)
- Table[i] = DefaultPlane;
-
- for (size_t i = 0; i != 256; ++i) {
- wchar32 ucode = target->unicode[i];
- if (ucode != BROKEN_RUNE) // always UNASSIGNED
- addToTable(ucode, (unsigned char)i, target);
- }
-
- if (!extended)
- return;
-
- for (wchar32 w = 1; w < 65535; w++) {
- if (Code(w) == 0) {
- wchar32 dw = w;
- while (IsComposed(dw) && Code(dw) == 0) {
- const wchar32* decomp_p = NUnicode::Decomposition<true>(dw);
- Y_ASSERT(decomp_p != nullptr);
-
- dw = decomp_p[0];
- if (std::char_traits<wchar32>::length(decomp_p) > 1 && (dw == (wchar32)' ' || dw == (wchar32)'('))
- dw = decomp_p[1];
- }
- if (Code(dw) != 0 && isGoodDecomp(w, dw))
- addToTable(w, Code(dw), target);
- }
- }
-}
-
-TCustomEncoder::~TCustomEncoder() {
- for (size_t i = 0; i != 256; ++i) {
- if (Table[i] != DefaultPlane) {
- delete[] Table[i];
- }
- }
- delete[] DefaultPlane;
-}
diff --git a/library/cpp/unicode/normalization/custom_encoder.h b/library/cpp/unicode/normalization/custom_encoder.h
deleted file mode 100644
index ef4d5b7f65..0000000000
--- a/library/cpp/unicode/normalization/custom_encoder.h
+++ /dev/null
@@ -1,11 +0,0 @@
-#pragma once
-
-#include <library/cpp/charset/codepage.h>
-
-struct TCustomEncoder: public Encoder {
- void Create(const CodePage* target, bool extended = false);
- ~TCustomEncoder();
-
-private:
- void addToTable(wchar32 ucode, unsigned char code, const CodePage* target);
-};