aboutsummaryrefslogtreecommitdiffstats
path: root/library/cpp/unicode/folding/fold.cpp
diff options
context:
space:
mode:
authorqrort <qrort@yandex-team.com>2022-11-30 23:47:12 +0300
committerqrort <qrort@yandex-team.com>2022-11-30 23:47:12 +0300
commit22f8ae0e3f5d68b92aecccdf96c1d841a0334311 (patch)
treebffa27765faf54126ad44bcafa89fadecb7a73d7 /library/cpp/unicode/folding/fold.cpp
parent332b99e2173f0425444abb759eebcb2fafaa9209 (diff)
downloadydb-22f8ae0e3f5d68b92aecccdf96c1d841a0334311.tar.gz
validate canons without yatest_common
Diffstat (limited to 'library/cpp/unicode/folding/fold.cpp')
-rw-r--r--library/cpp/unicode/folding/fold.cpp78
1 files changed, 78 insertions, 0 deletions
diff --git a/library/cpp/unicode/folding/fold.cpp b/library/cpp/unicode/folding/fold.cpp
new file mode 100644
index 0000000000..47a42a80b2
--- /dev/null
+++ b/library/cpp/unicode/folding/fold.cpp
@@ -0,0 +1,78 @@
+#include "fold.h"
+
+namespace NUF {
+ TNormalizer::TNormalizer(ELanguage lmain, ELanguage laux)
+ : DoRenyxa()
+ , DoLowerCase()
+ , DoSimpleCyr()
+ , FillOffsets()
+ {
+ Reset();
+ SetLanguages(lmain, laux);
+ }
+
+ TNormalizer::TNormalizer(const TLanguages& langs)
+ : DoRenyxa()
+ , DoLowerCase()
+ , DoSimpleCyr()
+ , FillOffsets()
+ {
+ Reset();
+ SetLanguages(langs);
+ }
+
+ void TNormalizer::SetLanguages(ELanguage lmain, ELanguage laux) {
+ Languages.reset();
+ Scripts.reset();
+ Languages.set(lmain);
+ Languages.set(laux);
+ Scripts.set(ScriptByLanguage(lmain));
+ Scripts.set(ScriptByLanguage(laux));
+ }
+
+ void TNormalizer::SetLanguages(const TLanguages& langs) {
+ Languages = langs;
+ Scripts.reset();
+
+ for (ui32 i = 0; i < langs.size(); ++i) {
+ if (langs.test(i))
+ Scripts.set(ScriptByLanguage(ELanguage(i)));
+ }
+ }
+
+ void TNormalizer::SetDoRenyxa(bool da) {
+ DoRenyxa = da;
+ }
+
+ void TNormalizer::SetDoLowerCase(bool da) {
+ DoLowerCase = da;
+ }
+
+ void TNormalizer::SetDoSimpleCyr(bool da) {
+ DoSimpleCyr = da;
+ }
+
+ void TNormalizer::SetFillOffsets(bool da) {
+ FillOffsets = da;
+ }
+
+ void TNormalizer::Reset() {
+ CDBuf.clear();
+ OutBuf.clear();
+ CDOffsets.clear();
+ TmpBuf.clear();
+ p = p0 = pe = eof = ts = te = ret = nullptr;
+ cs = act = 0;
+ }
+
+ void TNormalizer::SetInput(TWtringBuf b) {
+ Reset();
+ CDBuf.reserve(2 * b.size());
+ OutBuf.reserve(2 * b.size());
+
+ Decomposer.Normalize(b.data(), b.size(), CDBuf);
+ p = p0 = CDBuf.begin();
+ pe = eof = CDBuf.end();
+ }
+
+}