diff options
author | qrort <qrort@yandex-team.com> | 2022-11-30 23:47:12 +0300 |
---|---|---|
committer | qrort <qrort@yandex-team.com> | 2022-11-30 23:47:12 +0300 |
commit | 22f8ae0e3f5d68b92aecccdf96c1d841a0334311 (patch) | |
tree | bffa27765faf54126ad44bcafa89fadecb7a73d7 /library/cpp/unicode/folding/fold.cpp | |
parent | 332b99e2173f0425444abb759eebcb2fafaa9209 (diff) | |
download | ydb-22f8ae0e3f5d68b92aecccdf96c1d841a0334311.tar.gz |
validate canons without yatest_common
Diffstat (limited to 'library/cpp/unicode/folding/fold.cpp')
-rw-r--r-- | library/cpp/unicode/folding/fold.cpp | 78 |
1 files changed, 78 insertions, 0 deletions
diff --git a/library/cpp/unicode/folding/fold.cpp b/library/cpp/unicode/folding/fold.cpp new file mode 100644 index 0000000000..47a42a80b2 --- /dev/null +++ b/library/cpp/unicode/folding/fold.cpp @@ -0,0 +1,78 @@ +#include "fold.h" + +namespace NUF { + TNormalizer::TNormalizer(ELanguage lmain, ELanguage laux) + : DoRenyxa() + , DoLowerCase() + , DoSimpleCyr() + , FillOffsets() + { + Reset(); + SetLanguages(lmain, laux); + } + + TNormalizer::TNormalizer(const TLanguages& langs) + : DoRenyxa() + , DoLowerCase() + , DoSimpleCyr() + , FillOffsets() + { + Reset(); + SetLanguages(langs); + } + + void TNormalizer::SetLanguages(ELanguage lmain, ELanguage laux) { + Languages.reset(); + Scripts.reset(); + Languages.set(lmain); + Languages.set(laux); + Scripts.set(ScriptByLanguage(lmain)); + Scripts.set(ScriptByLanguage(laux)); + } + + void TNormalizer::SetLanguages(const TLanguages& langs) { + Languages = langs; + Scripts.reset(); + + for (ui32 i = 0; i < langs.size(); ++i) { + if (langs.test(i)) + Scripts.set(ScriptByLanguage(ELanguage(i))); + } + } + + void TNormalizer::SetDoRenyxa(bool da) { + DoRenyxa = da; + } + + void TNormalizer::SetDoLowerCase(bool da) { + DoLowerCase = da; + } + + void TNormalizer::SetDoSimpleCyr(bool da) { + DoSimpleCyr = da; + } + + void TNormalizer::SetFillOffsets(bool da) { + FillOffsets = da; + } + + void TNormalizer::Reset() { + CDBuf.clear(); + OutBuf.clear(); + CDOffsets.clear(); + TmpBuf.clear(); + p = p0 = pe = eof = ts = te = ret = nullptr; + cs = act = 0; + } + + void TNormalizer::SetInput(TWtringBuf b) { + Reset(); + CDBuf.reserve(2 * b.size()); + OutBuf.reserve(2 * b.size()); + + Decomposer.Normalize(b.data(), b.size(), CDBuf); + p = p0 = CDBuf.begin(); + pe = eof = CDBuf.end(); + } + +} |