blob: 47a42a80b2ea2cf785ed34bb1b65b99396c399f8 (
plain) (
blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
|
#include "fold.h"
namespace NUF {
TNormalizer::TNormalizer(ELanguage lmain, ELanguage laux)
: DoRenyxa()
, DoLowerCase()
, DoSimpleCyr()
, FillOffsets()
{
Reset();
SetLanguages(lmain, laux);
}
TNormalizer::TNormalizer(const TLanguages& langs)
: DoRenyxa()
, DoLowerCase()
, DoSimpleCyr()
, FillOffsets()
{
Reset();
SetLanguages(langs);
}
void TNormalizer::SetLanguages(ELanguage lmain, ELanguage laux) {
Languages.reset();
Scripts.reset();
Languages.set(lmain);
Languages.set(laux);
Scripts.set(ScriptByLanguage(lmain));
Scripts.set(ScriptByLanguage(laux));
}
void TNormalizer::SetLanguages(const TLanguages& langs) {
Languages = langs;
Scripts.reset();
for (ui32 i = 0; i < langs.size(); ++i) {
if (langs.test(i))
Scripts.set(ScriptByLanguage(ELanguage(i)));
}
}
void TNormalizer::SetDoRenyxa(bool da) {
DoRenyxa = da;
}
void TNormalizer::SetDoLowerCase(bool da) {
DoLowerCase = da;
}
void TNormalizer::SetDoSimpleCyr(bool da) {
DoSimpleCyr = da;
}
void TNormalizer::SetFillOffsets(bool da) {
FillOffsets = da;
}
void TNormalizer::Reset() {
CDBuf.clear();
OutBuf.clear();
CDOffsets.clear();
TmpBuf.clear();
p = p0 = pe = eof = ts = te = ret = nullptr;
cs = act = 0;
}
void TNormalizer::SetInput(TWtringBuf b) {
Reset();
CDBuf.reserve(2 * b.size());
OutBuf.reserve(2 * b.size());
Decomposer.Normalize(b.data(), b.size(), CDBuf);
p = p0 = CDBuf.begin();
pe = eof = CDBuf.end();
}
}
|