blob: de7625abeaef2c202b08221334d94ae02e984553 (
plain) (
blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
|
#include "normalization.h"
static const wchar32 S_BASE = 0xAC00;
static const wchar32 L_BASE = 0x1100;
static const wchar32 V_BASE = 0x1161;
static const wchar32 T_BASE = 0x11A7;
static const int L_COUNT = 19;
static const int V_COUNT = 21;
static const int T_COUNT = 28;
static const int N_COUNT = V_COUNT * T_COUNT; // 588
static const int S_COUNT = L_COUNT * N_COUNT; // 11172
static inline wchar32 ComposeHangul(wchar32 lead, wchar32 tail) {
// 1. check to see if two current characters are L and V
int lIndex = lead - L_BASE;
if (0 <= lIndex && lIndex < L_COUNT) {
int vIndex = tail - V_BASE;
if (0 <= vIndex && vIndex < V_COUNT) {
// make syllable of form LV
lead = (wchar32)(S_BASE + (lIndex * V_COUNT + vIndex) * T_COUNT);
return lead;
}
}
// 2. check to see if two current characters are LV and T
int sIndex = lead - S_BASE;
if (0 <= sIndex && sIndex < S_COUNT && (sIndex % T_COUNT) == 0) {
int TIndex = tail - T_BASE;
if (0 < TIndex && TIndex < T_COUNT) {
// make syllable of form LVT
lead += TIndex;
return lead;
}
}
return 0;
}
NUnicode::NPrivate::TComposition::TComposition() {
for (size_t i = 0; i != RawDataSize; ++i) {
const TRawData& data = RawData[i];
if (DecompositionCombining(data.Lead) != 0)
continue;
Data[TKey(data.Lead, data.Tail)] = data.Comp;
}
for (wchar32 s = 0xAC00; s != 0xD7A4; ++s) {
const wchar32* decompBegin = NUnicode::Decomposition<true>(s);
if (decompBegin == nullptr)
continue;
wchar32 lead = *(decompBegin++);
while (*decompBegin) {
wchar32 tail = *(decompBegin++);
wchar32 comp = ComposeHangul(lead, tail);
Y_ASSERT(comp != 0);
Data[TKey(lead, tail)] = comp;
lead = comp;
}
}
}
|