aboutsummaryrefslogtreecommitdiffstats
path: root/library/cpp/unicode/normalization/normalization.cpp
blob: f27840fe2d9a7be2e92c308bdc24f2a41032a91a (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
#include "normalization.h" 
 
static const wchar32 S_BASE = 0xAC00; 
static const wchar32 L_BASE = 0x1100; 
static const wchar32 V_BASE = 0x1161; 
static const wchar32 T_BASE = 0x11A7; 
static const int L_COUNT = 19; 
static const int V_COUNT = 21; 
static const int T_COUNT = 28; 
static const int N_COUNT = V_COUNT * T_COUNT; // 588 
static const int S_COUNT = L_COUNT * N_COUNT; // 11172 
 
static inline wchar32 ComposeHangul(wchar32 lead, wchar32 tail) { 
    // 1. check to see if two current characters are L and V 
    int lIndex = lead - L_BASE; 
    if (0 <= lIndex && lIndex < L_COUNT) { 
        int vIndex = tail - V_BASE; 
        if (0 <= vIndex && vIndex < V_COUNT) { 
            // make syllable of form LV 
            lead = (wchar32)(S_BASE + (lIndex * V_COUNT + vIndex) * T_COUNT); 
            return lead; 
        } 
    } 
 
    // 2. check to see if two current characters are LV and T 
    int sIndex = lead - S_BASE; 
    if (0 <= sIndex && sIndex < S_COUNT && (sIndex % T_COUNT) == 0) { 
        int TIndex = tail - T_BASE; 
        if (0 < TIndex && TIndex < T_COUNT) { 
            // make syllable of form LVT 
            lead += TIndex; 
            return lead; 
        } 
    } 
 
    return 0; 
} 
 
NUnicode::NPrivate::TComposition::TComposition() { 
    for (size_t i = 0; i != RawDataSize; ++i) { 
        const TRawData& data = RawData[i]; 
 
        if (DecompositionCombining(data.Lead) != 0) 
            continue; 
 
        Data[TKey(data.Lead, data.Tail)] = data.Comp; 
    } 
 
    for (wchar32 s = 0xAC00; s != 0xD7A4; ++s) { 
        const wchar32* decompBegin = NUnicode::Decomposition<true>(s); 
 
        if (decompBegin == nullptr)
            continue; 
 
        wchar32 lead = *(decompBegin++); 
        while (*decompBegin) { 
            wchar32 tail = *(decompBegin++); 
            wchar32 comp = ComposeHangul(lead, tail); 
            Y_ASSERT(comp != 0);
 
            Data[TKey(lead, tail)] = comp; 
 
            lead = comp; 
        } 
    } 
}