aboutsummaryrefslogtreecommitdiffstats
path: root/library/cpp/unicode/normalization/normalization.cpp
blob: de7625abeaef2c202b08221334d94ae02e984553 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
#include "normalization.h"

static const wchar32 S_BASE = 0xAC00;
static const wchar32 L_BASE = 0x1100;
static const wchar32 V_BASE = 0x1161;
static const wchar32 T_BASE = 0x11A7;
static const int L_COUNT = 19;
static const int V_COUNT = 21;
static const int T_COUNT = 28;
static const int N_COUNT = V_COUNT * T_COUNT; // 588
static const int S_COUNT = L_COUNT * N_COUNT; // 11172

static inline wchar32 ComposeHangul(wchar32 lead, wchar32 tail) {
    // 1. check to see if two current characters are L and V
    int lIndex = lead - L_BASE;
    if (0 <= lIndex && lIndex < L_COUNT) {
        int vIndex = tail - V_BASE;
        if (0 <= vIndex && vIndex < V_COUNT) {
            // make syllable of form LV
            lead = (wchar32)(S_BASE + (lIndex * V_COUNT + vIndex) * T_COUNT);
            return lead;
        }
    }

    // 2. check to see if two current characters are LV and T
    int sIndex = lead - S_BASE;
    if (0 <= sIndex && sIndex < S_COUNT && (sIndex % T_COUNT) == 0) {
        int TIndex = tail - T_BASE;
        if (0 < TIndex && TIndex < T_COUNT) {
            // make syllable of form LVT
            lead += TIndex;
            return lead;
        }
    }

    return 0;
}

NUnicode::NPrivate::TComposition::TComposition() {
    for (size_t i = 0; i != RawDataSize; ++i) {
        const TRawData& data = RawData[i];

        if (DecompositionCombining(data.Lead) != 0)
            continue;

        Data[TKey(data.Lead, data.Tail)] = data.Comp;
    }

    for (wchar32 s = 0xAC00; s != 0xD7A4; ++s) {
        const wchar32* decompBegin = NUnicode::Decomposition<true>(s);

        if (decompBegin == nullptr)
            continue;

        wchar32 lead = *(decompBegin++);
        while (*decompBegin) {
            wchar32 tail = *(decompBegin++);
            wchar32 comp = ComposeHangul(lead, tail);
            Y_ASSERT(comp != 0); 

            Data[TKey(lead, tail)] = comp;

            lead = comp;
        }
    }
}