summaryrefslogtreecommitdiffstats
path: root/yql/essentials/sql/v1/complete/name/static/frequency.cpp
blob: 62997ccff7abc250d402ef5d2e4d564a8b9d8cf3 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
#include "frequency.h"

#include <library/cpp/json/json_reader.h>
#include <library/cpp/resource/resource.h>

#include <util/charset/utf8.h>

namespace NSQLComplete {

    constexpr struct {
        struct {
            const char* Parent = "parent";
            const char* Rule = "rule";
            const char* Sum = "sum";
        } Key;
        struct {
            const char* Pragma = "PRAGMA";
            const char* Type = "TYPE";
            const char* Func = "FUNC";
            const char* Keyword = "KEYWORD";
            const char* Module = "MODULE";
            const char* ModuleFunc = "MODULE_FUNC";
            const char* ReadHint = "READ_HINT";
            const char* InsertHint = "INSERT_HINT";
        } Parent;
    } Json;

    struct TFrequencyItem {
        TString Parent;
        TString Rule;
        size_t Sum;

        static TFrequencyItem ParseJsonMap(NJson::TJsonValue::TMapType&& json) {
            return {
                .Parent = json.at(Json.Key.Parent).GetStringSafe(),
                .Rule = json.at(Json.Key.Rule).GetStringSafe(),
                .Sum = json.at(Json.Key.Sum).GetUIntegerSafe(),
            };
        }

        static TVector<TFrequencyItem> ParseListFromJsonArray(NJson::TJsonValue::TArray& json) {
            TVector<TFrequencyItem> items;
            items.reserve(json.size());
            for (auto& element : json) {
                auto item = TFrequencyItem::ParseJsonMap(std::move(element.GetMapSafe()));
                items.emplace_back(std::move(item));
            }
            return items;
        }

        static TVector<TFrequencyItem> ParseListFromJsonText(const TStringBuf text) {
            NJson::TJsonValue json = NJson::ReadJsonFastTree(text);
            return ParseListFromJsonArray(json.GetArraySafe());
        }
    };

    TFrequencyData Convert(TVector<TFrequencyItem> items) {
        TFrequencyData data;
        for (auto& item : items) {
            if (item.Parent == Json.Parent.Pragma ||
                item.Parent == Json.Parent.Type ||
                item.Parent == Json.Parent.Func ||
                item.Parent == Json.Parent.Keyword ||
                item.Parent == Json.Parent.ModuleFunc ||
                item.Parent == Json.Parent.Module ||
                item.Parent == Json.Parent.ReadHint ||
                item.Parent == Json.Parent.InsertHint) {
                item.Rule = ToLowerUTF8(item.Rule);
            }

            if (item.Parent == Json.Parent.Pragma) {
                data.Pragmas[item.Rule] += item.Sum;
            } else if (item.Parent == Json.Parent.Type) {
                data.Types[item.Rule] += item.Sum;
            } else if (item.Parent == Json.Parent.Keyword) {
                data.Keywords[item.Rule] += item.Sum;
            } else if (item.Parent == Json.Parent.Module) {
                // Ignore, unsupported: Modules
            } else if (item.Parent == Json.Parent.Func ||
                       item.Parent == Json.Parent.ModuleFunc) {
                data.Functions[item.Rule] += item.Sum;
            } else if (item.Parent == Json.Parent.ReadHint ||
                       item.Parent == Json.Parent.InsertHint) {
                data.Hints[item.Rule] += item.Sum;
            } else {
                // Ignore, unsupported: Parser Call Stacks
            }
        }
        return data;
    }

    TFrequencyData ParseJsonFrequencyData(const TStringBuf text) {
        return Convert(TFrequencyItem::ParseListFromJsonText(text));
    }

    TFrequencyData LoadFrequencyData() {
        TString text;
        Y_ENSURE(NResource::FindExact("rules_corr_basic.json", &text));
        return ParseJsonFrequencyData(text);
    }

} // namespace NSQLComplete