summaryrefslogtreecommitdiffstats
path: root/yql/essentials/sql/v1/ide/completion/name/service/ranking/frequency.cpp
blob: 6c7eda5b9b3c935cece48c7b97de2ef878785df1 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
#include "frequency.h"

#include <yql/essentials/sql/v1/ide/completion/name/service/name_service.h>

#include <library/cpp/json/json_reader.h>
#include <library/cpp/resource/resource.h>

#include <util/charset/utf8.h>

namespace NSQLComplete {

constexpr struct {
    struct {
        const char* Parent = "parent";
        const char* Rule = "rule";
        const char* Sum = "sum";
    } Key;
    struct {
        const char* Pragma = "PRAGMA";
        const char* Type = "TYPE";
        const char* Func = "FUNC";
        const char* Keyword = "KEYWORD";
        const char* Module = "MODULE";
        const char* ModuleFunc = "MODULE_FUNC";
        const char* ReadHint = "READ_HINT";
        const char* InsertHint = "INSERT_HINT";
    } Parent;
} Json;

struct TFrequencyItem {
    TString Parent;
    TString Rule;
    size_t Sum;

    static TFrequencyItem ParseJsonMap(NJson::TJsonValue::TMapType&& json) {
        return {
            .Parent = json.at(Json.Key.Parent).GetStringSafe(),
            .Rule = json.at(Json.Key.Rule).GetStringSafe(),
            .Sum = json.at(Json.Key.Sum).GetUIntegerSafe(),
        };
    }

    static TVector<TFrequencyItem> ParseListFromJsonArray(NJson::TJsonValue::TArray& json) {
        TVector<TFrequencyItem> items;
        items.reserve(json.size());
        for (auto& element : json) {
            auto item = TFrequencyItem::ParseJsonMap(std::move(element.GetMapSafe()));
            items.emplace_back(std::move(item));
        }
        return items;
    }

    static TVector<TFrequencyItem> ParseListFromJsonText(TStringBuf text) {
        NJson::TJsonValue json = NJson::ReadJsonFastTree(text);
        return ParseListFromJsonArray(json.GetArraySafe());
    }
};

TFrequencyData Collect(const TVector<TFrequencyItem>& items) {
    TFrequencyData data;
    for (auto& item : items) {
        if (item.Parent == Json.Parent.Pragma) {
            data.Pragmas[item.Rule] += item.Sum;
        } else if (item.Parent == Json.Parent.Type) {
            data.Types[item.Rule] += item.Sum;
        } else if (item.Parent == Json.Parent.Keyword) {
            data.Keywords[item.Rule] += item.Sum;
        } else if (item.Parent == Json.Parent.Module) {
            // Ignore, unsupported: Modules
        } else if (item.Parent == Json.Parent.Func ||
                   item.Parent == Json.Parent.ModuleFunc) {
            data.Functions[item.Rule] += item.Sum;
        } else if (item.Parent == Json.Parent.ReadHint ||
                   item.Parent == Json.Parent.InsertHint) {
            data.Hints[item.Rule] += item.Sum;
        } else {
            // Ignore, unsupported: Parser Call Stacks
        }
    }
    return data;
}

THashMap<TString, size_t> PrunedBy(const THashMap<TString, size_t>& data, auto normalize) {
    THashMap<TString, size_t> pruned;
    for (const auto& [name, count] : data) {
        pruned[normalize(name)] += count;
    }
    return pruned;
}

TFrequencyData PrunedBy(const TFrequencyData& data, auto normalize) {
    return {
        .Keywords = PrunedBy(data.Keywords, normalize),
        .Pragmas = PrunedBy(data.Pragmas, normalize),
        .Types = PrunedBy(data.Types, normalize),
        .Functions = PrunedBy(data.Functions, normalize),
        .Hints = PrunedBy(data.Hints, normalize),
    };
}

TFrequencyData Pruned(const TFrequencyData& data) {
    return PrunedBy(data, [](TStringBuf s) {
        return NormalizeName(s);
    });
}

TFrequencyData ParseJsonFrequencyData(TStringBuf text) {
    return Collect(TFrequencyItem::ParseListFromJsonText(text));
}

TFrequencyData LoadFrequencyData() {
    TString text;
    Y_ENSURE(NResource::FindExact("rules_corr_basic.json", &text));
    return ParseJsonFrequencyData(text);
}

} // namespace NSQLComplete