summaryrefslogtreecommitdiffstats
path: root/yql/essentials/utils/docs/link_page.cpp
blob: 985410fafd5d5f37cbf9dca9ac5df33ec09fe883 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
#include "link_page.h"

#include "name.h"

#include <util/generic/hash_set.h>
#include <util/string/split.h>

namespace NYql::NDocs {

TMaybe<TString> MatchSingleFunctionHeader(TStringBuf header) {
    return NormalizedName(TString(header));
}

TVector<TString> SplitBy(TStringBuf delim, const TVector<TString>& strings) {
    TVector<TString> parts;
    for (const TString& s : strings) {
        StringSplitter(s).SplitByString(delim).AddTo(&parts);
    }
    return parts;
}

TVector<TString> SplitByPunctuation(TStringBuf header) {
    TVector<TString> parts = {TString(header)};
    parts = SplitBy(" и ", parts);
    parts = SplitBy(" / ", parts);
    parts = SplitBy(", ", parts);
    return parts;
}

TVector<TString> MatchMultiFunctionHeader(TStringBuf header) {
    TVector<TString> names = SplitByPunctuation(header);

    for (TString& name : names) {
        TMaybe<TString> normalized = NormalizedName(std::move(name));
        if (!normalized) {
            return {};
        }

        name = std::move(*normalized);
    }

    return names;
}

TVector<TString> ExtractNormalized(TStringBuf header) {
    if (auto single = MatchSingleFunctionHeader(header)) {
        return {*single};
    }
    if (auto multi = MatchMultiFunctionHeader(header)) {
        return multi;
    }
    return {};
}

void EnrichFromMarkdown(TLinks& links, const TString& path, const TMarkdownHeader& header) {
    for (const TString& name : ExtractNormalized(header.Content)) {
        links[name] = {
            .RelativePath = path,
            .Anchor = header.Anchor,
        };
    }
}

void EnrichFromMarkdown(TLinks& links, const TString& path, const TMarkdownPage& page) {
    for (const auto& [anchor, section] : page.SectionsByAnchor) {
        const TMarkdownHeader& header = section.Header;
        EnrichFromMarkdown(links, path, header);
    }
}

void EnrichFromMarkdown(TLinks& links, const TPages& pages) {
    for (const auto& [path, page] : pages) {
        EnrichFromMarkdown(links, path, page);
    }
}

TLinks GetLinksFromPages(const TPages& pages) {
    TLinks links;
    EnrichFromMarkdown(links, pages);
    return links;
}

TPages Stripped(TPages&& pages, const TLinks& links) {
    THashSet<TString> usedPaths;
    THashMap<TString, THashSet<TString>> usedAnchors;
    for (const auto& [_, link] : links) {
        TString anchor = link.Anchor.GetOrElse("");
        usedAnchors[link.RelativePath].emplace(std::move(anchor));
    }

    THashSet<TString> unusedPaths;
    THashMap<TString, THashSet<TString>> unusedAnchors;
    for (const auto& [path, page] : pages) {
        for (const auto& [anchor, _] : page.SectionsByAnchor) {
            if (!usedAnchors.contains(path)) {
                unusedPaths.emplace(path);
            } else if (!usedAnchors[path].contains(anchor)) {
                unusedAnchors[path].emplace(anchor);
            }
        }
    }

    for (const auto& [path, anchors] : unusedAnchors) {
        for (const auto& anchor : anchors) {
            pages[path].SectionsByAnchor.erase(anchor);
        }
    }

    for (const auto& path : unusedPaths) {
        pages.erase(path);
    }

    return pages;
}

} // namespace NYql::NDocs