summaryrefslogtreecommitdiffstats
path: root/yql/essentials/utils/docs/link_page.cpp
blob: eb71979462d6cd7683c3013581de92952df16621 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
#include "link_page.h"

#include "name.h"

#include <util/generic/hash_set.h>
#include <util/string/split.h>

namespace NYql::NDocs {

    TMaybe<TString> MatchSingleFunctionHeader(TStringBuf header) {
        return NormalizedName(TString(header));
    }

    TVector<TString> SplitBy(TStringBuf delim, const TVector<TString>& strings) {
        TVector<TString> parts;
        for (const TString& s : strings) {
            StringSplitter(s).SplitByString(delim).AddTo(&parts);
        }
        return parts;
    }

    TVector<TString> SplitByPunctuation(TStringBuf header) {
        TVector<TString> parts = {TString(header)};
        parts = SplitBy(" и ", parts);
        parts = SplitBy(" / ", parts);
        parts = SplitBy(", ", parts);
        return parts;
    }

    TVector<TString> MatchMultiFunctionHeader(TStringBuf header) {
        TVector<TString> names = SplitByPunctuation(header);

        for (TString& name : names) {
            TMaybe<TString> normalized = NormalizedName(std::move(name));
            if (!normalized) {
                return {};
            }

            name = std::move(*normalized);
        }

        return names;
    }

    TVector<TString> ExtractNormalized(TStringBuf header) {
        if (auto single = MatchSingleFunctionHeader(header)) {
            return {*single};
        }
        if (auto multi = MatchMultiFunctionHeader(header)) {
            return multi;
        }
        return {};
    }

    void EnrichFromMarkdown(TLinks& links, const TString& path, const TMarkdownHeader& header) {
        for (const TString& name : ExtractNormalized(header.Content)) {
            links[name] = {
                .RelativePath = path,
                .Anchor = header.Anchor,
            };
        }
    }

    void EnrichFromMarkdown(TLinks& links, const TString& path, const TMarkdownPage& page) {
        for (const auto& [anchor, section] : page.SectionsByAnchor) {
            const TMarkdownHeader& header = section.Header;
            EnrichFromMarkdown(links, path, header);
        }
    }

    void EnrichFromMarkdown(TLinks& links, const TPages& pages) {
        for (const auto& [path, page] : pages) {
            EnrichFromMarkdown(links, path, page);
        }
    }

    TLinks GetLinksFromPages(const TPages& pages) {
        TLinks links;
        EnrichFromMarkdown(links, pages);
        return links;
    }

    TPages Stripped(TPages&& pages, const TLinks& links) {
        THashSet<TString> usedPaths;
        THashMap<TString, THashSet<TString>> usedAnchors;
        for (const auto& [_, link] : links) {
            TString anchor = link.Anchor.GetOrElse("");
            usedAnchors[link.RelativePath].emplace(std::move(anchor));
        }

        THashSet<TString> unusedPaths;
        THashMap<TString, THashSet<TString>> unusedAnchors;
        for (const auto& [path, page] : pages) {
            for (const auto& [anchor, _] : page.SectionsByAnchor) {
                if (!usedAnchors.contains(path)) {
                    unusedPaths.emplace(path);
                } else if (!usedAnchors[path].contains(anchor)) {
                    unusedAnchors[path].emplace(anchor);
                }
            }
        }

        for (const auto& [path, anchors] : unusedAnchors) {
            for (const auto& anchor : anchors) {
                pages[path].SectionsByAnchor.erase(anchor);
            }
        }

        for (const auto& path : unusedPaths) {
            pages.erase(path);
        }

        return pages;
    }

} // namespace NYql::NDocs