1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
|
#include "link_page.h"
#include "name.h"
#include <util/generic/hash_set.h>
#include <util/string/split.h>
namespace NYql::NDocs {
TMaybe<TString> MatchSingleFunctionHeader(TStringBuf header) {
return NormalizedName(TString(header));
}
TVector<TString> SplitBy(TStringBuf delim, const TVector<TString>& strings) {
TVector<TString> parts;
for (const TString& s : strings) {
StringSplitter(s).SplitByString(delim).AddTo(&parts);
}
return parts;
}
TVector<TString> SplitByPunctuation(TStringBuf header) {
TVector<TString> parts = {TString(header)};
parts = SplitBy(" и ", parts);
parts = SplitBy(" / ", parts);
parts = SplitBy(", ", parts);
return parts;
}
TVector<TString> MatchMultiFunctionHeader(TStringBuf header) {
TVector<TString> names = SplitByPunctuation(header);
for (TString& name : names) {
TMaybe<TString> normalized = NormalizedName(std::move(name));
if (!normalized) {
return {};
}
name = std::move(*normalized);
}
return names;
}
TVector<TString> ExtractNormalized(TStringBuf header) {
if (auto single = MatchSingleFunctionHeader(header)) {
return {*single};
}
if (auto multi = MatchMultiFunctionHeader(header)) {
return multi;
}
return {};
}
void EnrichFromMarkdown(TLinks& links, const TString& path, const TMarkdownHeader& header) {
for (const TString& name : ExtractNormalized(header.Content)) {
links[name] = {
.RelativePath = path,
.Anchor = header.Anchor,
};
}
}
void EnrichFromMarkdown(TLinks& links, const TString& path, const TMarkdownPage& page) {
for (const auto& [anchor, section] : page.SectionsByAnchor) {
const TMarkdownHeader& header = section.Header;
EnrichFromMarkdown(links, path, header);
}
}
void EnrichFromMarkdown(TLinks& links, const TPages& pages) {
for (const auto& [path, page] : pages) {
EnrichFromMarkdown(links, path, page);
}
}
TLinks GetLinksFromPages(const TPages& pages) {
TLinks links;
EnrichFromMarkdown(links, pages);
return links;
}
TPages Stripped(TPages&& pages, const TLinks& links) {
THashSet<TString> usedPaths;
THashMap<TString, THashSet<TString>> usedAnchors;
for (const auto& [_, link] : links) {
TString anchor = link.Anchor.GetOrElse("");
usedAnchors[link.RelativePath].emplace(std::move(anchor));
}
THashSet<TString> unusedPaths;
THashMap<TString, THashSet<TString>> unusedAnchors;
for (const auto& [path, page] : pages) {
for (const auto& [anchor, _] : page.SectionsByAnchor) {
if (!usedAnchors.contains(path)) {
unusedPaths.emplace(path);
} else if (!usedAnchors[path].contains(anchor)) {
unusedAnchors[path].emplace(anchor);
}
}
}
for (const auto& [path, anchors] : unusedAnchors) {
for (const auto& anchor : anchors) {
pages[path].SectionsByAnchor.erase(anchor);
}
}
for (const auto& path : unusedPaths) {
pages.erase(path);
}
return pages;
}
} // namespace NYql::NDocs
|