diff options
author | vitya-smirnov <[email protected]> | 2025-07-30 11:26:26 +0300 |
---|---|---|
committer | vitya-smirnov <[email protected]> | 2025-07-30 11:38:37 +0300 |
commit | cf9f591e5c90bf964bb922c0f6c3716045972b02 (patch) | |
tree | 36d4eb0816606653836399ac32ea58d2eca08b53 /yql/essentials/utils/docs/link_page.cpp | |
parent | ada885655c2e21f6b55e2d3d724e57c9a1fdb843 (diff) |
YQL-20112: Improve dramatically yql/utils/docs
Introduced `links.json` format to link names to
documentation sections. Implement general links
verification framework. Also fixed two small typos.
Extended Description: https://nda.ya.ru/t/zR4voivb7GzD9r.
commit_hash:e72db0e202b4ff612374c73fa384f70d029f0ef0
Diffstat (limited to 'yql/essentials/utils/docs/link_page.cpp')
-rw-r--r-- | yql/essentials/utils/docs/link_page.cpp | 116 |
1 files changed, 116 insertions, 0 deletions
diff --git a/yql/essentials/utils/docs/link_page.cpp b/yql/essentials/utils/docs/link_page.cpp new file mode 100644 index 00000000000..eb71979462d --- /dev/null +++ b/yql/essentials/utils/docs/link_page.cpp @@ -0,0 +1,116 @@ +#include "link_page.h" + +#include "name.h" + +#include <util/generic/hash_set.h> +#include <util/string/split.h> + +namespace NYql::NDocs { + + TMaybe<TString> MatchSingleFunctionHeader(TStringBuf header) { + return NormalizedName(TString(header)); + } + + TVector<TString> SplitBy(TStringBuf delim, const TVector<TString>& strings) { + TVector<TString> parts; + for (const TString& s : strings) { + StringSplitter(s).SplitByString(delim).AddTo(&parts); + } + return parts; + } + + TVector<TString> SplitByPunctuation(TStringBuf header) { + TVector<TString> parts = {TString(header)}; + parts = SplitBy(" и ", parts); + parts = SplitBy(" / ", parts); + parts = SplitBy(", ", parts); + return parts; + } + + TVector<TString> MatchMultiFunctionHeader(TStringBuf header) { + TVector<TString> names = SplitByPunctuation(header); + + for (TString& name : names) { + TMaybe<TString> normalized = NormalizedName(std::move(name)); + if (!normalized) { + return {}; + } + + name = std::move(*normalized); + } + + return names; + } + + TVector<TString> ExtractNormalized(TStringBuf header) { + if (auto single = MatchSingleFunctionHeader(header)) { + return {*single}; + } + if (auto multi = MatchMultiFunctionHeader(header)) { + return multi; + } + return {}; + } + + void EnrichFromMarkdown(TLinks& links, const TString& path, const TMarkdownHeader& header) { + for (const TString& name : ExtractNormalized(header.Content)) { + links[name] = { + .RelativePath = path, + .Anchor = header.Anchor, + }; + } + } + + void EnrichFromMarkdown(TLinks& links, const TString& path, const TMarkdownPage& page) { + for (const auto& [anchor, section] : page.SectionsByAnchor) { + const TMarkdownHeader& header = section.Header; + EnrichFromMarkdown(links, path, header); + } + } + + void EnrichFromMarkdown(TLinks& links, const TPages& pages) { + for (const auto& [path, page] : pages) { + EnrichFromMarkdown(links, path, page); + } + } + + TLinks GetLinksFromPages(const TPages& pages) { + TLinks links; + EnrichFromMarkdown(links, pages); + return links; + } + + TPages Stripped(TPages&& pages, const TLinks& links) { + THashSet<TString> usedPaths; + THashMap<TString, THashSet<TString>> usedAnchors; + for (const auto& [_, link] : links) { + TString anchor = link.Anchor.GetOrElse(""); + usedAnchors[link.RelativePath].emplace(std::move(anchor)); + } + + THashSet<TString> unusedPaths; + THashMap<TString, THashSet<TString>> unusedAnchors; + for (const auto& [path, page] : pages) { + for (const auto& [anchor, _] : page.SectionsByAnchor) { + if (!usedAnchors.contains(path)) { + unusedPaths.emplace(path); + } else if (!usedAnchors[path].contains(anchor)) { + unusedAnchors[path].emplace(anchor); + } + } + } + + for (const auto& [path, anchors] : unusedAnchors) { + for (const auto& anchor : anchors) { + pages[path].SectionsByAnchor.erase(anchor); + } + } + + for (const auto& path : unusedPaths) { + pages.erase(path); + } + + return pages; + } + +} // namespace NYql::NDocs |