summaryrefslogtreecommitdiffstats
path: root/yql/essentials/utils/docs/markdown.cpp
diff options
context:
space:
mode:
authorvitya-smirnov <[email protected]>2025-07-30 11:26:26 +0300
committervitya-smirnov <[email protected]>2025-07-30 11:38:37 +0300
commitcf9f591e5c90bf964bb922c0f6c3716045972b02 (patch)
tree36d4eb0816606653836399ac32ea58d2eca08b53 /yql/essentials/utils/docs/markdown.cpp
parentada885655c2e21f6b55e2d3d724e57c9a1fdb843 (diff)
YQL-20112: Improve dramatically yql/utils/docs
Introduced `links.json` format to link names to documentation sections. Implement general links verification framework. Also fixed two small typos. Extended Description: https://nda.ya.ru/t/zR4voivb7GzD9r. commit_hash:e72db0e202b4ff612374c73fa384f70d029f0ef0
Diffstat (limited to 'yql/essentials/utils/docs/markdown.cpp')
-rw-r--r--yql/essentials/utils/docs/markdown.cpp80
1 files changed, 66 insertions, 14 deletions
diff --git a/yql/essentials/utils/docs/markdown.cpp b/yql/essentials/utils/docs/markdown.cpp
index eb584c6b47a..ca0f555a815 100644
--- a/yql/essentials/utils/docs/markdown.cpp
+++ b/yql/essentials/utils/docs/markdown.cpp
@@ -5,11 +5,22 @@
#include <contrib/libs/re2/re2/re2.h>
#include <util/generic/yexception.h>
+#include <util/charset/utf8.h>
-namespace NSQLComplete {
+namespace NYql::NDocs {
class TMarkdownParser {
+ private:
+ static constexpr TStringBuf HeaderRegex = R"re(([^#]+)(\s+{#([a-z0-9\-_]+)})?)re";
+
public:
+ explicit TMarkdownParser(size_t headerDepth)
+ : HeaderDepth_(headerDepth)
+ , SectionHeaderRegex_(" *" + TString(HeaderDepth_, '#') + " " + HeaderRegex)
+ , IsSkipping_(true)
+ {
+ }
+
void Parse(IInputStream& markdown, TMarkdownCallback&& onSection) {
for (TString line; markdown.ReadLine(line) != 0;) {
if (IsSkipping_) {
@@ -42,10 +53,10 @@ namespace NSQLComplete {
TString content;
std::optional<TString> dummy;
std::optional<TString> anchor;
- YQL_ENSURE(
- RE2::FullMatch(line, SectionHeaderRegex_, &content, &dummy, &anchor),
- "line '" << line << "' does not match regex '"
- << SectionHeaderRegex_.pattern() << "'");
+ if (!RE2::FullMatch(line, SectionHeaderRegex_, &content, &dummy, &anchor)) {
+ Section_.Header.Content = std::move(line);
+ return;
+ }
Section_.Header.Content = std::move(content);
if (anchor) {
@@ -54,22 +65,63 @@ namespace NSQLComplete {
}
bool IsSectionHeader(TStringBuf line) const {
- return HeaderDepth(line) == 2;
+ return HeaderDepth(line) == HeaderDepth_;
}
size_t HeaderDepth(TStringBuf line) const {
- size_t pos = line.find_first_not_of('#');
- return pos != TStringBuf::npos ? pos : 0;
+ size_t begin = line.find('#');
+ size_t end = line.find_first_not_of('#', begin);
+ return end != TStringBuf::npos ? (end - begin) : 0;
}
- RE2 SectionHeaderRegex_{R"re(## ([^#]+)(\s+{(#[a-z0-9\-_]+)})?)re"};
- bool IsSkipping_ = true;
+ size_t HeaderDepth_;
+ RE2 SectionHeaderRegex_;
+ bool IsSkipping_;
TMarkdownSection Section_;
};
- void ParseMarkdown(IInputStream& markdown, TMarkdownCallback&& onSection) {
- TMarkdownParser parser;
- parser.Parse(markdown, std::forward<TMarkdownCallback>(onSection));
+ TMaybe<TString> Anchor(const TMarkdownHeader& header) {
+ static RE2 Regex(R"re([0-9a-z\-_]+)re");
+
+ if (header.Anchor) {
+ return header.Anchor;
+ }
+
+ TString content = ToLowerUTF8(header.Content);
+ SubstGlobal(content, ' ', '-');
+
+ if (RE2::FullMatch(content, Regex)) {
+ return content;
+ }
+
+ return Nothing();
+ }
+
+ TMarkdownPage ParseMarkdownPage(TString markdown) {
+ TMarkdownPage page;
+
+ const auto onSection = [&](TMarkdownSection&& section) {
+ if (TMaybe<TString> anchor = Anchor(section.Header)) {
+ section.Header.Anchor = anchor;
+ page.SectionsByAnchor[*anchor] = std::move(section);
+ }
+ };
+
+ {
+ TMarkdownParser parser(/*headerDepth=*/2);
+ TStringStream stream(markdown);
+ parser.Parse(stream, onSection);
+ }
+
+ {
+ TMarkdownParser parser(/*headerDepth=*/3);
+ TStringStream stream(markdown);
+ parser.Parse(stream, onSection);
+ }
+
+ page.Text = std::move(markdown);
+
+ return page;
}
-} // namespace NSQLComplete
+} // namespace NYql::NDocs