Add python/textual to YDB

commit_hash:eda16a869229724fec5479fa27fa5cdbccbe0395
author: eivanov89 <[email protected]> 2025-08-29 10:12:02 +0300
committer: eivanov89 <[email protected]> 2025-08-29 10:27:27 +0300
commit: 140ced4d34c422c9f3cbe096f8dd35243b67d6e4 (patch)
tree: b7373341f64151c0ab9839ee692dc919366590d5 /contrib/python/markdown-it-py/markdown_it/parser_block.py
parent: 136471c8b2f3ab8cd7993200c0de0456b7018118 (diff)
1 files changed, 113 insertions, 0 deletions
diff --git a/contrib/python/markdown-it-py/markdown_it/parser_block.py b/contrib/python/markdown-it-py/markdown_it/parser_block.py
new file mode 100644
index 00000000000..50a7184cf47
--- /dev/null
+++ b/contrib/python/markdown-it-py/markdown_it/parser_block.py
@@ -0,0 +1,113 @@
+"""Block-level tokenizer."""
+
+from __future__ import annotations
+
+from collections.abc import Callable
+import logging
+from typing import TYPE_CHECKING
+
+from . import rules_block
+from .ruler import Ruler
+from .rules_block.state_block import StateBlock
+from .token import Token
+from .utils import EnvType
+
+if TYPE_CHECKING:
+    from markdown_it import MarkdownIt
+
+LOGGER = logging.getLogger(__name__)
+
+
+RuleFuncBlockType = Callable[[StateBlock, int, int, bool], bool]
+"""(state: StateBlock, startLine: int, endLine: int, silent: bool) -> matched: bool)
+
+`silent` disables token generation, useful for lookahead.
+"""
+
+_rules: list[tuple[str, RuleFuncBlockType, list[str]]] = [
+    # First 2 params - rule name & source. Secondary array - list of rules,
+    # which can be terminated by this one.
+    ("table", rules_block.table, ["paragraph", "reference"]),
+    ("code", rules_block.code, []),
+    ("fence", rules_block.fence, ["paragraph", "reference", "blockquote", "list"]),
+    (
+        "blockquote",
+        rules_block.blockquote,
+        ["paragraph", "reference", "blockquote", "list"],
+    ),
+    ("hr", rules_block.hr, ["paragraph", "reference", "blockquote", "list"]),
+    ("list", rules_block.list_block, ["paragraph", "reference", "blockquote"]),
+    ("reference", rules_block.reference, []),
+    ("html_block", rules_block.html_block, ["paragraph", "reference", "blockquote"]),
+    ("heading", rules_block.heading, ["paragraph", "reference", "blockquote"]),
+    ("lheading", rules_block.lheading, []),
+    ("paragraph", rules_block.paragraph, []),
+]
+
+
+class ParserBlock:
+    """
+    ParserBlock#ruler -> Ruler
+
+    [[Ruler]] instance. Keep configuration of block rules.
+    """
+
+    def __init__(self) -> None:
+        self.ruler = Ruler[RuleFuncBlockType]()
+        for name, rule, alt in _rules:
+            self.ruler.push(name, rule, {"alt": alt})
+
+    def tokenize(self, state: StateBlock, startLine: int, endLine: int) -> None:
+        """Generate tokens for input range."""
+        rules = self.ruler.getRules("")
+        line = startLine
+        maxNesting = state.md.options.maxNesting
+        hasEmptyLines = False
+
+        while line < endLine:
+            state.line = line = state.skipEmptyLines(line)
+            if line >= endLine:
+                break
+            if state.sCount[line] < state.blkIndent:
+                # Termination condition for nested calls.
+                # Nested calls currently used for blockquotes & lists
+                break
+            if state.level >= maxNesting:
+                # If nesting level exceeded - skip tail to the end.
+                # That's not ordinary situation and we should not care about content.
+                state.line = endLine
+                break
+
+            # Try all possible rules.
+            # On success, rule should:
+            # - update `state.line`
+            # - update `state.tokens`
+            # - return True
+            for rule in rules:
+                if rule(state, line, endLine, False):
+                    break
+
+            # set state.tight if we had an empty line before current tag
+            # i.e. latest empty line should not count
+            state.tight = not hasEmptyLines
+
+            line = state.line
+
+            # paragraph might "eat" one newline after it in nested lists
+            if (line - 1) < endLine and state.isEmpty(line - 1):
+                hasEmptyLines = True
+
+            if line < endLine and state.isEmpty(line):
+                hasEmptyLines = True
+                line += 1
+                state.line = line
+
+    def parse(
+        self, src: str, md: MarkdownIt, env: EnvType, outTokens: list[Token]
+    ) -> list[Token] | None:
+        """Process input string and push block tokens into `outTokens`."""
+        if not src:
+            return None
+        state = StateBlock(src, md, env, outTokens)
+        self.tokenize(state, state.line, state.lineMax)
+        return state.tokens
author	eivanov89 <[email protected]>	2025-08-29 10:12:02 +0300
committer	eivanov89 <[email protected]>	2025-08-29 10:27:27 +0300
commit	140ced4d34c422c9f3cbe096f8dd35243b67d6e4 (patch)
tree	b7373341f64151c0ab9839ee692dc919366590d5 /contrib/python/markdown-it-py/markdown_it/parser_block.py
parent	136471c8b2f3ab8cd7993200c0de0456b7018118 (diff)