diff options
| author | robot-piglet <[email protected]> | 2026-05-22 23:54:15 +0300 |
|---|---|---|
| committer | robot-piglet <[email protected]> | 2026-05-23 00:31:16 +0300 |
| commit | 88de1fde2ead7a8a664f403cf8c6a29cc3f718eb (patch) | |
| tree | a06fe19732b21ca5bf728b6e091f06a9aa8a59b0 /contrib/python/markdown-it-py | |
| parent | a4934b97e2fbc92ceda3743e051dff0e1d6e2708 (diff) | |
Intermediate changes
commit_hash:a0d2bdedf870db96a3096f257f9ad580475d6d26
Diffstat (limited to 'contrib/python/markdown-it-py')
18 files changed, 387 insertions, 81 deletions
diff --git a/contrib/python/markdown-it-py/.dist-info/METADATA b/contrib/python/markdown-it-py/.dist-info/METADATA index 0f2b466a638..488e089b08e 100644 --- a/contrib/python/markdown-it-py/.dist-info/METADATA +++ b/contrib/python/markdown-it-py/.dist-info/METADATA @@ -1,6 +1,6 @@ Metadata-Version: 2.4 Name: markdown-it-py -Version: 4.0.0 +Version: 4.1.0 Summary: Python port of markdown-it. Markdown parsing, done right! Keywords: markdown,lexer,parser,commonmark,markdown-it Author-email: Chris Sewell <[email protected]> @@ -46,6 +46,7 @@ Requires-Dist: coverage ; extra == "testing" Requires-Dist: pytest ; extra == "testing" Requires-Dist: pytest-cov ; extra == "testing" Requires-Dist: pytest-regressions ; extra == "testing" +Requires-Dist: pytest-timeout ; extra == "testing" Requires-Dist: requests ; extra == "testing" Project-URL: Documentation, https://markdown-it-py.readthedocs.io Project-URL: Homepage, https://github.com/executablebooks/markdown-it-py @@ -160,11 +161,12 @@ Render markdown to HTML with markdown-it-py from the command-line: ```console -usage: markdown-it [-h] [-v] [filenames [filenames ...]] +usage: markdown-it [-h] [-v] [--stdin|filenames [filenames ...]] Parse one or more markdown files, convert each to HTML, and print to stdout positional arguments: + --stdin read source Markdown file from standard input filenames specify an optional list of files to convert optional arguments: diff --git a/contrib/python/markdown-it-py/README.md b/contrib/python/markdown-it-py/README.md index b94729f8545..82d218b3ae6 100644 --- a/contrib/python/markdown-it-py/README.md +++ b/contrib/python/markdown-it-py/README.md @@ -101,11 +101,12 @@ Render markdown to HTML with markdown-it-py from the command-line: ```console -usage: markdown-it [-h] [-v] [filenames [filenames ...]] +usage: markdown-it [-h] [-v] [--stdin|filenames [filenames ...]] Parse one or more markdown files, convert each to HTML, and print to stdout positional arguments: + --stdin read source Markdown file from standard input filenames specify an optional list of files to convert optional arguments: diff --git a/contrib/python/markdown-it-py/markdown_it/__init__.py b/contrib/python/markdown-it-py/markdown_it/__init__.py index 9fac279576e..523d92ef4ba 100644 --- a/contrib/python/markdown-it-py/markdown_it/__init__.py +++ b/contrib/python/markdown-it-py/markdown_it/__init__.py @@ -1,6 +1,6 @@ """A Python port of Markdown-It""" __all__ = ("MarkdownIt",) -__version__ = "4.0.0" +__version__ = "4.1.0" from .main import MarkdownIt diff --git a/contrib/python/markdown-it-py/markdown_it/cli/parse.py b/contrib/python/markdown-it-py/markdown_it/cli/parse.py index fe346b2f51b..5de738b2da7 100644 --- a/contrib/python/markdown-it-py/markdown_it/cli/parse.py +++ b/contrib/python/markdown-it-py/markdown_it/cli/parse.py @@ -21,6 +21,8 @@ def main(args: Sequence[str] | None = None) -> int: namespace = parse_args(args) if namespace.filenames: convert(namespace.filenames) + elif namespace.stdin: + convert_stdin() else: interactive() return 0 @@ -31,6 +33,18 @@ def convert(filenames: Iterable[str]) -> None: convert_file(filename) +def convert_stdin() -> None: + """ + Parse a Markdown file and dump the output to stdout. + """ + try: + rendered = MarkdownIt().render(sys.stdin.read()) + print(rendered, end="") + except OSError: + sys.stderr.write("Cannot parse Markdown from the standard input.\n") + sys.exit(1) + + def convert_file(filename: str) -> None: """ Parse a Markdown file and dump the output to stdout. @@ -95,6 +109,9 @@ Batch: ) parser.add_argument("-v", "--version", action="version", version=version_str) parser.add_argument( + "--stdin", action="store_true", help="read Markdown from standard input" + ) + parser.add_argument( "filenames", nargs="*", help="specify an optional list of files to convert" ) return parser.parse_args(args) diff --git a/contrib/python/markdown-it-py/markdown_it/main.py b/contrib/python/markdown-it-py/markdown_it/main.py index bf9fd18f3f3..87835e541f1 100644 --- a/contrib/python/markdown-it-py/markdown_it/main.py +++ b/contrib/python/markdown-it-py/markdown_it/main.py @@ -26,6 +26,7 @@ _PRESETS: dict[str, PresetType] = { "zero": presets.zero.make(), "commonmark": presets.commonmark.make(), "gfm-like": presets.gfm_like.make(), + "gfm-like2": presets.gfm_like2.make(), } @@ -125,7 +126,7 @@ class MarkdownIt: if options_update: options = {**options, **options_update} # type: ignore - self.set(options) # type: ignore + self.set(options) if "components" in config: for name, component in config["components"].items(): diff --git a/contrib/python/markdown-it-py/markdown_it/parser_inline.py b/contrib/python/markdown-it-py/markdown_it/parser_inline.py index 26ec2e636d4..8fabb9884bb 100644 --- a/contrib/python/markdown-it-py/markdown_it/parser_inline.py +++ b/contrib/python/markdown-it-py/markdown_it/parser_inline.py @@ -3,6 +3,8 @@ from __future__ import annotations from collections.abc import Callable +import functools +import re from typing import TYPE_CHECKING from . import rules_inline @@ -15,6 +17,47 @@ if TYPE_CHECKING: from markdown_it import MarkdownIt +# Default set of characters that terminate a text token and allow inline rules to fire. +# '{}$%@~+=:' reserved for extensions. +# Note: Don't confuse with "Markdown ASCII Punctuation" chars. +# http://spec.commonmark.org/0.15/#ascii-punctuation-character +_DEFAULT_TERMINATORS: frozenset[str] = frozenset( + { + "\n", + "!", + "#", + "$", + "%", + "&", + "*", + "+", + "-", + ":", + "<", + "=", + ">", + "@", + "[", + "\\", + "]", + "^", + "_", + "`", + "{", + "}", + "~", + } +) + + +# Lazily compiled regex for the default terminator set. The @cache ensures it is +# compiled at most once (on first ParserInline instantiation) and shared across all +# instances that have not added extra chars, keeping __init__ cost near zero. +def _default_terminator_re() -> re.Pattern[str]: + return re.compile("[" + re.escape("".join(_DEFAULT_TERMINATORS)) + "]") + + # Parser rules RuleFuncInlineType = Callable[[StateInline, bool], bool] """(state: StateInline, silent: bool) -> matched: bool) @@ -61,6 +104,30 @@ class ParserInline: self.ruler2 = Ruler[RuleFuncInline2Type]() for name, rule2 in _rules2: self.ruler2.push(name, rule2) + # Characters that stop the text rule, allowing other inline rules to fire. + # _extra_terminator_chars is only allocated when add_terminator_char() is called + # with a char outside the defaults, keeping __init__ allocation-free. + self._extra_terminator_chars: set[str] = set() + # Pre-compiled regex shared with all default instances (no copy in the common path). + self.terminator_re: re.Pattern[str] = _default_terminator_re() + + def add_terminator_char(self, ch: str) -> None: + """Register a character that stops the ``text`` rule, allowing inline rules to fire. + + This lets plugins declare which characters their inline rules react to, + mirroring the ``MARKER`` mechanism in the Rust markdown-it implementation. + + :param ch: A single character to add to the terminator set. + """ + if ch not in _DEFAULT_TERMINATORS and ch not in self._extra_terminator_chars: + self._extra_terminator_chars.add(ch) + self.terminator_re = re.compile( + "[" + + re.escape( + "".join(_DEFAULT_TERMINATORS | self._extra_terminator_chars) + ) + + "]" + ) def skipToken(self, state: StateInline) -> None: """Skip single token by running all rules in validation mode; diff --git a/contrib/python/markdown-it-py/markdown_it/presets/__init__.py b/contrib/python/markdown-it-py/markdown_it/presets/__init__.py index e21c7806930..43578148cee 100644 --- a/contrib/python/markdown-it-py/markdown_it/presets/__init__.py +++ b/contrib/python/markdown-it-py/markdown_it/presets/__init__.py @@ -1,4 +1,4 @@ -__all__ = ("commonmark", "default", "gfm_like", "js_default", "zero") +__all__ = ("commonmark", "default", "gfm_like", "gfm_like2", "js_default", "zero") from ..utils import PresetType from . import commonmark, default, zero @@ -26,3 +26,23 @@ class gfm_like: # noqa: N801 config["options"]["linkify"] = True config["options"]["html"] = True return config + + +class gfm_like2: # noqa: N801 + """GitHub Flavoured Markdown (GFM) like, extended. + + Builds on ``gfm-like`` and additionally enables: + + - Task lists (``- [x] done``) + - Alerts (``> [!NOTE]``) + - Single-tilde strikethrough (``~text~`` in addition to ``~~text~~``) + """ + + @staticmethod + def make() -> PresetType: + config = gfm_like.make() + config["options"]["tasklists"] = True + config["options"]["tasklists_editable"] = False + config["options"]["alerts"] = True + config["options"]["strikethrough_single_tilde"] = True + return config diff --git a/contrib/python/markdown-it-py/markdown_it/renderer.py b/contrib/python/markdown-it-py/markdown_it/renderer.py index 6d60589adaa..f690b091e2b 100644 --- a/contrib/python/markdown-it-py/markdown_it/renderer.py +++ b/contrib/python/markdown-it-py/markdown_it/renderer.py @@ -209,6 +209,26 @@ class RendererHTML(RendererProtocol): ################################################### + def list_item_open( + self, + tokens: Sequence[Token], + idx: int, + options: OptionsDict, + env: EnvType, + ) -> str: + token = tokens[idx] + result = self.renderToken(tokens, idx, options, env) + if token.meta and "checked" in token.meta: + checked_attr = ' checked=""' if token.meta["checked"] else "" + disabled_attr = ( + "" if options.get("tasklists_editable", False) else ' disabled=""' + ) + result += ( + '<input class="task-list-item-checkbox"' + f'{disabled_attr} type="checkbox"{checked_attr}> ' + ) + return result + def code_inline( self, tokens: Sequence[Token], idx: int, options: OptionsDict, env: EnvType ) -> str: diff --git a/contrib/python/markdown-it-py/markdown_it/rules_block/blockquote.py b/contrib/python/markdown-it-py/markdown_it/rules_block/blockquote.py index 0c9081b9cbd..de2d4f2d2f1 100644 --- a/contrib/python/markdown-it-py/markdown_it/rules_block/blockquote.py +++ b/contrib/python/markdown-it-py/markdown_it/rules_block/blockquote.py @@ -273,17 +273,58 @@ def blockquote(state: StateBlock, startLine: int, endLine: int, silent: bool) -> oldIndent = state.blkIndent state.blkIndent = 0 - token = state.push("blockquote_open", "blockquote", 1) - token.markup = ">" - token.map = lines = [startLine, 0] + # Detect GitHub-style alert marker on the first content line. + # Note: `startLine` here refers to the first content line of the + # blockquote, after the `>` prefix has already been stripped by the + # blockquote parser above (bMarks/tShift adjusted to skip `> `). + alert_kind = None + if state.md.options.get("alerts", False) and nextLine > startLine: + alert_kind = _detect_alert(state, startLine) - state.md.block.tokenize(state, startLine, nextLine) + lines = [startLine, 0] - token = state.push("blockquote_close", "blockquote", -1) - token.markup = ">" + if alert_kind is not None: + # Emit alert tokens instead of blockquote tokens + alert_lower = alert_kind.lower() + token = state.push("alert_open", "div", 1) + token.markup = ">" + token.attrSet("class", f"markdown-alert markdown-alert-{alert_lower}") + token.map = lines + token.info = alert_kind + token.meta = {"kind": alert_kind} + + # Emit a title paragraph: <p class="markdown-alert-title">Kind</p> + token = state.push("alert_title_open", "p", 1) + token.attrSet("class", "markdown-alert-title") + title_token = state.push("inline", "", 0) + title_token.content = alert_kind.capitalize() + title_token.children = [] + token = state.push("alert_title_close", "p", -1) + + # Skip the marker line (startLine) and tokenize from startLine + 1. + contentStart = startLine + 1 + if contentStart < nextLine: + # tokenize() updates state.line to nextLine as part of its + # contract, consistent with the blockquote code path below. + state.md.block.tokenize(state, contentStart, nextLine) + else: + state.line = nextLine + + token = state.push("alert_close", "div", -1) + token.markup = ">" + else: + token = state.push("blockquote_open", "blockquote", 1) + token.markup = ">" + token.map = lines + + state.md.block.tokenize(state, startLine, nextLine) + + token = state.push("blockquote_close", "blockquote", -1) + token.markup = ">" state.lineMax = oldLineMax state.parentType = oldParentType + # Update the opening token map for both alert and blockquote containers. lines[1] = state.line # Restore original tShift; this might not be necessary since the parser @@ -297,3 +338,31 @@ def blockquote(state: StateBlock, startLine: int, endLine: int, silent: bool) -> state.blkIndent = oldIndent return True + + +_ALERT_TYPES = {"NOTE", "TIP", "IMPORTANT", "WARNING", "CAUTION"} + + +def _detect_alert(state: StateBlock, startLine: int) -> str | None: + """Detect ``[!TYPE]`` on *startLine* (after ``>`` prefix has been stripped). + + Returns the alert type string (e.g. ``"NOTE"``) or ``None``. + """ + pos = state.bMarks[startLine] + state.tShift[startLine] + maximum = state.eMarks[startLine] + src = state.src + + # Trim trailing whitespace + while maximum > pos and src[maximum - 1] in (" ", "\t"): + maximum -= 1 + + if maximum - pos < 4: + return None + if src[pos] != "[" or src[pos + 1] != "!": + return None + if src[maximum - 1] != "]": + return None + type_str = src[pos + 2 : maximum - 1].upper() + if type_str not in _ALERT_TYPES: + return None + return type_str diff --git a/contrib/python/markdown-it-py/markdown_it/rules_block/list.py b/contrib/python/markdown-it-py/markdown_it/rules_block/list.py index d8070d74703..c8fe7af5d5d 100644 --- a/contrib/python/markdown-it-py/markdown_it/rules_block/list.py +++ b/contrib/python/markdown-it-py/markdown_it/rules_block/list.py @@ -235,8 +235,20 @@ def list_block(state: StateBlock, startLine: int, endLine: int, silent: bool) -> if isOrdered: token.info = state.src[start : posAfterMarker - 1] + # Detect GFM task checkbox: `[ ] ` or `[x] `/`[X] ` at content start + checkboxLen = 0 + if state.md.options.get("tasklists", False) and contentStart < maximum: + checked = _detect_task_checkbox(state.src, contentStart, maximum) + if checked is not None: + token.meta = {"checked": checked} + # Advance content past the checkbox: `[x]` (3 chars) + whitespace. + # `_detect_task_checkbox` already guarantees a whitespace char at + # pos+3, so we always consume 4 characters. + checkboxLen = 4 + # change current state, then restore it after parser subcall oldTight = state.tight + oldBMark = state.bMarks[startLine] oldTShift = state.tShift[startLine] oldSCount = state.sCount[startLine] @@ -252,6 +264,12 @@ def list_block(state: StateBlock, startLine: int, endLine: int, silent: bool) -> state.tShift[startLine] = contentStart - state.bMarks[startLine] state.sCount[startLine] = offset + # If we detected a checkbox, advance bMarks past it so that + # getLines() doesn't include the checkbox text in the content. + if checkboxLen: + state.bMarks[startLine] = contentStart + checkboxLen + state.tShift[startLine] = 0 + if contentStart >= maximum and state.isEmpty(startLine + 1): # workaround for this case # (list item is empty, list terminates before "foo"): @@ -277,6 +295,8 @@ def list_block(state: StateBlock, startLine: int, endLine: int, silent: bool) -> state.blkIndent = state.listIndent state.listIndent = oldListIndent + if checkboxLen: + state.bMarks[startLine] = oldBMark state.tShift[startLine] = oldTShift state.sCount[startLine] = oldSCount state.tight = oldTight @@ -326,6 +346,24 @@ def list_block(state: StateBlock, startLine: int, endLine: int, silent: bool) -> break # Finalize list + + # If any direct list item has a task checkbox, add class to the list + if state.md.options.get("tasklists", False): + containsTask = False + level = state.tokens[listTokIdx].level + for j in range(listTokIdx + 1, len(state.tokens)): + tok = state.tokens[j] + if ( + tok.level == level + 1 + and tok.type == "list_item_open" + and tok.meta + and "checked" in tok.meta + ): + tok.attrJoin("class", "task-list-item") + containsTask = True + if containsTask: + state.tokens[listTokIdx].attrJoin("class", "contains-task-list") + if isOrdered: token = state.push("ordered_list_close", "ol", -1) else: @@ -343,3 +381,28 @@ def list_block(state: StateBlock, startLine: int, endLine: int, silent: bool) -> markTightParagraphs(state, listTokIdx) return True + + +def _detect_task_checkbox(src: str, pos: int, maximum: int) -> bool | None: + """Detect ``[ ]``, ``[x]``, or ``[X]`` at *pos*, followed by whitespace. + + Returns ``True`` (checked), ``False`` (unchecked), or ``None`` (no match). + """ + # Need at least 4 chars: `[`, char, `]`, whitespace + if pos + 4 > maximum: + return None + if src[pos] != "[": + return None + inner = src[pos + 1] + if src[pos + 2] != "]": + return None + if inner == " ": + checked = False + elif inner in ("x", "X"): + checked = True + else: + return None + # After `]`, must have whitespace + if src[pos + 3] not in (" ", "\t"): + return None + return checked diff --git a/contrib/python/markdown-it-py/markdown_it/rules_core/text_join.py b/contrib/python/markdown-it-py/markdown_it/rules_core/text_join.py index 5379f6d7a8e..939b83b29f7 100644 --- a/contrib/python/markdown-it-py/markdown_it/rules_core/text_join.py +++ b/contrib/python/markdown-it-py/markdown_it/rules_core/text_join.py @@ -21,7 +21,10 @@ def text_join(state: StateCore) -> None: # convert text_special to text and join all adjacent text nodes new_tokens: list[Token] = [] - for child_token in inline_token.children or []: + children = inline_token.children or [] + i = 0 + while i < len(children): + child_token = children[i] if child_token.type == "text_special": child_token.type = "text" if ( @@ -29,7 +32,22 @@ def text_join(state: StateCore) -> None: and new_tokens and new_tokens[-1].type == "text" ): - new_tokens[-1].content += child_token.content + # Collapse a run of adjacent text nodes in a single join, instead + # of pairwise `a + b` concatenation. The pairwise form is O(L*k) + # in the size of the run because each step rebuilds the growing + # prefix; "".join is O(L). + parts = [new_tokens[-1].content, child_token.content] + i += 1 + while i < len(children): + next_token = children[i] + if next_token.type == "text_special": + next_token.type = "text" + if next_token.type != "text": + break + parts.append(next_token.content) + i += 1 + new_tokens[-1].content = "".join(parts) else: new_tokens.append(child_token) + i += 1 inline_token.children = new_tokens diff --git a/contrib/python/markdown-it-py/markdown_it/rules_inline/fragments_join.py b/contrib/python/markdown-it-py/markdown_it/rules_inline/fragments_join.py index f795c1364b8..5eb88a14025 100644 --- a/contrib/python/markdown-it-py/markdown_it/rules_inline/fragments_join.py +++ b/contrib/python/markdown-it-py/markdown_it/rules_inline/fragments_join.py @@ -29,14 +29,25 @@ def fragments_join(state: StateInline) -> None: and curr + 1 < maximum and state.tokens[curr + 1].type == "text" ): - # collapse two adjacent text nodes - state.tokens[curr + 1].content = ( - state.tokens[curr].content + state.tokens[curr + 1].content - ) - else: - if curr != last: - state.tokens[last] = state.tokens[curr] + # Collapse a run of adjacent text nodes in a single join, instead + # of pairwise `a + b` concatenation. The pairwise form is O(L*k) + # in the size of the run because each step rebuilds the growing + # prefix; "".join is O(L). + parts = [state.tokens[curr].content] + curr += 1 + while curr < maximum and state.tokens[curr].type == "text": + parts.append(state.tokens[curr].content) + curr += 1 + merged = state.tokens[curr - 1] + merged.content = "".join(parts) + merged.level = level + state.tokens[last] = merged last += 1 + continue + + if curr != last: + state.tokens[last] = state.tokens[curr] + last += 1 curr += 1 if curr != last: diff --git a/contrib/python/markdown-it-py/markdown_it/rules_inline/state_inline.py b/contrib/python/markdown-it-py/markdown_it/rules_inline/state_inline.py index 50dc41294d6..de35287d427 100644 --- a/contrib/python/markdown-it-py/markdown_it/rules_inline/state_inline.py +++ b/contrib/python/markdown-it-py/markdown_it/rules_inline/state_inline.py @@ -1,8 +1,7 @@ from __future__ import annotations -from collections import namedtuple from dataclasses import dataclass -from typing import TYPE_CHECKING, Any, Literal +from typing import TYPE_CHECKING, Any, Literal, NamedTuple from ..common.utils import isMdAsciiPunct, isPunctChar, isWhiteSpace from ..ruler import StateBase @@ -36,7 +35,10 @@ class Delimiter: level: bool | None = None -Scanned = namedtuple("Scanned", ["can_open", "can_close", "length"]) +class Scanned(NamedTuple): + can_open: bool + can_close: bool + length: int class StateInline(StateBase): diff --git a/contrib/python/markdown-it-py/markdown_it/rules_inline/strikethrough.py b/contrib/python/markdown-it-py/markdown_it/rules_inline/strikethrough.py index ec816281d49..c9875e043ab 100644 --- a/contrib/python/markdown-it-py/markdown_it/rules_inline/strikethrough.py +++ b/contrib/python/markdown-it-py/markdown_it/rules_inline/strikethrough.py @@ -1,11 +1,16 @@ -# ~~strike through~~ +# ~~strike through~~ (and optionally ~single tilde~) from __future__ import annotations from .state_inline import Delimiter, StateInline def tokenize(state: StateInline, silent: bool) -> bool: - """Insert each marker as a separate text token, and add it to delimiter list""" + """Insert each marker as a separate text token, and add it to delimiter list. + + When the ``strikethrough_single_tilde`` option is enabled on the + ``MarkdownIt`` instance, single ``~`` delimiters are also accepted and + runs of three or more tildes are rejected (matching GitHub's rendering behaviour). + """ start = state.pos ch = state.src[start] @@ -18,30 +23,59 @@ def tokenize(state: StateInline, silent: bool) -> bool: scanned = state.scanDelims(state.pos, True) length = scanned.length - if length < 2: - return False + single_tilde = state.md.options.get("strikethrough_single_tilde", False) - if length % 2: - token = state.push("text", "", 0) - token.content = ch - length -= 1 + if single_tilde: + # GitHub mode: only accept exactly 1 or 2 tildes. + if length < 1: + return False + if length > 2: + # Consume 3+ tildes as plain text so the parser doesn't + # re-enter and match a subset of them. This intentionally + # matches GitHub's rendering, where ≥3 tildes are literal text. + token = state.push("text", "", 0) + token.content = ch * length + state.pos += scanned.length + return True - i = 0 - while i < length: token = state.push("text", "", 0) - token.content = ch + ch + token.content = ch * length state.delimiters.append( Delimiter( marker=ord(ch), - length=0, # disable "rule of 3" length checks meant for emphasis + length=0, # disable "rule of 3" length checks token=len(state.tokens) - 1, end=-1, open=scanned.can_open, close=scanned.can_close, ) ) + else: + # Original markdown-it behaviour: minimum 2, split odd runs. + if length < 2: + return False - i += 2 + if length % 2: + token = state.push("text", "", 0) + token.content = ch + length -= 1 + + i = 0 + while i < length: + token = state.push("text", "", 0) + token.content = ch + ch + state.delimiters.append( + Delimiter( + marker=ord(ch), + length=0, # disable "rule of 3" length checks + token=len(state.tokens) - 1, + end=-1, + open=scanned.can_open, + close=scanned.can_close, + ) + ) + + i += 2 state.pos += scanned.length @@ -51,6 +85,7 @@ def tokenize(state: StateInline, silent: bool) -> bool: def _postProcess(state: StateInline, delimiters: list[Delimiter]) -> None: loneMarkers = [] maximum = len(delimiters) + single_tilde = state.md.options.get("strikethrough_single_tilde", False) i = 0 while i < maximum: @@ -66,18 +101,29 @@ def _postProcess(state: StateInline, delimiters: list[Delimiter]) -> None: endDelim = delimiters[startDelim.end] + # In single-tilde mode, opener and closer must have the same width + # (both `~` or both `~~`). The width is stored in the text token. + if single_tilde: + opener_content = state.tokens[startDelim.token].content + closer_content = state.tokens[endDelim.token].content + if opener_content != closer_content: + i += 1 + continue + + markup = state.tokens[startDelim.token].content + token = state.tokens[startDelim.token] token.type = "s_open" token.tag = "s" token.nesting = 1 - token.markup = "~~" + token.markup = markup token.content = "" token = state.tokens[endDelim.token] token.type = "s_close" token.tag = "s" token.nesting = -1 - token.markup = "~~" + token.markup = markup token.content = "" if ( diff --git a/contrib/python/markdown-it-py/markdown_it/rules_inline/text.py b/contrib/python/markdown-it-py/markdown_it/rules_inline/text.py index 18b2fcc7a8f..ef0cc9cec55 100644 --- a/contrib/python/markdown-it-py/markdown_it/rules_inline/text.py +++ b/contrib/python/markdown-it-py/markdown_it/rules_inline/text.py @@ -1,54 +1,15 @@ -import functools -import re - # Skip text characters for text token, place those to pending buffer # and increment current pos from .state_inline import StateInline # Rule to skip pure text -# '{}$%@~+=:' reserved for extensions - -# !!!! Don't confuse with "Markdown ASCII Punctuation" chars -# http://spec.commonmark.org/0.15/#ascii-punctuation-character - - -_TerminatorChars = { - "\n", - "!", - "#", - "$", - "%", - "&", - "*", - "+", - "-", - ":", - "<", - "=", - ">", - "@", - "[", - "\\", - "]", - "^", - "_", - "`", - "{", - "}", - "~", -} - - -def _terminator_char_regex() -> re.Pattern[str]: - return re.compile("[" + re.escape("".join(_TerminatorChars)) + "]") def text(state: StateInline, silent: bool) -> bool: pos = state.pos posMax = state.posMax - terminator_char = _terminator_char_regex().search(state.src, pos) + terminator_char = state.md.inline.terminator_re.search(state.src, pos) pos = terminator_char.start() if terminator_char else posMax if pos == state.pos: diff --git a/contrib/python/markdown-it-py/markdown_it/tree.py b/contrib/python/markdown-it-py/markdown_it/tree.py index 5369157bc3c..24bc24663f9 100644 --- a/contrib/python/markdown-it-py/markdown_it/tree.py +++ b/contrib/python/markdown-it-py/markdown_it/tree.py @@ -274,7 +274,7 @@ class SyntaxTreeNode: @property def tag(self) -> str: - """html tag name, e.g. \"p\" """ + """html tag name, e.g. \"p\"""" return self._attribute_token().tag @property diff --git a/contrib/python/markdown-it-py/markdown_it/utils.py b/contrib/python/markdown-it-py/markdown_it/utils.py index 2571a158612..09e60163160 100644 --- a/contrib/python/markdown-it-py/markdown_it/utils.py +++ b/contrib/python/markdown-it-py/markdown_it/utils.py @@ -41,6 +41,14 @@ class OptionsType(TypedDict): This is a Python only option, and is intended for the use of round-trip parsing. """ + tasklists: NotRequired[bool] + """Enable GFM task list checkbox detection in list items.""" + alerts: NotRequired[bool] + """Enable GitHub-style alert detection in blockquotes.""" + tasklists_editable: NotRequired[bool] + """When True, rendered task list checkboxes are interactive (no disabled attribute).""" + strikethrough_single_tilde: NotRequired[bool] + """Allow single tilde ``~text~`` for strikethrough in addition to double.""" class PresetType(TypedDict): diff --git a/contrib/python/markdown-it-py/ya.make b/contrib/python/markdown-it-py/ya.make index 919a3099597..dd380e6d343 100644 --- a/contrib/python/markdown-it-py/ya.make +++ b/contrib/python/markdown-it-py/ya.make @@ -2,7 +2,7 @@ PY3_LIBRARY() -VERSION(4.0.0) +VERSION(4.1.0) LICENSE(MIT) |
