summaryrefslogtreecommitdiffstats
path: root/contrib/python/markdown-it-py
diff options
context:
space:
mode:
authorrobot-piglet <[email protected]>2026-05-22 23:54:15 +0300
committerrobot-piglet <[email protected]>2026-05-23 00:31:16 +0300
commit88de1fde2ead7a8a664f403cf8c6a29cc3f718eb (patch)
treea06fe19732b21ca5bf728b6e091f06a9aa8a59b0 /contrib/python/markdown-it-py
parenta4934b97e2fbc92ceda3743e051dff0e1d6e2708 (diff)
Intermediate changes
commit_hash:a0d2bdedf870db96a3096f257f9ad580475d6d26
Diffstat (limited to 'contrib/python/markdown-it-py')
-rw-r--r--contrib/python/markdown-it-py/.dist-info/METADATA6
-rw-r--r--contrib/python/markdown-it-py/README.md3
-rw-r--r--contrib/python/markdown-it-py/markdown_it/__init__.py2
-rw-r--r--contrib/python/markdown-it-py/markdown_it/cli/parse.py17
-rw-r--r--contrib/python/markdown-it-py/markdown_it/main.py3
-rw-r--r--contrib/python/markdown-it-py/markdown_it/parser_inline.py67
-rw-r--r--contrib/python/markdown-it-py/markdown_it/presets/__init__.py22
-rw-r--r--contrib/python/markdown-it-py/markdown_it/renderer.py20
-rw-r--r--contrib/python/markdown-it-py/markdown_it/rules_block/blockquote.py81
-rw-r--r--contrib/python/markdown-it-py/markdown_it/rules_block/list.py63
-rw-r--r--contrib/python/markdown-it-py/markdown_it/rules_core/text_join.py22
-rw-r--r--contrib/python/markdown-it-py/markdown_it/rules_inline/fragments_join.py25
-rw-r--r--contrib/python/markdown-it-py/markdown_it/rules_inline/state_inline.py8
-rw-r--r--contrib/python/markdown-it-py/markdown_it/rules_inline/strikethrough.py76
-rw-r--r--contrib/python/markdown-it-py/markdown_it/rules_inline/text.py41
-rw-r--r--contrib/python/markdown-it-py/markdown_it/tree.py2
-rw-r--r--contrib/python/markdown-it-py/markdown_it/utils.py8
-rw-r--r--contrib/python/markdown-it-py/ya.make2
18 files changed, 387 insertions, 81 deletions
diff --git a/contrib/python/markdown-it-py/.dist-info/METADATA b/contrib/python/markdown-it-py/.dist-info/METADATA
index 0f2b466a638..488e089b08e 100644
--- a/contrib/python/markdown-it-py/.dist-info/METADATA
+++ b/contrib/python/markdown-it-py/.dist-info/METADATA
@@ -1,6 +1,6 @@
Metadata-Version: 2.4
Name: markdown-it-py
-Version: 4.0.0
+Version: 4.1.0
Summary: Python port of markdown-it. Markdown parsing, done right!
Keywords: markdown,lexer,parser,commonmark,markdown-it
Author-email: Chris Sewell <[email protected]>
@@ -46,6 +46,7 @@ Requires-Dist: coverage ; extra == "testing"
Requires-Dist: pytest ; extra == "testing"
Requires-Dist: pytest-cov ; extra == "testing"
Requires-Dist: pytest-regressions ; extra == "testing"
+Requires-Dist: pytest-timeout ; extra == "testing"
Requires-Dist: requests ; extra == "testing"
Project-URL: Documentation, https://markdown-it-py.readthedocs.io
Project-URL: Homepage, https://github.com/executablebooks/markdown-it-py
@@ -160,11 +161,12 @@ Render markdown to HTML with markdown-it-py from the
command-line:
```console
-usage: markdown-it [-h] [-v] [filenames [filenames ...]]
+usage: markdown-it [-h] [-v] [--stdin|filenames [filenames ...]]
Parse one or more markdown files, convert each to HTML, and print to stdout
positional arguments:
+ --stdin read source Markdown file from standard input
filenames specify an optional list of files to convert
optional arguments:
diff --git a/contrib/python/markdown-it-py/README.md b/contrib/python/markdown-it-py/README.md
index b94729f8545..82d218b3ae6 100644
--- a/contrib/python/markdown-it-py/README.md
+++ b/contrib/python/markdown-it-py/README.md
@@ -101,11 +101,12 @@ Render markdown to HTML with markdown-it-py from the
command-line:
```console
-usage: markdown-it [-h] [-v] [filenames [filenames ...]]
+usage: markdown-it [-h] [-v] [--stdin|filenames [filenames ...]]
Parse one or more markdown files, convert each to HTML, and print to stdout
positional arguments:
+ --stdin read source Markdown file from standard input
filenames specify an optional list of files to convert
optional arguments:
diff --git a/contrib/python/markdown-it-py/markdown_it/__init__.py b/contrib/python/markdown-it-py/markdown_it/__init__.py
index 9fac279576e..523d92ef4ba 100644
--- a/contrib/python/markdown-it-py/markdown_it/__init__.py
+++ b/contrib/python/markdown-it-py/markdown_it/__init__.py
@@ -1,6 +1,6 @@
"""A Python port of Markdown-It"""
__all__ = ("MarkdownIt",)
-__version__ = "4.0.0"
+__version__ = "4.1.0"
from .main import MarkdownIt
diff --git a/contrib/python/markdown-it-py/markdown_it/cli/parse.py b/contrib/python/markdown-it-py/markdown_it/cli/parse.py
index fe346b2f51b..5de738b2da7 100644
--- a/contrib/python/markdown-it-py/markdown_it/cli/parse.py
+++ b/contrib/python/markdown-it-py/markdown_it/cli/parse.py
@@ -21,6 +21,8 @@ def main(args: Sequence[str] | None = None) -> int:
namespace = parse_args(args)
if namespace.filenames:
convert(namespace.filenames)
+ elif namespace.stdin:
+ convert_stdin()
else:
interactive()
return 0
@@ -31,6 +33,18 @@ def convert(filenames: Iterable[str]) -> None:
convert_file(filename)
+def convert_stdin() -> None:
+ """
+ Parse a Markdown file and dump the output to stdout.
+ """
+ try:
+ rendered = MarkdownIt().render(sys.stdin.read())
+ print(rendered, end="")
+ except OSError:
+ sys.stderr.write("Cannot parse Markdown from the standard input.\n")
+ sys.exit(1)
+
+
def convert_file(filename: str) -> None:
"""
Parse a Markdown file and dump the output to stdout.
@@ -95,6 +109,9 @@ Batch:
)
parser.add_argument("-v", "--version", action="version", version=version_str)
parser.add_argument(
+ "--stdin", action="store_true", help="read Markdown from standard input"
+ )
+ parser.add_argument(
"filenames", nargs="*", help="specify an optional list of files to convert"
)
return parser.parse_args(args)
diff --git a/contrib/python/markdown-it-py/markdown_it/main.py b/contrib/python/markdown-it-py/markdown_it/main.py
index bf9fd18f3f3..87835e541f1 100644
--- a/contrib/python/markdown-it-py/markdown_it/main.py
+++ b/contrib/python/markdown-it-py/markdown_it/main.py
@@ -26,6 +26,7 @@ _PRESETS: dict[str, PresetType] = {
"zero": presets.zero.make(),
"commonmark": presets.commonmark.make(),
"gfm-like": presets.gfm_like.make(),
+ "gfm-like2": presets.gfm_like2.make(),
}
@@ -125,7 +126,7 @@ class MarkdownIt:
if options_update:
options = {**options, **options_update} # type: ignore
- self.set(options) # type: ignore
+ self.set(options)
if "components" in config:
for name, component in config["components"].items():
diff --git a/contrib/python/markdown-it-py/markdown_it/parser_inline.py b/contrib/python/markdown-it-py/markdown_it/parser_inline.py
index 26ec2e636d4..8fabb9884bb 100644
--- a/contrib/python/markdown-it-py/markdown_it/parser_inline.py
+++ b/contrib/python/markdown-it-py/markdown_it/parser_inline.py
@@ -3,6 +3,8 @@
from __future__ import annotations
from collections.abc import Callable
+import functools
+import re
from typing import TYPE_CHECKING
from . import rules_inline
@@ -15,6 +17,47 @@ if TYPE_CHECKING:
from markdown_it import MarkdownIt
+# Default set of characters that terminate a text token and allow inline rules to fire.
+# '{}$%@~+=:' reserved for extensions.
+# Note: Don't confuse with "Markdown ASCII Punctuation" chars.
+# http://spec.commonmark.org/0.15/#ascii-punctuation-character
+_DEFAULT_TERMINATORS: frozenset[str] = frozenset(
+ {
+ "\n",
+ "!",
+ "#",
+ "$",
+ "%",
+ "&",
+ "*",
+ "+",
+ "-",
+ ":",
+ "<",
+ "=",
+ ">",
+ "@",
+ "[",
+ "\\",
+ "]",
+ "^",
+ "_",
+ "`",
+ "{",
+ "}",
+ "~",
+ }
+)
+
+
+# Lazily compiled regex for the default terminator set. The @cache ensures it is
+# compiled at most once (on first ParserInline instantiation) and shared across all
+# instances that have not added extra chars, keeping __init__ cost near zero.
+def _default_terminator_re() -> re.Pattern[str]:
+ return re.compile("[" + re.escape("".join(_DEFAULT_TERMINATORS)) + "]")
+
+
# Parser rules
RuleFuncInlineType = Callable[[StateInline, bool], bool]
"""(state: StateInline, silent: bool) -> matched: bool)
@@ -61,6 +104,30 @@ class ParserInline:
self.ruler2 = Ruler[RuleFuncInline2Type]()
for name, rule2 in _rules2:
self.ruler2.push(name, rule2)
+ # Characters that stop the text rule, allowing other inline rules to fire.
+ # _extra_terminator_chars is only allocated when add_terminator_char() is called
+ # with a char outside the defaults, keeping __init__ allocation-free.
+ self._extra_terminator_chars: set[str] = set()
+ # Pre-compiled regex shared with all default instances (no copy in the common path).
+ self.terminator_re: re.Pattern[str] = _default_terminator_re()
+
+ def add_terminator_char(self, ch: str) -> None:
+ """Register a character that stops the ``text`` rule, allowing inline rules to fire.
+
+ This lets plugins declare which characters their inline rules react to,
+ mirroring the ``MARKER`` mechanism in the Rust markdown-it implementation.
+
+ :param ch: A single character to add to the terminator set.
+ """
+ if ch not in _DEFAULT_TERMINATORS and ch not in self._extra_terminator_chars:
+ self._extra_terminator_chars.add(ch)
+ self.terminator_re = re.compile(
+ "["
+ + re.escape(
+ "".join(_DEFAULT_TERMINATORS | self._extra_terminator_chars)
+ )
+ + "]"
+ )
def skipToken(self, state: StateInline) -> None:
"""Skip single token by running all rules in validation mode;
diff --git a/contrib/python/markdown-it-py/markdown_it/presets/__init__.py b/contrib/python/markdown-it-py/markdown_it/presets/__init__.py
index e21c7806930..43578148cee 100644
--- a/contrib/python/markdown-it-py/markdown_it/presets/__init__.py
+++ b/contrib/python/markdown-it-py/markdown_it/presets/__init__.py
@@ -1,4 +1,4 @@
-__all__ = ("commonmark", "default", "gfm_like", "js_default", "zero")
+__all__ = ("commonmark", "default", "gfm_like", "gfm_like2", "js_default", "zero")
from ..utils import PresetType
from . import commonmark, default, zero
@@ -26,3 +26,23 @@ class gfm_like: # noqa: N801
config["options"]["linkify"] = True
config["options"]["html"] = True
return config
+
+
+class gfm_like2: # noqa: N801
+ """GitHub Flavoured Markdown (GFM) like, extended.
+
+ Builds on ``gfm-like`` and additionally enables:
+
+ - Task lists (``- [x] done``)
+ - Alerts (``> [!NOTE]``)
+ - Single-tilde strikethrough (``~text~`` in addition to ``~~text~~``)
+ """
+
+ @staticmethod
+ def make() -> PresetType:
+ config = gfm_like.make()
+ config["options"]["tasklists"] = True
+ config["options"]["tasklists_editable"] = False
+ config["options"]["alerts"] = True
+ config["options"]["strikethrough_single_tilde"] = True
+ return config
diff --git a/contrib/python/markdown-it-py/markdown_it/renderer.py b/contrib/python/markdown-it-py/markdown_it/renderer.py
index 6d60589adaa..f690b091e2b 100644
--- a/contrib/python/markdown-it-py/markdown_it/renderer.py
+++ b/contrib/python/markdown-it-py/markdown_it/renderer.py
@@ -209,6 +209,26 @@ class RendererHTML(RendererProtocol):
###################################################
+ def list_item_open(
+ self,
+ tokens: Sequence[Token],
+ idx: int,
+ options: OptionsDict,
+ env: EnvType,
+ ) -> str:
+ token = tokens[idx]
+ result = self.renderToken(tokens, idx, options, env)
+ if token.meta and "checked" in token.meta:
+ checked_attr = ' checked=""' if token.meta["checked"] else ""
+ disabled_attr = (
+ "" if options.get("tasklists_editable", False) else ' disabled=""'
+ )
+ result += (
+ '<input class="task-list-item-checkbox"'
+ f'{disabled_attr} type="checkbox"{checked_attr}> '
+ )
+ return result
+
def code_inline(
self, tokens: Sequence[Token], idx: int, options: OptionsDict, env: EnvType
) -> str:
diff --git a/contrib/python/markdown-it-py/markdown_it/rules_block/blockquote.py b/contrib/python/markdown-it-py/markdown_it/rules_block/blockquote.py
index 0c9081b9cbd..de2d4f2d2f1 100644
--- a/contrib/python/markdown-it-py/markdown_it/rules_block/blockquote.py
+++ b/contrib/python/markdown-it-py/markdown_it/rules_block/blockquote.py
@@ -273,17 +273,58 @@ def blockquote(state: StateBlock, startLine: int, endLine: int, silent: bool) ->
oldIndent = state.blkIndent
state.blkIndent = 0
- token = state.push("blockquote_open", "blockquote", 1)
- token.markup = ">"
- token.map = lines = [startLine, 0]
+ # Detect GitHub-style alert marker on the first content line.
+ # Note: `startLine` here refers to the first content line of the
+ # blockquote, after the `>` prefix has already been stripped by the
+ # blockquote parser above (bMarks/tShift adjusted to skip `> `).
+ alert_kind = None
+ if state.md.options.get("alerts", False) and nextLine > startLine:
+ alert_kind = _detect_alert(state, startLine)
- state.md.block.tokenize(state, startLine, nextLine)
+ lines = [startLine, 0]
- token = state.push("blockquote_close", "blockquote", -1)
- token.markup = ">"
+ if alert_kind is not None:
+ # Emit alert tokens instead of blockquote tokens
+ alert_lower = alert_kind.lower()
+ token = state.push("alert_open", "div", 1)
+ token.markup = ">"
+ token.attrSet("class", f"markdown-alert markdown-alert-{alert_lower}")
+ token.map = lines
+ token.info = alert_kind
+ token.meta = {"kind": alert_kind}
+
+ # Emit a title paragraph: <p class="markdown-alert-title">Kind</p>
+ token = state.push("alert_title_open", "p", 1)
+ token.attrSet("class", "markdown-alert-title")
+ title_token = state.push("inline", "", 0)
+ title_token.content = alert_kind.capitalize()
+ title_token.children = []
+ token = state.push("alert_title_close", "p", -1)
+
+ # Skip the marker line (startLine) and tokenize from startLine + 1.
+ contentStart = startLine + 1
+ if contentStart < nextLine:
+ # tokenize() updates state.line to nextLine as part of its
+ # contract, consistent with the blockquote code path below.
+ state.md.block.tokenize(state, contentStart, nextLine)
+ else:
+ state.line = nextLine
+
+ token = state.push("alert_close", "div", -1)
+ token.markup = ">"
+ else:
+ token = state.push("blockquote_open", "blockquote", 1)
+ token.markup = ">"
+ token.map = lines
+
+ state.md.block.tokenize(state, startLine, nextLine)
+
+ token = state.push("blockquote_close", "blockquote", -1)
+ token.markup = ">"
state.lineMax = oldLineMax
state.parentType = oldParentType
+ # Update the opening token map for both alert and blockquote containers.
lines[1] = state.line
# Restore original tShift; this might not be necessary since the parser
@@ -297,3 +338,31 @@ def blockquote(state: StateBlock, startLine: int, endLine: int, silent: bool) ->
state.blkIndent = oldIndent
return True
+
+
+_ALERT_TYPES = {"NOTE", "TIP", "IMPORTANT", "WARNING", "CAUTION"}
+
+
+def _detect_alert(state: StateBlock, startLine: int) -> str | None:
+ """Detect ``[!TYPE]`` on *startLine* (after ``>`` prefix has been stripped).
+
+ Returns the alert type string (e.g. ``"NOTE"``) or ``None``.
+ """
+ pos = state.bMarks[startLine] + state.tShift[startLine]
+ maximum = state.eMarks[startLine]
+ src = state.src
+
+ # Trim trailing whitespace
+ while maximum > pos and src[maximum - 1] in (" ", "\t"):
+ maximum -= 1
+
+ if maximum - pos < 4:
+ return None
+ if src[pos] != "[" or src[pos + 1] != "!":
+ return None
+ if src[maximum - 1] != "]":
+ return None
+ type_str = src[pos + 2 : maximum - 1].upper()
+ if type_str not in _ALERT_TYPES:
+ return None
+ return type_str
diff --git a/contrib/python/markdown-it-py/markdown_it/rules_block/list.py b/contrib/python/markdown-it-py/markdown_it/rules_block/list.py
index d8070d74703..c8fe7af5d5d 100644
--- a/contrib/python/markdown-it-py/markdown_it/rules_block/list.py
+++ b/contrib/python/markdown-it-py/markdown_it/rules_block/list.py
@@ -235,8 +235,20 @@ def list_block(state: StateBlock, startLine: int, endLine: int, silent: bool) ->
if isOrdered:
token.info = state.src[start : posAfterMarker - 1]
+ # Detect GFM task checkbox: `[ ] ` or `[x] `/`[X] ` at content start
+ checkboxLen = 0
+ if state.md.options.get("tasklists", False) and contentStart < maximum:
+ checked = _detect_task_checkbox(state.src, contentStart, maximum)
+ if checked is not None:
+ token.meta = {"checked": checked}
+ # Advance content past the checkbox: `[x]` (3 chars) + whitespace.
+ # `_detect_task_checkbox` already guarantees a whitespace char at
+ # pos+3, so we always consume 4 characters.
+ checkboxLen = 4
+
# change current state, then restore it after parser subcall
oldTight = state.tight
+ oldBMark = state.bMarks[startLine]
oldTShift = state.tShift[startLine]
oldSCount = state.sCount[startLine]
@@ -252,6 +264,12 @@ def list_block(state: StateBlock, startLine: int, endLine: int, silent: bool) ->
state.tShift[startLine] = contentStart - state.bMarks[startLine]
state.sCount[startLine] = offset
+ # If we detected a checkbox, advance bMarks past it so that
+ # getLines() doesn't include the checkbox text in the content.
+ if checkboxLen:
+ state.bMarks[startLine] = contentStart + checkboxLen
+ state.tShift[startLine] = 0
+
if contentStart >= maximum and state.isEmpty(startLine + 1):
# workaround for this case
# (list item is empty, list terminates before "foo"):
@@ -277,6 +295,8 @@ def list_block(state: StateBlock, startLine: int, endLine: int, silent: bool) ->
state.blkIndent = state.listIndent
state.listIndent = oldListIndent
+ if checkboxLen:
+ state.bMarks[startLine] = oldBMark
state.tShift[startLine] = oldTShift
state.sCount[startLine] = oldSCount
state.tight = oldTight
@@ -326,6 +346,24 @@ def list_block(state: StateBlock, startLine: int, endLine: int, silent: bool) ->
break
# Finalize list
+
+ # If any direct list item has a task checkbox, add class to the list
+ if state.md.options.get("tasklists", False):
+ containsTask = False
+ level = state.tokens[listTokIdx].level
+ for j in range(listTokIdx + 1, len(state.tokens)):
+ tok = state.tokens[j]
+ if (
+ tok.level == level + 1
+ and tok.type == "list_item_open"
+ and tok.meta
+ and "checked" in tok.meta
+ ):
+ tok.attrJoin("class", "task-list-item")
+ containsTask = True
+ if containsTask:
+ state.tokens[listTokIdx].attrJoin("class", "contains-task-list")
+
if isOrdered:
token = state.push("ordered_list_close", "ol", -1)
else:
@@ -343,3 +381,28 @@ def list_block(state: StateBlock, startLine: int, endLine: int, silent: bool) ->
markTightParagraphs(state, listTokIdx)
return True
+
+
+def _detect_task_checkbox(src: str, pos: int, maximum: int) -> bool | None:
+ """Detect ``[ ]``, ``[x]``, or ``[X]`` at *pos*, followed by whitespace.
+
+ Returns ``True`` (checked), ``False`` (unchecked), or ``None`` (no match).
+ """
+ # Need at least 4 chars: `[`, char, `]`, whitespace
+ if pos + 4 > maximum:
+ return None
+ if src[pos] != "[":
+ return None
+ inner = src[pos + 1]
+ if src[pos + 2] != "]":
+ return None
+ if inner == " ":
+ checked = False
+ elif inner in ("x", "X"):
+ checked = True
+ else:
+ return None
+ # After `]`, must have whitespace
+ if src[pos + 3] not in (" ", "\t"):
+ return None
+ return checked
diff --git a/contrib/python/markdown-it-py/markdown_it/rules_core/text_join.py b/contrib/python/markdown-it-py/markdown_it/rules_core/text_join.py
index 5379f6d7a8e..939b83b29f7 100644
--- a/contrib/python/markdown-it-py/markdown_it/rules_core/text_join.py
+++ b/contrib/python/markdown-it-py/markdown_it/rules_core/text_join.py
@@ -21,7 +21,10 @@ def text_join(state: StateCore) -> None:
# convert text_special to text and join all adjacent text nodes
new_tokens: list[Token] = []
- for child_token in inline_token.children or []:
+ children = inline_token.children or []
+ i = 0
+ while i < len(children):
+ child_token = children[i]
if child_token.type == "text_special":
child_token.type = "text"
if (
@@ -29,7 +32,22 @@ def text_join(state: StateCore) -> None:
and new_tokens
and new_tokens[-1].type == "text"
):
- new_tokens[-1].content += child_token.content
+ # Collapse a run of adjacent text nodes in a single join, instead
+ # of pairwise `a + b` concatenation. The pairwise form is O(L*k)
+ # in the size of the run because each step rebuilds the growing
+ # prefix; "".join is O(L).
+ parts = [new_tokens[-1].content, child_token.content]
+ i += 1
+ while i < len(children):
+ next_token = children[i]
+ if next_token.type == "text_special":
+ next_token.type = "text"
+ if next_token.type != "text":
+ break
+ parts.append(next_token.content)
+ i += 1
+ new_tokens[-1].content = "".join(parts)
else:
new_tokens.append(child_token)
+ i += 1
inline_token.children = new_tokens
diff --git a/contrib/python/markdown-it-py/markdown_it/rules_inline/fragments_join.py b/contrib/python/markdown-it-py/markdown_it/rules_inline/fragments_join.py
index f795c1364b8..5eb88a14025 100644
--- a/contrib/python/markdown-it-py/markdown_it/rules_inline/fragments_join.py
+++ b/contrib/python/markdown-it-py/markdown_it/rules_inline/fragments_join.py
@@ -29,14 +29,25 @@ def fragments_join(state: StateInline) -> None:
and curr + 1 < maximum
and state.tokens[curr + 1].type == "text"
):
- # collapse two adjacent text nodes
- state.tokens[curr + 1].content = (
- state.tokens[curr].content + state.tokens[curr + 1].content
- )
- else:
- if curr != last:
- state.tokens[last] = state.tokens[curr]
+ # Collapse a run of adjacent text nodes in a single join, instead
+ # of pairwise `a + b` concatenation. The pairwise form is O(L*k)
+ # in the size of the run because each step rebuilds the growing
+ # prefix; "".join is O(L).
+ parts = [state.tokens[curr].content]
+ curr += 1
+ while curr < maximum and state.tokens[curr].type == "text":
+ parts.append(state.tokens[curr].content)
+ curr += 1
+ merged = state.tokens[curr - 1]
+ merged.content = "".join(parts)
+ merged.level = level
+ state.tokens[last] = merged
last += 1
+ continue
+
+ if curr != last:
+ state.tokens[last] = state.tokens[curr]
+ last += 1
curr += 1
if curr != last:
diff --git a/contrib/python/markdown-it-py/markdown_it/rules_inline/state_inline.py b/contrib/python/markdown-it-py/markdown_it/rules_inline/state_inline.py
index 50dc41294d6..de35287d427 100644
--- a/contrib/python/markdown-it-py/markdown_it/rules_inline/state_inline.py
+++ b/contrib/python/markdown-it-py/markdown_it/rules_inline/state_inline.py
@@ -1,8 +1,7 @@
from __future__ import annotations
-from collections import namedtuple
from dataclasses import dataclass
-from typing import TYPE_CHECKING, Any, Literal
+from typing import TYPE_CHECKING, Any, Literal, NamedTuple
from ..common.utils import isMdAsciiPunct, isPunctChar, isWhiteSpace
from ..ruler import StateBase
@@ -36,7 +35,10 @@ class Delimiter:
level: bool | None = None
-Scanned = namedtuple("Scanned", ["can_open", "can_close", "length"])
+class Scanned(NamedTuple):
+ can_open: bool
+ can_close: bool
+ length: int
class StateInline(StateBase):
diff --git a/contrib/python/markdown-it-py/markdown_it/rules_inline/strikethrough.py b/contrib/python/markdown-it-py/markdown_it/rules_inline/strikethrough.py
index ec816281d49..c9875e043ab 100644
--- a/contrib/python/markdown-it-py/markdown_it/rules_inline/strikethrough.py
+++ b/contrib/python/markdown-it-py/markdown_it/rules_inline/strikethrough.py
@@ -1,11 +1,16 @@
-# ~~strike through~~
+# ~~strike through~~ (and optionally ~single tilde~)
from __future__ import annotations
from .state_inline import Delimiter, StateInline
def tokenize(state: StateInline, silent: bool) -> bool:
- """Insert each marker as a separate text token, and add it to delimiter list"""
+ """Insert each marker as a separate text token, and add it to delimiter list.
+
+ When the ``strikethrough_single_tilde`` option is enabled on the
+ ``MarkdownIt`` instance, single ``~`` delimiters are also accepted and
+ runs of three or more tildes are rejected (matching GitHub's rendering behaviour).
+ """
start = state.pos
ch = state.src[start]
@@ -18,30 +23,59 @@ def tokenize(state: StateInline, silent: bool) -> bool:
scanned = state.scanDelims(state.pos, True)
length = scanned.length
- if length < 2:
- return False
+ single_tilde = state.md.options.get("strikethrough_single_tilde", False)
- if length % 2:
- token = state.push("text", "", 0)
- token.content = ch
- length -= 1
+ if single_tilde:
+ # GitHub mode: only accept exactly 1 or 2 tildes.
+ if length < 1:
+ return False
+ if length > 2:
+ # Consume 3+ tildes as plain text so the parser doesn't
+ # re-enter and match a subset of them. This intentionally
+ # matches GitHub's rendering, where ≥3 tildes are literal text.
+ token = state.push("text", "", 0)
+ token.content = ch * length
+ state.pos += scanned.length
+ return True
- i = 0
- while i < length:
token = state.push("text", "", 0)
- token.content = ch + ch
+ token.content = ch * length
state.delimiters.append(
Delimiter(
marker=ord(ch),
- length=0, # disable "rule of 3" length checks meant for emphasis
+ length=0, # disable "rule of 3" length checks
token=len(state.tokens) - 1,
end=-1,
open=scanned.can_open,
close=scanned.can_close,
)
)
+ else:
+ # Original markdown-it behaviour: minimum 2, split odd runs.
+ if length < 2:
+ return False
- i += 2
+ if length % 2:
+ token = state.push("text", "", 0)
+ token.content = ch
+ length -= 1
+
+ i = 0
+ while i < length:
+ token = state.push("text", "", 0)
+ token.content = ch + ch
+ state.delimiters.append(
+ Delimiter(
+ marker=ord(ch),
+ length=0, # disable "rule of 3" length checks
+ token=len(state.tokens) - 1,
+ end=-1,
+ open=scanned.can_open,
+ close=scanned.can_close,
+ )
+ )
+
+ i += 2
state.pos += scanned.length
@@ -51,6 +85,7 @@ def tokenize(state: StateInline, silent: bool) -> bool:
def _postProcess(state: StateInline, delimiters: list[Delimiter]) -> None:
loneMarkers = []
maximum = len(delimiters)
+ single_tilde = state.md.options.get("strikethrough_single_tilde", False)
i = 0
while i < maximum:
@@ -66,18 +101,29 @@ def _postProcess(state: StateInline, delimiters: list[Delimiter]) -> None:
endDelim = delimiters[startDelim.end]
+ # In single-tilde mode, opener and closer must have the same width
+ # (both `~` or both `~~`). The width is stored in the text token.
+ if single_tilde:
+ opener_content = state.tokens[startDelim.token].content
+ closer_content = state.tokens[endDelim.token].content
+ if opener_content != closer_content:
+ i += 1
+ continue
+
+ markup = state.tokens[startDelim.token].content
+
token = state.tokens[startDelim.token]
token.type = "s_open"
token.tag = "s"
token.nesting = 1
- token.markup = "~~"
+ token.markup = markup
token.content = ""
token = state.tokens[endDelim.token]
token.type = "s_close"
token.tag = "s"
token.nesting = -1
- token.markup = "~~"
+ token.markup = markup
token.content = ""
if (
diff --git a/contrib/python/markdown-it-py/markdown_it/rules_inline/text.py b/contrib/python/markdown-it-py/markdown_it/rules_inline/text.py
index 18b2fcc7a8f..ef0cc9cec55 100644
--- a/contrib/python/markdown-it-py/markdown_it/rules_inline/text.py
+++ b/contrib/python/markdown-it-py/markdown_it/rules_inline/text.py
@@ -1,54 +1,15 @@
-import functools
-import re
-
# Skip text characters for text token, place those to pending buffer
# and increment current pos
from .state_inline import StateInline
# Rule to skip pure text
-# '{}$%@~+=:' reserved for extensions
-
-# !!!! Don't confuse with "Markdown ASCII Punctuation" chars
-# http://spec.commonmark.org/0.15/#ascii-punctuation-character
-
-
-_TerminatorChars = {
- "\n",
- "!",
- "#",
- "$",
- "%",
- "&",
- "*",
- "+",
- "-",
- ":",
- "<",
- "=",
- ">",
- "@",
- "[",
- "\\",
- "]",
- "^",
- "_",
- "`",
- "{",
- "}",
- "~",
-}
-
-
-def _terminator_char_regex() -> re.Pattern[str]:
- return re.compile("[" + re.escape("".join(_TerminatorChars)) + "]")
def text(state: StateInline, silent: bool) -> bool:
pos = state.pos
posMax = state.posMax
- terminator_char = _terminator_char_regex().search(state.src, pos)
+ terminator_char = state.md.inline.terminator_re.search(state.src, pos)
pos = terminator_char.start() if terminator_char else posMax
if pos == state.pos:
diff --git a/contrib/python/markdown-it-py/markdown_it/tree.py b/contrib/python/markdown-it-py/markdown_it/tree.py
index 5369157bc3c..24bc24663f9 100644
--- a/contrib/python/markdown-it-py/markdown_it/tree.py
+++ b/contrib/python/markdown-it-py/markdown_it/tree.py
@@ -274,7 +274,7 @@ class SyntaxTreeNode:
@property
def tag(self) -> str:
- """html tag name, e.g. \"p\" """
+ """html tag name, e.g. \"p\""""
return self._attribute_token().tag
@property
diff --git a/contrib/python/markdown-it-py/markdown_it/utils.py b/contrib/python/markdown-it-py/markdown_it/utils.py
index 2571a158612..09e60163160 100644
--- a/contrib/python/markdown-it-py/markdown_it/utils.py
+++ b/contrib/python/markdown-it-py/markdown_it/utils.py
@@ -41,6 +41,14 @@ class OptionsType(TypedDict):
This is a Python only option, and is intended for the use of round-trip parsing.
"""
+ tasklists: NotRequired[bool]
+ """Enable GFM task list checkbox detection in list items."""
+ alerts: NotRequired[bool]
+ """Enable GitHub-style alert detection in blockquotes."""
+ tasklists_editable: NotRequired[bool]
+ """When True, rendered task list checkboxes are interactive (no disabled attribute)."""
+ strikethrough_single_tilde: NotRequired[bool]
+ """Allow single tilde ``~text~`` for strikethrough in addition to double."""
class PresetType(TypedDict):
diff --git a/contrib/python/markdown-it-py/ya.make b/contrib/python/markdown-it-py/ya.make
index 919a3099597..dd380e6d343 100644
--- a/contrib/python/markdown-it-py/ya.make
+++ b/contrib/python/markdown-it-py/ya.make
@@ -2,7 +2,7 @@
PY3_LIBRARY()
-VERSION(4.0.0)
+VERSION(4.1.0)
LICENSE(MIT)