contrib/python/markdown-it-py/markdown_it/rules_inline/state_inline.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165

from __future__ import annotations

from collections import namedtuple
from dataclasses import dataclass
from typing import TYPE_CHECKING, Any, Literal

from ..common.utils import isMdAsciiPunct, isPunctChar, isWhiteSpace
from ..ruler import StateBase
from ..token import Token
from ..utils import EnvType

if TYPE_CHECKING:
    from markdown_it import MarkdownIt


@dataclass(slots=True)
class Delimiter:
    # Char code of the starting marker (number).
    marker: int

    # Total length of these series of delimiters.
    length: int

    # A position of the token this delimiter corresponds to.
    token: int

    # If this delimiter is matched as a valid opener, `end` will be
    # equal to its position, otherwise it's `-1`.
    end: int

    # Boolean flags that determine if this delimiter could open or close
    # an emphasis.
    open: bool
    close: bool

    level: bool | None = None


Scanned = namedtuple("Scanned", ["can_open", "can_close", "length"])


class StateInline(StateBase):
    def __init__(
        self, src: str, md: MarkdownIt, env: EnvType, outTokens: list[Token]
    ) -> None:
        self.src = src
        self.env = env
        self.md = md
        self.tokens = outTokens
        self.tokens_meta: list[dict[str, Any] | None] = [None] * len(outTokens)

        self.pos = 0
        self.posMax = len(self.src)
        self.level = 0
        self.pending = ""
        self.pendingLevel = 0

        # Stores { start: end } pairs. Useful for backtrack
        # optimization of pairs parse (emphasis, strikes).
        self.cache: dict[int, int] = {}

        # List of emphasis-like delimiters for current tag
        self.delimiters: list[Delimiter] = []

        # Stack of delimiter lists for upper level tags
        self._prev_delimiters: list[list[Delimiter]] = []

        # backticklength => last seen position
        self.backticks: dict[int, int] = {}
        self.backticksScanned = False

        # Counter used to disable inline linkify-it execution
        # inside <a> and markdown links
        self.linkLevel = 0

    def __repr__(self) -> str:
        return (
            f"{self.__class__.__name__}"
            f"(pos=[{self.pos} of {self.posMax}], token={len(self.tokens)})"
        )

    def pushPending(self) -> Token:
        token = Token("text", "", 0)
        token.content = self.pending
        token.level = self.pendingLevel
        self.tokens.append(token)
        self.pending = ""
        return token

    def push(self, ttype: str, tag: str, nesting: Literal[-1, 0, 1]) -> Token:
        """Push new token to "stream".
        If pending text exists - flush it as text token
        """
        if self.pending:
            self.pushPending()

        token = Token(ttype, tag, nesting)
        token_meta = None

        if nesting < 0:
            # closing tag
            self.level -= 1
            self.delimiters = self._prev_delimiters.pop()

        token.level = self.level

        if nesting > 0:
            # opening tag
            self.level += 1
            self._prev_delimiters.append(self.delimiters)
            self.delimiters = []
            token_meta = {"delimiters": self.delimiters}

        self.pendingLevel = self.level
        self.tokens.append(token)
        self.tokens_meta.append(token_meta)
        return token

    def scanDelims(self, start: int, canSplitWord: bool) -> Scanned:
        """
        Scan a sequence of emphasis-like markers, and determine whether
        it can start an emphasis sequence or end an emphasis sequence.

         - start - position to scan from (it should point at a valid marker);
         - canSplitWord - determine if these markers can be found inside a word

        """
        pos = start
        maximum = self.posMax
        marker = self.src[start]

        # treat beginning of the line as a whitespace
        lastChar = self.src[start - 1] if start > 0 else " "

        while pos < maximum and self.src[pos] == marker:
            pos += 1

        count = pos - start

        # treat end of the line as a whitespace
        nextChar = self.src[pos] if pos < maximum else " "

        isLastPunctChar = isMdAsciiPunct(ord(lastChar)) or isPunctChar(lastChar)
        isNextPunctChar = isMdAsciiPunct(ord(nextChar)) or isPunctChar(nextChar)

        isLastWhiteSpace = isWhiteSpace(ord(lastChar))
        isNextWhiteSpace = isWhiteSpace(ord(nextChar))

        left_flanking = not (
            isNextWhiteSpace
            or (isNextPunctChar and not (isLastWhiteSpace or isLastPunctChar))
        )
        right_flanking = not (
            isLastWhiteSpace
            or (isLastPunctChar and not (isNextWhiteSpace or isNextPunctChar))
        )

        can_open = left_flanking and (
            canSplitWord or (not right_flanking) or isLastPunctChar
        )
        can_close = right_flanking and (
            canSplitWord or (not left_flanking) or isNextPunctChar
        )

        return Scanned(can_open, can_close, count)