contrib/python/markdown-it-py/markdown_it/rules_core/linkify.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149

from __future__ import annotations

import re
from typing import Protocol

from ..common.utils import arrayReplaceAt, isLinkClose, isLinkOpen
from ..token import Token
from .state_core import StateCore

HTTP_RE = re.compile(r"^http://")
MAILTO_RE = re.compile(r"^mailto:")
TEST_MAILTO_RE = re.compile(r"^mailto:", flags=re.IGNORECASE)


def linkify(state: StateCore) -> None:
    """Rule for identifying plain-text links."""
    if not state.md.options.linkify:
        return

    if not state.md.linkify:
        raise ModuleNotFoundError("Linkify enabled but not installed.")

    for inline_token in state.tokens:
        if inline_token.type != "inline" or not state.md.linkify.pretest(
            inline_token.content
        ):
            continue

        tokens = inline_token.children

        htmlLinkLevel = 0

        # We scan from the end, to keep position when new tags added.
        # Use reversed logic in links start/end match
        assert tokens is not None
        i = len(tokens)
        while i >= 1:
            i -= 1
            assert isinstance(tokens, list)
            currentToken = tokens[i]

            # Skip content of markdown links
            if currentToken.type == "link_close":
                i -= 1
                while (
                    tokens[i].level != currentToken.level
                    and tokens[i].type != "link_open"
                ):
                    i -= 1
                continue

            # Skip content of html tag links
            if currentToken.type == "html_inline":
                if isLinkOpen(currentToken.content) and htmlLinkLevel > 0:
                    htmlLinkLevel -= 1
                if isLinkClose(currentToken.content):
                    htmlLinkLevel += 1
            if htmlLinkLevel > 0:
                continue

            if currentToken.type == "text" and state.md.linkify.test(
                currentToken.content
            ):
                text = currentToken.content
                links: list[_LinkType] = state.md.linkify.match(text) or []

                # Now split string to nodes
                nodes = []
                level = currentToken.level
                lastPos = 0

                # forbid escape sequence at the start of the string,
                # this avoids http\://example.com/ from being linkified as
                # http:<a href="//example.com/">//example.com/</a>
                if (
                    links
                    and links[0].index == 0
                    and i > 0
                    and tokens[i - 1].type == "text_special"
                ):
                    links = links[1:]

                for link in links:
                    url = link.url
                    fullUrl = state.md.normalizeLink(url)
                    if not state.md.validateLink(fullUrl):
                        continue

                    urlText = link.text

                    # Linkifier might send raw hostnames like "example.com", where url
                    # starts with domain name. So we prepend http:// in those cases,
                    # and remove it afterwards.
                    if not link.schema:
                        urlText = HTTP_RE.sub(
                            "", state.md.normalizeLinkText("http://" + urlText)
                        )
                    elif link.schema == "mailto:" and TEST_MAILTO_RE.search(urlText):
                        urlText = MAILTO_RE.sub(
                            "", state.md.normalizeLinkText("mailto:" + urlText)
                        )
                    else:
                        urlText = state.md.normalizeLinkText(urlText)

                    pos = link.index

                    if pos > lastPos:
                        token = Token("text", "", 0)
                        token.content = text[lastPos:pos]
                        token.level = level
                        nodes.append(token)

                    token = Token("link_open", "a", 1)
                    token.attrs = {"href": fullUrl}
                    token.level = level
                    level += 1
                    token.markup = "linkify"
                    token.info = "auto"
                    nodes.append(token)

                    token = Token("text", "", 0)
                    token.content = urlText
                    token.level = level
                    nodes.append(token)

                    token = Token("link_close", "a", -1)
                    level -= 1
                    token.level = level
                    token.markup = "linkify"
                    token.info = "auto"
                    nodes.append(token)

                    lastPos = link.last_index

                if lastPos < len(text):
                    token = Token("text", "", 0)
                    token.content = text[lastPos:]
                    token.level = level
                    nodes.append(token)

                inline_token.children = tokens = arrayReplaceAt(tokens, i, nodes)


class _LinkType(Protocol):
    url: str
    text: str
    index: int
    last_index: int
    schema: str | None