1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
|
from __future__ import annotations
import re
from typing import Protocol
from ..common.utils import arrayReplaceAt, isLinkClose, isLinkOpen
from ..token import Token
from .state_core import StateCore
HTTP_RE = re.compile(r"^http://")
MAILTO_RE = re.compile(r"^mailto:")
TEST_MAILTO_RE = re.compile(r"^mailto:", flags=re.IGNORECASE)
def linkify(state: StateCore) -> None:
"""Rule for identifying plain-text links."""
if not state.md.options.linkify:
return
if not state.md.linkify:
raise ModuleNotFoundError("Linkify enabled but not installed.")
for inline_token in state.tokens:
if inline_token.type != "inline" or not state.md.linkify.pretest(
inline_token.content
):
continue
tokens = inline_token.children
htmlLinkLevel = 0
# We scan from the end, to keep position when new tags added.
# Use reversed logic in links start/end match
assert tokens is not None
i = len(tokens)
while i >= 1:
i -= 1
assert isinstance(tokens, list)
currentToken = tokens[i]
# Skip content of markdown links
if currentToken.type == "link_close":
i -= 1
while (
tokens[i].level != currentToken.level
and tokens[i].type != "link_open"
):
i -= 1
continue
# Skip content of html tag links
if currentToken.type == "html_inline":
if isLinkOpen(currentToken.content) and htmlLinkLevel > 0:
htmlLinkLevel -= 1
if isLinkClose(currentToken.content):
htmlLinkLevel += 1
if htmlLinkLevel > 0:
continue
if currentToken.type == "text" and state.md.linkify.test(
currentToken.content
):
text = currentToken.content
links: list[_LinkType] = state.md.linkify.match(text) or []
# Now split string to nodes
nodes = []
level = currentToken.level
lastPos = 0
# forbid escape sequence at the start of the string,
# this avoids http\://example.com/ from being linkified as
# http:<a href="//example.com/">//example.com/</a>
if (
links
and links[0].index == 0
and i > 0
and tokens[i - 1].type == "text_special"
):
links = links[1:]
for link in links:
url = link.url
fullUrl = state.md.normalizeLink(url)
if not state.md.validateLink(fullUrl):
continue
urlText = link.text
# Linkifier might send raw hostnames like "example.com", where url
# starts with domain name. So we prepend http:// in those cases,
# and remove it afterwards.
if not link.schema:
urlText = HTTP_RE.sub(
"", state.md.normalizeLinkText("http://" + urlText)
)
elif link.schema == "mailto:" and TEST_MAILTO_RE.search(urlText):
urlText = MAILTO_RE.sub(
"", state.md.normalizeLinkText("mailto:" + urlText)
)
else:
urlText = state.md.normalizeLinkText(urlText)
pos = link.index
if pos > lastPos:
token = Token("text", "", 0)
token.content = text[lastPos:pos]
token.level = level
nodes.append(token)
token = Token("link_open", "a", 1)
token.attrs = {"href": fullUrl}
token.level = level
level += 1
token.markup = "linkify"
token.info = "auto"
nodes.append(token)
token = Token("text", "", 0)
token.content = urlText
token.level = level
nodes.append(token)
token = Token("link_close", "a", -1)
level -= 1
token.level = level
token.markup = "linkify"
token.info = "auto"
nodes.append(token)
lastPos = link.last_index
if lastPos < len(text):
token = Token("text", "", 0)
token.content = text[lastPos:]
token.level = level
nodes.append(token)
inline_token.children = tokens = arrayReplaceAt(tokens, i, nodes)
class _LinkType(Protocol):
url: str
text: str
index: int
last_index: int
schema: str | None
|