diff options
author | nkozlovskiy <nmk@ydb.tech> | 2023-09-29 12:24:06 +0300 |
---|---|---|
committer | nkozlovskiy <nmk@ydb.tech> | 2023-09-29 12:41:34 +0300 |
commit | e0e3e1717e3d33762ce61950504f9637a6e669ed (patch) | |
tree | bca3ff6939b10ed60c3d5c12439963a1146b9711 /contrib/python/Pygments/py3/pygments/lexers/markup.py | |
parent | 38f2c5852db84c7b4d83adfcb009eb61541d1ccd (diff) | |
download | ydb-e0e3e1717e3d33762ce61950504f9637a6e669ed.tar.gz |
add ydb deps
Diffstat (limited to 'contrib/python/Pygments/py3/pygments/lexers/markup.py')
-rw-r--r-- | contrib/python/Pygments/py3/pygments/lexers/markup.py | 1521 |
1 files changed, 1521 insertions, 0 deletions
diff --git a/contrib/python/Pygments/py3/pygments/lexers/markup.py b/contrib/python/Pygments/py3/pygments/lexers/markup.py new file mode 100644 index 0000000000..8794037097 --- /dev/null +++ b/contrib/python/Pygments/py3/pygments/lexers/markup.py @@ -0,0 +1,1521 @@ +""" + pygments.lexers.markup + ~~~~~~~~~~~~~~~~~~~~~~ + + Lexers for non-HTML markup languages. + + :copyright: Copyright 2006-2023 by the Pygments team, see AUTHORS. + :license: BSD, see LICENSE for details. +""" + +import re + +from pygments.lexers.html import XmlLexer +from pygments.lexers.javascript import JavascriptLexer +from pygments.lexers.css import CssLexer +from pygments.lexers.lilypond import LilyPondLexer +from pygments.lexers.data import JsonLexer + +from pygments.lexer import RegexLexer, DelegatingLexer, include, bygroups, \ + using, this, do_insertions, default, words +from pygments.token import Text, Comment, Operator, Keyword, Name, String, \ + Number, Punctuation, Generic, Other, Whitespace +from pygments.util import get_bool_opt, ClassNotFound + +__all__ = ['BBCodeLexer', 'MoinWikiLexer', 'RstLexer', 'TexLexer', 'GroffLexer', + 'MozPreprocHashLexer', 'MozPreprocPercentLexer', + 'MozPreprocXulLexer', 'MozPreprocJavascriptLexer', + 'MozPreprocCssLexer', 'MarkdownLexer', 'TiddlyWiki5Lexer', 'WikitextLexer'] + + +class BBCodeLexer(RegexLexer): + """ + A lexer that highlights BBCode(-like) syntax. + + .. versionadded:: 0.6 + """ + + name = 'BBCode' + aliases = ['bbcode'] + mimetypes = ['text/x-bbcode'] + + tokens = { + 'root': [ + (r'[^[]+', Text), + # tag/end tag begin + (r'\[/?\w+', Keyword, 'tag'), + # stray bracket + (r'\[', Text), + ], + 'tag': [ + (r'\s+', Text), + # attribute with value + (r'(\w+)(=)("?[^\s"\]]+"?)', + bygroups(Name.Attribute, Operator, String)), + # tag argument (a la [color=green]) + (r'(=)("?[^\s"\]]+"?)', + bygroups(Operator, String)), + # tag end + (r'\]', Keyword, '#pop'), + ], + } + + +class MoinWikiLexer(RegexLexer): + """ + For MoinMoin (and Trac) Wiki markup. + + .. versionadded:: 0.7 + """ + + name = 'MoinMoin/Trac Wiki markup' + aliases = ['trac-wiki', 'moin'] + filenames = [] + mimetypes = ['text/x-trac-wiki'] + flags = re.MULTILINE | re.IGNORECASE + + tokens = { + 'root': [ + (r'^#.*$', Comment), + (r'(!)(\S+)', bygroups(Keyword, Text)), # Ignore-next + # Titles + (r'^(=+)([^=]+)(=+)(\s*#.+)?$', + bygroups(Generic.Heading, using(this), Generic.Heading, String)), + # Literal code blocks, with optional shebang + (r'(\{\{\{)(\n#!.+)?', bygroups(Name.Builtin, Name.Namespace), 'codeblock'), + (r'(\'\'\'?|\|\||`|__|~~|\^|,,|::)', Comment), # Formatting + # Lists + (r'^( +)([.*-])( )', bygroups(Text, Name.Builtin, Text)), + (r'^( +)([a-z]{1,5}\.)( )', bygroups(Text, Name.Builtin, Text)), + # Other Formatting + (r'\[\[\w+.*?\]\]', Keyword), # Macro + (r'(\[[^\s\]]+)(\s+[^\]]+?)?(\])', + bygroups(Keyword, String, Keyword)), # Link + (r'^----+$', Keyword), # Horizontal rules + (r'[^\n\'\[{!_~^,|]+', Text), + (r'\n', Text), + (r'.', Text), + ], + 'codeblock': [ + (r'\}\}\}', Name.Builtin, '#pop'), + # these blocks are allowed to be nested in Trac, but not MoinMoin + (r'\{\{\{', Text, '#push'), + (r'[^{}]+', Comment.Preproc), # slurp boring text + (r'.', Comment.Preproc), # allow loose { or } + ], + } + + +class RstLexer(RegexLexer): + """ + For reStructuredText markup. + + .. versionadded:: 0.7 + + Additional options accepted: + + `handlecodeblocks` + Highlight the contents of ``.. sourcecode:: language``, + ``.. code:: language`` and ``.. code-block:: language`` + directives with a lexer for the given language (default: + ``True``). + + .. versionadded:: 0.8 + """ + name = 'reStructuredText' + url = 'https://docutils.sourceforge.io/rst.html' + aliases = ['restructuredtext', 'rst', 'rest'] + filenames = ['*.rst', '*.rest'] + mimetypes = ["text/x-rst", "text/prs.fallenstein.rst"] + flags = re.MULTILINE + + def _handle_sourcecode(self, match): + from pygments.lexers import get_lexer_by_name + + # section header + yield match.start(1), Punctuation, match.group(1) + yield match.start(2), Text, match.group(2) + yield match.start(3), Operator.Word, match.group(3) + yield match.start(4), Punctuation, match.group(4) + yield match.start(5), Text, match.group(5) + yield match.start(6), Keyword, match.group(6) + yield match.start(7), Text, match.group(7) + + # lookup lexer if wanted and existing + lexer = None + if self.handlecodeblocks: + try: + lexer = get_lexer_by_name(match.group(6).strip()) + except ClassNotFound: + pass + indention = match.group(8) + indention_size = len(indention) + code = (indention + match.group(9) + match.group(10) + match.group(11)) + + # no lexer for this language. handle it like it was a code block + if lexer is None: + yield match.start(8), String, code + return + + # highlight the lines with the lexer. + ins = [] + codelines = code.splitlines(True) + code = '' + for line in codelines: + if len(line) > indention_size: + ins.append((len(code), [(0, Text, line[:indention_size])])) + code += line[indention_size:] + else: + code += line + yield from do_insertions(ins, lexer.get_tokens_unprocessed(code)) + + # from docutils.parsers.rst.states + closers = '\'")]}>\u2019\u201d\xbb!?' + unicode_delimiters = '\u2010\u2011\u2012\u2013\u2014\u00a0' + end_string_suffix = (r'((?=$)|(?=[-/:.,; \n\x00%s%s]))' + % (re.escape(unicode_delimiters), + re.escape(closers))) + + tokens = { + 'root': [ + # Heading with overline + (r'^(=+|-+|`+|:+|\.+|\'+|"+|~+|\^+|_+|\*+|\++|#+)([ \t]*\n)' + r'(.+)(\n)(\1)(\n)', + bygroups(Generic.Heading, Text, Generic.Heading, + Text, Generic.Heading, Text)), + # Plain heading + (r'^(\S.*)(\n)(={3,}|-{3,}|`{3,}|:{3,}|\.{3,}|\'{3,}|"{3,}|' + r'~{3,}|\^{3,}|_{3,}|\*{3,}|\+{3,}|#{3,})(\n)', + bygroups(Generic.Heading, Text, Generic.Heading, Text)), + # Bulleted lists + (r'^(\s*)([-*+])( .+\n(?:\1 .+\n)*)', + bygroups(Text, Number, using(this, state='inline'))), + # Numbered lists + (r'^(\s*)([0-9#ivxlcmIVXLCM]+\.)( .+\n(?:\1 .+\n)*)', + bygroups(Text, Number, using(this, state='inline'))), + (r'^(\s*)(\(?[0-9#ivxlcmIVXLCM]+\))( .+\n(?:\1 .+\n)*)', + bygroups(Text, Number, using(this, state='inline'))), + # Numbered, but keep words at BOL from becoming lists + (r'^(\s*)([A-Z]+\.)( .+\n(?:\1 .+\n)+)', + bygroups(Text, Number, using(this, state='inline'))), + (r'^(\s*)(\(?[A-Za-z]+\))( .+\n(?:\1 .+\n)+)', + bygroups(Text, Number, using(this, state='inline'))), + # Line blocks + (r'^(\s*)(\|)( .+\n(?:\| .+\n)*)', + bygroups(Text, Operator, using(this, state='inline'))), + # Sourcecode directives + (r'^( *\.\.)(\s*)((?:source)?code(?:-block)?)(::)([ \t]*)([^\n]+)' + r'(\n[ \t]*\n)([ \t]+)(.*)(\n)((?:(?:\8.*)?\n)+)', + _handle_sourcecode), + # A directive + (r'^( *\.\.)(\s*)([\w:-]+?)(::)(?:([ \t]*)(.*))', + bygroups(Punctuation, Text, Operator.Word, Punctuation, Text, + using(this, state='inline'))), + # A reference target + (r'^( *\.\.)(\s*)(_(?:[^:\\]|\\.)+:)(.*?)$', + bygroups(Punctuation, Text, Name.Tag, using(this, state='inline'))), + # A footnote/citation target + (r'^( *\.\.)(\s*)(\[.+\])(.*?)$', + bygroups(Punctuation, Text, Name.Tag, using(this, state='inline'))), + # A substitution def + (r'^( *\.\.)(\s*)(\|.+\|)(\s*)([\w:-]+?)(::)(?:([ \t]*)(.*))', + bygroups(Punctuation, Text, Name.Tag, Text, Operator.Word, + Punctuation, Text, using(this, state='inline'))), + # Comments + (r'^ *\.\..*(\n( +.*\n|\n)+)?', Comment.Preproc), + # Field list marker + (r'^( *)(:(?:\\\\|\\:|[^:\n])+:(?=\s))([ \t]*)', + bygroups(Text, Name.Class, Text)), + # Definition list + (r'^(\S.*(?<!::)\n)((?:(?: +.*)\n)+)', + bygroups(using(this, state='inline'), using(this, state='inline'))), + # Code blocks + (r'(::)(\n[ \t]*\n)([ \t]+)(.*)(\n)((?:(?:\3.*)?\n)+)', + bygroups(String.Escape, Text, String, String, Text, String)), + include('inline'), + ], + 'inline': [ + (r'\\.', Text), # escape + (r'``', String, 'literal'), # code + (r'(`.+?)(<.+?>)(`__?)', # reference with inline target + bygroups(String, String.Interpol, String)), + (r'`.+?`__?', String), # reference + (r'(`.+?`)(:[a-zA-Z0-9:-]+?:)?', + bygroups(Name.Variable, Name.Attribute)), # role + (r'(:[a-zA-Z0-9:-]+?:)(`.+?`)', + bygroups(Name.Attribute, Name.Variable)), # role (content first) + (r'\*\*.+?\*\*', Generic.Strong), # Strong emphasis + (r'\*.+?\*', Generic.Emph), # Emphasis + (r'\[.*?\]_', String), # Footnote or citation + (r'<.+?>', Name.Tag), # Hyperlink + (r'[^\\\n\[*`:]+', Text), + (r'.', Text), + ], + 'literal': [ + (r'[^`]+', String), + (r'``' + end_string_suffix, String, '#pop'), + (r'`', String), + ] + } + + def __init__(self, **options): + self.handlecodeblocks = get_bool_opt(options, 'handlecodeblocks', True) + RegexLexer.__init__(self, **options) + + def analyse_text(text): + if text[:2] == '..' and text[2:3] != '.': + return 0.3 + p1 = text.find("\n") + p2 = text.find("\n", p1 + 1) + if (p2 > -1 and # has two lines + p1 * 2 + 1 == p2 and # they are the same length + text[p1+1] in '-=' and # the next line both starts and ends with + text[p1+1] == text[p2-1]): # ...a sufficiently high header + return 0.5 + + +class TexLexer(RegexLexer): + """ + Lexer for the TeX and LaTeX typesetting languages. + """ + + name = 'TeX' + aliases = ['tex', 'latex'] + filenames = ['*.tex', '*.aux', '*.toc'] + mimetypes = ['text/x-tex', 'text/x-latex'] + + tokens = { + 'general': [ + (r'%.*?\n', Comment), + (r'[{}]', Name.Builtin), + (r'[&_^]', Name.Builtin), + ], + 'root': [ + (r'\\\[', String.Backtick, 'displaymath'), + (r'\\\(', String, 'inlinemath'), + (r'\$\$', String.Backtick, 'displaymath'), + (r'\$', String, 'inlinemath'), + (r'\\([a-zA-Z]+|.)', Keyword, 'command'), + (r'\\$', Keyword), + include('general'), + (r'[^\\$%&_^{}]+', Text), + ], + 'math': [ + (r'\\([a-zA-Z]+|.)', Name.Variable), + include('general'), + (r'[0-9]+', Number), + (r'[-=!+*/()\[\]]', Operator), + (r'[^=!+*/()\[\]\\$%&_^{}0-9-]+', Name.Builtin), + ], + 'inlinemath': [ + (r'\\\)', String, '#pop'), + (r'\$', String, '#pop'), + include('math'), + ], + 'displaymath': [ + (r'\\\]', String, '#pop'), + (r'\$\$', String, '#pop'), + (r'\$', Name.Builtin), + include('math'), + ], + 'command': [ + (r'\[.*?\]', Name.Attribute), + (r'\*', Keyword), + default('#pop'), + ], + } + + def analyse_text(text): + for start in ("\\documentclass", "\\input", "\\documentstyle", + "\\relax"): + if text[:len(start)] == start: + return True + + +class GroffLexer(RegexLexer): + """ + Lexer for the (g)roff typesetting language, supporting groff + extensions. Mainly useful for highlighting manpage sources. + + .. versionadded:: 0.6 + """ + + name = 'Groff' + aliases = ['groff', 'nroff', 'man'] + filenames = ['*.[1-9]', '*.man', '*.1p', '*.3pm'] + mimetypes = ['application/x-troff', 'text/troff'] + + tokens = { + 'root': [ + (r'(\.)(\w+)', bygroups(Text, Keyword), 'request'), + (r'\.', Punctuation, 'request'), + # Regular characters, slurp till we find a backslash or newline + (r'[^\\\n]+', Text, 'textline'), + default('textline'), + ], + 'textline': [ + include('escapes'), + (r'[^\\\n]+', Text), + (r'\n', Text, '#pop'), + ], + 'escapes': [ + # groff has many ways to write escapes. + (r'\\"[^\n]*', Comment), + (r'\\[fn]\w', String.Escape), + (r'\\\(.{2}', String.Escape), + (r'\\.\[.*\]', String.Escape), + (r'\\.', String.Escape), + (r'\\\n', Text, 'request'), + ], + 'request': [ + (r'\n', Text, '#pop'), + include('escapes'), + (r'"[^\n"]+"', String.Double), + (r'\d+', Number), + (r'\S+', String), + (r'\s+', Text), + ], + } + + def analyse_text(text): + if text[:1] != '.': + return False + if text[:3] == '.\\"': + return True + if text[:4] == '.TH ': + return True + if text[1:3].isalnum() and text[3].isspace(): + return 0.9 + + +class MozPreprocHashLexer(RegexLexer): + """ + Lexer for Mozilla Preprocessor files (with '#' as the marker). + + Other data is left untouched. + + .. versionadded:: 2.0 + """ + name = 'mozhashpreproc' + aliases = [name] + filenames = [] + mimetypes = [] + + tokens = { + 'root': [ + (r'^#', Comment.Preproc, ('expr', 'exprstart')), + (r'.+', Other), + ], + 'exprstart': [ + (r'(literal)(.*)', bygroups(Comment.Preproc, Text), '#pop:2'), + (words(( + 'define', 'undef', 'if', 'ifdef', 'ifndef', 'else', 'elif', + 'elifdef', 'elifndef', 'endif', 'expand', 'filter', 'unfilter', + 'include', 'includesubst', 'error')), + Comment.Preproc, '#pop'), + ], + 'expr': [ + (words(('!', '!=', '==', '&&', '||')), Operator), + (r'(defined)(\()', bygroups(Keyword, Punctuation)), + (r'\)', Punctuation), + (r'[0-9]+', Number.Decimal), + (r'__\w+?__', Name.Variable), + (r'@\w+?@', Name.Class), + (r'\w+', Name), + (r'\n', Text, '#pop'), + (r'\s+', Text), + (r'\S', Punctuation), + ], + } + + +class MozPreprocPercentLexer(MozPreprocHashLexer): + """ + Lexer for Mozilla Preprocessor files (with '%' as the marker). + + Other data is left untouched. + + .. versionadded:: 2.0 + """ + name = 'mozpercentpreproc' + aliases = [name] + filenames = [] + mimetypes = [] + + tokens = { + 'root': [ + (r'^%', Comment.Preproc, ('expr', 'exprstart')), + (r'.+', Other), + ], + } + + +class MozPreprocXulLexer(DelegatingLexer): + """ + Subclass of the `MozPreprocHashLexer` that highlights unlexed data with the + `XmlLexer`. + + .. versionadded:: 2.0 + """ + name = "XUL+mozpreproc" + aliases = ['xul+mozpreproc'] + filenames = ['*.xul.in'] + mimetypes = [] + + def __init__(self, **options): + super().__init__(XmlLexer, MozPreprocHashLexer, **options) + + +class MozPreprocJavascriptLexer(DelegatingLexer): + """ + Subclass of the `MozPreprocHashLexer` that highlights unlexed data with the + `JavascriptLexer`. + + .. versionadded:: 2.0 + """ + name = "Javascript+mozpreproc" + aliases = ['javascript+mozpreproc'] + filenames = ['*.js.in'] + mimetypes = [] + + def __init__(self, **options): + super().__init__(JavascriptLexer, MozPreprocHashLexer, **options) + + +class MozPreprocCssLexer(DelegatingLexer): + """ + Subclass of the `MozPreprocHashLexer` that highlights unlexed data with the + `CssLexer`. + + .. versionadded:: 2.0 + """ + name = "CSS+mozpreproc" + aliases = ['css+mozpreproc'] + filenames = ['*.css.in'] + mimetypes = [] + + def __init__(self, **options): + super().__init__(CssLexer, MozPreprocPercentLexer, **options) + + +class MarkdownLexer(RegexLexer): + """ + For Markdown markup. + + .. versionadded:: 2.2 + """ + name = 'Markdown' + url = 'https://daringfireball.net/projects/markdown/' + aliases = ['markdown', 'md'] + filenames = ['*.md', '*.markdown'] + mimetypes = ["text/x-markdown"] + flags = re.MULTILINE + + def _handle_codeblock(self, match): + from pygments.lexers import get_lexer_by_name + + yield match.start('initial'), String.Backtick, match.group('initial') + yield match.start('lang'), String.Backtick, match.group('lang') + if match.group('afterlang') is not None: + yield match.start('whitespace'), Whitespace, match.group('whitespace') + yield match.start('extra'), Text, match.group('extra') + yield match.start('newline'), Whitespace, match.group('newline') + + # lookup lexer if wanted and existing + lexer = None + if self.handlecodeblocks: + try: + lexer = get_lexer_by_name(match.group('lang').strip()) + except ClassNotFound: + pass + code = match.group('code') + # no lexer for this language. handle it like it was a code block + if lexer is None: + yield match.start('code'), String, code + else: + # FIXME: aren't the offsets wrong? + yield from do_insertions([], lexer.get_tokens_unprocessed(code)) + + yield match.start('terminator'), String.Backtick, match.group('terminator') + + tokens = { + 'root': [ + # heading with '#' prefix (atx-style) + (r'(^#[^#].+)(\n)', bygroups(Generic.Heading, Text)), + # subheading with '#' prefix (atx-style) + (r'(^#{2,6}[^#].+)(\n)', bygroups(Generic.Subheading, Text)), + # heading with '=' underlines (Setext-style) + (r'^(.+)(\n)(=+)(\n)', bygroups(Generic.Heading, Text, Generic.Heading, Text)), + # subheading with '-' underlines (Setext-style) + (r'^(.+)(\n)(-+)(\n)', bygroups(Generic.Subheading, Text, Generic.Subheading, Text)), + # task list + (r'^(\s*)([*-] )(\[[ xX]\])( .+\n)', + bygroups(Whitespace, Keyword, Keyword, using(this, state='inline'))), + # bulleted list + (r'^(\s*)([*-])(\s)(.+\n)', + bygroups(Whitespace, Keyword, Whitespace, using(this, state='inline'))), + # numbered list + (r'^(\s*)([0-9]+\.)( .+\n)', + bygroups(Whitespace, Keyword, using(this, state='inline'))), + # quote + (r'^(\s*>\s)(.+\n)', bygroups(Keyword, Generic.Emph)), + # code block fenced by 3 backticks + (r'^(\s*```\n[\w\W]*?^\s*```$\n)', String.Backtick), + # code block with language + # Some tools include extra stuff after the language name, just + # highlight that as text. For example: https://docs.enola.dev/use/execmd + (r'''(?x) + ^(?P<initial>\s*```) + (?P<lang>[\w\-]+) + (?P<afterlang> + (?P<whitespace>[^\S\n]+) + (?P<extra>.*))? + (?P<newline>\n) + (?P<code>(.|\n)*?) + (?P<terminator>^\s*```$\n) + ''', + _handle_codeblock), + + include('inline'), + ], + 'inline': [ + # escape + (r'\\.', Text), + # inline code + (r'([^`]?)(`[^`\n]+`)', bygroups(Text, String.Backtick)), + # warning: the following rules eat outer tags. + # eg. **foo _bar_ baz** => foo and baz are not recognized as bold + # bold fenced by '**' + (r'([^\*]?)(\*\*[^* \n][^*\n]*\*\*)', bygroups(Text, Generic.Strong)), + # bold fenced by '__' + (r'([^_]?)(__[^_ \n][^_\n]*__)', bygroups(Text, Generic.Strong)), + # italics fenced by '*' + (r'([^\*]?)(\*[^* \n][^*\n]*\*)', bygroups(Text, Generic.Emph)), + # italics fenced by '_' + (r'([^_]?)(_[^_ \n][^_\n]*_)', bygroups(Text, Generic.Emph)), + # strikethrough + (r'([^~]?)(~~[^~ \n][^~\n]*~~)', bygroups(Text, Generic.Deleted)), + # mentions and topics (twitter and github stuff) + (r'[@#][\w/:]+', Name.Entity), + # (image?) links eg:  + (r'(!?\[)([^]]+)(\])(\()([^)]+)(\))', + bygroups(Text, Name.Tag, Text, Text, Name.Attribute, Text)), + # reference-style links, e.g.: + # [an example][id] + # [id]: http://example.com/ + (r'(\[)([^]]+)(\])(\[)([^]]*)(\])', + bygroups(Text, Name.Tag, Text, Text, Name.Label, Text)), + (r'^(\s*\[)([^]]*)(\]:\s*)(.+)', + bygroups(Text, Name.Label, Text, Name.Attribute)), + + # general text, must come last! + (r'[^\\\s]+', Text), + (r'.', Text), + ], + } + + def __init__(self, **options): + self.handlecodeblocks = get_bool_opt(options, 'handlecodeblocks', True) + RegexLexer.__init__(self, **options) + + +class TiddlyWiki5Lexer(RegexLexer): + """ + For TiddlyWiki5 markup. + + .. versionadded:: 2.7 + """ + name = 'tiddler' + url = 'https://tiddlywiki.com/#TiddlerFiles' + aliases = ['tid'] + filenames = ['*.tid'] + mimetypes = ["text/vnd.tiddlywiki"] + flags = re.MULTILINE + + def _handle_codeblock(self, match): + """ + match args: 1:backticks, 2:lang_name, 3:newline, 4:code, 5:backticks + """ + from pygments.lexers import get_lexer_by_name + + # section header + yield match.start(1), String, match.group(1) + yield match.start(2), String, match.group(2) + yield match.start(3), Text, match.group(3) + + # lookup lexer if wanted and existing + lexer = None + if self.handlecodeblocks: + try: + lexer = get_lexer_by_name(match.group(2).strip()) + except ClassNotFound: + pass + code = match.group(4) + + # no lexer for this language. handle it like it was a code block + if lexer is None: + yield match.start(4), String, code + return + + yield from do_insertions([], lexer.get_tokens_unprocessed(code)) + + yield match.start(5), String, match.group(5) + + def _handle_cssblock(self, match): + """ + match args: 1:style tag 2:newline, 3:code, 4:closing style tag + """ + from pygments.lexers import get_lexer_by_name + + # section header + yield match.start(1), String, match.group(1) + yield match.start(2), String, match.group(2) + + lexer = None + if self.handlecodeblocks: + try: + lexer = get_lexer_by_name('css') + except ClassNotFound: + pass + code = match.group(3) + + # no lexer for this language. handle it like it was a code block + if lexer is None: + yield match.start(3), String, code + return + + yield from do_insertions([], lexer.get_tokens_unprocessed(code)) + + yield match.start(4), String, match.group(4) + + tokens = { + 'root': [ + # title in metadata section + (r'^(title)(:\s)(.+\n)', bygroups(Keyword, Text, Generic.Heading)), + # headings + (r'^(!)([^!].+\n)', bygroups(Generic.Heading, Text)), + (r'^(!{2,6})(.+\n)', bygroups(Generic.Subheading, Text)), + # bulleted or numbered lists or single-line block quotes + # (can be mixed) + (r'^(\s*)([*#>]+)(\s*)(.+\n)', + bygroups(Text, Keyword, Text, using(this, state='inline'))), + # multi-line block quotes + (r'^(<<<.*\n)([\w\W]*?)(^<<<.*$)', bygroups(String, Text, String)), + # table header + (r'^(\|.*?\|h)$', bygroups(Generic.Strong)), + # table footer or caption + (r'^(\|.*?\|[cf])$', bygroups(Generic.Emph)), + # table class + (r'^(\|.*?\|k)$', bygroups(Name.Tag)), + # definitions + (r'^(;.*)$', bygroups(Generic.Strong)), + # text block + (r'^(```\n)([\w\W]*?)(^```$)', bygroups(String, Text, String)), + # code block with language + (r'^(```)(\w+)(\n)([\w\W]*?)(^```$)', _handle_codeblock), + # CSS style block + (r'^(<style>)(\n)([\w\W]*?)(^</style>$)', _handle_cssblock), + + include('keywords'), + include('inline'), + ], + 'keywords': [ + (words(( + '\\define', '\\end', 'caption', 'created', 'modified', 'tags', + 'title', 'type'), prefix=r'^', suffix=r'\b'), + Keyword), + ], + 'inline': [ + # escape + (r'\\.', Text), + # created or modified date + (r'\d{17}', Number.Integer), + # italics + (r'(\s)(//[^/]+//)((?=\W|\n))', + bygroups(Text, Generic.Emph, Text)), + # superscript + (r'(\s)(\^\^[^\^]+\^\^)', bygroups(Text, Generic.Emph)), + # subscript + (r'(\s)(,,[^,]+,,)', bygroups(Text, Generic.Emph)), + # underscore + (r'(\s)(__[^_]+__)', bygroups(Text, Generic.Strong)), + # bold + (r"(\s)(''[^']+'')((?=\W|\n))", + bygroups(Text, Generic.Strong, Text)), + # strikethrough + (r'(\s)(~~[^~]+~~)((?=\W|\n))', + bygroups(Text, Generic.Deleted, Text)), + # TiddlyWiki variables + (r'<<[^>]+>>', Name.Tag), + (r'\$\$[^$]+\$\$', Name.Tag), + (r'\$\([^)]+\)\$', Name.Tag), + # TiddlyWiki style or class + (r'^@@.*$', Name.Tag), + # HTML tags + (r'</?[^>]+>', Name.Tag), + # inline code + (r'`[^`]+`', String.Backtick), + # HTML escaped symbols + (r'&\S*?;', String.Regex), + # Wiki links + (r'(\[{2})([^]\|]+)(\]{2})', bygroups(Text, Name.Tag, Text)), + # External links + (r'(\[{2})([^]\|]+)(\|)([^]\|]+)(\]{2})', + bygroups(Text, Name.Tag, Text, Name.Attribute, Text)), + # Transclusion + (r'(\{{2})([^}]+)(\}{2})', bygroups(Text, Name.Tag, Text)), + # URLs + (r'(\b.?.?tps?://[^\s"]+)', bygroups(Name.Attribute)), + + # general text, must come last! + (r'[\w]+', Text), + (r'.', Text) + ], + } + + def __init__(self, **options): + self.handlecodeblocks = get_bool_opt(options, 'handlecodeblocks', True) + RegexLexer.__init__(self, **options) + + +class WikitextLexer(RegexLexer): + """ + For MediaWiki Wikitext. + + Parsing Wikitext is tricky, and results vary between different MediaWiki + installations, so we only highlight common syntaxes (built-in or from + popular extensions), and also assume templates produce no unbalanced + syntaxes. + + .. versionadded:: 2.15 + """ + name = 'Wikitext' + url = 'https://www.mediawiki.org/wiki/Wikitext' + aliases = ['wikitext', 'mediawiki'] + filenames = [] + mimetypes = ['text/x-wiki'] + flags = re.MULTILINE + + def nowiki_tag_rules(tag_name): + return [ + (r'(?i)(</)({})(\s*)(>)'.format(tag_name), bygroups(Punctuation, + Name.Tag, Whitespace, Punctuation), '#pop'), + include('entity'), + include('text'), + ] + + def plaintext_tag_rules(tag_name): + return [ + (r'(?si)(.*?)(</)({})(\s*)(>)'.format(tag_name), bygroups(Text, + Punctuation, Name.Tag, Whitespace, Punctuation), '#pop'), + ] + + def delegate_tag_rules(tag_name, lexer): + return [ + (r'(?i)(</)({})(\s*)(>)'.format(tag_name), bygroups(Punctuation, + Name.Tag, Whitespace, Punctuation), '#pop'), + (r'(?si).+?(?=</{}\s*>)'.format(tag_name), using(lexer)), + ] + + def text_rules(token): + return [ + (r'\w+', token), + (r'[^\S\n]+', token), + (r'(?s).', token), + ] + + def handle_syntaxhighlight(self, match, ctx): + from pygments.lexers import get_lexer_by_name + + attr_content = match.group() + start = 0 + index = 0 + while True: + index = attr_content.find('>', start) + # Exclude comment end (-->) + if attr_content[index-2:index] != '--': + break + start = index + 1 + + if index == -1: + # No tag end + yield from self.get_tokens_unprocessed(attr_content, stack=['root', 'attr']) + return + attr = attr_content[:index] + yield from self.get_tokens_unprocessed(attr, stack=['root', 'attr']) + yield match.start(3) + index, Punctuation, '>' + + lexer = None + content = attr_content[index+1:] + lang_match = re.findall(r'\blang=("|\'|)(\w+)(\1)', attr) + + if len(lang_match) >= 1: + # Pick the last match in case of multiple matches + lang = lang_match[-1][1] + try: + lexer = get_lexer_by_name(lang) + except ClassNotFound: + pass + + if lexer is None: + yield match.start() + index + 1, Text, content + else: + yield from lexer.get_tokens_unprocessed(content) + + def handle_score(self, match, ctx): + attr_content = match.group() + start = 0 + index = 0 + while True: + index = attr_content.find('>', start) + # Exclude comment end (-->) + if attr_content[index-2:index] != '--': + break + start = index + 1 + + if index == -1: + # No tag end + yield from self.get_tokens_unprocessed(attr_content, stack=['root', 'attr']) + return + attr = attr_content[:index] + content = attr_content[index+1:] + yield from self.get_tokens_unprocessed(attr, stack=['root', 'attr']) + yield match.start(3) + index, Punctuation, '>' + + lang_match = re.findall(r'\blang=("|\'|)(\w+)(\1)', attr) + # Pick the last match in case of multiple matches + lang = lang_match[-1][1] if len(lang_match) >= 1 else 'lilypond' + + if lang == 'lilypond': # Case sensitive + yield from LilyPondLexer().get_tokens_unprocessed(content) + else: # ABC + # FIXME: Use ABC lexer in the future + yield match.start() + index + 1, Text, content + + # a-z removed to prevent linter from complaining, REMEMBER to use (?i) + title_char = r' %!"$&\'()*,\-./0-9:;=?@A-Z\\\^_`~+\u0080-\uFFFF' + nbsp_char = r'(?:\t| |&\#0*160;|&\#[Xx]0*[Aa]0;|[ \xA0\u1680\u2000-\u200A\u202F\u205F\u3000])' + link_address = r'(?:[0-9.]+|\[[0-9a-f:.]+\]|[^\x00-\x20"<>\[\]\x7F\xA0\u1680\u2000-\u200A\u202F\u205F\u3000\uFFFD])' + link_char_class = r'[^\x00-\x20"<>\[\]\x7F\xA0\u1680\u2000-\u200A\u202F\u205F\u3000\uFFFD]' + double_slashes_i = { + '__FORCETOC__', '__NOCONTENTCONVERT__', '__NOCC__', '__NOEDITSECTION__', '__NOGALLERY__', + '__NOTITLECONVERT__', '__NOTC__', '__NOTOC__', '__TOC__', + } + double_slashes = { + '__EXPECTUNUSEDCATEGORY__', '__HIDDENCAT__', '__INDEX__', '__NEWSECTIONLINK__', + '__NOINDEX__', '__NONEWSECTIONLINK__', '__STATICREDIRECT__', '__NOGLOBAL__', + '__DISAMBIG__', '__EXPECTED_UNCONNECTED_PAGE__', + } + protocols = { + 'bitcoin:', 'ftp://', 'ftps://', 'geo:', 'git://', 'gopher://', 'http://', 'https://', + 'irc://', 'ircs://', 'magnet:', 'mailto:', 'mms://', 'news:', 'nntp://', 'redis://', + 'sftp://', 'sip:', 'sips:', 'sms:', 'ssh://', 'svn://', 'tel:', 'telnet://', 'urn:', + 'worldwind://', 'xmpp:', '//', + } + non_relative_protocols = protocols - {'//'} + html_tags = { + 'abbr', 'b', 'bdi', 'bdo', 'big', 'blockquote', 'br', 'caption', 'center', 'cite', 'code', + 'data', 'dd', 'del', 'dfn', 'div', 'dl', 'dt', 'em', 'font', 'h1', 'h2', 'h3', 'h4', 'h5', + 'h6', 'hr', 'i', 'ins', 'kbd', 'li', 'link', 'mark', 'meta', 'ol', 'p', 'q', 'rb', 'rp', + 'rt', 'rtc', 'ruby', 's', 'samp', 'small', 'span', 'strike', 'strong', 'sub', 'sup', + 'table', 'td', 'th', 'time', 'tr', 'tt', 'u', 'ul', 'var', 'wbr', + } + parser_tags = { + 'graph', 'charinsert', 'rss', 'chem', 'categorytree', 'nowiki', 'inputbox', 'math', + 'hiero', 'score', 'pre', 'ref', 'translate', 'imagemap', 'templatestyles', 'languages', + 'noinclude', 'mapframe', 'section', 'poem', 'syntaxhighlight', 'includeonly', 'tvar', + 'onlyinclude', 'templatedata', 'langconvert', 'timeline', 'dynamicpagelist', 'gallery', + 'maplink', 'ce', 'references', + } + variant_langs = { + # ZhConverter.php + 'zh', 'zh-hans', 'zh-hant', 'zh-cn', 'zh-hk', 'zh-mo', 'zh-my', 'zh-sg', 'zh-tw', + # UnConverter.php + 'uz', 'uz-latn', 'uz-cyrl', + # TlyConverter.php + 'tly', 'tly-cyrl', + # TgConverter.php + 'tg', 'tg-latn', + # SrConverter.php + 'sr', 'sr-ec', 'sr-el', + # ShiConverter.php + 'shi', 'shi-tfng', 'shi-latn', + # ShConverter.php + 'sh-latn', 'sh-cyrl', + # KuConverter.php + 'ku', 'ku-arab', 'ku-latn', + # KkConverter.php + 'kk', 'kk-cyrl', 'kk-latn', 'kk-arab', 'kk-kz', 'kk-tr', 'kk-cn', + # IuConverter.php + 'iu', 'ike-cans', 'ike-latn', + # GanConverter.php + 'gan', 'gan-hans', 'gan-hant', + # EnConverter.php + 'en', 'en-x-piglatin', + # CrhConverter.php + 'crh', 'crh-cyrl', 'crh-latn', + # BanConverter.php + 'ban', 'ban-bali', 'ban-x-dharma', 'ban-x-palmleaf', 'ban-x-pku', + } + magic_vars_i = { + 'ARTICLEPATH', 'INT', 'PAGEID', 'SCRIPTPATH', 'SERVER', 'SERVERNAME', 'STYLEPATH', + } + magic_vars = { + '!', '=', 'BASEPAGENAME', 'BASEPAGENAMEE', 'CASCADINGSOURCES', 'CONTENTLANGUAGE', + 'CONTENTLANG', 'CURRENTDAY', 'CURRENTDAY2', 'CURRENTDAYNAME', 'CURRENTDOW', 'CURRENTHOUR', + 'CURRENTMONTH', 'CURRENTMONTH2', 'CURRENTMONTH1', 'CURRENTMONTHABBREV', 'CURRENTMONTHNAME', + 'CURRENTMONTHNAMEGEN', 'CURRENTTIME', 'CURRENTTIMESTAMP', 'CURRENTVERSION', 'CURRENTWEEK', + 'CURRENTYEAR', 'DIRECTIONMARK', 'DIRMARK', 'FULLPAGENAME', 'FULLPAGENAMEE', 'LOCALDAY', + 'LOCALDAY2', 'LOCALDAYNAME', 'LOCALDOW', 'LOCALHOUR', 'LOCALMONTH', 'LOCALMONTH2', + 'LOCALMONTH1', 'LOCALMONTHABBREV', 'LOCALMONTHNAME', 'LOCALMONTHNAMEGEN', 'LOCALTIME', + 'LOCALTIMESTAMP', 'LOCALWEEK', 'LOCALYEAR', 'NAMESPACE', 'NAMESPACEE', 'NAMESPACENUMBER', + 'NUMBEROFACTIVEUSERS', 'NUMBEROFADMINS', 'NUMBEROFARTICLES', 'NUMBEROFEDITS', + 'NUMBEROFFILES', 'NUMBEROFPAGES', 'NUMBEROFUSERS', 'PAGELANGUAGE', 'PAGENAME', 'PAGENAMEE', + 'REVISIONDAY', 'REVISIONDAY2', 'REVISIONID', 'REVISIONMONTH', 'REVISIONMONTH1', + 'REVISIONSIZE', 'REVISIONTIMESTAMP', 'REVISIONUSER', 'REVISIONYEAR', 'ROOTPAGENAME', + 'ROOTPAGENAMEE', 'SITENAME', 'SUBJECTPAGENAME', 'ARTICLEPAGENAME', 'SUBJECTPAGENAMEE', + 'ARTICLEPAGENAMEE', 'SUBJECTSPACE', 'ARTICLESPACE', 'SUBJECTSPACEE', 'ARTICLESPACEE', + 'SUBPAGENAME', 'SUBPAGENAMEE', 'TALKPAGENAME', 'TALKPAGENAMEE', 'TALKSPACE', 'TALKSPACEE', + } + parser_functions_i = { + 'ANCHORENCODE', 'BIDI', 'CANONICALURL', 'CANONICALURLE', 'FILEPATH', 'FORMATNUM', + 'FULLURL', 'FULLURLE', 'GENDER', 'GRAMMAR', 'INT', r'\#LANGUAGE', 'LC', 'LCFIRST', 'LOCALURL', + 'LOCALURLE', 'NS', 'NSE', 'PADLEFT', 'PADRIGHT', 'PAGEID', 'PLURAL', 'UC', 'UCFIRST', + 'URLENCODE', + } + parser_functions = { + 'BASEPAGENAME', 'BASEPAGENAMEE', 'CASCADINGSOURCES', 'DEFAULTSORT', 'DEFAULTSORTKEY', + 'DEFAULTCATEGORYSORT', 'FULLPAGENAME', 'FULLPAGENAMEE', 'NAMESPACE', 'NAMESPACEE', + 'NAMESPACENUMBER', 'NUMBERINGROUP', 'NUMINGROUP', 'NUMBEROFACTIVEUSERS', 'NUMBEROFADMINS', + 'NUMBEROFARTICLES', 'NUMBEROFEDITS', 'NUMBEROFFILES', 'NUMBEROFPAGES', 'NUMBEROFUSERS', + 'PAGENAME', 'PAGENAMEE', 'PAGESINCATEGORY', 'PAGESINCAT', 'PAGESIZE', 'PROTECTIONEXPIRY', + 'PROTECTIONLEVEL', 'REVISIONDAY', 'REVISIONDAY2', 'REVISIONID', 'REVISIONMONTH', + 'REVISIONMONTH1', 'REVISIONTIMESTAMP', 'REVISIONUSER', 'REVISIONYEAR', 'ROOTPAGENAME', + 'ROOTPAGENAMEE', 'SUBJECTPAGENAME', 'ARTICLEPAGENAME', 'SUBJECTPAGENAMEE', + 'ARTICLEPAGENAMEE', 'SUBJECTSPACE', 'ARTICLESPACE', 'SUBJECTSPACEE', 'ARTICLESPACEE', + 'SUBPAGENAME', 'SUBPAGENAMEE', 'TALKPAGENAME', 'TALKPAGENAMEE', 'TALKSPACE', 'TALKSPACEE', + 'INT', 'DISPLAYTITLE', 'PAGESINNAMESPACE', 'PAGESINNS', + } + + tokens = { + 'root': [ + # Redirects + (r"""(?xi) + (\A\s*?)(\#REDIRECT:?) # may contain a colon + (\s+)(\[\[) (?=[^\]\n]* \]\]$) + """, + bygroups(Whitespace, Keyword, Whitespace, Punctuation), 'redirect-inner'), + # Subheadings + (r'^(={2,6})(.+?)(\1)(\s*$\n)', + bygroups(Generic.Subheading, Generic.Subheading, Generic.Subheading, Whitespace)), + # Headings + (r'^(=.+?=)(\s*$\n)', + bygroups(Generic.Heading, Whitespace)), + # Double-slashed magic words + (words(double_slashes_i, prefix=r'(?i)'), Name.Function.Magic), + (words(double_slashes), Name.Function.Magic), + # Raw URLs + (r'(?i)\b(?:{}){}{}*'.format('|'.join(protocols), + link_address, link_char_class), Name.Label), + # Magic links + (r'\b(?:RFC|PMID){}+[0-9]+\b'.format(nbsp_char), + Name.Function.Magic), + (r"""(?x) + \bISBN {nbsp_char} + (?: 97[89] {nbsp_dash}? )? + (?: [0-9] {nbsp_dash}? ){{9}} # escape format() + [0-9Xx]\b + """.format(nbsp_char=nbsp_char, nbsp_dash=f'(?:-|{nbsp_char})'), Name.Function.Magic), + include('list'), + include('inline'), + include('text'), + ], + 'redirect-inner': [ + (r'(\]\])(\s*?\n)', bygroups(Punctuation, Whitespace), '#pop'), + (r'(\#)([^#]*?)', bygroups(Punctuation, Name.Label)), + (r'(?i)[{}]+'.format(title_char), Name.Tag), + ], + 'list': [ + # Description lists + (r'^;', Keyword, 'dt'), + # Ordered lists, unordered lists and indents + (r'^[#:*]+', Keyword), + # Horizontal rules + (r'^-{4,}', Keyword), + ], + 'inline': [ + # Signatures + (r'~{3,5}', Keyword), + # Entities + include('entity'), + # Bold & italic + (r"('')(''')(?!')", bygroups(Generic.Emph, + Generic.EmphStrong), 'inline-italic-bold'), + (r"'''(?!')", Generic.Strong, 'inline-bold'), + (r"''(?!')", Generic.Emph, 'inline-italic'), + # Comments & parameters & templates + include('replaceable'), + # Media links + ( + r"""(?xi) + (\[\[) + (File|Image) (:) + ((?: [%s] | \{{2,3}[^{}]*?\}{2,3} | <!--[\s\S]*?--> )*) + (?: (\#) ([%s]*?) )? + """ % (title_char, f'{title_char}#'), + bygroups(Punctuation, Name.Namespace, Punctuation, + using(this, state=['wikilink-name']), Punctuation, Name.Label), + 'medialink-inner' + ), + # Wikilinks + ( + r"""(?xi) + (\[\[)(?!%s) # Should not contain URLs + (?: ([%s]*) (:))? + ((?: [%s] | \{{2,3}[^{}]*?\}{2,3} | <!--[\s\S]*?--> )*?) + (?: (\#) ([%s]*?) )? + (\]\]) + """ % ('|'.join(protocols), title_char.replace('/', ''), + title_char, f'{title_char}#'), + bygroups(Punctuation, Name.Namespace, Punctuation, + using(this, state=['wikilink-name']), Punctuation, Name.Label, Punctuation) + ), + ( + r"""(?xi) + (\[\[)(?!%s) + (?: ([%s]*) (:))? + ((?: [%s] | \{{2,3}[^{}]*?\}{2,3} | <!--[\s\S]*?--> )*?) + (?: (\#) ([%s]*?) )? + (\|) + """ % ('|'.join(protocols), title_char.replace('/', ''), + title_char, f'{title_char}#'), + bygroups(Punctuation, Name.Namespace, Punctuation, + using(this, state=['wikilink-name']), Punctuation, Name.Label, Punctuation), + 'wikilink-inner' + ), + # External links + ( + r"""(?xi) + (\[) + ((?:{}) {} {}*) + (\s*) + """.format('|'.join(protocols), link_address, link_char_class), + bygroups(Punctuation, Name.Label, Whitespace), + 'extlink-inner' + ), + # Tables + (r'^(:*)(\s*?)(\{\|)([^\n]*)$', bygroups(Keyword, + Whitespace, Punctuation, using(this, state=['root', 'attr'])), 'table'), + # HTML tags + (r'(?i)(<)({})\b'.format('|'.join(html_tags)), + bygroups(Punctuation, Name.Tag), 'tag-inner-ordinary'), + (r'(?i)(</)({})\b(\s*)(>)'.format('|'.join(html_tags)), + bygroups(Punctuation, Name.Tag, Whitespace, Punctuation)), + # <nowiki> + (r'(?i)(<)(nowiki)\b', bygroups(Punctuation, + Name.Tag), ('tag-nowiki', 'tag-inner')), + # <pre> + (r'(?i)(<)(pre)\b', bygroups(Punctuation, + Name.Tag), ('tag-pre', 'tag-inner')), + # <categorytree> + (r'(?i)(<)(categorytree)\b', bygroups( + Punctuation, Name.Tag), ('tag-categorytree', 'tag-inner')), + # <hiero> + (r'(?i)(<)(hiero)\b', bygroups(Punctuation, + Name.Tag), ('tag-hiero', 'tag-inner')), + # <math> + (r'(?i)(<)(math)\b', bygroups(Punctuation, + Name.Tag), ('tag-math', 'tag-inner')), + # <chem> + (r'(?i)(<)(chem)\b', bygroups(Punctuation, + Name.Tag), ('tag-chem', 'tag-inner')), + # <ce> + (r'(?i)(<)(ce)\b', bygroups(Punctuation, + Name.Tag), ('tag-ce', 'tag-inner')), + # <charinsert> + (r'(?i)(<)(charinsert)\b', bygroups( + Punctuation, Name.Tag), ('tag-charinsert', 'tag-inner')), + # <templatedata> + (r'(?i)(<)(templatedata)\b', bygroups( + Punctuation, Name.Tag), ('tag-templatedata', 'tag-inner')), + # <gallery> + (r'(?i)(<)(gallery)\b', bygroups( + Punctuation, Name.Tag), ('tag-gallery', 'tag-inner')), + # <graph> + (r'(?i)(<)(gallery)\b', bygroups( + Punctuation, Name.Tag), ('tag-graph', 'tag-inner')), + # <dynamicpagelist> + (r'(?i)(<)(dynamicpagelist)\b', bygroups( + Punctuation, Name.Tag), ('tag-dynamicpagelist', 'tag-inner')), + # <inputbox> + (r'(?i)(<)(inputbox)\b', bygroups( + Punctuation, Name.Tag), ('tag-inputbox', 'tag-inner')), + # <rss> + (r'(?i)(<)(rss)\b', bygroups( + Punctuation, Name.Tag), ('tag-rss', 'tag-inner')), + # <imagemap> + (r'(?i)(<)(imagemap)\b', bygroups( + Punctuation, Name.Tag), ('tag-imagemap', 'tag-inner')), + # <syntaxhighlight> + (r'(?i)(</)(syntaxhighlight)\b(\s*)(>)', + bygroups(Punctuation, Name.Tag, Whitespace, Punctuation)), + (r'(?si)(<)(syntaxhighlight)\b([^>]*?(?<!/)>.*?)(?=</\2\s*>)', + bygroups(Punctuation, Name.Tag, handle_syntaxhighlight)), + # <syntaxhighlight>: Fallback case for self-closing tags + (r'(?i)(<)(syntaxhighlight)\b(\s*?)((?:[^>]|-->)*?)(/\s*?(?<!--)>)', bygroups( + Punctuation, Name.Tag, Whitespace, using(this, state=['root', 'attr']), Punctuation)), + # <source> + (r'(?i)(</)(source)\b(\s*)(>)', + bygroups(Punctuation, Name.Tag, Whitespace, Punctuation)), + (r'(?si)(<)(source)\b([^>]*?(?<!/)>.*?)(?=</\2\s*>)', + bygroups(Punctuation, Name.Tag, handle_syntaxhighlight)), + # <source>: Fallback case for self-closing tags + (r'(?i)(<)(source)\b(\s*?)((?:[^>]|-->)*?)(/\s*?(?<!--)>)', bygroups( + Punctuation, Name.Tag, Whitespace, using(this, state=['root', 'attr']), Punctuation)), + # <score> + (r'(?i)(</)(score)\b(\s*)(>)', + bygroups(Punctuation, Name.Tag, Whitespace, Punctuation)), + (r'(?si)(<)(score)\b([^>]*?(?<!/)>.*?)(?=</\2\s*>)', + bygroups(Punctuation, Name.Tag, handle_score)), + # <score>: Fallback case for self-closing tags + (r'(?i)(<)(score)\b(\s*?)((?:[^>]|-->)*?)(/\s*?(?<!--)>)', bygroups( + Punctuation, Name.Tag, Whitespace, using(this, state=['root', 'attr']), Punctuation)), + # Other parser tags + (r'(?i)(<)({})\b'.format('|'.join(parser_tags)), + bygroups(Punctuation, Name.Tag), 'tag-inner-ordinary'), + (r'(?i)(</)({})\b(\s*)(>)'.format('|'.join(parser_tags)), + bygroups(Punctuation, Name.Tag, Whitespace, Punctuation)), + # LanguageConverter markups + ( + r"""(?xi) + (-\{{) # Escape format() + (?: ([^|]) (\|))? + (?: (\s* (?:{variants}) \s*) (=>))? + (\s* (?:{variants}) \s*) (:) + """.format(variants='|'.join(variant_langs)), + bygroups(Punctuation, Keyword, Punctuation, + Name.Label, Operator, Name.Label, Punctuation), + 'lc-inner' + ), + (r'-\{(?!\{)', Punctuation, 'lc-raw'), + ], + 'wikilink-name': [ + include('replaceable'), + (r'[^{<]+', Name.Tag), + (r'(?s).', Name.Tag), + ], + 'wikilink-inner': [ + # Quit in case of another wikilink + (r'(?=\[\[)', Punctuation, '#pop'), + (r'\]\]', Punctuation, '#pop'), + include('inline'), + include('text'), + ], + 'medialink-inner': [ + (r'\]\]', Punctuation, '#pop'), + (r'(\|)([^\n=|]*)(=)', + bygroups(Punctuation, Name.Attribute, Operator)), + (r'\|', Punctuation), + include('inline'), + include('text'), + ], + 'quote-common': [ + # Quit in case of link/template endings + (r'(?=\]\]|\{\{|\}\})', Punctuation, '#pop'), + (r'\n', Text, '#pop'), + ], + 'inline-italic': [ + include('quote-common'), + (r"('')(''')(?!')", bygroups(Generic.Emph, + Generic.Strong), ('#pop', 'inline-bold')), + (r"'''(?!')", Generic.EmphStrong, ('#pop', 'inline-italic-bold')), + (r"''(?!')", Generic.Emph, '#pop'), + include('inline'), + include('text-italic'), + ], + 'inline-bold': [ + include('quote-common'), + (r"(''')('')(?!')", bygroups( + Generic.Strong, Generic.Emph), ('#pop', 'inline-italic')), + (r"'''(?!')", Generic.Strong, '#pop'), + (r"''(?!')", Generic.EmphStrong, ('#pop', 'inline-bold-italic')), + include('inline'), + include('text-bold'), + ], + 'inline-bold-italic': [ + include('quote-common'), + (r"('')(''')(?!')", bygroups(Generic.EmphStrong, + Generic.Strong), '#pop'), + (r"'''(?!')", Generic.EmphStrong, ('#pop', 'inline-italic')), + (r"''(?!')", Generic.EmphStrong, ('#pop', 'inline-bold')), + include('inline'), + include('text-bold-italic'), + ], + 'inline-italic-bold': [ + include('quote-common'), + (r"(''')('')(?!')", bygroups( + Generic.EmphStrong, Generic.Emph), '#pop'), + (r"'''(?!')", Generic.EmphStrong, ('#pop', 'inline-italic')), + (r"''(?!')", Generic.EmphStrong, ('#pop', 'inline-bold')), + include('inline'), + include('text-bold-italic'), + ], + 'lc-inner': [ + ( + r"""(?xi) + (;) + (?: (\s* (?:{variants}) \s*) (=>))? + (\s* (?:{variants}) \s*) (:) + """.format(variants='|'.join(variant_langs)), + bygroups(Punctuation, Name.Label, + Operator, Name.Label, Punctuation) + ), + (r';?\s*?\}-', Punctuation, '#pop'), + include('inline'), + include('text'), + ], + 'lc-raw': [ + (r'\}-', Punctuation, '#pop'), + include('inline'), + include('text'), + ], + 'replaceable': [ + # Comments + (r'<!--[\s\S]*?(?:-->|\Z)', Comment.Multiline), + # Parameters + ( + r"""(?x) + (\{{3}) + ([^|]*?) + (?=\}{3}|\|) + """, + bygroups(Punctuation, Name.Variable), + 'parameter-inner', + ), + # Magic variables + (r'(?i)(\{\{)(\s*)(%s)(\s*)(\}\})' % '|'.join(magic_vars_i), + bygroups(Punctuation, Whitespace, Name.Function, Whitespace, Punctuation)), + (r'(\{\{)(\s*)(%s)(\s*)(\}\})' % '|'.join(magic_vars), + bygroups(Punctuation, Whitespace, Name.Function, Whitespace, Punctuation)), + # Parser functions & templates + (r'\{\{', Punctuation, 'template-begin-space'), + # <tvar> legacy syntax + (r'(?i)(<)(tvar)\b(\|)([^>]*?)(>)', bygroups(Punctuation, + Name.Tag, Punctuation, String, Punctuation)), + (r'</>', Punctuation, '#pop'), + # <tvar> + (r'(?i)(<)(tvar)\b', bygroups(Punctuation, Name.Tag), 'tag-inner-ordinary'), + (r'(?i)(</)(tvar)\b(\s*)(>)', + bygroups(Punctuation, Name.Tag, Whitespace, Punctuation)), + ], + 'parameter-inner': [ + (r'\}{3}', Punctuation, '#pop'), + (r'\|', Punctuation), + include('inline'), + include('text'), + ], + 'template-begin-space': [ + # Templates allow line breaks at the beginning, and due to how MediaWiki handles + # comments, an extra state is required to handle things like {{\n<!---->\n name}} + (r'<!--[\s\S]*?(?:-->|\Z)', Comment.Multiline), + (r'\s+', Whitespace), + # Parser functions + ( + r'(?i)(\#[%s]*?|%s)(:)' % (title_char, + '|'.join(parser_functions_i)), + bygroups(Name.Function, Punctuation), ('#pop', 'template-inner') + ), + ( + r'(%s)(:)' % ('|'.join(parser_functions)), + bygroups(Name.Function, Punctuation), ('#pop', 'template-inner') + ), + # Templates + ( + r'(?i)([%s]*?)(:)' % title_char, + bygroups(Name.Namespace, Punctuation), ('#pop', 'template-name') + ), + default(('#pop', 'template-name'),), + ], + 'template-name': [ + (r'(\s*?)(\|)', bygroups(Text, Punctuation), ('#pop', 'template-inner')), + (r'\}\}', Punctuation, '#pop'), + (r'\n', Text, '#pop'), + include('replaceable'), + *text_rules(Name.Tag), + ], + 'template-inner': [ + (r'\}\}', Punctuation, '#pop'), + (r'\|', Punctuation), + ( + r"""(?x) + (?<=\|) + ( (?: (?! \{\{ | \}\} )[^=\|<])*? ) # Exclude templates and tags + (=) + """, + bygroups(Name.Label, Operator) + ), + include('inline'), + include('text'), + ], + 'table': [ + # Use [ \t\n\r\0\x0B] instead of \s to follow PHP trim() behavior + # Endings + (r'^([ \t\n\r\0\x0B]*?)(\|\})', + bygroups(Whitespace, Punctuation), '#pop'), + # Table rows + (r'^([ \t\n\r\0\x0B]*?)(\|-+)(.*)$', bygroups(Whitespace, Punctuation, + using(this, state=['root', 'attr']))), + # Captions + ( + r"""(?x) + ^([ \t\n\r\0\x0B]*?)(\|\+) + # Exclude links, template and tags + (?: ( (?: (?! \[\[ | \{\{ )[^|\n<] )*? )(\|) )? + (.*?)$ + """, + bygroups(Whitespace, Punctuation, using(this, state=[ + 'root', 'attr']), Punctuation, Generic.Heading), + ), + # Table data + ( + r"""(?x) + ( ^(?:[ \t\n\r\0\x0B]*?)\| | \|\| ) + (?: ( (?: (?! \[\[ | \{\{ )[^|\n<] )*? )(\|)(?!\|) )? + """, + bygroups(Punctuation, using(this, state=[ + 'root', 'attr']), Punctuation), + ), + # Table headers + ( + r"""(?x) + ( ^(?:[ \t\n\r\0\x0B]*?)! ) + (?: ( (?: (?! \[\[ | \{\{ )[^|\n<] )*? )(\|)(?!\|) )? + """, + bygroups(Punctuation, using(this, state=[ + 'root', 'attr']), Punctuation), + 'table-header', + ), + include('list'), + include('inline'), + include('text'), + ], + 'table-header': [ + # Requires another state for || handling inside headers + (r'\n', Text, '#pop'), + ( + r"""(?x) + (!!|\|\|) + (?: + ( (?: (?! \[\[ | \{\{ )[^|\n<] )*? ) + (\|)(?!\|) + )? + """, + bygroups(Punctuation, using(this, state=[ + 'root', 'attr']), Punctuation) + ), + *text_rules(Generic.Subheading), + ], + 'entity': [ + (r'&\S*?;', Name.Entity), + ], + 'dt': [ + (r'\n', Text, '#pop'), + include('inline'), + (r':', Keyword, '#pop'), + include('text'), + ], + 'extlink-inner': [ + (r'\]', Punctuation, '#pop'), + include('inline'), + include('text'), + ], + 'nowiki-ish': [ + include('entity'), + include('text'), + ], + 'attr': [ + include('replaceable'), + (r'\s+', Whitespace), + (r'(=)(\s*)(")', bygroups(Operator, Whitespace, String.Double), 'attr-val-2'), + (r"(=)(\s*)(')", bygroups(Operator, Whitespace, String.Single), 'attr-val-1'), + (r'(=)(\s*)', bygroups(Operator, Whitespace), 'attr-val-0'), + (r'[\w:-]+', Name.Attribute), + + ], + 'attr-val-0': [ + (r'\s', Whitespace, '#pop'), + include('replaceable'), + *text_rules(String), + ], + 'attr-val-1': [ + (r"'", String.Single, '#pop'), + include('replaceable'), + *text_rules(String.Single), + ], + 'attr-val-2': [ + (r'"', String.Double, '#pop'), + include('replaceable'), + *text_rules(String.Double), + ], + 'tag-inner-ordinary': [ + (r'/?\s*>', Punctuation, '#pop'), + include('tag-attr'), + ], + 'tag-inner': [ + # Return to root state for self-closing tags + (r'/\s*>', Punctuation, '#pop:2'), + (r'\s*>', Punctuation, '#pop'), + include('tag-attr'), + ], + # There states below are just like their non-tag variants, the key difference is + # they forcibly quit when encountering tag closing markup + 'tag-attr': [ + include('replaceable'), + (r'\s+', Whitespace), + (r'(=)(\s*)(")', bygroups(Operator, + Whitespace, String.Double), 'tag-attr-val-2'), + (r"(=)(\s*)(')", bygroups(Operator, + Whitespace, String.Single), 'tag-attr-val-1'), + (r'(=)(\s*)', bygroups(Operator, Whitespace), 'tag-attr-val-0'), + (r'[\w:-]+', Name.Attribute), + + ], + 'tag-attr-val-0': [ + (r'\s', Whitespace, '#pop'), + (r'/?>', Punctuation, '#pop:2'), + include('replaceable'), + *text_rules(String), + ], + 'tag-attr-val-1': [ + (r"'", String.Single, '#pop'), + (r'/?>', Punctuation, '#pop:2'), + include('replaceable'), + *text_rules(String.Single), + ], + 'tag-attr-val-2': [ + (r'"', String.Double, '#pop'), + (r'/?>', Punctuation, '#pop:2'), + include('replaceable'), + *text_rules(String.Double), + ], + 'tag-nowiki': nowiki_tag_rules('nowiki'), + 'tag-pre': nowiki_tag_rules('pre'), + 'tag-categorytree': plaintext_tag_rules('categorytree'), + 'tag-dynamicpagelist': plaintext_tag_rules('dynamicpagelist'), + 'tag-hiero': plaintext_tag_rules('hiero'), + 'tag-inputbox': plaintext_tag_rules('inputbox'), + 'tag-imagemap': plaintext_tag_rules('imagemap'), + 'tag-charinsert': plaintext_tag_rules('charinsert'), + 'tag-timeline': plaintext_tag_rules('timeline'), + 'tag-gallery': plaintext_tag_rules('gallery'), + 'tag-graph': plaintext_tag_rules('graph'), + 'tag-rss': plaintext_tag_rules('rss'), + 'tag-math': delegate_tag_rules('math', TexLexer), + 'tag-chem': delegate_tag_rules('chem', TexLexer), + 'tag-ce': delegate_tag_rules('ce', TexLexer), + 'tag-templatedata': delegate_tag_rules('templatedata', JsonLexer), + 'text-italic': text_rules(Generic.Emph), + 'text-bold': text_rules(Generic.Strong), + 'text-bold-italic': text_rules(Generic.EmphStrong), + 'text': text_rules(Text), + } |