diff options
author | nkozlovskiy <nmk@ydb.tech> | 2023-09-29 12:24:06 +0300 |
---|---|---|
committer | nkozlovskiy <nmk@ydb.tech> | 2023-09-29 12:41:34 +0300 |
commit | e0e3e1717e3d33762ce61950504f9637a6e669ed (patch) | |
tree | bca3ff6939b10ed60c3d5c12439963a1146b9711 /contrib/python/Pygments/py2/pygments/lexers/html.py | |
parent | 38f2c5852db84c7b4d83adfcb009eb61541d1ccd (diff) | |
download | ydb-e0e3e1717e3d33762ce61950504f9637a6e669ed.tar.gz |
add ydb deps
Diffstat (limited to 'contrib/python/Pygments/py2/pygments/lexers/html.py')
-rw-r--r-- | contrib/python/Pygments/py2/pygments/lexers/html.py | 602 |
1 files changed, 602 insertions, 0 deletions
diff --git a/contrib/python/Pygments/py2/pygments/lexers/html.py b/contrib/python/Pygments/py2/pygments/lexers/html.py new file mode 100644 index 0000000000..cbef4f7e98 --- /dev/null +++ b/contrib/python/Pygments/py2/pygments/lexers/html.py @@ -0,0 +1,602 @@ +# -*- coding: utf-8 -*- +""" + pygments.lexers.html + ~~~~~~~~~~~~~~~~~~~~ + + Lexers for HTML, XML and related markup. + + :copyright: Copyright 2006-2019 by the Pygments team, see AUTHORS. + :license: BSD, see LICENSE for details. +""" + +import re + +from pygments.lexer import RegexLexer, ExtendedRegexLexer, include, bygroups, \ + default, using +from pygments.token import Text, Comment, Operator, Keyword, Name, String, \ + Punctuation +from pygments.util import looks_like_xml, html_doctype_matches + +from pygments.lexers.javascript import JavascriptLexer +from pygments.lexers.jvm import ScalaLexer +from pygments.lexers.css import CssLexer, _indentation, _starts_block +from pygments.lexers.ruby import RubyLexer + +__all__ = ['HtmlLexer', 'DtdLexer', 'XmlLexer', 'XsltLexer', 'HamlLexer', + 'ScamlLexer', 'PugLexer'] + + +class HtmlLexer(RegexLexer): + """ + For HTML 4 and XHTML 1 markup. Nested JavaScript and CSS is highlighted + by the appropriate lexer. + """ + + name = 'HTML' + aliases = ['html'] + filenames = ['*.html', '*.htm', '*.xhtml', '*.xslt'] + mimetypes = ['text/html', 'application/xhtml+xml'] + + flags = re.IGNORECASE | re.DOTALL + tokens = { + 'root': [ + ('[^<&]+', Text), + (r'&\S*?;', Name.Entity), + (r'\<\!\[CDATA\[.*?\]\]\>', Comment.Preproc), + ('<!--', Comment, 'comment'), + (r'<\?.*?\?>', Comment.Preproc), + ('<![^>]*>', Comment.Preproc), + (r'(<)(\s*)(script)(\s*)', + bygroups(Punctuation, Text, Name.Tag, Text), + ('script-content', 'tag')), + (r'(<)(\s*)(style)(\s*)', + bygroups(Punctuation, Text, Name.Tag, Text), + ('style-content', 'tag')), + # note: this allows tag names not used in HTML like <x:with-dash>, + # this is to support yet-unknown template engines and the like + (r'(<)(\s*)([\w:.-]+)', + bygroups(Punctuation, Text, Name.Tag), 'tag'), + (r'(<)(\s*)(/)(\s*)([\w:.-]+)(\s*)(>)', + bygroups(Punctuation, Text, Punctuation, Text, Name.Tag, Text, + Punctuation)), + ], + 'comment': [ + ('[^-]+', Comment), + ('-->', Comment, '#pop'), + ('-', Comment), + ], + 'tag': [ + (r'\s+', Text), + (r'([\w:-]+\s*)(=)(\s*)', bygroups(Name.Attribute, Operator, Text), + 'attr'), + (r'[\w:-]+', Name.Attribute), + (r'(/?)(\s*)(>)', bygroups(Punctuation, Text, Punctuation), '#pop'), + ], + 'script-content': [ + (r'(<)(\s*)(/)(\s*)(script)(\s*)(>)', + bygroups(Punctuation, Text, Punctuation, Text, Name.Tag, Text, + Punctuation), '#pop'), + (r'.+?(?=<\s*/\s*script\s*>)', using(JavascriptLexer)), + ], + 'style-content': [ + (r'(<)(\s*)(/)(\s*)(style)(\s*)(>)', + bygroups(Punctuation, Text, Punctuation, Text, Name.Tag, Text, + Punctuation),'#pop'), + (r'.+?(?=<\s*/\s*style\s*>)', using(CssLexer)), + ], + 'attr': [ + ('".*?"', String, '#pop'), + ("'.*?'", String, '#pop'), + (r'[^\s>]+', String, '#pop'), + ], + } + + def analyse_text(text): + if html_doctype_matches(text): + return 0.5 + + +class DtdLexer(RegexLexer): + """ + A lexer for DTDs (Document Type Definitions). + + .. versionadded:: 1.5 + """ + + flags = re.MULTILINE | re.DOTALL + + name = 'DTD' + aliases = ['dtd'] + filenames = ['*.dtd'] + mimetypes = ['application/xml-dtd'] + + tokens = { + 'root': [ + include('common'), + + (r'(<!ELEMENT)(\s+)(\S+)', + bygroups(Keyword, Text, Name.Tag), 'element'), + (r'(<!ATTLIST)(\s+)(\S+)', + bygroups(Keyword, Text, Name.Tag), 'attlist'), + (r'(<!ENTITY)(\s+)(\S+)', + bygroups(Keyword, Text, Name.Entity), 'entity'), + (r'(<!NOTATION)(\s+)(\S+)', + bygroups(Keyword, Text, Name.Tag), 'notation'), + (r'(<!\[)([^\[\s]+)(\s*)(\[)', # conditional sections + bygroups(Keyword, Name.Entity, Text, Keyword)), + + (r'(<!DOCTYPE)(\s+)([^>\s]+)', + bygroups(Keyword, Text, Name.Tag)), + (r'PUBLIC|SYSTEM', Keyword.Constant), + (r'[\[\]>]', Keyword), + ], + + 'common': [ + (r'\s+', Text), + (r'(%|&)[^;]*;', Name.Entity), + ('<!--', Comment, 'comment'), + (r'[(|)*,?+]', Operator), + (r'"[^"]*"', String.Double), + (r'\'[^\']*\'', String.Single), + ], + + 'comment': [ + ('[^-]+', Comment), + ('-->', Comment, '#pop'), + ('-', Comment), + ], + + 'element': [ + include('common'), + (r'EMPTY|ANY|#PCDATA', Keyword.Constant), + (r'[^>\s|()?+*,]+', Name.Tag), + (r'>', Keyword, '#pop'), + ], + + 'attlist': [ + include('common'), + (r'CDATA|IDREFS|IDREF|ID|NMTOKENS|NMTOKEN|ENTITIES|ENTITY|NOTATION', + Keyword.Constant), + (r'#REQUIRED|#IMPLIED|#FIXED', Keyword.Constant), + (r'xml:space|xml:lang', Keyword.Reserved), + (r'[^>\s|()?+*,]+', Name.Attribute), + (r'>', Keyword, '#pop'), + ], + + 'entity': [ + include('common'), + (r'SYSTEM|PUBLIC|NDATA', Keyword.Constant), + (r'[^>\s|()?+*,]+', Name.Entity), + (r'>', Keyword, '#pop'), + ], + + 'notation': [ + include('common'), + (r'SYSTEM|PUBLIC', Keyword.Constant), + (r'[^>\s|()?+*,]+', Name.Attribute), + (r'>', Keyword, '#pop'), + ], + } + + def analyse_text(text): + if not looks_like_xml(text) and \ + ('<!ELEMENT' in text or '<!ATTLIST' in text or '<!ENTITY' in text): + return 0.8 + + +class XmlLexer(RegexLexer): + """ + Generic lexer for XML (eXtensible Markup Language). + """ + + flags = re.MULTILINE | re.DOTALL | re.UNICODE + + name = 'XML' + aliases = ['xml'] + filenames = ['*.xml', '*.xsl', '*.rss', '*.xslt', '*.xsd', + '*.wsdl', '*.wsf'] + mimetypes = ['text/xml', 'application/xml', 'image/svg+xml', + 'application/rss+xml', 'application/atom+xml'] + + tokens = { + 'root': [ + ('[^<&]+', Text), + (r'&\S*?;', Name.Entity), + (r'\<\!\[CDATA\[.*?\]\]\>', Comment.Preproc), + ('<!--', Comment, 'comment'), + (r'<\?.*?\?>', Comment.Preproc), + ('<![^>]*>', Comment.Preproc), + (r'<\s*[\w:.-]+', Name.Tag, 'tag'), + (r'<\s*/\s*[\w:.-]+\s*>', Name.Tag), + ], + 'comment': [ + ('[^-]+', Comment), + ('-->', Comment, '#pop'), + ('-', Comment), + ], + 'tag': [ + (r'\s+', Text), + (r'[\w.:-]+\s*=', Name.Attribute, 'attr'), + (r'/?\s*>', Name.Tag, '#pop'), + ], + 'attr': [ + (r'\s+', Text), + ('".*?"', String, '#pop'), + ("'.*?'", String, '#pop'), + (r'[^\s>]+', String, '#pop'), + ], + } + + def analyse_text(text): + if looks_like_xml(text): + return 0.45 # less than HTML + + +class XsltLexer(XmlLexer): + """ + A lexer for XSLT. + + .. versionadded:: 0.10 + """ + + name = 'XSLT' + aliases = ['xslt'] + filenames = ['*.xsl', '*.xslt', '*.xpl'] # xpl is XProc + mimetypes = ['application/xsl+xml', 'application/xslt+xml'] + + EXTRA_KEYWORDS = { + 'apply-imports', 'apply-templates', 'attribute', + 'attribute-set', 'call-template', 'choose', 'comment', + 'copy', 'copy-of', 'decimal-format', 'element', 'fallback', + 'for-each', 'if', 'import', 'include', 'key', 'message', + 'namespace-alias', 'number', 'otherwise', 'output', 'param', + 'preserve-space', 'processing-instruction', 'sort', + 'strip-space', 'stylesheet', 'template', 'text', 'transform', + 'value-of', 'variable', 'when', 'with-param' + } + + def get_tokens_unprocessed(self, text): + for index, token, value in XmlLexer.get_tokens_unprocessed(self, text): + m = re.match('</?xsl:([^>]*)/?>?', value) + + if token is Name.Tag and m and m.group(1) in self.EXTRA_KEYWORDS: + yield index, Keyword, value + else: + yield index, token, value + + def analyse_text(text): + if looks_like_xml(text) and '<xsl' in text: + return 0.8 + + +class HamlLexer(ExtendedRegexLexer): + """ + For Haml markup. + + .. versionadded:: 1.3 + """ + + name = 'Haml' + aliases = ['haml'] + filenames = ['*.haml'] + mimetypes = ['text/x-haml'] + + flags = re.IGNORECASE + # Haml can include " |\n" anywhere, + # which is ignored and used to wrap long lines. + # To accomodate this, use this custom faux dot instead. + _dot = r'(?: \|\n(?=.* \|)|.)' + + # In certain places, a comma at the end of the line + # allows line wrapping as well. + _comma_dot = r'(?:,\s*\n|' + _dot + ')' + tokens = { + 'root': [ + (r'[ \t]*\n', Text), + (r'[ \t]*', _indentation), + ], + + 'css': [ + (r'\.[\w:-]+', Name.Class, 'tag'), + (r'\#[\w:-]+', Name.Function, 'tag'), + ], + + 'eval-or-plain': [ + (r'[&!]?==', Punctuation, 'plain'), + (r'([&!]?[=~])(' + _comma_dot + r'*\n)', + bygroups(Punctuation, using(RubyLexer)), + 'root'), + default('plain'), + ], + + 'content': [ + include('css'), + (r'%[\w:-]+', Name.Tag, 'tag'), + (r'!!!' + _dot + r'*\n', Name.Namespace, '#pop'), + (r'(/)(\[' + _dot + r'*?\])(' + _dot + r'*\n)', + bygroups(Comment, Comment.Special, Comment), + '#pop'), + (r'/' + _dot + r'*\n', _starts_block(Comment, 'html-comment-block'), + '#pop'), + (r'-#' + _dot + r'*\n', _starts_block(Comment.Preproc, + 'haml-comment-block'), '#pop'), + (r'(-)(' + _comma_dot + r'*\n)', + bygroups(Punctuation, using(RubyLexer)), + '#pop'), + (r':' + _dot + r'*\n', _starts_block(Name.Decorator, 'filter-block'), + '#pop'), + include('eval-or-plain'), + ], + + 'tag': [ + include('css'), + (r'\{(,\n|' + _dot + r')*?\}', using(RubyLexer)), + (r'\[' + _dot + r'*?\]', using(RubyLexer)), + (r'\(', Text, 'html-attributes'), + (r'/[ \t]*\n', Punctuation, '#pop:2'), + (r'[<>]{1,2}(?=[ \t=])', Punctuation), + include('eval-or-plain'), + ], + + 'plain': [ + (r'([^#\n]|#[^{\n]|(\\\\)*\\#\{)+', Text), + (r'(#\{)(' + _dot + r'*?)(\})', + bygroups(String.Interpol, using(RubyLexer), String.Interpol)), + (r'\n', Text, 'root'), + ], + + 'html-attributes': [ + (r'\s+', Text), + (r'[\w:-]+[ \t]*=', Name.Attribute, 'html-attribute-value'), + (r'[\w:-]+', Name.Attribute), + (r'\)', Text, '#pop'), + ], + + 'html-attribute-value': [ + (r'[ \t]+', Text), + (r'\w+', Name.Variable, '#pop'), + (r'@\w+', Name.Variable.Instance, '#pop'), + (r'\$\w+', Name.Variable.Global, '#pop'), + (r"'(\\\\|\\'|[^'\n])*'", String, '#pop'), + (r'"(\\\\|\\"|[^"\n])*"', String, '#pop'), + ], + + 'html-comment-block': [ + (_dot + '+', Comment), + (r'\n', Text, 'root'), + ], + + 'haml-comment-block': [ + (_dot + '+', Comment.Preproc), + (r'\n', Text, 'root'), + ], + + 'filter-block': [ + (r'([^#\n]|#[^{\n]|(\\\\)*\\#\{)+', Name.Decorator), + (r'(#\{)(' + _dot + r'*?)(\})', + bygroups(String.Interpol, using(RubyLexer), String.Interpol)), + (r'\n', Text, 'root'), + ], + } + + +class ScamlLexer(ExtendedRegexLexer): + """ + For `Scaml markup <http://scalate.fusesource.org/>`_. Scaml is Haml for Scala. + + .. versionadded:: 1.4 + """ + + name = 'Scaml' + aliases = ['scaml'] + filenames = ['*.scaml'] + mimetypes = ['text/x-scaml'] + + flags = re.IGNORECASE + # Scaml does not yet support the " |\n" notation to + # wrap long lines. Once it does, use the custom faux + # dot instead. + # _dot = r'(?: \|\n(?=.* \|)|.)' + _dot = r'.' + + tokens = { + 'root': [ + (r'[ \t]*\n', Text), + (r'[ \t]*', _indentation), + ], + + 'css': [ + (r'\.[\w:-]+', Name.Class, 'tag'), + (r'\#[\w:-]+', Name.Function, 'tag'), + ], + + 'eval-or-plain': [ + (r'[&!]?==', Punctuation, 'plain'), + (r'([&!]?[=~])(' + _dot + r'*\n)', + bygroups(Punctuation, using(ScalaLexer)), + 'root'), + default('plain'), + ], + + 'content': [ + include('css'), + (r'%[\w:-]+', Name.Tag, 'tag'), + (r'!!!' + _dot + r'*\n', Name.Namespace, '#pop'), + (r'(/)(\[' + _dot + r'*?\])(' + _dot + r'*\n)', + bygroups(Comment, Comment.Special, Comment), + '#pop'), + (r'/' + _dot + r'*\n', _starts_block(Comment, 'html-comment-block'), + '#pop'), + (r'-#' + _dot + r'*\n', _starts_block(Comment.Preproc, + 'scaml-comment-block'), '#pop'), + (r'(-@\s*)(import)?(' + _dot + r'*\n)', + bygroups(Punctuation, Keyword, using(ScalaLexer)), + '#pop'), + (r'(-)(' + _dot + r'*\n)', + bygroups(Punctuation, using(ScalaLexer)), + '#pop'), + (r':' + _dot + r'*\n', _starts_block(Name.Decorator, 'filter-block'), + '#pop'), + include('eval-or-plain'), + ], + + 'tag': [ + include('css'), + (r'\{(,\n|' + _dot + r')*?\}', using(ScalaLexer)), + (r'\[' + _dot + r'*?\]', using(ScalaLexer)), + (r'\(', Text, 'html-attributes'), + (r'/[ \t]*\n', Punctuation, '#pop:2'), + (r'[<>]{1,2}(?=[ \t=])', Punctuation), + include('eval-or-plain'), + ], + + 'plain': [ + (r'([^#\n]|#[^{\n]|(\\\\)*\\#\{)+', Text), + (r'(#\{)(' + _dot + r'*?)(\})', + bygroups(String.Interpol, using(ScalaLexer), String.Interpol)), + (r'\n', Text, 'root'), + ], + + 'html-attributes': [ + (r'\s+', Text), + (r'[\w:-]+[ \t]*=', Name.Attribute, 'html-attribute-value'), + (r'[\w:-]+', Name.Attribute), + (r'\)', Text, '#pop'), + ], + + 'html-attribute-value': [ + (r'[ \t]+', Text), + (r'\w+', Name.Variable, '#pop'), + (r'@\w+', Name.Variable.Instance, '#pop'), + (r'\$\w+', Name.Variable.Global, '#pop'), + (r"'(\\\\|\\'|[^'\n])*'", String, '#pop'), + (r'"(\\\\|\\"|[^"\n])*"', String, '#pop'), + ], + + 'html-comment-block': [ + (_dot + '+', Comment), + (r'\n', Text, 'root'), + ], + + 'scaml-comment-block': [ + (_dot + '+', Comment.Preproc), + (r'\n', Text, 'root'), + ], + + 'filter-block': [ + (r'([^#\n]|#[^{\n]|(\\\\)*\\#\{)+', Name.Decorator), + (r'(#\{)(' + _dot + r'*?)(\})', + bygroups(String.Interpol, using(ScalaLexer), String.Interpol)), + (r'\n', Text, 'root'), + ], + } + + +class PugLexer(ExtendedRegexLexer): + """ + For Pug markup. + Pug is a variant of Scaml, see: + http://scalate.fusesource.org/documentation/scaml-reference.html + + .. versionadded:: 1.4 + """ + + name = 'Pug' + aliases = ['pug', 'jade'] + filenames = ['*.pug', '*.jade'] + mimetypes = ['text/x-pug', 'text/x-jade'] + + flags = re.IGNORECASE + _dot = r'.' + + tokens = { + 'root': [ + (r'[ \t]*\n', Text), + (r'[ \t]*', _indentation), + ], + + 'css': [ + (r'\.[\w:-]+', Name.Class, 'tag'), + (r'\#[\w:-]+', Name.Function, 'tag'), + ], + + 'eval-or-plain': [ + (r'[&!]?==', Punctuation, 'plain'), + (r'([&!]?[=~])(' + _dot + r'*\n)', + bygroups(Punctuation, using(ScalaLexer)), 'root'), + default('plain'), + ], + + 'content': [ + include('css'), + (r'!!!' + _dot + r'*\n', Name.Namespace, '#pop'), + (r'(/)(\[' + _dot + r'*?\])(' + _dot + r'*\n)', + bygroups(Comment, Comment.Special, Comment), + '#pop'), + (r'/' + _dot + r'*\n', _starts_block(Comment, 'html-comment-block'), + '#pop'), + (r'-#' + _dot + r'*\n', _starts_block(Comment.Preproc, + 'scaml-comment-block'), '#pop'), + (r'(-@\s*)(import)?(' + _dot + r'*\n)', + bygroups(Punctuation, Keyword, using(ScalaLexer)), + '#pop'), + (r'(-)(' + _dot + r'*\n)', + bygroups(Punctuation, using(ScalaLexer)), + '#pop'), + (r':' + _dot + r'*\n', _starts_block(Name.Decorator, 'filter-block'), + '#pop'), + (r'[\w:-]+', Name.Tag, 'tag'), + (r'\|', Text, 'eval-or-plain'), + ], + + 'tag': [ + include('css'), + (r'\{(,\n|' + _dot + r')*?\}', using(ScalaLexer)), + (r'\[' + _dot + r'*?\]', using(ScalaLexer)), + (r'\(', Text, 'html-attributes'), + (r'/[ \t]*\n', Punctuation, '#pop:2'), + (r'[<>]{1,2}(?=[ \t=])', Punctuation), + include('eval-or-plain'), + ], + + 'plain': [ + (r'([^#\n]|#[^{\n]|(\\\\)*\\#\{)+', Text), + (r'(#\{)(' + _dot + r'*?)(\})', + bygroups(String.Interpol, using(ScalaLexer), String.Interpol)), + (r'\n', Text, 'root'), + ], + + 'html-attributes': [ + (r'\s+', Text), + (r'[\w:-]+[ \t]*=', Name.Attribute, 'html-attribute-value'), + (r'[\w:-]+', Name.Attribute), + (r'\)', Text, '#pop'), + ], + + 'html-attribute-value': [ + (r'[ \t]+', Text), + (r'\w+', Name.Variable, '#pop'), + (r'@\w+', Name.Variable.Instance, '#pop'), + (r'\$\w+', Name.Variable.Global, '#pop'), + (r"'(\\\\|\\'|[^'\n])*'", String, '#pop'), + (r'"(\\\\|\\"|[^"\n])*"', String, '#pop'), + ], + + 'html-comment-block': [ + (_dot + '+', Comment), + (r'\n', Text, 'root'), + ], + + 'scaml-comment-block': [ + (_dot + '+', Comment.Preproc), + (r'\n', Text, 'root'), + ], + + 'filter-block': [ + (r'([^#\n]|#[^{\n]|(\\\\)*\\#\{)+', Name.Decorator), + (r'(#\{)(' + _dot + r'*?)(\})', + bygroups(String.Interpol, using(ScalaLexer), String.Interpol)), + (r'\n', Text, 'root'), + ], + } +JadeLexer = PugLexer # compat |