diff options
author | nkozlovskiy <nmk@ydb.tech> | 2023-09-29 12:24:06 +0300 |
---|---|---|
committer | nkozlovskiy <nmk@ydb.tech> | 2023-09-29 12:41:34 +0300 |
commit | e0e3e1717e3d33762ce61950504f9637a6e669ed (patch) | |
tree | bca3ff6939b10ed60c3d5c12439963a1146b9711 /contrib/python/Pygments/py2/pygments/lexers/data.py | |
parent | 38f2c5852db84c7b4d83adfcb009eb61541d1ccd (diff) | |
download | ydb-e0e3e1717e3d33762ce61950504f9637a6e669ed.tar.gz |
add ydb deps
Diffstat (limited to 'contrib/python/Pygments/py2/pygments/lexers/data.py')
-rw-r--r-- | contrib/python/Pygments/py2/pygments/lexers/data.py | 561 |
1 files changed, 561 insertions, 0 deletions
diff --git a/contrib/python/Pygments/py2/pygments/lexers/data.py b/contrib/python/Pygments/py2/pygments/lexers/data.py new file mode 100644 index 0000000000..46ca734006 --- /dev/null +++ b/contrib/python/Pygments/py2/pygments/lexers/data.py @@ -0,0 +1,561 @@ +# -*- coding: utf-8 -*- +""" + pygments.lexers.data + ~~~~~~~~~~~~~~~~~~~~ + + Lexers for data file format. + + :copyright: Copyright 2006-2019 by the Pygments team, see AUTHORS. + :license: BSD, see LICENSE for details. +""" + +import re + +from pygments.lexer import RegexLexer, ExtendedRegexLexer, LexerContext, \ + include, bygroups, inherit +from pygments.token import Text, Comment, Keyword, Name, String, Number, \ + Punctuation, Literal, Error + +__all__ = ['YamlLexer', 'JsonLexer', 'JsonBareObjectLexer', 'JsonLdLexer'] + + +class YamlLexerContext(LexerContext): + """Indentation context for the YAML lexer.""" + + def __init__(self, *args, **kwds): + super(YamlLexerContext, self).__init__(*args, **kwds) + self.indent_stack = [] + self.indent = -1 + self.next_indent = 0 + self.block_scalar_indent = None + + +class YamlLexer(ExtendedRegexLexer): + """ + Lexer for `YAML <http://yaml.org/>`_, a human-friendly data serialization + language. + + .. versionadded:: 0.11 + """ + + name = 'YAML' + aliases = ['yaml'] + filenames = ['*.yaml', '*.yml'] + mimetypes = ['text/x-yaml'] + + def something(token_class): + """Do not produce empty tokens.""" + def callback(lexer, match, context): + text = match.group() + if not text: + return + yield match.start(), token_class, text + context.pos = match.end() + return callback + + def reset_indent(token_class): + """Reset the indentation levels.""" + def callback(lexer, match, context): + text = match.group() + context.indent_stack = [] + context.indent = -1 + context.next_indent = 0 + context.block_scalar_indent = None + yield match.start(), token_class, text + context.pos = match.end() + return callback + + def save_indent(token_class, start=False): + """Save a possible indentation level.""" + def callback(lexer, match, context): + text = match.group() + extra = '' + if start: + context.next_indent = len(text) + if context.next_indent < context.indent: + while context.next_indent < context.indent: + context.indent = context.indent_stack.pop() + if context.next_indent > context.indent: + extra = text[context.indent:] + text = text[:context.indent] + else: + context.next_indent += len(text) + if text: + yield match.start(), token_class, text + if extra: + yield match.start()+len(text), token_class.Error, extra + context.pos = match.end() + return callback + + def set_indent(token_class, implicit=False): + """Set the previously saved indentation level.""" + def callback(lexer, match, context): + text = match.group() + if context.indent < context.next_indent: + context.indent_stack.append(context.indent) + context.indent = context.next_indent + if not implicit: + context.next_indent += len(text) + yield match.start(), token_class, text + context.pos = match.end() + return callback + + def set_block_scalar_indent(token_class): + """Set an explicit indentation level for a block scalar.""" + def callback(lexer, match, context): + text = match.group() + context.block_scalar_indent = None + if not text: + return + increment = match.group(1) + if increment: + current_indent = max(context.indent, 0) + increment = int(increment) + context.block_scalar_indent = current_indent + increment + if text: + yield match.start(), token_class, text + context.pos = match.end() + return callback + + def parse_block_scalar_empty_line(indent_token_class, content_token_class): + """Process an empty line in a block scalar.""" + def callback(lexer, match, context): + text = match.group() + if (context.block_scalar_indent is None or + len(text) <= context.block_scalar_indent): + if text: + yield match.start(), indent_token_class, text + else: + indentation = text[:context.block_scalar_indent] + content = text[context.block_scalar_indent:] + yield match.start(), indent_token_class, indentation + yield (match.start()+context.block_scalar_indent, + content_token_class, content) + context.pos = match.end() + return callback + + def parse_block_scalar_indent(token_class): + """Process indentation spaces in a block scalar.""" + def callback(lexer, match, context): + text = match.group() + if context.block_scalar_indent is None: + if len(text) <= max(context.indent, 0): + context.stack.pop() + context.stack.pop() + return + context.block_scalar_indent = len(text) + else: + if len(text) < context.block_scalar_indent: + context.stack.pop() + context.stack.pop() + return + if text: + yield match.start(), token_class, text + context.pos = match.end() + return callback + + def parse_plain_scalar_indent(token_class): + """Process indentation spaces in a plain scalar.""" + def callback(lexer, match, context): + text = match.group() + if len(text) <= context.indent: + context.stack.pop() + context.stack.pop() + return + if text: + yield match.start(), token_class, text + context.pos = match.end() + return callback + + tokens = { + # the root rules + 'root': [ + # ignored whitespaces + (r'[ ]+(?=#|$)', Text), + # line breaks + (r'\n+', Text), + # a comment + (r'#[^\n]*', Comment.Single), + # the '%YAML' directive + (r'^%YAML(?=[ ]|$)', reset_indent(Name.Tag), 'yaml-directive'), + # the %TAG directive + (r'^%TAG(?=[ ]|$)', reset_indent(Name.Tag), 'tag-directive'), + # document start and document end indicators + (r'^(?:---|\.\.\.)(?=[ ]|$)', reset_indent(Name.Namespace), + 'block-line'), + # indentation spaces + (r'[ ]*(?!\s|$)', save_indent(Text, start=True), + ('block-line', 'indentation')), + ], + + # trailing whitespaces after directives or a block scalar indicator + 'ignored-line': [ + # ignored whitespaces + (r'[ ]+(?=#|$)', Text), + # a comment + (r'#[^\n]*', Comment.Single), + # line break + (r'\n', Text, '#pop:2'), + ], + + # the %YAML directive + 'yaml-directive': [ + # the version number + (r'([ ]+)([0-9]+\.[0-9]+)', + bygroups(Text, Number), 'ignored-line'), + ], + + # the %TAG directive + 'tag-directive': [ + # a tag handle and the corresponding prefix + (r'([ ]+)(!|![\w-]*!)' + r'([ ]+)(!|!?[\w;/?:@&=+$,.!~*\'()\[\]%-]+)', + bygroups(Text, Keyword.Type, Text, Keyword.Type), + 'ignored-line'), + ], + + # block scalar indicators and indentation spaces + 'indentation': [ + # trailing whitespaces are ignored + (r'[ ]*$', something(Text), '#pop:2'), + # whitespaces preceding block collection indicators + (r'[ ]+(?=[?:-](?:[ ]|$))', save_indent(Text)), + # block collection indicators + (r'[?:-](?=[ ]|$)', set_indent(Punctuation.Indicator)), + # the beginning a block line + (r'[ ]*', save_indent(Text), '#pop'), + ], + + # an indented line in the block context + 'block-line': [ + # the line end + (r'[ ]*(?=#|$)', something(Text), '#pop'), + # whitespaces separating tokens + (r'[ ]+', Text), + # key with colon + (r'''([^#,:?\[\]{}"'\n]+)(:)(?=[ ]|$)''', + bygroups(Name.Tag, set_indent(Punctuation, implicit=True))), + # tags, anchors and aliases, + include('descriptors'), + # block collections and scalars + include('block-nodes'), + # flow collections and quoted scalars + include('flow-nodes'), + # a plain scalar + (r'(?=[^\s?:,\[\]{}#&*!|>\'"%@`-]|[?:-]\S)', + something(Name.Variable), + 'plain-scalar-in-block-context'), + ], + + # tags, anchors, aliases + 'descriptors': [ + # a full-form tag + (r'!<[\w#;/?:@&=+$,.!~*\'()\[\]%-]+>', Keyword.Type), + # a tag in the form '!', '!suffix' or '!handle!suffix' + (r'!(?:[\w-]+!)?' + r'[\w#;/?:@&=+$,.!~*\'()\[\]%-]*', Keyword.Type), + # an anchor + (r'&[\w-]+', Name.Label), + # an alias + (r'\*[\w-]+', Name.Variable), + ], + + # block collections and scalars + 'block-nodes': [ + # implicit key + (r':(?=[ ]|$)', set_indent(Punctuation.Indicator, implicit=True)), + # literal and folded scalars + (r'[|>]', Punctuation.Indicator, + ('block-scalar-content', 'block-scalar-header')), + ], + + # flow collections and quoted scalars + 'flow-nodes': [ + # a flow sequence + (r'\[', Punctuation.Indicator, 'flow-sequence'), + # a flow mapping + (r'\{', Punctuation.Indicator, 'flow-mapping'), + # a single-quoted scalar + (r'\'', String, 'single-quoted-scalar'), + # a double-quoted scalar + (r'\"', String, 'double-quoted-scalar'), + ], + + # the content of a flow collection + 'flow-collection': [ + # whitespaces + (r'[ ]+', Text), + # line breaks + (r'\n+', Text), + # a comment + (r'#[^\n]*', Comment.Single), + # simple indicators + (r'[?:,]', Punctuation.Indicator), + # tags, anchors and aliases + include('descriptors'), + # nested collections and quoted scalars + include('flow-nodes'), + # a plain scalar + (r'(?=[^\s?:,\[\]{}#&*!|>\'"%@`])', + something(Name.Variable), + 'plain-scalar-in-flow-context'), + ], + + # a flow sequence indicated by '[' and ']' + 'flow-sequence': [ + # include flow collection rules + include('flow-collection'), + # the closing indicator + (r'\]', Punctuation.Indicator, '#pop'), + ], + + # a flow mapping indicated by '{' and '}' + 'flow-mapping': [ + # key with colon + (r'''([^,:?\[\]{}"'\n]+)(:)(?=[ ]|$)''', + bygroups(Name.Tag, Punctuation)), + # include flow collection rules + include('flow-collection'), + # the closing indicator + (r'\}', Punctuation.Indicator, '#pop'), + ], + + # block scalar lines + 'block-scalar-content': [ + # line break + (r'\n', Text), + # empty line + (r'^[ ]+$', + parse_block_scalar_empty_line(Text, Name.Constant)), + # indentation spaces (we may leave the state here) + (r'^[ ]*', parse_block_scalar_indent(Text)), + # line content + (r'[\S\t ]+', Name.Constant), + ], + + # the content of a literal or folded scalar + 'block-scalar-header': [ + # indentation indicator followed by chomping flag + (r'([1-9])?[+-]?(?=[ ]|$)', + set_block_scalar_indent(Punctuation.Indicator), + 'ignored-line'), + # chomping flag followed by indentation indicator + (r'[+-]?([1-9])?(?=[ ]|$)', + set_block_scalar_indent(Punctuation.Indicator), + 'ignored-line'), + ], + + # ignored and regular whitespaces in quoted scalars + 'quoted-scalar-whitespaces': [ + # leading and trailing whitespaces are ignored + (r'^[ ]+', Text), + (r'[ ]+$', Text), + # line breaks are ignored + (r'\n+', Text), + # other whitespaces are a part of the value + (r'[ ]+', Name.Variable), + ], + + # single-quoted scalars + 'single-quoted-scalar': [ + # include whitespace and line break rules + include('quoted-scalar-whitespaces'), + # escaping of the quote character + (r'\'\'', String.Escape), + # regular non-whitespace characters + (r'[^\s\']+', String), + # the closing quote + (r'\'', String, '#pop'), + ], + + # double-quoted scalars + 'double-quoted-scalar': [ + # include whitespace and line break rules + include('quoted-scalar-whitespaces'), + # escaping of special characters + (r'\\[0abt\tn\nvfre "\\N_LP]', String), + # escape codes + (r'\\(?:x[0-9A-Fa-f]{2}|u[0-9A-Fa-f]{4}|U[0-9A-Fa-f]{8})', + String.Escape), + # regular non-whitespace characters + (r'[^\s"\\]+', String), + # the closing quote + (r'"', String, '#pop'), + ], + + # the beginning of a new line while scanning a plain scalar + 'plain-scalar-in-block-context-new-line': [ + # empty lines + (r'^[ ]+$', Text), + # line breaks + (r'\n+', Text), + # document start and document end indicators + (r'^(?=---|\.\.\.)', something(Name.Namespace), '#pop:3'), + # indentation spaces (we may leave the block line state here) + (r'^[ ]*', parse_plain_scalar_indent(Text), '#pop'), + ], + + # a plain scalar in the block context + 'plain-scalar-in-block-context': [ + # the scalar ends with the ':' indicator + (r'[ ]*(?=:[ ]|:$)', something(Text), '#pop'), + # the scalar ends with whitespaces followed by a comment + (r'[ ]+(?=#)', Text, '#pop'), + # trailing whitespaces are ignored + (r'[ ]+$', Text), + # line breaks are ignored + (r'\n+', Text, 'plain-scalar-in-block-context-new-line'), + # other whitespaces are a part of the value + (r'[ ]+', Literal.Scalar.Plain), + # regular non-whitespace characters + (r'(?::(?!\s)|[^\s:])+', Literal.Scalar.Plain), + ], + + # a plain scalar is the flow context + 'plain-scalar-in-flow-context': [ + # the scalar ends with an indicator character + (r'[ ]*(?=[,:?\[\]{}])', something(Text), '#pop'), + # the scalar ends with a comment + (r'[ ]+(?=#)', Text, '#pop'), + # leading and trailing whitespaces are ignored + (r'^[ ]+', Text), + (r'[ ]+$', Text), + # line breaks are ignored + (r'\n+', Text), + # other whitespaces are a part of the value + (r'[ ]+', Name.Variable), + # regular non-whitespace characters + (r'[^\s,:?\[\]{}]+', Name.Variable), + ], + + } + + def get_tokens_unprocessed(self, text=None, context=None): + if context is None: + context = YamlLexerContext(text, 0) + return super(YamlLexer, self).get_tokens_unprocessed(text, context) + + +class JsonLexer(RegexLexer): + """ + For JSON data structures. + + .. versionadded:: 1.5 + """ + + name = 'JSON' + aliases = ['json'] + filenames = ['*.json'] + mimetypes = ['application/json'] + + flags = re.DOTALL + + # integer part of a number + int_part = r'-?(0|[1-9]\d*)' + + # fractional part of a number + frac_part = r'\.\d+' + + # exponential part of a number + exp_part = r'[eE](\+|-)?\d+' + + tokens = { + 'whitespace': [ + (r'\s+', Text), + ], + + # represents a simple terminal value + 'simplevalue': [ + (r'(true|false|null)\b', Keyword.Constant), + (('%(int_part)s(%(frac_part)s%(exp_part)s|' + '%(exp_part)s|%(frac_part)s)') % vars(), + Number.Float), + (int_part, Number.Integer), + (r'"(\\\\|\\"|[^"])*"', String.Double), + ], + + + # the right hand side of an object, after the attribute name + 'objectattribute': [ + include('value'), + (r':', Punctuation), + # comma terminates the attribute but expects more + (r',', Punctuation, '#pop'), + # a closing bracket terminates the entire object, so pop twice + (r'\}', Punctuation, '#pop:2'), + ], + + # a json object - { attr, attr, ... } + 'objectvalue': [ + include('whitespace'), + (r'"(\\\\|\\"|[^"])*"', Name.Tag, 'objectattribute'), + (r'\}', Punctuation, '#pop'), + ], + + # json array - [ value, value, ... } + 'arrayvalue': [ + include('whitespace'), + include('value'), + (r',', Punctuation), + (r'\]', Punctuation, '#pop'), + ], + + # a json value - either a simple value or a complex value (object or array) + 'value': [ + include('whitespace'), + include('simplevalue'), + (r'\{', Punctuation, 'objectvalue'), + (r'\[', Punctuation, 'arrayvalue'), + ], + + # the root of a json document whould be a value + 'root': [ + include('value'), + ], + } + + +class JsonBareObjectLexer(JsonLexer): + """ + For JSON data structures (with missing object curly braces). + + .. versionadded:: 2.2 + """ + + name = 'JSONBareObject' + aliases = ['json-object'] + filenames = [] + mimetypes = ['application/json-object'] + + tokens = { + 'root': [ + (r'\}', Error), + include('objectvalue'), + ], + 'objectattribute': [ + (r'\}', Error), + inherit, + ], + } + + +class JsonLdLexer(JsonLexer): + """ + For `JSON-LD <http://json-ld.org/>`_ linked data. + + .. versionadded:: 2.0 + """ + + name = 'JSON-LD' + aliases = ['jsonld', 'json-ld'] + filenames = ['*.jsonld'] + mimetypes = ['application/ld+json'] + + tokens = { + 'objectvalue': [ + (r'"@(context|id|value|language|type|container|list|set|' + r'reverse|index|base|vocab|graph)"', Name.Decorator, + 'objectattribute'), + inherit, + ], + } |