diff options
author | Devtools Arcadia <arcadia-devtools@yandex-team.ru> | 2022-02-07 18:08:42 +0300 |
---|---|---|
committer | Devtools Arcadia <arcadia-devtools@mous.vla.yp-c.yandex.net> | 2022-02-07 18:08:42 +0300 |
commit | 1110808a9d39d4b808aef724c861a2e1a38d2a69 (patch) | |
tree | e26c9fed0de5d9873cce7e00bc214573dc2195b7 /contrib/python/Pygments/py3/pygments/lexers/special.py | |
download | ydb-1110808a9d39d4b808aef724c861a2e1a38d2a69.tar.gz |
intermediate changes
ref:cde9a383711a11544ce7e107a78147fb96cc4029
Diffstat (limited to 'contrib/python/Pygments/py3/pygments/lexers/special.py')
-rw-r--r-- | contrib/python/Pygments/py3/pygments/lexers/special.py | 119 |
1 files changed, 119 insertions, 0 deletions
diff --git a/contrib/python/Pygments/py3/pygments/lexers/special.py b/contrib/python/Pygments/py3/pygments/lexers/special.py new file mode 100644 index 0000000000..bff6652c56 --- /dev/null +++ b/contrib/python/Pygments/py3/pygments/lexers/special.py @@ -0,0 +1,119 @@ +""" + pygments.lexers.special + ~~~~~~~~~~~~~~~~~~~~~~~ + + Special lexers. + + :copyright: Copyright 2006-2021 by the Pygments team, see AUTHORS. + :license: BSD, see LICENSE for details. +""" + +import ast +import re + +from pygments.lexer import Lexer +from pygments.token import Token, Error, Text, Generic +from pygments.util import get_choice_opt + + +__all__ = ['TextLexer', 'OutputLexer', 'RawTokenLexer'] + + +class TextLexer(Lexer): + """ + "Null" lexer, doesn't highlight anything. + """ + name = 'Text only' + aliases = ['text'] + filenames = ['*.txt'] + mimetypes = ['text/plain'] + priority = 0.01 + + def get_tokens_unprocessed(self, text): + yield 0, Text, text + + def analyse_text(text): + return TextLexer.priority + + +class OutputLexer(Lexer): + """ + Simple lexer that highlights everything as ``Token.Generic.Output``. + + .. versionadded:: 2.10 + """ + name = 'Text output' + aliases = ['output'] + + def get_tokens_unprocessed(self, text): + yield 0, Generic.Output, text + + +_ttype_cache = {} + +line_re = re.compile('.*?\n') + + +class RawTokenLexer(Lexer): + """ + Recreate a token stream formatted with the `RawTokenFormatter`. + + Additional options accepted: + + `compress` + If set to ``"gz"`` or ``"bz2"``, decompress the token stream with + the given compression algorithm before lexing (default: ``""``). + """ + name = 'Raw token data' + aliases = [] + filenames = [] + mimetypes = ['application/x-pygments-tokens'] + + def __init__(self, **options): + self.compress = get_choice_opt(options, 'compress', + ['', 'none', 'gz', 'bz2'], '') + Lexer.__init__(self, **options) + + def get_tokens(self, text): + if self.compress: + if isinstance(text, str): + text = text.encode('latin1') + try: + if self.compress == 'gz': + import gzip + text = gzip.decompress(text) + elif self.compress == 'bz2': + import bz2 + text = bz2.decompress(text) + except OSError: + yield Error, text.decode('latin1') + if isinstance(text, bytes): + text = text.decode('latin1') + + # do not call Lexer.get_tokens() because stripping is not optional. + text = text.strip('\n') + '\n' + for i, t, v in self.get_tokens_unprocessed(text): + yield t, v + + def get_tokens_unprocessed(self, text): + length = 0 + for match in line_re.finditer(text): + try: + ttypestr, val = match.group().rstrip().split('\t', 1) + ttype = _ttype_cache.get(ttypestr) + if not ttype: + ttype = Token + ttypes = ttypestr.split('.')[1:] + for ttype_ in ttypes: + if not ttype_ or not ttype_[0].isupper(): + raise ValueError('malformed token name') + ttype = getattr(ttype, ttype_) + _ttype_cache[ttypestr] = ttype + val = ast.literal_eval(val) + if not isinstance(val, str): + raise ValueError('expected str') + except (SyntaxError, ValueError): + val = match.group() + ttype = Error + yield length, ttype, val + length += len(val) |