contrib/python/Pygments/py3/pygments/lexers/special.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119

"""
    pygments.lexers.special
    ~~~~~~~~~~~~~~~~~~~~~~~

    Special lexers.

    :copyright: Copyright 2006-2021 by the Pygments team, see AUTHORS. 
    :license: BSD, see LICENSE for details.
"""

import ast 
import re

from pygments.lexer import Lexer
from pygments.token import Token, Error, Text, Generic 
from pygments.util import get_choice_opt 


__all__ = ['TextLexer', 'OutputLexer', 'RawTokenLexer'] 


class TextLexer(Lexer):
    """
    "Null" lexer, doesn't highlight anything.
    """
    name = 'Text only'
    aliases = ['text']
    filenames = ['*.txt']
    mimetypes = ['text/plain']
    priority = 0.01

    def get_tokens_unprocessed(self, text):
        yield 0, Text, text

    def analyse_text(text):
        return TextLexer.priority

 
class OutputLexer(Lexer): 
    """ 
    Simple lexer that highlights everything as ``Token.Generic.Output``. 
 
    .. versionadded:: 2.10 
    """ 
    name = 'Text output' 
    aliases = ['output'] 
 
    def get_tokens_unprocessed(self, text): 
        yield 0, Generic.Output, text 
 
 
_ttype_cache = {}

line_re = re.compile('.*?\n') 


class RawTokenLexer(Lexer):
    """
    Recreate a token stream formatted with the `RawTokenFormatter`. 

    Additional options accepted:

    `compress`
        If set to ``"gz"`` or ``"bz2"``, decompress the token stream with
        the given compression algorithm before lexing (default: ``""``).
    """
    name = 'Raw token data'
    aliases = [] 
    filenames = []
    mimetypes = ['application/x-pygments-tokens']

    def __init__(self, **options):
        self.compress = get_choice_opt(options, 'compress',
                                       ['', 'none', 'gz', 'bz2'], '')
        Lexer.__init__(self, **options)

    def get_tokens(self, text):
        if self.compress: 
            if isinstance(text, str): 
                text = text.encode('latin1') 
            try: 
                if self.compress == 'gz': 
                    import gzip 
                    text = gzip.decompress(text) 
                elif self.compress == 'bz2': 
                    import bz2 
                    text = bz2.decompress(text) 
            except OSError: 
                yield Error, text.decode('latin1') 
        if isinstance(text, bytes): 
            text = text.decode('latin1') 

        # do not call Lexer.get_tokens() because stripping is not optional. 
        text = text.strip('\n') + '\n' 
        for i, t, v in self.get_tokens_unprocessed(text):
            yield t, v

    def get_tokens_unprocessed(self, text):
        length = 0
        for match in line_re.finditer(text):
            try:
                ttypestr, val = match.group().rstrip().split('\t', 1) 
                ttype = _ttype_cache.get(ttypestr)
                if not ttype:
                    ttype = Token
                    ttypes = ttypestr.split('.')[1:]
                    for ttype_ in ttypes:
                        if not ttype_ or not ttype_[0].isupper():
                            raise ValueError('malformed token name')
                        ttype = getattr(ttype, ttype_)
                    _ttype_cache[ttypestr] = ttype
                val = ast.literal_eval(val) 
                if not isinstance(val, str): 
                    raise ValueError('expected str') 
            except (SyntaxError, ValueError): 
                val = match.group() 
                ttype = Error 
            yield length, ttype, val
            length += len(val)