aboutsummaryrefslogtreecommitdiffstats
path: root/contrib/python/Pygments/py3/pygments/lexers/oberon.py
blob: 266506f62e8e2accbe9c80b4608d487eaf61ce91 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
""" 
    pygments.lexers.oberon 
    ~~~~~~~~~~~~~~~~~~~~~~ 
 
    Lexers for Oberon family languages. 
 
    :copyright: Copyright 2006-2021 by the Pygments team, see AUTHORS.
    :license: BSD, see LICENSE for details. 
""" 
 
import re 
 
from pygments.lexer import RegexLexer, include, words 
from pygments.token import Text, Comment, Operator, Keyword, Name, String, \ 
    Number, Punctuation 
 
__all__ = ['ComponentPascalLexer'] 
 
 
class ComponentPascalLexer(RegexLexer): 
    """ 
    For `Component Pascal <http://www.oberon.ch/pdf/CP-Lang.pdf>`_ source code. 
 
    .. versionadded:: 2.1 
    """ 
    name = 'Component Pascal' 
    aliases = ['componentpascal', 'cp'] 
    filenames = ['*.cp', '*.cps'] 
    mimetypes = ['text/x-component-pascal'] 
 
    flags = re.MULTILINE | re.DOTALL 
 
    tokens = { 
        'root': [ 
            include('whitespace'), 
            include('comments'), 
            include('punctuation'), 
            include('numliterals'), 
            include('strings'), 
            include('operators'), 
            include('builtins'), 
            include('identifiers'), 
        ], 
        'whitespace': [ 
            (r'\n+', Text),  # blank lines 
            (r'\s+', Text),  # whitespace 
        ], 
        'comments': [ 
            (r'\(\*([^$].*?)\*\)', Comment.Multiline),
            # TODO: nested comments (* (* ... *) ... (* ... *) *) not supported! 
        ], 
        'punctuation': [ 
            (r'[()\[\]{},.:;|]', Punctuation),
        ], 
        'numliterals': [ 
            (r'[0-9A-F]+X\b', Number.Hex),                 # char code 
            (r'[0-9A-F]+[HL]\b', Number.Hex),              # hexadecimal number 
            (r'[0-9]+\.[0-9]+E[+-][0-9]+', Number.Float),  # real number 
            (r'[0-9]+\.[0-9]+', Number.Float),             # real number 
            (r'[0-9]+', Number.Integer),                   # decimal whole number 
        ], 
        'strings': [ 
            (r"'[^\n']*'", String),  # single quoted string 
            (r'"[^\n"]*"', String),  # double quoted string 
        ], 
        'operators': [ 
            # Arithmetic Operators 
            (r'[+-]', Operator), 
            (r'[*/]', Operator), 
            # Relational Operators 
            (r'[=#<>]', Operator), 
            # Dereferencing Operator 
            (r'\^', Operator), 
            # Logical AND Operator 
            (r'&', Operator), 
            # Logical NOT Operator 
            (r'~', Operator), 
            # Assignment Symbol 
            (r':=', Operator), 
            # Range Constructor 
            (r'\.\.', Operator), 
            (r'\$', Operator), 
        ], 
        'identifiers': [ 
            (r'([a-zA-Z_$][\w$]*)', Name),
        ], 
        'builtins': [ 
            (words(( 
                'ANYPTR', 'ANYREC', 'BOOLEAN', 'BYTE', 'CHAR', 'INTEGER', 'LONGINT', 
                'REAL', 'SET', 'SHORTCHAR', 'SHORTINT', 'SHORTREAL' 
                ), suffix=r'\b'), Keyword.Type), 
            (words(( 
                'ABS', 'ABSTRACT', 'ARRAY', 'ASH', 'ASSERT', 'BEGIN', 'BITS', 'BY', 
                'CAP', 'CASE', 'CHR', 'CLOSE', 'CONST', 'DEC', 'DIV', 'DO', 'ELSE', 
                'ELSIF', 'EMPTY', 'END', 'ENTIER', 'EXCL', 'EXIT', 'EXTENSIBLE', 'FOR', 
                'HALT', 'IF', 'IMPORT', 'IN', 'INC', 'INCL', 'IS', 'LEN', 'LIMITED', 
                'LONG', 'LOOP', 'MAX', 'MIN', 'MOD', 'MODULE', 'NEW', 'ODD', 'OF', 
                'OR', 'ORD', 'OUT', 'POINTER', 'PROCEDURE', 'RECORD', 'REPEAT', 'RETURN', 
                'SHORT', 'SHORTCHAR', 'SHORTINT', 'SIZE', 'THEN', 'TYPE', 'TO', 'UNTIL', 
                'VAR', 'WHILE', 'WITH' 
                ), suffix=r'\b'), Keyword.Reserved), 
            (r'(TRUE|FALSE|NIL|INF)\b', Keyword.Constant), 
        ] 
    } 

    def analyse_text(text):
        """The only other lexer using .cp is the C++ one, so we check if for
        a few common Pascal keywords here. Those are unfortunately quite
        common across various business languages as well."""
        result = 0
        if 'BEGIN' in text:
            result += 0.01
        if 'END' in text:
            result += 0.01
        if 'PROCEDURE' in text:
            result += 0.01
        if 'END' in text:
            result += 0.01

        return result