diff options
author | ilezhankin <ilezhankin@yandex-team.ru> | 2022-02-10 16:45:55 +0300 |
---|---|---|
committer | Daniil Cherednik <dcherednik@yandex-team.ru> | 2022-02-10 16:45:55 +0300 |
commit | 1d125034f06575234f83f24f08677955133f140e (patch) | |
tree | ec05fbbd61dc118d5de37f206ab978cff58774bd /contrib/python/Pygments/py2/pygments/lexers/modula2.py | |
parent | 3a7a498715ef1b66f5054455421b845e45e3a653 (diff) | |
download | ydb-1d125034f06575234f83f24f08677955133f140e.tar.gz |
Restoring authorship annotation for <ilezhankin@yandex-team.ru>. Commit 1 of 2.
Diffstat (limited to 'contrib/python/Pygments/py2/pygments/lexers/modula2.py')
-rw-r--r-- | contrib/python/Pygments/py2/pygments/lexers/modula2.py | 3118 |
1 files changed, 1559 insertions, 1559 deletions
diff --git a/contrib/python/Pygments/py2/pygments/lexers/modula2.py b/contrib/python/Pygments/py2/pygments/lexers/modula2.py index 4fd84dab8f..4c0fde8aab 100644 --- a/contrib/python/Pygments/py2/pygments/lexers/modula2.py +++ b/contrib/python/Pygments/py2/pygments/lexers/modula2.py @@ -1,1561 +1,1561 @@ -# -*- coding: utf-8 -*- -""" - pygments.lexers.modula2 - ~~~~~~~~~~~~~~~~~~~~~~~ - - Multi-Dialect Lexer for Modula-2. - +# -*- coding: utf-8 -*- +""" + pygments.lexers.modula2 + ~~~~~~~~~~~~~~~~~~~~~~~ + + Multi-Dialect Lexer for Modula-2. + :copyright: Copyright 2006-2019 by the Pygments team, see AUTHORS. - :license: BSD, see LICENSE for details. -""" - -import re - -from pygments.lexer import RegexLexer, include -from pygments.util import get_bool_opt, get_list_opt -from pygments.token import Text, Comment, Operator, Keyword, Name, \ - String, Number, Punctuation, Error - -__all__ = ['Modula2Lexer'] - - -# Multi-Dialect Modula-2 Lexer -class Modula2Lexer(RegexLexer): - """ - For `Modula-2 <http://www.modula2.org/>`_ source code. - - The Modula-2 lexer supports several dialects. By default, it operates in - fallback mode, recognising the *combined* literals, punctuation symbols - and operators of all supported dialects, and the *combined* reserved words - and builtins of PIM Modula-2, ISO Modula-2 and Modula-2 R10, while not - differentiating between library defined identifiers. - - To select a specific dialect, a dialect option may be passed - or a dialect tag may be embedded into a source file. - - Dialect Options: - - `m2pim` - Select PIM Modula-2 dialect. - `m2iso` - Select ISO Modula-2 dialect. - `m2r10` - Select Modula-2 R10 dialect. - `objm2` - Select Objective Modula-2 dialect. - - The PIM and ISO dialect options may be qualified with a language extension. - - Language Extensions: - - `+aglet` - Select Aglet Modula-2 extensions, available with m2iso. - `+gm2` - Select GNU Modula-2 extensions, available with m2pim. - `+p1` - Select p1 Modula-2 extensions, available with m2iso. - `+xds` - Select XDS Modula-2 extensions, available with m2iso. - - - Passing a Dialect Option via Unix Commandline Interface - - Dialect options may be passed to the lexer using the `dialect` key. - Only one such option should be passed. If multiple dialect options are - passed, the first valid option is used, any subsequent options are ignored. - - Examples: - - `$ pygmentize -O full,dialect=m2iso -f html -o /path/to/output /path/to/input` - Use ISO dialect to render input to HTML output - `$ pygmentize -O full,dialect=m2iso+p1 -f rtf -o /path/to/output /path/to/input` - Use ISO dialect with p1 extensions to render input to RTF output - - - Embedding a Dialect Option within a source file - - A dialect option may be embedded in a source file in form of a dialect - tag, a specially formatted comment that specifies a dialect option. - - Dialect Tag EBNF:: - - dialectTag : - OpeningCommentDelim Prefix dialectOption ClosingCommentDelim ; - - dialectOption : - 'm2pim' | 'm2iso' | 'm2r10' | 'objm2' | - 'm2iso+aglet' | 'm2pim+gm2' | 'm2iso+p1' | 'm2iso+xds' ; - - Prefix : '!' ; - - OpeningCommentDelim : '(*' ; - - ClosingCommentDelim : '*)' ; - - No whitespace is permitted between the tokens of a dialect tag. - - In the event that a source file contains multiple dialect tags, the first - tag that contains a valid dialect option will be used and any subsequent - dialect tags will be ignored. Ideally, a dialect tag should be placed - at the beginning of a source file. - - An embedded dialect tag overrides a dialect option set via command line. - - Examples: - - ``(*!m2r10*) DEFINITION MODULE Foobar; ...`` - Use Modula2 R10 dialect to render this source file. - ``(*!m2pim+gm2*) DEFINITION MODULE Bazbam; ...`` - Use PIM dialect with GNU extensions to render this source file. - - - Algol Publication Mode: - - In Algol publication mode, source text is rendered for publication of - algorithms in scientific papers and academic texts, following the format - of the Revised Algol-60 Language Report. It is activated by passing - one of two corresponding styles as an option: - - `algol` - render reserved words lowercase underline boldface - and builtins lowercase boldface italic - `algol_nu` - render reserved words lowercase boldface (no underlining) - and builtins lowercase boldface italic - - The lexer automatically performs the required lowercase conversion when - this mode is activated. - - Example: - - ``$ pygmentize -O full,style=algol -f latex -o /path/to/output /path/to/input`` - Render input file in Algol publication mode to LaTeX output. - - - Rendering Mode of First Class ADT Identifiers: - - The rendering of standard library first class ADT identifiers is controlled - by option flag "treat_stdlib_adts_as_builtins". - - When this option is turned on, standard library ADT identifiers are rendered - as builtins. When it is turned off, they are rendered as ordinary library - identifiers. - - `treat_stdlib_adts_as_builtins` (default: On) - - The option is useful for dialects that support ADTs as first class objects - and provide ADTs in the standard library that would otherwise be built-in. - - At present, only Modula-2 R10 supports library ADTs as first class objects - and therefore, no ADT identifiers are defined for any other dialects. - - Example: - - ``$ pygmentize -O full,dialect=m2r10,treat_stdlib_adts_as_builtins=Off ...`` - Render standard library ADTs as ordinary library types. - - .. versionadded:: 1.3 - - .. versionchanged:: 2.1 - Added multi-dialect support. - """ - name = 'Modula-2' - aliases = ['modula2', 'm2'] - filenames = ['*.def', '*.mod'] - mimetypes = ['text/x-modula2'] - - flags = re.MULTILINE | re.DOTALL - - tokens = { - 'whitespace': [ - (r'\n+', Text), # blank lines - (r'\s+', Text), # whitespace - ], - 'dialecttags': [ - # PIM Dialect Tag - (r'\(\*!m2pim\*\)', Comment.Special), - # ISO Dialect Tag - (r'\(\*!m2iso\*\)', Comment.Special), - # M2R10 Dialect Tag - (r'\(\*!m2r10\*\)', Comment.Special), - # ObjM2 Dialect Tag - (r'\(\*!objm2\*\)', Comment.Special), - # Aglet Extensions Dialect Tag - (r'\(\*!m2iso\+aglet\*\)', Comment.Special), - # GNU Extensions Dialect Tag - (r'\(\*!m2pim\+gm2\*\)', Comment.Special), - # p1 Extensions Dialect Tag - (r'\(\*!m2iso\+p1\*\)', Comment.Special), - # XDS Extensions Dialect Tag - (r'\(\*!m2iso\+xds\*\)', Comment.Special), - ], - 'identifiers': [ - (r'([a-zA-Z_$][\w$]*)', Name), - ], - 'prefixed_number_literals': [ - # - # Base-2, whole number - (r'0b[01]+(\'[01]+)*', Number.Bin), - # - # Base-16, whole number - (r'0[ux][0-9A-F]+(\'[0-9A-F]+)*', Number.Hex), - ], - 'plain_number_literals': [ - # - # Base-10, real number with exponent - (r'[0-9]+(\'[0-9]+)*' # integral part - r'\.[0-9]+(\'[0-9]+)*' # fractional part - r'[eE][+-]?[0-9]+(\'[0-9]+)*', # exponent - Number.Float), - # - # Base-10, real number without exponent - (r'[0-9]+(\'[0-9]+)*' # integral part - r'\.[0-9]+(\'[0-9]+)*', # fractional part - Number.Float), - # - # Base-10, whole number - (r'[0-9]+(\'[0-9]+)*', Number.Integer), - ], - 'suffixed_number_literals': [ - # - # Base-8, whole number - (r'[0-7]+B', Number.Oct), - # - # Base-8, character code - (r'[0-7]+C', Number.Oct), - # - # Base-16, number - (r'[0-9A-F]+H', Number.Hex), - ], - 'string_literals': [ - (r"'(\\\\|\\'|[^'])*'", String), # single quoted string - (r'"(\\\\|\\"|[^"])*"', String), # double quoted string - ], - 'digraph_operators': [ - # Dot Product Operator - (r'\*\.', Operator), - # Array Concatenation Operator - (r'\+>', Operator), # M2R10 + ObjM2 - # Inequality Operator - (r'<>', Operator), # ISO + PIM - # Less-Or-Equal, Subset - (r'<=', Operator), - # Greater-Or-Equal, Superset - (r'>=', Operator), - # Identity Operator - (r'==', Operator), # M2R10 + ObjM2 - # Type Conversion Operator - (r'::', Operator), # M2R10 + ObjM2 - # Assignment Symbol - (r':=', Operator), - # Postfix Increment Mutator - (r'\+\+', Operator), # M2R10 + ObjM2 - # Postfix Decrement Mutator - (r'--', Operator), # M2R10 + ObjM2 - ], - 'unigraph_operators': [ - # Arithmetic Operators - (r'[+-]', Operator), - (r'[*/]', Operator), - # ISO 80000-2 compliant Set Difference Operator - (r'\\', Operator), # M2R10 + ObjM2 - # Relational Operators - (r'[=#<>]', Operator), - # Dereferencing Operator - (r'\^', Operator), - # Dereferencing Operator Synonym - (r'@', Operator), # ISO - # Logical AND Operator Synonym - (r'&', Operator), # PIM + ISO - # Logical NOT Operator Synonym - (r'~', Operator), # PIM + ISO - # Smalltalk Message Prefix - (r'`', Operator), # ObjM2 - ], - 'digraph_punctuation': [ - # Range Constructor - (r'\.\.', Punctuation), - # Opening Chevron Bracket - (r'<<', Punctuation), # M2R10 + ISO - # Closing Chevron Bracket - (r'>>', Punctuation), # M2R10 + ISO - # Blueprint Punctuation - (r'->', Punctuation), # M2R10 + ISO - # Distinguish |# and # in M2 R10 - (r'\|#', Punctuation), - # Distinguish ## and # in M2 R10 - (r'##', Punctuation), - # Distinguish |* and * in M2 R10 - (r'\|\*', Punctuation), - ], - 'unigraph_punctuation': [ - # Common Punctuation + :license: BSD, see LICENSE for details. +""" + +import re + +from pygments.lexer import RegexLexer, include +from pygments.util import get_bool_opt, get_list_opt +from pygments.token import Text, Comment, Operator, Keyword, Name, \ + String, Number, Punctuation, Error + +__all__ = ['Modula2Lexer'] + + +# Multi-Dialect Modula-2 Lexer +class Modula2Lexer(RegexLexer): + """ + For `Modula-2 <http://www.modula2.org/>`_ source code. + + The Modula-2 lexer supports several dialects. By default, it operates in + fallback mode, recognising the *combined* literals, punctuation symbols + and operators of all supported dialects, and the *combined* reserved words + and builtins of PIM Modula-2, ISO Modula-2 and Modula-2 R10, while not + differentiating between library defined identifiers. + + To select a specific dialect, a dialect option may be passed + or a dialect tag may be embedded into a source file. + + Dialect Options: + + `m2pim` + Select PIM Modula-2 dialect. + `m2iso` + Select ISO Modula-2 dialect. + `m2r10` + Select Modula-2 R10 dialect. + `objm2` + Select Objective Modula-2 dialect. + + The PIM and ISO dialect options may be qualified with a language extension. + + Language Extensions: + + `+aglet` + Select Aglet Modula-2 extensions, available with m2iso. + `+gm2` + Select GNU Modula-2 extensions, available with m2pim. + `+p1` + Select p1 Modula-2 extensions, available with m2iso. + `+xds` + Select XDS Modula-2 extensions, available with m2iso. + + + Passing a Dialect Option via Unix Commandline Interface + + Dialect options may be passed to the lexer using the `dialect` key. + Only one such option should be passed. If multiple dialect options are + passed, the first valid option is used, any subsequent options are ignored. + + Examples: + + `$ pygmentize -O full,dialect=m2iso -f html -o /path/to/output /path/to/input` + Use ISO dialect to render input to HTML output + `$ pygmentize -O full,dialect=m2iso+p1 -f rtf -o /path/to/output /path/to/input` + Use ISO dialect with p1 extensions to render input to RTF output + + + Embedding a Dialect Option within a source file + + A dialect option may be embedded in a source file in form of a dialect + tag, a specially formatted comment that specifies a dialect option. + + Dialect Tag EBNF:: + + dialectTag : + OpeningCommentDelim Prefix dialectOption ClosingCommentDelim ; + + dialectOption : + 'm2pim' | 'm2iso' | 'm2r10' | 'objm2' | + 'm2iso+aglet' | 'm2pim+gm2' | 'm2iso+p1' | 'm2iso+xds' ; + + Prefix : '!' ; + + OpeningCommentDelim : '(*' ; + + ClosingCommentDelim : '*)' ; + + No whitespace is permitted between the tokens of a dialect tag. + + In the event that a source file contains multiple dialect tags, the first + tag that contains a valid dialect option will be used and any subsequent + dialect tags will be ignored. Ideally, a dialect tag should be placed + at the beginning of a source file. + + An embedded dialect tag overrides a dialect option set via command line. + + Examples: + + ``(*!m2r10*) DEFINITION MODULE Foobar; ...`` + Use Modula2 R10 dialect to render this source file. + ``(*!m2pim+gm2*) DEFINITION MODULE Bazbam; ...`` + Use PIM dialect with GNU extensions to render this source file. + + + Algol Publication Mode: + + In Algol publication mode, source text is rendered for publication of + algorithms in scientific papers and academic texts, following the format + of the Revised Algol-60 Language Report. It is activated by passing + one of two corresponding styles as an option: + + `algol` + render reserved words lowercase underline boldface + and builtins lowercase boldface italic + `algol_nu` + render reserved words lowercase boldface (no underlining) + and builtins lowercase boldface italic + + The lexer automatically performs the required lowercase conversion when + this mode is activated. + + Example: + + ``$ pygmentize -O full,style=algol -f latex -o /path/to/output /path/to/input`` + Render input file in Algol publication mode to LaTeX output. + + + Rendering Mode of First Class ADT Identifiers: + + The rendering of standard library first class ADT identifiers is controlled + by option flag "treat_stdlib_adts_as_builtins". + + When this option is turned on, standard library ADT identifiers are rendered + as builtins. When it is turned off, they are rendered as ordinary library + identifiers. + + `treat_stdlib_adts_as_builtins` (default: On) + + The option is useful for dialects that support ADTs as first class objects + and provide ADTs in the standard library that would otherwise be built-in. + + At present, only Modula-2 R10 supports library ADTs as first class objects + and therefore, no ADT identifiers are defined for any other dialects. + + Example: + + ``$ pygmentize -O full,dialect=m2r10,treat_stdlib_adts_as_builtins=Off ...`` + Render standard library ADTs as ordinary library types. + + .. versionadded:: 1.3 + + .. versionchanged:: 2.1 + Added multi-dialect support. + """ + name = 'Modula-2' + aliases = ['modula2', 'm2'] + filenames = ['*.def', '*.mod'] + mimetypes = ['text/x-modula2'] + + flags = re.MULTILINE | re.DOTALL + + tokens = { + 'whitespace': [ + (r'\n+', Text), # blank lines + (r'\s+', Text), # whitespace + ], + 'dialecttags': [ + # PIM Dialect Tag + (r'\(\*!m2pim\*\)', Comment.Special), + # ISO Dialect Tag + (r'\(\*!m2iso\*\)', Comment.Special), + # M2R10 Dialect Tag + (r'\(\*!m2r10\*\)', Comment.Special), + # ObjM2 Dialect Tag + (r'\(\*!objm2\*\)', Comment.Special), + # Aglet Extensions Dialect Tag + (r'\(\*!m2iso\+aglet\*\)', Comment.Special), + # GNU Extensions Dialect Tag + (r'\(\*!m2pim\+gm2\*\)', Comment.Special), + # p1 Extensions Dialect Tag + (r'\(\*!m2iso\+p1\*\)', Comment.Special), + # XDS Extensions Dialect Tag + (r'\(\*!m2iso\+xds\*\)', Comment.Special), + ], + 'identifiers': [ + (r'([a-zA-Z_$][\w$]*)', Name), + ], + 'prefixed_number_literals': [ + # + # Base-2, whole number + (r'0b[01]+(\'[01]+)*', Number.Bin), + # + # Base-16, whole number + (r'0[ux][0-9A-F]+(\'[0-9A-F]+)*', Number.Hex), + ], + 'plain_number_literals': [ + # + # Base-10, real number with exponent + (r'[0-9]+(\'[0-9]+)*' # integral part + r'\.[0-9]+(\'[0-9]+)*' # fractional part + r'[eE][+-]?[0-9]+(\'[0-9]+)*', # exponent + Number.Float), + # + # Base-10, real number without exponent + (r'[0-9]+(\'[0-9]+)*' # integral part + r'\.[0-9]+(\'[0-9]+)*', # fractional part + Number.Float), + # + # Base-10, whole number + (r'[0-9]+(\'[0-9]+)*', Number.Integer), + ], + 'suffixed_number_literals': [ + # + # Base-8, whole number + (r'[0-7]+B', Number.Oct), + # + # Base-8, character code + (r'[0-7]+C', Number.Oct), + # + # Base-16, number + (r'[0-9A-F]+H', Number.Hex), + ], + 'string_literals': [ + (r"'(\\\\|\\'|[^'])*'", String), # single quoted string + (r'"(\\\\|\\"|[^"])*"', String), # double quoted string + ], + 'digraph_operators': [ + # Dot Product Operator + (r'\*\.', Operator), + # Array Concatenation Operator + (r'\+>', Operator), # M2R10 + ObjM2 + # Inequality Operator + (r'<>', Operator), # ISO + PIM + # Less-Or-Equal, Subset + (r'<=', Operator), + # Greater-Or-Equal, Superset + (r'>=', Operator), + # Identity Operator + (r'==', Operator), # M2R10 + ObjM2 + # Type Conversion Operator + (r'::', Operator), # M2R10 + ObjM2 + # Assignment Symbol + (r':=', Operator), + # Postfix Increment Mutator + (r'\+\+', Operator), # M2R10 + ObjM2 + # Postfix Decrement Mutator + (r'--', Operator), # M2R10 + ObjM2 + ], + 'unigraph_operators': [ + # Arithmetic Operators + (r'[+-]', Operator), + (r'[*/]', Operator), + # ISO 80000-2 compliant Set Difference Operator + (r'\\', Operator), # M2R10 + ObjM2 + # Relational Operators + (r'[=#<>]', Operator), + # Dereferencing Operator + (r'\^', Operator), + # Dereferencing Operator Synonym + (r'@', Operator), # ISO + # Logical AND Operator Synonym + (r'&', Operator), # PIM + ISO + # Logical NOT Operator Synonym + (r'~', Operator), # PIM + ISO + # Smalltalk Message Prefix + (r'`', Operator), # ObjM2 + ], + 'digraph_punctuation': [ + # Range Constructor + (r'\.\.', Punctuation), + # Opening Chevron Bracket + (r'<<', Punctuation), # M2R10 + ISO + # Closing Chevron Bracket + (r'>>', Punctuation), # M2R10 + ISO + # Blueprint Punctuation + (r'->', Punctuation), # M2R10 + ISO + # Distinguish |# and # in M2 R10 + (r'\|#', Punctuation), + # Distinguish ## and # in M2 R10 + (r'##', Punctuation), + # Distinguish |* and * in M2 R10 + (r'\|\*', Punctuation), + ], + 'unigraph_punctuation': [ + # Common Punctuation (r'[()\[\]{},.:;|]', Punctuation), - # Case Label Separator Synonym - (r'!', Punctuation), # ISO - # Blueprint Punctuation - (r'\?', Punctuation), # M2R10 + ObjM2 - ], - 'comments': [ - # Single Line Comment - (r'^//.*?\n', Comment.Single), # M2R10 + ObjM2 - # Block Comment - (r'\(\*([^$].*?)\*\)', Comment.Multiline), - # Template Block Comment - (r'/\*(.*?)\*/', Comment.Multiline), # M2R10 + ObjM2 - ], - 'pragmas': [ - # ISO Style Pragmas - (r'<\*.*?\*>', Comment.Preproc), # ISO, M2R10 + ObjM2 - # Pascal Style Pragmas - (r'\(\*\$.*?\*\)', Comment.Preproc), # PIM - ], - 'root': [ - include('whitespace'), - include('dialecttags'), - include('pragmas'), - include('comments'), - include('identifiers'), - include('suffixed_number_literals'), # PIM + ISO - include('prefixed_number_literals'), # M2R10 + ObjM2 - include('plain_number_literals'), - include('string_literals'), - include('digraph_punctuation'), - include('digraph_operators'), - include('unigraph_punctuation'), - include('unigraph_operators'), - ] - } - -# C o m m o n D a t a s e t s - - # Common Reserved Words Dataset - common_reserved_words = ( - # 37 common reserved words - 'AND', 'ARRAY', 'BEGIN', 'BY', 'CASE', 'CONST', 'DEFINITION', 'DIV', - 'DO', 'ELSE', 'ELSIF', 'END', 'EXIT', 'FOR', 'FROM', 'IF', - 'IMPLEMENTATION', 'IMPORT', 'IN', 'LOOP', 'MOD', 'MODULE', 'NOT', - 'OF', 'OR', 'POINTER', 'PROCEDURE', 'RECORD', 'REPEAT', 'RETURN', - 'SET', 'THEN', 'TO', 'TYPE', 'UNTIL', 'VAR', 'WHILE', - ) - - # Common Builtins Dataset - common_builtins = ( - # 16 common builtins - 'ABS', 'BOOLEAN', 'CARDINAL', 'CHAR', 'CHR', 'FALSE', 'INTEGER', - 'LONGINT', 'LONGREAL', 'MAX', 'MIN', 'NIL', 'ODD', 'ORD', 'REAL', - 'TRUE', - ) - - # Common Pseudo-Module Builtins Dataset - common_pseudo_builtins = ( - # 4 common pseudo builtins - 'ADDRESS', 'BYTE', 'WORD', 'ADR' - ) - -# P I M M o d u l a - 2 D a t a s e t s - - # Lexemes to Mark as Error Tokens for PIM Modula-2 - pim_lexemes_to_reject = ( - '!', '`', '@', '$', '%', '?', '\\', '==', '++', '--', '::', '*.', - '+>', '->', '<<', '>>', '|#', '##', - ) - - # PIM Modula-2 Additional Reserved Words Dataset - pim_additional_reserved_words = ( - # 3 additional reserved words - 'EXPORT', 'QUALIFIED', 'WITH', - ) - - # PIM Modula-2 Additional Builtins Dataset - pim_additional_builtins = ( - # 16 additional builtins - 'BITSET', 'CAP', 'DEC', 'DISPOSE', 'EXCL', 'FLOAT', 'HALT', 'HIGH', - 'INC', 'INCL', 'NEW', 'NIL', 'PROC', 'SIZE', 'TRUNC', 'VAL', - ) - - # PIM Modula-2 Additional Pseudo-Module Builtins Dataset - pim_additional_pseudo_builtins = ( - # 5 additional pseudo builtins - 'SYSTEM', 'PROCESS', 'TSIZE', 'NEWPROCESS', 'TRANSFER', - ) - -# I S O M o d u l a - 2 D a t a s e t s - - # Lexemes to Mark as Error Tokens for ISO Modula-2 - iso_lexemes_to_reject = ( - '`', '$', '%', '?', '\\', '==', '++', '--', '::', '*.', '+>', '->', - '<<', '>>', '|#', '##', - ) - - # ISO Modula-2 Additional Reserved Words Dataset - iso_additional_reserved_words = ( - # 9 additional reserved words (ISO 10514-1) - 'EXCEPT', 'EXPORT', 'FINALLY', 'FORWARD', 'PACKEDSET', 'QUALIFIED', - 'REM', 'RETRY', 'WITH', - # 10 additional reserved words (ISO 10514-2 & ISO 10514-3) - 'ABSTRACT', 'AS', 'CLASS', 'GUARD', 'INHERIT', 'OVERRIDE', 'READONLY', - 'REVEAL', 'TRACED', 'UNSAFEGUARDED', - ) - - # ISO Modula-2 Additional Builtins Dataset - iso_additional_builtins = ( - # 26 additional builtins (ISO 10514-1) - 'BITSET', 'CAP', 'CMPLX', 'COMPLEX', 'DEC', 'DISPOSE', 'EXCL', 'FLOAT', - 'HALT', 'HIGH', 'IM', 'INC', 'INCL', 'INT', 'INTERRUPTIBLE', 'LENGTH', - 'LFLOAT', 'LONGCOMPLEX', 'NEW', 'PROC', 'PROTECTION', 'RE', 'SIZE', - 'TRUNC', 'UNINTERRUBTIBLE', 'VAL', - # 5 additional builtins (ISO 10514-2 & ISO 10514-3) - 'CREATE', 'DESTROY', 'EMPTY', 'ISMEMBER', 'SELF', - ) - - # ISO Modula-2 Additional Pseudo-Module Builtins Dataset - iso_additional_pseudo_builtins = ( - # 14 additional builtins (SYSTEM) - 'SYSTEM', 'BITSPERLOC', 'LOCSPERBYTE', 'LOCSPERWORD', 'LOC', - 'ADDADR', 'SUBADR', 'DIFADR', 'MAKEADR', 'ADR', - 'ROTATE', 'SHIFT', 'CAST', 'TSIZE', - # 13 additional builtins (COROUTINES) - 'COROUTINES', 'ATTACH', 'COROUTINE', 'CURRENT', 'DETACH', 'HANDLER', - 'INTERRUPTSOURCE', 'IOTRANSFER', 'IsATTACHED', 'LISTEN', - 'NEWCOROUTINE', 'PROT', 'TRANSFER', - # 9 additional builtins (EXCEPTIONS) - 'EXCEPTIONS', 'AllocateSource', 'CurrentNumber', 'ExceptionNumber', - 'ExceptionSource', 'GetMessage', 'IsCurrentSource', - 'IsExceptionalExecution', 'RAISE', - # 3 additional builtins (TERMINATION) - 'TERMINATION', 'IsTerminating', 'HasHalted', - # 4 additional builtins (M2EXCEPTION) - 'M2EXCEPTION', 'M2Exceptions', 'M2Exception', 'IsM2Exception', - 'indexException', 'rangeException', 'caseSelectException', - 'invalidLocation', 'functionException', 'wholeValueException', - 'wholeDivException', 'realValueException', 'realDivException', - 'complexValueException', 'complexDivException', 'protException', - 'sysException', 'coException', 'exException', - ) - -# M o d u l a - 2 R 1 0 D a t a s e t s - - # Lexemes to Mark as Error Tokens for Modula-2 R10 - m2r10_lexemes_to_reject = ( - '!', '`', '@', '$', '%', '&', '<>', - ) - - # Modula-2 R10 reserved words in addition to the common set - m2r10_additional_reserved_words = ( - # 12 additional reserved words - 'ALIAS', 'ARGLIST', 'BLUEPRINT', 'COPY', 'GENLIB', 'INDETERMINATE', - 'NEW', 'NONE', 'OPAQUE', 'REFERENTIAL', 'RELEASE', 'RETAIN', - # 2 additional reserved words with symbolic assembly option - 'ASM', 'REG', - ) - - # Modula-2 R10 builtins in addition to the common set - m2r10_additional_builtins = ( - # 26 additional builtins - 'CARDINAL', 'COUNT', 'EMPTY', 'EXISTS', 'INSERT', 'LENGTH', 'LONGCARD', - 'OCTET', 'PTR', 'PRED', 'READ', 'READNEW', 'REMOVE', 'RETRIEVE', 'SORT', - 'STORE', 'SUBSET', 'SUCC', 'TLIMIT', 'TMAX', 'TMIN', 'TRUE', 'TSIZE', - 'UNICHAR', 'WRITE', 'WRITEF', - ) - - # Modula-2 R10 Additional Pseudo-Module Builtins Dataset - m2r10_additional_pseudo_builtins = ( - # 13 additional builtins (TPROPERTIES) - 'TPROPERTIES', 'PROPERTY', 'LITERAL', 'TPROPERTY', 'TLITERAL', - 'TBUILTIN', 'TDYN', 'TREFC', 'TNIL', 'TBASE', 'TPRECISION', - 'TMAXEXP', 'TMINEXP', - # 4 additional builtins (CONVERSION) - 'CONVERSION', 'TSXFSIZE', 'SXF', 'VAL', - # 35 additional builtins (UNSAFE) - 'UNSAFE', 'CAST', 'INTRINSIC', 'AVAIL', 'ADD', 'SUB', 'ADDC', 'SUBC', - 'FETCHADD', 'FETCHSUB', 'SHL', 'SHR', 'ASHR', 'ROTL', 'ROTR', 'ROTLC', - 'ROTRC', 'BWNOT', 'BWAND', 'BWOR', 'BWXOR', 'BWNAND', 'BWNOR', - 'SETBIT', 'TESTBIT', 'LSBIT', 'MSBIT', 'CSBITS', 'BAIL', 'HALT', - 'TODO', 'FFI', 'ADDR', 'VARGLIST', 'VARGC', - # 11 additional builtins (ATOMIC) - 'ATOMIC', 'INTRINSIC', 'AVAIL', 'SWAP', 'CAS', 'INC', 'DEC', 'BWAND', - 'BWNAND', 'BWOR', 'BWXOR', - # 7 additional builtins (COMPILER) - 'COMPILER', 'DEBUG', 'MODNAME', 'PROCNAME', 'LINENUM', 'DEFAULT', - 'HASH', - # 5 additional builtins (ASSEMBLER) - 'ASSEMBLER', 'REGISTER', 'SETREG', 'GETREG', 'CODE', - ) - -# O b j e c t i v e M o d u l a - 2 D a t a s e t s - - # Lexemes to Mark as Error Tokens for Objective Modula-2 - objm2_lexemes_to_reject = ( - '!', '$', '%', '&', '<>', - ) - - # Objective Modula-2 Extensions - # reserved words in addition to Modula-2 R10 - objm2_additional_reserved_words = ( - # 16 additional reserved words - 'BYCOPY', 'BYREF', 'CLASS', 'CONTINUE', 'CRITICAL', 'INOUT', 'METHOD', - 'ON', 'OPTIONAL', 'OUT', 'PRIVATE', 'PROTECTED', 'PROTOCOL', 'PUBLIC', - 'SUPER', 'TRY', - ) - - # Objective Modula-2 Extensions - # builtins in addition to Modula-2 R10 - objm2_additional_builtins = ( - # 3 additional builtins - 'OBJECT', 'NO', 'YES', - ) - - # Objective Modula-2 Extensions - # pseudo-module builtins in addition to Modula-2 R10 - objm2_additional_pseudo_builtins = ( - # None - ) - -# A g l e t M o d u l a - 2 D a t a s e t s - - # Aglet Extensions - # reserved words in addition to ISO Modula-2 - aglet_additional_reserved_words = ( - # None - ) - - # Aglet Extensions - # builtins in addition to ISO Modula-2 - aglet_additional_builtins = ( - # 9 additional builtins - 'BITSET8', 'BITSET16', 'BITSET32', 'CARDINAL8', 'CARDINAL16', - 'CARDINAL32', 'INTEGER8', 'INTEGER16', 'INTEGER32', - ) - - # Aglet Modula-2 Extensions - # pseudo-module builtins in addition to ISO Modula-2 - aglet_additional_pseudo_builtins = ( - # None - ) - -# G N U M o d u l a - 2 D a t a s e t s - - # GNU Extensions - # reserved words in addition to PIM Modula-2 - gm2_additional_reserved_words = ( - # 10 additional reserved words - 'ASM', '__ATTRIBUTE__', '__BUILTIN__', '__COLUMN__', '__DATE__', - '__FILE__', '__FUNCTION__', '__LINE__', '__MODULE__', 'VOLATILE', - ) - - # GNU Extensions - # builtins in addition to PIM Modula-2 - gm2_additional_builtins = ( - # 21 additional builtins - 'BITSET8', 'BITSET16', 'BITSET32', 'CARDINAL8', 'CARDINAL16', - 'CARDINAL32', 'CARDINAL64', 'COMPLEX32', 'COMPLEX64', 'COMPLEX96', - 'COMPLEX128', 'INTEGER8', 'INTEGER16', 'INTEGER32', 'INTEGER64', - 'REAL8', 'REAL16', 'REAL32', 'REAL96', 'REAL128', 'THROW', - ) - - # GNU Extensions - # pseudo-module builtins in addition to PIM Modula-2 - gm2_additional_pseudo_builtins = ( - # None - ) - -# p 1 M o d u l a - 2 D a t a s e t s - - # p1 Extensions - # reserved words in addition to ISO Modula-2 - p1_additional_reserved_words = ( - # None - ) - - # p1 Extensions - # builtins in addition to ISO Modula-2 - p1_additional_builtins = ( - # None - ) - - # p1 Modula-2 Extensions - # pseudo-module builtins in addition to ISO Modula-2 - p1_additional_pseudo_builtins = ( - # 1 additional builtin - 'BCD', - ) - -# X D S M o d u l a - 2 D a t a s e t s - - # XDS Extensions - # reserved words in addition to ISO Modula-2 - xds_additional_reserved_words = ( - # 1 additional reserved word - 'SEQ', - ) - - # XDS Extensions - # builtins in addition to ISO Modula-2 - xds_additional_builtins = ( - # 9 additional builtins - 'ASH', 'ASSERT', 'DIFFADR_TYPE', 'ENTIER', 'INDEX', 'LEN', - 'LONGCARD', 'SHORTCARD', 'SHORTINT', - ) - - # XDS Modula-2 Extensions - # pseudo-module builtins in addition to ISO Modula-2 - xds_additional_pseudo_builtins = ( - # 22 additional builtins (SYSTEM) - 'PROCESS', 'NEWPROCESS', 'BOOL8', 'BOOL16', 'BOOL32', 'CARD8', - 'CARD16', 'CARD32', 'INT8', 'INT16', 'INT32', 'REF', 'MOVE', - 'FILL', 'GET', 'PUT', 'CC', 'int', 'unsigned', 'size_t', 'void' - # 3 additional builtins (COMPILER) - 'COMPILER', 'OPTION', 'EQUATION' - ) - -# P I M S t a n d a r d L i b r a r y D a t a s e t s - - # PIM Modula-2 Standard Library Modules Dataset - pim_stdlib_module_identifiers = ( - 'Terminal', 'FileSystem', 'InOut', 'RealInOut', 'MathLib0', 'Storage', - ) - - # PIM Modula-2 Standard Library Types Dataset - pim_stdlib_type_identifiers = ( - 'Flag', 'FlagSet', 'Response', 'Command', 'Lock', 'Permission', - 'MediumType', 'File', 'FileProc', 'DirectoryProc', 'FileCommand', - 'DirectoryCommand', - ) - - # PIM Modula-2 Standard Library Procedures Dataset - pim_stdlib_proc_identifiers = ( - 'Read', 'BusyRead', 'ReadAgain', 'Write', 'WriteString', 'WriteLn', - 'Create', 'Lookup', 'Close', 'Delete', 'Rename', 'SetRead', 'SetWrite', - 'SetModify', 'SetOpen', 'Doio', 'SetPos', 'GetPos', 'Length', 'Reset', - 'Again', 'ReadWord', 'WriteWord', 'ReadChar', 'WriteChar', - 'CreateMedium', 'DeleteMedium', 'AssignName', 'DeassignName', - 'ReadMedium', 'LookupMedium', 'OpenInput', 'OpenOutput', 'CloseInput', - 'CloseOutput', 'ReadString', 'ReadInt', 'ReadCard', 'ReadWrd', - 'WriteInt', 'WriteCard', 'WriteOct', 'WriteHex', 'WriteWrd', - 'ReadReal', 'WriteReal', 'WriteFixPt', 'WriteRealOct', 'sqrt', 'exp', - 'ln', 'sin', 'cos', 'arctan', 'entier', 'ALLOCATE', 'DEALLOCATE', - ) - - # PIM Modula-2 Standard Library Variables Dataset - pim_stdlib_var_identifiers = ( - 'Done', 'termCH', 'in', 'out' - ) - - # PIM Modula-2 Standard Library Constants Dataset - pim_stdlib_const_identifiers = ( - 'EOL', - ) - -# I S O S t a n d a r d L i b r a r y D a t a s e t s - - # ISO Modula-2 Standard Library Modules Dataset - iso_stdlib_module_identifiers = ( - # TO DO - ) - - # ISO Modula-2 Standard Library Types Dataset - iso_stdlib_type_identifiers = ( - # TO DO - ) - - # ISO Modula-2 Standard Library Procedures Dataset - iso_stdlib_proc_identifiers = ( - # TO DO - ) - - # ISO Modula-2 Standard Library Variables Dataset - iso_stdlib_var_identifiers = ( - # TO DO - ) - - # ISO Modula-2 Standard Library Constants Dataset - iso_stdlib_const_identifiers = ( - # TO DO - ) - -# M 2 R 1 0 S t a n d a r d L i b r a r y D a t a s e t s - - # Modula-2 R10 Standard Library ADTs Dataset - m2r10_stdlib_adt_identifiers = ( - 'BCD', 'LONGBCD', 'BITSET', 'SHORTBITSET', 'LONGBITSET', - 'LONGLONGBITSET', 'COMPLEX', 'LONGCOMPLEX', 'SHORTCARD', 'LONGLONGCARD', - 'SHORTINT', 'LONGLONGINT', 'POSINT', 'SHORTPOSINT', 'LONGPOSINT', - 'LONGLONGPOSINT', 'BITSET8', 'BITSET16', 'BITSET32', 'BITSET64', - 'BITSET128', 'BS8', 'BS16', 'BS32', 'BS64', 'BS128', 'CARDINAL8', - 'CARDINAL16', 'CARDINAL32', 'CARDINAL64', 'CARDINAL128', 'CARD8', - 'CARD16', 'CARD32', 'CARD64', 'CARD128', 'INTEGER8', 'INTEGER16', - 'INTEGER32', 'INTEGER64', 'INTEGER128', 'INT8', 'INT16', 'INT32', - 'INT64', 'INT128', 'STRING', 'UNISTRING', - ) - - # Modula-2 R10 Standard Library Blueprints Dataset - m2r10_stdlib_blueprint_identifiers = ( - 'ProtoRoot', 'ProtoComputational', 'ProtoNumeric', 'ProtoScalar', - 'ProtoNonScalar', 'ProtoCardinal', 'ProtoInteger', 'ProtoReal', - 'ProtoComplex', 'ProtoVector', 'ProtoTuple', 'ProtoCompArray', - 'ProtoCollection', 'ProtoStaticArray', 'ProtoStaticSet', - 'ProtoStaticString', 'ProtoArray', 'ProtoString', 'ProtoSet', - 'ProtoMultiSet', 'ProtoDictionary', 'ProtoMultiDict', 'ProtoExtension', - 'ProtoIO', 'ProtoCardMath', 'ProtoIntMath', 'ProtoRealMath', - ) - - # Modula-2 R10 Standard Library Modules Dataset - m2r10_stdlib_module_identifiers = ( - 'ASCII', 'BooleanIO', 'CharIO', 'UnicharIO', 'OctetIO', - 'CardinalIO', 'LongCardIO', 'IntegerIO', 'LongIntIO', 'RealIO', - 'LongRealIO', 'BCDIO', 'LongBCDIO', 'CardMath', 'LongCardMath', - 'IntMath', 'LongIntMath', 'RealMath', 'LongRealMath', 'BCDMath', - 'LongBCDMath', 'FileIO', 'FileSystem', 'Storage', 'IOSupport', - ) - - # Modula-2 R10 Standard Library Types Dataset - m2r10_stdlib_type_identifiers = ( - 'File', 'Status', - # TO BE COMPLETED - ) - - # Modula-2 R10 Standard Library Procedures Dataset - m2r10_stdlib_proc_identifiers = ( - 'ALLOCATE', 'DEALLOCATE', 'SIZE', - # TO BE COMPLETED - ) - - # Modula-2 R10 Standard Library Variables Dataset - m2r10_stdlib_var_identifiers = ( - 'stdIn', 'stdOut', 'stdErr', - ) - - # Modula-2 R10 Standard Library Constants Dataset - m2r10_stdlib_const_identifiers = ( - 'pi', 'tau', - ) - -# D i a l e c t s - - # Dialect modes - dialects = ( - 'unknown', - 'm2pim', 'm2iso', 'm2r10', 'objm2', - 'm2iso+aglet', 'm2pim+gm2', 'm2iso+p1', 'm2iso+xds', - ) - -# D a t a b a s e s - - # Lexemes to Mark as Errors Database - lexemes_to_reject_db = { - # Lexemes to reject for unknown dialect - 'unknown': ( - # LEAVE THIS EMPTY - ), - # Lexemes to reject for PIM Modula-2 - 'm2pim': ( - pim_lexemes_to_reject, - ), - # Lexemes to reject for ISO Modula-2 - 'm2iso': ( - iso_lexemes_to_reject, - ), - # Lexemes to reject for Modula-2 R10 - 'm2r10': ( - m2r10_lexemes_to_reject, - ), - # Lexemes to reject for Objective Modula-2 - 'objm2': ( - objm2_lexemes_to_reject, - ), - # Lexemes to reject for Aglet Modula-2 - 'm2iso+aglet': ( - iso_lexemes_to_reject, - ), - # Lexemes to reject for GNU Modula-2 - 'm2pim+gm2': ( - pim_lexemes_to_reject, - ), - # Lexemes to reject for p1 Modula-2 - 'm2iso+p1': ( - iso_lexemes_to_reject, - ), - # Lexemes to reject for XDS Modula-2 - 'm2iso+xds': ( - iso_lexemes_to_reject, - ), - } - - # Reserved Words Database - reserved_words_db = { - # Reserved words for unknown dialect - 'unknown': ( - common_reserved_words, - pim_additional_reserved_words, - iso_additional_reserved_words, - m2r10_additional_reserved_words, - ), - - # Reserved words for PIM Modula-2 - 'm2pim': ( - common_reserved_words, - pim_additional_reserved_words, - ), - - # Reserved words for Modula-2 R10 - 'm2iso': ( - common_reserved_words, - iso_additional_reserved_words, - ), - - # Reserved words for ISO Modula-2 - 'm2r10': ( - common_reserved_words, - m2r10_additional_reserved_words, - ), - - # Reserved words for Objective Modula-2 - 'objm2': ( - common_reserved_words, - m2r10_additional_reserved_words, - objm2_additional_reserved_words, - ), - - # Reserved words for Aglet Modula-2 Extensions - 'm2iso+aglet': ( - common_reserved_words, - iso_additional_reserved_words, - aglet_additional_reserved_words, - ), - - # Reserved words for GNU Modula-2 Extensions - 'm2pim+gm2': ( - common_reserved_words, - pim_additional_reserved_words, - gm2_additional_reserved_words, - ), - - # Reserved words for p1 Modula-2 Extensions - 'm2iso+p1': ( - common_reserved_words, - iso_additional_reserved_words, - p1_additional_reserved_words, - ), - - # Reserved words for XDS Modula-2 Extensions - 'm2iso+xds': ( - common_reserved_words, - iso_additional_reserved_words, - xds_additional_reserved_words, - ), - } - - # Builtins Database - builtins_db = { - # Builtins for unknown dialect - 'unknown': ( - common_builtins, - pim_additional_builtins, - iso_additional_builtins, - m2r10_additional_builtins, - ), - - # Builtins for PIM Modula-2 - 'm2pim': ( - common_builtins, - pim_additional_builtins, - ), - - # Builtins for ISO Modula-2 - 'm2iso': ( - common_builtins, - iso_additional_builtins, - ), - - # Builtins for ISO Modula-2 - 'm2r10': ( - common_builtins, - m2r10_additional_builtins, - ), - - # Builtins for Objective Modula-2 - 'objm2': ( - common_builtins, - m2r10_additional_builtins, - objm2_additional_builtins, - ), - - # Builtins for Aglet Modula-2 Extensions - 'm2iso+aglet': ( - common_builtins, - iso_additional_builtins, - aglet_additional_builtins, - ), - - # Builtins for GNU Modula-2 Extensions - 'm2pim+gm2': ( - common_builtins, - pim_additional_builtins, - gm2_additional_builtins, - ), - - # Builtins for p1 Modula-2 Extensions - 'm2iso+p1': ( - common_builtins, - iso_additional_builtins, - p1_additional_builtins, - ), - - # Builtins for XDS Modula-2 Extensions - 'm2iso+xds': ( - common_builtins, - iso_additional_builtins, - xds_additional_builtins, - ), - } - - # Pseudo-Module Builtins Database - pseudo_builtins_db = { - # Builtins for unknown dialect - 'unknown': ( - common_pseudo_builtins, - pim_additional_pseudo_builtins, - iso_additional_pseudo_builtins, - m2r10_additional_pseudo_builtins, - ), - - # Builtins for PIM Modula-2 - 'm2pim': ( - common_pseudo_builtins, - pim_additional_pseudo_builtins, - ), - - # Builtins for ISO Modula-2 - 'm2iso': ( - common_pseudo_builtins, - iso_additional_pseudo_builtins, - ), - - # Builtins for ISO Modula-2 - 'm2r10': ( - common_pseudo_builtins, - m2r10_additional_pseudo_builtins, - ), - - # Builtins for Objective Modula-2 - 'objm2': ( - common_pseudo_builtins, - m2r10_additional_pseudo_builtins, - objm2_additional_pseudo_builtins, - ), - - # Builtins for Aglet Modula-2 Extensions - 'm2iso+aglet': ( - common_pseudo_builtins, - iso_additional_pseudo_builtins, - aglet_additional_pseudo_builtins, - ), - - # Builtins for GNU Modula-2 Extensions - 'm2pim+gm2': ( - common_pseudo_builtins, - pim_additional_pseudo_builtins, - gm2_additional_pseudo_builtins, - ), - - # Builtins for p1 Modula-2 Extensions - 'm2iso+p1': ( - common_pseudo_builtins, - iso_additional_pseudo_builtins, - p1_additional_pseudo_builtins, - ), - - # Builtins for XDS Modula-2 Extensions - 'm2iso+xds': ( - common_pseudo_builtins, - iso_additional_pseudo_builtins, - xds_additional_pseudo_builtins, - ), - } - - # Standard Library ADTs Database - stdlib_adts_db = { - # Empty entry for unknown dialect - 'unknown': ( - # LEAVE THIS EMPTY - ), - # Standard Library ADTs for PIM Modula-2 - 'm2pim': ( - # No first class library types - ), - - # Standard Library ADTs for ISO Modula-2 - 'm2iso': ( - # No first class library types - ), - - # Standard Library ADTs for Modula-2 R10 - 'm2r10': ( - m2r10_stdlib_adt_identifiers, - ), - - # Standard Library ADTs for Objective Modula-2 - 'objm2': ( - m2r10_stdlib_adt_identifiers, - ), - - # Standard Library ADTs for Aglet Modula-2 - 'm2iso+aglet': ( - # No first class library types - ), - - # Standard Library ADTs for GNU Modula-2 - 'm2pim+gm2': ( - # No first class library types - ), - - # Standard Library ADTs for p1 Modula-2 - 'm2iso+p1': ( - # No first class library types - ), - - # Standard Library ADTs for XDS Modula-2 - 'm2iso+xds': ( - # No first class library types - ), - } - - # Standard Library Modules Database - stdlib_modules_db = { - # Empty entry for unknown dialect - 'unknown': ( - # LEAVE THIS EMPTY - ), - # Standard Library Modules for PIM Modula-2 - 'm2pim': ( - pim_stdlib_module_identifiers, - ), - - # Standard Library Modules for ISO Modula-2 - 'm2iso': ( - iso_stdlib_module_identifiers, - ), - - # Standard Library Modules for Modula-2 R10 - 'm2r10': ( - m2r10_stdlib_blueprint_identifiers, - m2r10_stdlib_module_identifiers, - m2r10_stdlib_adt_identifiers, - ), - - # Standard Library Modules for Objective Modula-2 - 'objm2': ( - m2r10_stdlib_blueprint_identifiers, - m2r10_stdlib_module_identifiers, - ), - - # Standard Library Modules for Aglet Modula-2 - 'm2iso+aglet': ( - iso_stdlib_module_identifiers, - ), - - # Standard Library Modules for GNU Modula-2 - 'm2pim+gm2': ( - pim_stdlib_module_identifiers, - ), - - # Standard Library Modules for p1 Modula-2 - 'm2iso+p1': ( - iso_stdlib_module_identifiers, - ), - - # Standard Library Modules for XDS Modula-2 - 'm2iso+xds': ( - iso_stdlib_module_identifiers, - ), - } - - # Standard Library Types Database - stdlib_types_db = { - # Empty entry for unknown dialect - 'unknown': ( - # LEAVE THIS EMPTY - ), - # Standard Library Types for PIM Modula-2 - 'm2pim': ( - pim_stdlib_type_identifiers, - ), - - # Standard Library Types for ISO Modula-2 - 'm2iso': ( - iso_stdlib_type_identifiers, - ), - - # Standard Library Types for Modula-2 R10 - 'm2r10': ( - m2r10_stdlib_type_identifiers, - ), - - # Standard Library Types for Objective Modula-2 - 'objm2': ( - m2r10_stdlib_type_identifiers, - ), - - # Standard Library Types for Aglet Modula-2 - 'm2iso+aglet': ( - iso_stdlib_type_identifiers, - ), - - # Standard Library Types for GNU Modula-2 - 'm2pim+gm2': ( - pim_stdlib_type_identifiers, - ), - - # Standard Library Types for p1 Modula-2 - 'm2iso+p1': ( - iso_stdlib_type_identifiers, - ), - - # Standard Library Types for XDS Modula-2 - 'm2iso+xds': ( - iso_stdlib_type_identifiers, - ), - } - - # Standard Library Procedures Database - stdlib_procedures_db = { - # Empty entry for unknown dialect - 'unknown': ( - # LEAVE THIS EMPTY - ), - # Standard Library Procedures for PIM Modula-2 - 'm2pim': ( - pim_stdlib_proc_identifiers, - ), - - # Standard Library Procedures for ISO Modula-2 - 'm2iso': ( - iso_stdlib_proc_identifiers, - ), - - # Standard Library Procedures for Modula-2 R10 - 'm2r10': ( - m2r10_stdlib_proc_identifiers, - ), - - # Standard Library Procedures for Objective Modula-2 - 'objm2': ( - m2r10_stdlib_proc_identifiers, - ), - - # Standard Library Procedures for Aglet Modula-2 - 'm2iso+aglet': ( - iso_stdlib_proc_identifiers, - ), - - # Standard Library Procedures for GNU Modula-2 - 'm2pim+gm2': ( - pim_stdlib_proc_identifiers, - ), - - # Standard Library Procedures for p1 Modula-2 - 'm2iso+p1': ( - iso_stdlib_proc_identifiers, - ), - - # Standard Library Procedures for XDS Modula-2 - 'm2iso+xds': ( - iso_stdlib_proc_identifiers, - ), - } - - # Standard Library Variables Database - stdlib_variables_db = { - # Empty entry for unknown dialect - 'unknown': ( - # LEAVE THIS EMPTY - ), - # Standard Library Variables for PIM Modula-2 - 'm2pim': ( - pim_stdlib_var_identifiers, - ), - - # Standard Library Variables for ISO Modula-2 - 'm2iso': ( - iso_stdlib_var_identifiers, - ), - - # Standard Library Variables for Modula-2 R10 - 'm2r10': ( - m2r10_stdlib_var_identifiers, - ), - - # Standard Library Variables for Objective Modula-2 - 'objm2': ( - m2r10_stdlib_var_identifiers, - ), - - # Standard Library Variables for Aglet Modula-2 - 'm2iso+aglet': ( - iso_stdlib_var_identifiers, - ), - - # Standard Library Variables for GNU Modula-2 - 'm2pim+gm2': ( - pim_stdlib_var_identifiers, - ), - - # Standard Library Variables for p1 Modula-2 - 'm2iso+p1': ( - iso_stdlib_var_identifiers, - ), - - # Standard Library Variables for XDS Modula-2 - 'm2iso+xds': ( - iso_stdlib_var_identifiers, - ), - } - - # Standard Library Constants Database - stdlib_constants_db = { - # Empty entry for unknown dialect - 'unknown': ( - # LEAVE THIS EMPTY - ), - # Standard Library Constants for PIM Modula-2 - 'm2pim': ( - pim_stdlib_const_identifiers, - ), - - # Standard Library Constants for ISO Modula-2 - 'm2iso': ( - iso_stdlib_const_identifiers, - ), - - # Standard Library Constants for Modula-2 R10 - 'm2r10': ( - m2r10_stdlib_const_identifiers, - ), - - # Standard Library Constants for Objective Modula-2 - 'objm2': ( - m2r10_stdlib_const_identifiers, - ), - - # Standard Library Constants for Aglet Modula-2 - 'm2iso+aglet': ( - iso_stdlib_const_identifiers, - ), - - # Standard Library Constants for GNU Modula-2 - 'm2pim+gm2': ( - pim_stdlib_const_identifiers, - ), - - # Standard Library Constants for p1 Modula-2 - 'm2iso+p1': ( - iso_stdlib_const_identifiers, - ), - - # Standard Library Constants for XDS Modula-2 - 'm2iso+xds': ( - iso_stdlib_const_identifiers, - ), - } - -# M e t h o d s - - # initialise a lexer instance - def __init__(self, **options): - # - # check dialect options - # - dialects = get_list_opt(options, 'dialect', []) - # - for dialect_option in dialects: - if dialect_option in self.dialects[1:-1]: - # valid dialect option found - self.set_dialect(dialect_option) - break - # - # Fallback Mode (DEFAULT) - else: - # no valid dialect option - self.set_dialect('unknown') - # - self.dialect_set_by_tag = False - # - # check style options - # - styles = get_list_opt(options, 'style', []) - # - # use lowercase mode for Algol style - if 'algol' in styles or 'algol_nu' in styles: - self.algol_publication_mode = True - else: - self.algol_publication_mode = False - # - # Check option flags - # - self.treat_stdlib_adts_as_builtins = get_bool_opt( - options, 'treat_stdlib_adts_as_builtins', True) - # - # call superclass initialiser - RegexLexer.__init__(self, **options) - - # Set lexer to a specified dialect - def set_dialect(self, dialect_id): - # - # if __debug__: - # print 'entered set_dialect with arg: ', dialect_id - # - # check dialect name against known dialects - if dialect_id not in self.dialects: - dialect = 'unknown' # default - else: - dialect = dialect_id - # - # compose lexemes to reject set - lexemes_to_reject_set = set() - # add each list of reject lexemes for this dialect - for list in self.lexemes_to_reject_db[dialect]: - lexemes_to_reject_set.update(set(list)) - # - # compose reserved words set - reswords_set = set() - # add each list of reserved words for this dialect - for list in self.reserved_words_db[dialect]: - reswords_set.update(set(list)) - # - # compose builtins set - builtins_set = set() - # add each list of builtins for this dialect excluding reserved words - for list in self.builtins_db[dialect]: - builtins_set.update(set(list).difference(reswords_set)) - # - # compose pseudo-builtins set - pseudo_builtins_set = set() - # add each list of builtins for this dialect excluding reserved words - for list in self.pseudo_builtins_db[dialect]: - pseudo_builtins_set.update(set(list).difference(reswords_set)) - # - # compose ADTs set - adts_set = set() - # add each list of ADTs for this dialect excluding reserved words - for list in self.stdlib_adts_db[dialect]: - adts_set.update(set(list).difference(reswords_set)) - # - # compose modules set - modules_set = set() - # add each list of builtins for this dialect excluding builtins - for list in self.stdlib_modules_db[dialect]: - modules_set.update(set(list).difference(builtins_set)) - # - # compose types set - types_set = set() - # add each list of types for this dialect excluding builtins - for list in self.stdlib_types_db[dialect]: - types_set.update(set(list).difference(builtins_set)) - # - # compose procedures set - procedures_set = set() - # add each list of procedures for this dialect excluding builtins - for list in self.stdlib_procedures_db[dialect]: - procedures_set.update(set(list).difference(builtins_set)) - # - # compose variables set - variables_set = set() - # add each list of variables for this dialect excluding builtins - for list in self.stdlib_variables_db[dialect]: - variables_set.update(set(list).difference(builtins_set)) - # - # compose constants set - constants_set = set() - # add each list of constants for this dialect excluding builtins - for list in self.stdlib_constants_db[dialect]: - constants_set.update(set(list).difference(builtins_set)) - # - # update lexer state - self.dialect = dialect - self.lexemes_to_reject = lexemes_to_reject_set - self.reserved_words = reswords_set - self.builtins = builtins_set - self.pseudo_builtins = pseudo_builtins_set - self.adts = adts_set - self.modules = modules_set - self.types = types_set - self.procedures = procedures_set - self.variables = variables_set - self.constants = constants_set - # - # if __debug__: - # print 'exiting set_dialect' - # print ' self.dialect: ', self.dialect - # print ' self.lexemes_to_reject: ', self.lexemes_to_reject - # print ' self.reserved_words: ', self.reserved_words - # print ' self.builtins: ', self.builtins - # print ' self.pseudo_builtins: ', self.pseudo_builtins - # print ' self.adts: ', self.adts - # print ' self.modules: ', self.modules - # print ' self.types: ', self.types - # print ' self.procedures: ', self.procedures - # print ' self.variables: ', self.variables - # print ' self.types: ', self.types - # print ' self.constants: ', self.constants - - # Extracts a dialect name from a dialect tag comment string and checks - # the extracted name against known dialects. If a match is found, the - # matching name is returned, otherwise dialect id 'unknown' is returned - def get_dialect_from_dialect_tag(self, dialect_tag): - # - # if __debug__: - # print 'entered get_dialect_from_dialect_tag with arg: ', dialect_tag - # - # constants - left_tag_delim = '(*!' - right_tag_delim = '*)' - left_tag_delim_len = len(left_tag_delim) - right_tag_delim_len = len(right_tag_delim) - indicator_start = left_tag_delim_len - indicator_end = -(right_tag_delim_len) - # - # check comment string for dialect indicator - if len(dialect_tag) > (left_tag_delim_len + right_tag_delim_len) \ - and dialect_tag.startswith(left_tag_delim) \ - and dialect_tag.endswith(right_tag_delim): - # - # if __debug__: - # print 'dialect tag found' - # - # extract dialect indicator - indicator = dialect_tag[indicator_start:indicator_end] - # - # if __debug__: - # print 'extracted: ', indicator - # - # check against known dialects - for index in range(1, len(self.dialects)): - # - # if __debug__: - # print 'dialects[', index, ']: ', self.dialects[index] - # - if indicator == self.dialects[index]: - # - # if __debug__: - # print 'matching dialect found' - # - # indicator matches known dialect - return indicator - else: - # indicator does not match any dialect - return 'unknown' # default - else: - # invalid indicator string - return 'unknown' # default - - # intercept the token stream, modify token attributes and return them - def get_tokens_unprocessed(self, text): - for index, token, value in RegexLexer.get_tokens_unprocessed(self, text): - # - # check for dialect tag if dialect has not been set by tag - if not self.dialect_set_by_tag and token == Comment.Special: - indicated_dialect = self.get_dialect_from_dialect_tag(value) - if indicated_dialect != 'unknown': - # token is a dialect indicator - # reset reserved words and builtins - self.set_dialect(indicated_dialect) - self.dialect_set_by_tag = True - # - # check for reserved words, predefined and stdlib identifiers - if token is Name: - if value in self.reserved_words: - token = Keyword.Reserved - if self.algol_publication_mode: - value = value.lower() - # - elif value in self.builtins: - token = Name.Builtin - if self.algol_publication_mode: - value = value.lower() - # - elif value in self.pseudo_builtins: - token = Name.Builtin.Pseudo - if self.algol_publication_mode: - value = value.lower() - # - elif value in self.adts: - if not self.treat_stdlib_adts_as_builtins: - token = Name.Namespace - else: - token = Name.Builtin.Pseudo - if self.algol_publication_mode: - value = value.lower() - # - elif value in self.modules: - token = Name.Namespace - # - elif value in self.types: - token = Name.Class - # - elif value in self.procedures: - token = Name.Function - # - elif value in self.variables: - token = Name.Variable - # - elif value in self.constants: - token = Name.Constant - # - elif token in Number: - # - # mark prefix number literals as error for PIM and ISO dialects - if self.dialect not in ('unknown', 'm2r10', 'objm2'): - if "'" in value or value[0:2] in ('0b', '0x', '0u'): - token = Error - # - elif self.dialect in ('m2r10', 'objm2'): - # mark base-8 number literals as errors for M2 R10 and ObjM2 - if token is Number.Oct: - token = Error - # mark suffix base-16 literals as errors for M2 R10 and ObjM2 - elif token is Number.Hex and 'H' in value: - token = Error - # mark real numbers with E as errors for M2 R10 and ObjM2 - elif token is Number.Float and 'E' in value: - token = Error - # - elif token in Comment: - # - # mark single line comment as error for PIM and ISO dialects - if token is Comment.Single: - if self.dialect not in ('unknown', 'm2r10', 'objm2'): - token = Error - # - if token is Comment.Preproc: - # mark ISO pragma as error for PIM dialects - if value.startswith('<*') and \ - self.dialect.startswith('m2pim'): - token = Error - # mark PIM pragma as comment for other dialects - elif value.startswith('(*$') and \ - self.dialect != 'unknown' and \ - not self.dialect.startswith('m2pim'): - token = Comment.Multiline - # - else: # token is neither Name nor Comment - # - # mark lexemes matching the dialect's error token set as errors - if value in self.lexemes_to_reject: - token = Error - # - # substitute lexemes when in Algol mode - if self.algol_publication_mode: - if value == '#': - value = u'≠' - elif value == '<=': - value = u'≤' - elif value == '>=': - value = u'≥' - elif value == '==': - value = u'≡' - elif value == '*.': - value = u'•' - - # return result - yield index, token, value + # Case Label Separator Synonym + (r'!', Punctuation), # ISO + # Blueprint Punctuation + (r'\?', Punctuation), # M2R10 + ObjM2 + ], + 'comments': [ + # Single Line Comment + (r'^//.*?\n', Comment.Single), # M2R10 + ObjM2 + # Block Comment + (r'\(\*([^$].*?)\*\)', Comment.Multiline), + # Template Block Comment + (r'/\*(.*?)\*/', Comment.Multiline), # M2R10 + ObjM2 + ], + 'pragmas': [ + # ISO Style Pragmas + (r'<\*.*?\*>', Comment.Preproc), # ISO, M2R10 + ObjM2 + # Pascal Style Pragmas + (r'\(\*\$.*?\*\)', Comment.Preproc), # PIM + ], + 'root': [ + include('whitespace'), + include('dialecttags'), + include('pragmas'), + include('comments'), + include('identifiers'), + include('suffixed_number_literals'), # PIM + ISO + include('prefixed_number_literals'), # M2R10 + ObjM2 + include('plain_number_literals'), + include('string_literals'), + include('digraph_punctuation'), + include('digraph_operators'), + include('unigraph_punctuation'), + include('unigraph_operators'), + ] + } + +# C o m m o n D a t a s e t s + + # Common Reserved Words Dataset + common_reserved_words = ( + # 37 common reserved words + 'AND', 'ARRAY', 'BEGIN', 'BY', 'CASE', 'CONST', 'DEFINITION', 'DIV', + 'DO', 'ELSE', 'ELSIF', 'END', 'EXIT', 'FOR', 'FROM', 'IF', + 'IMPLEMENTATION', 'IMPORT', 'IN', 'LOOP', 'MOD', 'MODULE', 'NOT', + 'OF', 'OR', 'POINTER', 'PROCEDURE', 'RECORD', 'REPEAT', 'RETURN', + 'SET', 'THEN', 'TO', 'TYPE', 'UNTIL', 'VAR', 'WHILE', + ) + + # Common Builtins Dataset + common_builtins = ( + # 16 common builtins + 'ABS', 'BOOLEAN', 'CARDINAL', 'CHAR', 'CHR', 'FALSE', 'INTEGER', + 'LONGINT', 'LONGREAL', 'MAX', 'MIN', 'NIL', 'ODD', 'ORD', 'REAL', + 'TRUE', + ) + + # Common Pseudo-Module Builtins Dataset + common_pseudo_builtins = ( + # 4 common pseudo builtins + 'ADDRESS', 'BYTE', 'WORD', 'ADR' + ) + +# P I M M o d u l a - 2 D a t a s e t s + + # Lexemes to Mark as Error Tokens for PIM Modula-2 + pim_lexemes_to_reject = ( + '!', '`', '@', '$', '%', '?', '\\', '==', '++', '--', '::', '*.', + '+>', '->', '<<', '>>', '|#', '##', + ) + + # PIM Modula-2 Additional Reserved Words Dataset + pim_additional_reserved_words = ( + # 3 additional reserved words + 'EXPORT', 'QUALIFIED', 'WITH', + ) + + # PIM Modula-2 Additional Builtins Dataset + pim_additional_builtins = ( + # 16 additional builtins + 'BITSET', 'CAP', 'DEC', 'DISPOSE', 'EXCL', 'FLOAT', 'HALT', 'HIGH', + 'INC', 'INCL', 'NEW', 'NIL', 'PROC', 'SIZE', 'TRUNC', 'VAL', + ) + + # PIM Modula-2 Additional Pseudo-Module Builtins Dataset + pim_additional_pseudo_builtins = ( + # 5 additional pseudo builtins + 'SYSTEM', 'PROCESS', 'TSIZE', 'NEWPROCESS', 'TRANSFER', + ) + +# I S O M o d u l a - 2 D a t a s e t s + + # Lexemes to Mark as Error Tokens for ISO Modula-2 + iso_lexemes_to_reject = ( + '`', '$', '%', '?', '\\', '==', '++', '--', '::', '*.', '+>', '->', + '<<', '>>', '|#', '##', + ) + + # ISO Modula-2 Additional Reserved Words Dataset + iso_additional_reserved_words = ( + # 9 additional reserved words (ISO 10514-1) + 'EXCEPT', 'EXPORT', 'FINALLY', 'FORWARD', 'PACKEDSET', 'QUALIFIED', + 'REM', 'RETRY', 'WITH', + # 10 additional reserved words (ISO 10514-2 & ISO 10514-3) + 'ABSTRACT', 'AS', 'CLASS', 'GUARD', 'INHERIT', 'OVERRIDE', 'READONLY', + 'REVEAL', 'TRACED', 'UNSAFEGUARDED', + ) + + # ISO Modula-2 Additional Builtins Dataset + iso_additional_builtins = ( + # 26 additional builtins (ISO 10514-1) + 'BITSET', 'CAP', 'CMPLX', 'COMPLEX', 'DEC', 'DISPOSE', 'EXCL', 'FLOAT', + 'HALT', 'HIGH', 'IM', 'INC', 'INCL', 'INT', 'INTERRUPTIBLE', 'LENGTH', + 'LFLOAT', 'LONGCOMPLEX', 'NEW', 'PROC', 'PROTECTION', 'RE', 'SIZE', + 'TRUNC', 'UNINTERRUBTIBLE', 'VAL', + # 5 additional builtins (ISO 10514-2 & ISO 10514-3) + 'CREATE', 'DESTROY', 'EMPTY', 'ISMEMBER', 'SELF', + ) + + # ISO Modula-2 Additional Pseudo-Module Builtins Dataset + iso_additional_pseudo_builtins = ( + # 14 additional builtins (SYSTEM) + 'SYSTEM', 'BITSPERLOC', 'LOCSPERBYTE', 'LOCSPERWORD', 'LOC', + 'ADDADR', 'SUBADR', 'DIFADR', 'MAKEADR', 'ADR', + 'ROTATE', 'SHIFT', 'CAST', 'TSIZE', + # 13 additional builtins (COROUTINES) + 'COROUTINES', 'ATTACH', 'COROUTINE', 'CURRENT', 'DETACH', 'HANDLER', + 'INTERRUPTSOURCE', 'IOTRANSFER', 'IsATTACHED', 'LISTEN', + 'NEWCOROUTINE', 'PROT', 'TRANSFER', + # 9 additional builtins (EXCEPTIONS) + 'EXCEPTIONS', 'AllocateSource', 'CurrentNumber', 'ExceptionNumber', + 'ExceptionSource', 'GetMessage', 'IsCurrentSource', + 'IsExceptionalExecution', 'RAISE', + # 3 additional builtins (TERMINATION) + 'TERMINATION', 'IsTerminating', 'HasHalted', + # 4 additional builtins (M2EXCEPTION) + 'M2EXCEPTION', 'M2Exceptions', 'M2Exception', 'IsM2Exception', + 'indexException', 'rangeException', 'caseSelectException', + 'invalidLocation', 'functionException', 'wholeValueException', + 'wholeDivException', 'realValueException', 'realDivException', + 'complexValueException', 'complexDivException', 'protException', + 'sysException', 'coException', 'exException', + ) + +# M o d u l a - 2 R 1 0 D a t a s e t s + + # Lexemes to Mark as Error Tokens for Modula-2 R10 + m2r10_lexemes_to_reject = ( + '!', '`', '@', '$', '%', '&', '<>', + ) + + # Modula-2 R10 reserved words in addition to the common set + m2r10_additional_reserved_words = ( + # 12 additional reserved words + 'ALIAS', 'ARGLIST', 'BLUEPRINT', 'COPY', 'GENLIB', 'INDETERMINATE', + 'NEW', 'NONE', 'OPAQUE', 'REFERENTIAL', 'RELEASE', 'RETAIN', + # 2 additional reserved words with symbolic assembly option + 'ASM', 'REG', + ) + + # Modula-2 R10 builtins in addition to the common set + m2r10_additional_builtins = ( + # 26 additional builtins + 'CARDINAL', 'COUNT', 'EMPTY', 'EXISTS', 'INSERT', 'LENGTH', 'LONGCARD', + 'OCTET', 'PTR', 'PRED', 'READ', 'READNEW', 'REMOVE', 'RETRIEVE', 'SORT', + 'STORE', 'SUBSET', 'SUCC', 'TLIMIT', 'TMAX', 'TMIN', 'TRUE', 'TSIZE', + 'UNICHAR', 'WRITE', 'WRITEF', + ) + + # Modula-2 R10 Additional Pseudo-Module Builtins Dataset + m2r10_additional_pseudo_builtins = ( + # 13 additional builtins (TPROPERTIES) + 'TPROPERTIES', 'PROPERTY', 'LITERAL', 'TPROPERTY', 'TLITERAL', + 'TBUILTIN', 'TDYN', 'TREFC', 'TNIL', 'TBASE', 'TPRECISION', + 'TMAXEXP', 'TMINEXP', + # 4 additional builtins (CONVERSION) + 'CONVERSION', 'TSXFSIZE', 'SXF', 'VAL', + # 35 additional builtins (UNSAFE) + 'UNSAFE', 'CAST', 'INTRINSIC', 'AVAIL', 'ADD', 'SUB', 'ADDC', 'SUBC', + 'FETCHADD', 'FETCHSUB', 'SHL', 'SHR', 'ASHR', 'ROTL', 'ROTR', 'ROTLC', + 'ROTRC', 'BWNOT', 'BWAND', 'BWOR', 'BWXOR', 'BWNAND', 'BWNOR', + 'SETBIT', 'TESTBIT', 'LSBIT', 'MSBIT', 'CSBITS', 'BAIL', 'HALT', + 'TODO', 'FFI', 'ADDR', 'VARGLIST', 'VARGC', + # 11 additional builtins (ATOMIC) + 'ATOMIC', 'INTRINSIC', 'AVAIL', 'SWAP', 'CAS', 'INC', 'DEC', 'BWAND', + 'BWNAND', 'BWOR', 'BWXOR', + # 7 additional builtins (COMPILER) + 'COMPILER', 'DEBUG', 'MODNAME', 'PROCNAME', 'LINENUM', 'DEFAULT', + 'HASH', + # 5 additional builtins (ASSEMBLER) + 'ASSEMBLER', 'REGISTER', 'SETREG', 'GETREG', 'CODE', + ) + +# O b j e c t i v e M o d u l a - 2 D a t a s e t s + + # Lexemes to Mark as Error Tokens for Objective Modula-2 + objm2_lexemes_to_reject = ( + '!', '$', '%', '&', '<>', + ) + + # Objective Modula-2 Extensions + # reserved words in addition to Modula-2 R10 + objm2_additional_reserved_words = ( + # 16 additional reserved words + 'BYCOPY', 'BYREF', 'CLASS', 'CONTINUE', 'CRITICAL', 'INOUT', 'METHOD', + 'ON', 'OPTIONAL', 'OUT', 'PRIVATE', 'PROTECTED', 'PROTOCOL', 'PUBLIC', + 'SUPER', 'TRY', + ) + + # Objective Modula-2 Extensions + # builtins in addition to Modula-2 R10 + objm2_additional_builtins = ( + # 3 additional builtins + 'OBJECT', 'NO', 'YES', + ) + + # Objective Modula-2 Extensions + # pseudo-module builtins in addition to Modula-2 R10 + objm2_additional_pseudo_builtins = ( + # None + ) + +# A g l e t M o d u l a - 2 D a t a s e t s + + # Aglet Extensions + # reserved words in addition to ISO Modula-2 + aglet_additional_reserved_words = ( + # None + ) + + # Aglet Extensions + # builtins in addition to ISO Modula-2 + aglet_additional_builtins = ( + # 9 additional builtins + 'BITSET8', 'BITSET16', 'BITSET32', 'CARDINAL8', 'CARDINAL16', + 'CARDINAL32', 'INTEGER8', 'INTEGER16', 'INTEGER32', + ) + + # Aglet Modula-2 Extensions + # pseudo-module builtins in addition to ISO Modula-2 + aglet_additional_pseudo_builtins = ( + # None + ) + +# G N U M o d u l a - 2 D a t a s e t s + + # GNU Extensions + # reserved words in addition to PIM Modula-2 + gm2_additional_reserved_words = ( + # 10 additional reserved words + 'ASM', '__ATTRIBUTE__', '__BUILTIN__', '__COLUMN__', '__DATE__', + '__FILE__', '__FUNCTION__', '__LINE__', '__MODULE__', 'VOLATILE', + ) + + # GNU Extensions + # builtins in addition to PIM Modula-2 + gm2_additional_builtins = ( + # 21 additional builtins + 'BITSET8', 'BITSET16', 'BITSET32', 'CARDINAL8', 'CARDINAL16', + 'CARDINAL32', 'CARDINAL64', 'COMPLEX32', 'COMPLEX64', 'COMPLEX96', + 'COMPLEX128', 'INTEGER8', 'INTEGER16', 'INTEGER32', 'INTEGER64', + 'REAL8', 'REAL16', 'REAL32', 'REAL96', 'REAL128', 'THROW', + ) + + # GNU Extensions + # pseudo-module builtins in addition to PIM Modula-2 + gm2_additional_pseudo_builtins = ( + # None + ) + +# p 1 M o d u l a - 2 D a t a s e t s + + # p1 Extensions + # reserved words in addition to ISO Modula-2 + p1_additional_reserved_words = ( + # None + ) + + # p1 Extensions + # builtins in addition to ISO Modula-2 + p1_additional_builtins = ( + # None + ) + + # p1 Modula-2 Extensions + # pseudo-module builtins in addition to ISO Modula-2 + p1_additional_pseudo_builtins = ( + # 1 additional builtin + 'BCD', + ) + +# X D S M o d u l a - 2 D a t a s e t s + + # XDS Extensions + # reserved words in addition to ISO Modula-2 + xds_additional_reserved_words = ( + # 1 additional reserved word + 'SEQ', + ) + + # XDS Extensions + # builtins in addition to ISO Modula-2 + xds_additional_builtins = ( + # 9 additional builtins + 'ASH', 'ASSERT', 'DIFFADR_TYPE', 'ENTIER', 'INDEX', 'LEN', + 'LONGCARD', 'SHORTCARD', 'SHORTINT', + ) + + # XDS Modula-2 Extensions + # pseudo-module builtins in addition to ISO Modula-2 + xds_additional_pseudo_builtins = ( + # 22 additional builtins (SYSTEM) + 'PROCESS', 'NEWPROCESS', 'BOOL8', 'BOOL16', 'BOOL32', 'CARD8', + 'CARD16', 'CARD32', 'INT8', 'INT16', 'INT32', 'REF', 'MOVE', + 'FILL', 'GET', 'PUT', 'CC', 'int', 'unsigned', 'size_t', 'void' + # 3 additional builtins (COMPILER) + 'COMPILER', 'OPTION', 'EQUATION' + ) + +# P I M S t a n d a r d L i b r a r y D a t a s e t s + + # PIM Modula-2 Standard Library Modules Dataset + pim_stdlib_module_identifiers = ( + 'Terminal', 'FileSystem', 'InOut', 'RealInOut', 'MathLib0', 'Storage', + ) + + # PIM Modula-2 Standard Library Types Dataset + pim_stdlib_type_identifiers = ( + 'Flag', 'FlagSet', 'Response', 'Command', 'Lock', 'Permission', + 'MediumType', 'File', 'FileProc', 'DirectoryProc', 'FileCommand', + 'DirectoryCommand', + ) + + # PIM Modula-2 Standard Library Procedures Dataset + pim_stdlib_proc_identifiers = ( + 'Read', 'BusyRead', 'ReadAgain', 'Write', 'WriteString', 'WriteLn', + 'Create', 'Lookup', 'Close', 'Delete', 'Rename', 'SetRead', 'SetWrite', + 'SetModify', 'SetOpen', 'Doio', 'SetPos', 'GetPos', 'Length', 'Reset', + 'Again', 'ReadWord', 'WriteWord', 'ReadChar', 'WriteChar', + 'CreateMedium', 'DeleteMedium', 'AssignName', 'DeassignName', + 'ReadMedium', 'LookupMedium', 'OpenInput', 'OpenOutput', 'CloseInput', + 'CloseOutput', 'ReadString', 'ReadInt', 'ReadCard', 'ReadWrd', + 'WriteInt', 'WriteCard', 'WriteOct', 'WriteHex', 'WriteWrd', + 'ReadReal', 'WriteReal', 'WriteFixPt', 'WriteRealOct', 'sqrt', 'exp', + 'ln', 'sin', 'cos', 'arctan', 'entier', 'ALLOCATE', 'DEALLOCATE', + ) + + # PIM Modula-2 Standard Library Variables Dataset + pim_stdlib_var_identifiers = ( + 'Done', 'termCH', 'in', 'out' + ) + + # PIM Modula-2 Standard Library Constants Dataset + pim_stdlib_const_identifiers = ( + 'EOL', + ) + +# I S O S t a n d a r d L i b r a r y D a t a s e t s + + # ISO Modula-2 Standard Library Modules Dataset + iso_stdlib_module_identifiers = ( + # TO DO + ) + + # ISO Modula-2 Standard Library Types Dataset + iso_stdlib_type_identifiers = ( + # TO DO + ) + + # ISO Modula-2 Standard Library Procedures Dataset + iso_stdlib_proc_identifiers = ( + # TO DO + ) + + # ISO Modula-2 Standard Library Variables Dataset + iso_stdlib_var_identifiers = ( + # TO DO + ) + + # ISO Modula-2 Standard Library Constants Dataset + iso_stdlib_const_identifiers = ( + # TO DO + ) + +# M 2 R 1 0 S t a n d a r d L i b r a r y D a t a s e t s + + # Modula-2 R10 Standard Library ADTs Dataset + m2r10_stdlib_adt_identifiers = ( + 'BCD', 'LONGBCD', 'BITSET', 'SHORTBITSET', 'LONGBITSET', + 'LONGLONGBITSET', 'COMPLEX', 'LONGCOMPLEX', 'SHORTCARD', 'LONGLONGCARD', + 'SHORTINT', 'LONGLONGINT', 'POSINT', 'SHORTPOSINT', 'LONGPOSINT', + 'LONGLONGPOSINT', 'BITSET8', 'BITSET16', 'BITSET32', 'BITSET64', + 'BITSET128', 'BS8', 'BS16', 'BS32', 'BS64', 'BS128', 'CARDINAL8', + 'CARDINAL16', 'CARDINAL32', 'CARDINAL64', 'CARDINAL128', 'CARD8', + 'CARD16', 'CARD32', 'CARD64', 'CARD128', 'INTEGER8', 'INTEGER16', + 'INTEGER32', 'INTEGER64', 'INTEGER128', 'INT8', 'INT16', 'INT32', + 'INT64', 'INT128', 'STRING', 'UNISTRING', + ) + + # Modula-2 R10 Standard Library Blueprints Dataset + m2r10_stdlib_blueprint_identifiers = ( + 'ProtoRoot', 'ProtoComputational', 'ProtoNumeric', 'ProtoScalar', + 'ProtoNonScalar', 'ProtoCardinal', 'ProtoInteger', 'ProtoReal', + 'ProtoComplex', 'ProtoVector', 'ProtoTuple', 'ProtoCompArray', + 'ProtoCollection', 'ProtoStaticArray', 'ProtoStaticSet', + 'ProtoStaticString', 'ProtoArray', 'ProtoString', 'ProtoSet', + 'ProtoMultiSet', 'ProtoDictionary', 'ProtoMultiDict', 'ProtoExtension', + 'ProtoIO', 'ProtoCardMath', 'ProtoIntMath', 'ProtoRealMath', + ) + + # Modula-2 R10 Standard Library Modules Dataset + m2r10_stdlib_module_identifiers = ( + 'ASCII', 'BooleanIO', 'CharIO', 'UnicharIO', 'OctetIO', + 'CardinalIO', 'LongCardIO', 'IntegerIO', 'LongIntIO', 'RealIO', + 'LongRealIO', 'BCDIO', 'LongBCDIO', 'CardMath', 'LongCardMath', + 'IntMath', 'LongIntMath', 'RealMath', 'LongRealMath', 'BCDMath', + 'LongBCDMath', 'FileIO', 'FileSystem', 'Storage', 'IOSupport', + ) + + # Modula-2 R10 Standard Library Types Dataset + m2r10_stdlib_type_identifiers = ( + 'File', 'Status', + # TO BE COMPLETED + ) + + # Modula-2 R10 Standard Library Procedures Dataset + m2r10_stdlib_proc_identifiers = ( + 'ALLOCATE', 'DEALLOCATE', 'SIZE', + # TO BE COMPLETED + ) + + # Modula-2 R10 Standard Library Variables Dataset + m2r10_stdlib_var_identifiers = ( + 'stdIn', 'stdOut', 'stdErr', + ) + + # Modula-2 R10 Standard Library Constants Dataset + m2r10_stdlib_const_identifiers = ( + 'pi', 'tau', + ) + +# D i a l e c t s + + # Dialect modes + dialects = ( + 'unknown', + 'm2pim', 'm2iso', 'm2r10', 'objm2', + 'm2iso+aglet', 'm2pim+gm2', 'm2iso+p1', 'm2iso+xds', + ) + +# D a t a b a s e s + + # Lexemes to Mark as Errors Database + lexemes_to_reject_db = { + # Lexemes to reject for unknown dialect + 'unknown': ( + # LEAVE THIS EMPTY + ), + # Lexemes to reject for PIM Modula-2 + 'm2pim': ( + pim_lexemes_to_reject, + ), + # Lexemes to reject for ISO Modula-2 + 'm2iso': ( + iso_lexemes_to_reject, + ), + # Lexemes to reject for Modula-2 R10 + 'm2r10': ( + m2r10_lexemes_to_reject, + ), + # Lexemes to reject for Objective Modula-2 + 'objm2': ( + objm2_lexemes_to_reject, + ), + # Lexemes to reject for Aglet Modula-2 + 'm2iso+aglet': ( + iso_lexemes_to_reject, + ), + # Lexemes to reject for GNU Modula-2 + 'm2pim+gm2': ( + pim_lexemes_to_reject, + ), + # Lexemes to reject for p1 Modula-2 + 'm2iso+p1': ( + iso_lexemes_to_reject, + ), + # Lexemes to reject for XDS Modula-2 + 'm2iso+xds': ( + iso_lexemes_to_reject, + ), + } + + # Reserved Words Database + reserved_words_db = { + # Reserved words for unknown dialect + 'unknown': ( + common_reserved_words, + pim_additional_reserved_words, + iso_additional_reserved_words, + m2r10_additional_reserved_words, + ), + + # Reserved words for PIM Modula-2 + 'm2pim': ( + common_reserved_words, + pim_additional_reserved_words, + ), + + # Reserved words for Modula-2 R10 + 'm2iso': ( + common_reserved_words, + iso_additional_reserved_words, + ), + + # Reserved words for ISO Modula-2 + 'm2r10': ( + common_reserved_words, + m2r10_additional_reserved_words, + ), + + # Reserved words for Objective Modula-2 + 'objm2': ( + common_reserved_words, + m2r10_additional_reserved_words, + objm2_additional_reserved_words, + ), + + # Reserved words for Aglet Modula-2 Extensions + 'm2iso+aglet': ( + common_reserved_words, + iso_additional_reserved_words, + aglet_additional_reserved_words, + ), + + # Reserved words for GNU Modula-2 Extensions + 'm2pim+gm2': ( + common_reserved_words, + pim_additional_reserved_words, + gm2_additional_reserved_words, + ), + + # Reserved words for p1 Modula-2 Extensions + 'm2iso+p1': ( + common_reserved_words, + iso_additional_reserved_words, + p1_additional_reserved_words, + ), + + # Reserved words for XDS Modula-2 Extensions + 'm2iso+xds': ( + common_reserved_words, + iso_additional_reserved_words, + xds_additional_reserved_words, + ), + } + + # Builtins Database + builtins_db = { + # Builtins for unknown dialect + 'unknown': ( + common_builtins, + pim_additional_builtins, + iso_additional_builtins, + m2r10_additional_builtins, + ), + + # Builtins for PIM Modula-2 + 'm2pim': ( + common_builtins, + pim_additional_builtins, + ), + + # Builtins for ISO Modula-2 + 'm2iso': ( + common_builtins, + iso_additional_builtins, + ), + + # Builtins for ISO Modula-2 + 'm2r10': ( + common_builtins, + m2r10_additional_builtins, + ), + + # Builtins for Objective Modula-2 + 'objm2': ( + common_builtins, + m2r10_additional_builtins, + objm2_additional_builtins, + ), + + # Builtins for Aglet Modula-2 Extensions + 'm2iso+aglet': ( + common_builtins, + iso_additional_builtins, + aglet_additional_builtins, + ), + + # Builtins for GNU Modula-2 Extensions + 'm2pim+gm2': ( + common_builtins, + pim_additional_builtins, + gm2_additional_builtins, + ), + + # Builtins for p1 Modula-2 Extensions + 'm2iso+p1': ( + common_builtins, + iso_additional_builtins, + p1_additional_builtins, + ), + + # Builtins for XDS Modula-2 Extensions + 'm2iso+xds': ( + common_builtins, + iso_additional_builtins, + xds_additional_builtins, + ), + } + + # Pseudo-Module Builtins Database + pseudo_builtins_db = { + # Builtins for unknown dialect + 'unknown': ( + common_pseudo_builtins, + pim_additional_pseudo_builtins, + iso_additional_pseudo_builtins, + m2r10_additional_pseudo_builtins, + ), + + # Builtins for PIM Modula-2 + 'm2pim': ( + common_pseudo_builtins, + pim_additional_pseudo_builtins, + ), + + # Builtins for ISO Modula-2 + 'm2iso': ( + common_pseudo_builtins, + iso_additional_pseudo_builtins, + ), + + # Builtins for ISO Modula-2 + 'm2r10': ( + common_pseudo_builtins, + m2r10_additional_pseudo_builtins, + ), + + # Builtins for Objective Modula-2 + 'objm2': ( + common_pseudo_builtins, + m2r10_additional_pseudo_builtins, + objm2_additional_pseudo_builtins, + ), + + # Builtins for Aglet Modula-2 Extensions + 'm2iso+aglet': ( + common_pseudo_builtins, + iso_additional_pseudo_builtins, + aglet_additional_pseudo_builtins, + ), + + # Builtins for GNU Modula-2 Extensions + 'm2pim+gm2': ( + common_pseudo_builtins, + pim_additional_pseudo_builtins, + gm2_additional_pseudo_builtins, + ), + + # Builtins for p1 Modula-2 Extensions + 'm2iso+p1': ( + common_pseudo_builtins, + iso_additional_pseudo_builtins, + p1_additional_pseudo_builtins, + ), + + # Builtins for XDS Modula-2 Extensions + 'm2iso+xds': ( + common_pseudo_builtins, + iso_additional_pseudo_builtins, + xds_additional_pseudo_builtins, + ), + } + + # Standard Library ADTs Database + stdlib_adts_db = { + # Empty entry for unknown dialect + 'unknown': ( + # LEAVE THIS EMPTY + ), + # Standard Library ADTs for PIM Modula-2 + 'm2pim': ( + # No first class library types + ), + + # Standard Library ADTs for ISO Modula-2 + 'm2iso': ( + # No first class library types + ), + + # Standard Library ADTs for Modula-2 R10 + 'm2r10': ( + m2r10_stdlib_adt_identifiers, + ), + + # Standard Library ADTs for Objective Modula-2 + 'objm2': ( + m2r10_stdlib_adt_identifiers, + ), + + # Standard Library ADTs for Aglet Modula-2 + 'm2iso+aglet': ( + # No first class library types + ), + + # Standard Library ADTs for GNU Modula-2 + 'm2pim+gm2': ( + # No first class library types + ), + + # Standard Library ADTs for p1 Modula-2 + 'm2iso+p1': ( + # No first class library types + ), + + # Standard Library ADTs for XDS Modula-2 + 'm2iso+xds': ( + # No first class library types + ), + } + + # Standard Library Modules Database + stdlib_modules_db = { + # Empty entry for unknown dialect + 'unknown': ( + # LEAVE THIS EMPTY + ), + # Standard Library Modules for PIM Modula-2 + 'm2pim': ( + pim_stdlib_module_identifiers, + ), + + # Standard Library Modules for ISO Modula-2 + 'm2iso': ( + iso_stdlib_module_identifiers, + ), + + # Standard Library Modules for Modula-2 R10 + 'm2r10': ( + m2r10_stdlib_blueprint_identifiers, + m2r10_stdlib_module_identifiers, + m2r10_stdlib_adt_identifiers, + ), + + # Standard Library Modules for Objective Modula-2 + 'objm2': ( + m2r10_stdlib_blueprint_identifiers, + m2r10_stdlib_module_identifiers, + ), + + # Standard Library Modules for Aglet Modula-2 + 'm2iso+aglet': ( + iso_stdlib_module_identifiers, + ), + + # Standard Library Modules for GNU Modula-2 + 'm2pim+gm2': ( + pim_stdlib_module_identifiers, + ), + + # Standard Library Modules for p1 Modula-2 + 'm2iso+p1': ( + iso_stdlib_module_identifiers, + ), + + # Standard Library Modules for XDS Modula-2 + 'm2iso+xds': ( + iso_stdlib_module_identifiers, + ), + } + + # Standard Library Types Database + stdlib_types_db = { + # Empty entry for unknown dialect + 'unknown': ( + # LEAVE THIS EMPTY + ), + # Standard Library Types for PIM Modula-2 + 'm2pim': ( + pim_stdlib_type_identifiers, + ), + + # Standard Library Types for ISO Modula-2 + 'm2iso': ( + iso_stdlib_type_identifiers, + ), + + # Standard Library Types for Modula-2 R10 + 'm2r10': ( + m2r10_stdlib_type_identifiers, + ), + + # Standard Library Types for Objective Modula-2 + 'objm2': ( + m2r10_stdlib_type_identifiers, + ), + + # Standard Library Types for Aglet Modula-2 + 'm2iso+aglet': ( + iso_stdlib_type_identifiers, + ), + + # Standard Library Types for GNU Modula-2 + 'm2pim+gm2': ( + pim_stdlib_type_identifiers, + ), + + # Standard Library Types for p1 Modula-2 + 'm2iso+p1': ( + iso_stdlib_type_identifiers, + ), + + # Standard Library Types for XDS Modula-2 + 'm2iso+xds': ( + iso_stdlib_type_identifiers, + ), + } + + # Standard Library Procedures Database + stdlib_procedures_db = { + # Empty entry for unknown dialect + 'unknown': ( + # LEAVE THIS EMPTY + ), + # Standard Library Procedures for PIM Modula-2 + 'm2pim': ( + pim_stdlib_proc_identifiers, + ), + + # Standard Library Procedures for ISO Modula-2 + 'm2iso': ( + iso_stdlib_proc_identifiers, + ), + + # Standard Library Procedures for Modula-2 R10 + 'm2r10': ( + m2r10_stdlib_proc_identifiers, + ), + + # Standard Library Procedures for Objective Modula-2 + 'objm2': ( + m2r10_stdlib_proc_identifiers, + ), + + # Standard Library Procedures for Aglet Modula-2 + 'm2iso+aglet': ( + iso_stdlib_proc_identifiers, + ), + + # Standard Library Procedures for GNU Modula-2 + 'm2pim+gm2': ( + pim_stdlib_proc_identifiers, + ), + + # Standard Library Procedures for p1 Modula-2 + 'm2iso+p1': ( + iso_stdlib_proc_identifiers, + ), + + # Standard Library Procedures for XDS Modula-2 + 'm2iso+xds': ( + iso_stdlib_proc_identifiers, + ), + } + + # Standard Library Variables Database + stdlib_variables_db = { + # Empty entry for unknown dialect + 'unknown': ( + # LEAVE THIS EMPTY + ), + # Standard Library Variables for PIM Modula-2 + 'm2pim': ( + pim_stdlib_var_identifiers, + ), + + # Standard Library Variables for ISO Modula-2 + 'm2iso': ( + iso_stdlib_var_identifiers, + ), + + # Standard Library Variables for Modula-2 R10 + 'm2r10': ( + m2r10_stdlib_var_identifiers, + ), + + # Standard Library Variables for Objective Modula-2 + 'objm2': ( + m2r10_stdlib_var_identifiers, + ), + + # Standard Library Variables for Aglet Modula-2 + 'm2iso+aglet': ( + iso_stdlib_var_identifiers, + ), + + # Standard Library Variables for GNU Modula-2 + 'm2pim+gm2': ( + pim_stdlib_var_identifiers, + ), + + # Standard Library Variables for p1 Modula-2 + 'm2iso+p1': ( + iso_stdlib_var_identifiers, + ), + + # Standard Library Variables for XDS Modula-2 + 'm2iso+xds': ( + iso_stdlib_var_identifiers, + ), + } + + # Standard Library Constants Database + stdlib_constants_db = { + # Empty entry for unknown dialect + 'unknown': ( + # LEAVE THIS EMPTY + ), + # Standard Library Constants for PIM Modula-2 + 'm2pim': ( + pim_stdlib_const_identifiers, + ), + + # Standard Library Constants for ISO Modula-2 + 'm2iso': ( + iso_stdlib_const_identifiers, + ), + + # Standard Library Constants for Modula-2 R10 + 'm2r10': ( + m2r10_stdlib_const_identifiers, + ), + + # Standard Library Constants for Objective Modula-2 + 'objm2': ( + m2r10_stdlib_const_identifiers, + ), + + # Standard Library Constants for Aglet Modula-2 + 'm2iso+aglet': ( + iso_stdlib_const_identifiers, + ), + + # Standard Library Constants for GNU Modula-2 + 'm2pim+gm2': ( + pim_stdlib_const_identifiers, + ), + + # Standard Library Constants for p1 Modula-2 + 'm2iso+p1': ( + iso_stdlib_const_identifiers, + ), + + # Standard Library Constants for XDS Modula-2 + 'm2iso+xds': ( + iso_stdlib_const_identifiers, + ), + } + +# M e t h o d s + + # initialise a lexer instance + def __init__(self, **options): + # + # check dialect options + # + dialects = get_list_opt(options, 'dialect', []) + # + for dialect_option in dialects: + if dialect_option in self.dialects[1:-1]: + # valid dialect option found + self.set_dialect(dialect_option) + break + # + # Fallback Mode (DEFAULT) + else: + # no valid dialect option + self.set_dialect('unknown') + # + self.dialect_set_by_tag = False + # + # check style options + # + styles = get_list_opt(options, 'style', []) + # + # use lowercase mode for Algol style + if 'algol' in styles or 'algol_nu' in styles: + self.algol_publication_mode = True + else: + self.algol_publication_mode = False + # + # Check option flags + # + self.treat_stdlib_adts_as_builtins = get_bool_opt( + options, 'treat_stdlib_adts_as_builtins', True) + # + # call superclass initialiser + RegexLexer.__init__(self, **options) + + # Set lexer to a specified dialect + def set_dialect(self, dialect_id): + # + # if __debug__: + # print 'entered set_dialect with arg: ', dialect_id + # + # check dialect name against known dialects + if dialect_id not in self.dialects: + dialect = 'unknown' # default + else: + dialect = dialect_id + # + # compose lexemes to reject set + lexemes_to_reject_set = set() + # add each list of reject lexemes for this dialect + for list in self.lexemes_to_reject_db[dialect]: + lexemes_to_reject_set.update(set(list)) + # + # compose reserved words set + reswords_set = set() + # add each list of reserved words for this dialect + for list in self.reserved_words_db[dialect]: + reswords_set.update(set(list)) + # + # compose builtins set + builtins_set = set() + # add each list of builtins for this dialect excluding reserved words + for list in self.builtins_db[dialect]: + builtins_set.update(set(list).difference(reswords_set)) + # + # compose pseudo-builtins set + pseudo_builtins_set = set() + # add each list of builtins for this dialect excluding reserved words + for list in self.pseudo_builtins_db[dialect]: + pseudo_builtins_set.update(set(list).difference(reswords_set)) + # + # compose ADTs set + adts_set = set() + # add each list of ADTs for this dialect excluding reserved words + for list in self.stdlib_adts_db[dialect]: + adts_set.update(set(list).difference(reswords_set)) + # + # compose modules set + modules_set = set() + # add each list of builtins for this dialect excluding builtins + for list in self.stdlib_modules_db[dialect]: + modules_set.update(set(list).difference(builtins_set)) + # + # compose types set + types_set = set() + # add each list of types for this dialect excluding builtins + for list in self.stdlib_types_db[dialect]: + types_set.update(set(list).difference(builtins_set)) + # + # compose procedures set + procedures_set = set() + # add each list of procedures for this dialect excluding builtins + for list in self.stdlib_procedures_db[dialect]: + procedures_set.update(set(list).difference(builtins_set)) + # + # compose variables set + variables_set = set() + # add each list of variables for this dialect excluding builtins + for list in self.stdlib_variables_db[dialect]: + variables_set.update(set(list).difference(builtins_set)) + # + # compose constants set + constants_set = set() + # add each list of constants for this dialect excluding builtins + for list in self.stdlib_constants_db[dialect]: + constants_set.update(set(list).difference(builtins_set)) + # + # update lexer state + self.dialect = dialect + self.lexemes_to_reject = lexemes_to_reject_set + self.reserved_words = reswords_set + self.builtins = builtins_set + self.pseudo_builtins = pseudo_builtins_set + self.adts = adts_set + self.modules = modules_set + self.types = types_set + self.procedures = procedures_set + self.variables = variables_set + self.constants = constants_set + # + # if __debug__: + # print 'exiting set_dialect' + # print ' self.dialect: ', self.dialect + # print ' self.lexemes_to_reject: ', self.lexemes_to_reject + # print ' self.reserved_words: ', self.reserved_words + # print ' self.builtins: ', self.builtins + # print ' self.pseudo_builtins: ', self.pseudo_builtins + # print ' self.adts: ', self.adts + # print ' self.modules: ', self.modules + # print ' self.types: ', self.types + # print ' self.procedures: ', self.procedures + # print ' self.variables: ', self.variables + # print ' self.types: ', self.types + # print ' self.constants: ', self.constants + + # Extracts a dialect name from a dialect tag comment string and checks + # the extracted name against known dialects. If a match is found, the + # matching name is returned, otherwise dialect id 'unknown' is returned + def get_dialect_from_dialect_tag(self, dialect_tag): + # + # if __debug__: + # print 'entered get_dialect_from_dialect_tag with arg: ', dialect_tag + # + # constants + left_tag_delim = '(*!' + right_tag_delim = '*)' + left_tag_delim_len = len(left_tag_delim) + right_tag_delim_len = len(right_tag_delim) + indicator_start = left_tag_delim_len + indicator_end = -(right_tag_delim_len) + # + # check comment string for dialect indicator + if len(dialect_tag) > (left_tag_delim_len + right_tag_delim_len) \ + and dialect_tag.startswith(left_tag_delim) \ + and dialect_tag.endswith(right_tag_delim): + # + # if __debug__: + # print 'dialect tag found' + # + # extract dialect indicator + indicator = dialect_tag[indicator_start:indicator_end] + # + # if __debug__: + # print 'extracted: ', indicator + # + # check against known dialects + for index in range(1, len(self.dialects)): + # + # if __debug__: + # print 'dialects[', index, ']: ', self.dialects[index] + # + if indicator == self.dialects[index]: + # + # if __debug__: + # print 'matching dialect found' + # + # indicator matches known dialect + return indicator + else: + # indicator does not match any dialect + return 'unknown' # default + else: + # invalid indicator string + return 'unknown' # default + + # intercept the token stream, modify token attributes and return them + def get_tokens_unprocessed(self, text): + for index, token, value in RegexLexer.get_tokens_unprocessed(self, text): + # + # check for dialect tag if dialect has not been set by tag + if not self.dialect_set_by_tag and token == Comment.Special: + indicated_dialect = self.get_dialect_from_dialect_tag(value) + if indicated_dialect != 'unknown': + # token is a dialect indicator + # reset reserved words and builtins + self.set_dialect(indicated_dialect) + self.dialect_set_by_tag = True + # + # check for reserved words, predefined and stdlib identifiers + if token is Name: + if value in self.reserved_words: + token = Keyword.Reserved + if self.algol_publication_mode: + value = value.lower() + # + elif value in self.builtins: + token = Name.Builtin + if self.algol_publication_mode: + value = value.lower() + # + elif value in self.pseudo_builtins: + token = Name.Builtin.Pseudo + if self.algol_publication_mode: + value = value.lower() + # + elif value in self.adts: + if not self.treat_stdlib_adts_as_builtins: + token = Name.Namespace + else: + token = Name.Builtin.Pseudo + if self.algol_publication_mode: + value = value.lower() + # + elif value in self.modules: + token = Name.Namespace + # + elif value in self.types: + token = Name.Class + # + elif value in self.procedures: + token = Name.Function + # + elif value in self.variables: + token = Name.Variable + # + elif value in self.constants: + token = Name.Constant + # + elif token in Number: + # + # mark prefix number literals as error for PIM and ISO dialects + if self.dialect not in ('unknown', 'm2r10', 'objm2'): + if "'" in value or value[0:2] in ('0b', '0x', '0u'): + token = Error + # + elif self.dialect in ('m2r10', 'objm2'): + # mark base-8 number literals as errors for M2 R10 and ObjM2 + if token is Number.Oct: + token = Error + # mark suffix base-16 literals as errors for M2 R10 and ObjM2 + elif token is Number.Hex and 'H' in value: + token = Error + # mark real numbers with E as errors for M2 R10 and ObjM2 + elif token is Number.Float and 'E' in value: + token = Error + # + elif token in Comment: + # + # mark single line comment as error for PIM and ISO dialects + if token is Comment.Single: + if self.dialect not in ('unknown', 'm2r10', 'objm2'): + token = Error + # + if token is Comment.Preproc: + # mark ISO pragma as error for PIM dialects + if value.startswith('<*') and \ + self.dialect.startswith('m2pim'): + token = Error + # mark PIM pragma as comment for other dialects + elif value.startswith('(*$') and \ + self.dialect != 'unknown' and \ + not self.dialect.startswith('m2pim'): + token = Comment.Multiline + # + else: # token is neither Name nor Comment + # + # mark lexemes matching the dialect's error token set as errors + if value in self.lexemes_to_reject: + token = Error + # + # substitute lexemes when in Algol mode + if self.algol_publication_mode: + if value == '#': + value = u'≠' + elif value == '<=': + value = u'≤' + elif value == '>=': + value = u'≥' + elif value == '==': + value = u'≡' + elif value == '*.': + value = u'•' + + # return result + yield index, token, value |