aboutsummaryrefslogtreecommitdiffstats
path: root/contrib/python/Pygments/py2/pygments/lexers/parsers.py
diff options
context:
space:
mode:
authorilezhankin <ilezhankin@yandex-team.ru>2022-02-10 16:45:55 +0300
committerDaniil Cherednik <dcherednik@yandex-team.ru>2022-02-10 16:45:55 +0300
commit1d125034f06575234f83f24f08677955133f140e (patch)
treeec05fbbd61dc118d5de37f206ab978cff58774bd /contrib/python/Pygments/py2/pygments/lexers/parsers.py
parent3a7a498715ef1b66f5054455421b845e45e3a653 (diff)
downloadydb-1d125034f06575234f83f24f08677955133f140e.tar.gz
Restoring authorship annotation for <ilezhankin@yandex-team.ru>. Commit 1 of 2.
Diffstat (limited to 'contrib/python/Pygments/py2/pygments/lexers/parsers.py')
-rw-r--r--contrib/python/Pygments/py2/pygments/lexers/parsers.py1656
1 files changed, 828 insertions, 828 deletions
diff --git a/contrib/python/Pygments/py2/pygments/lexers/parsers.py b/contrib/python/Pygments/py2/pygments/lexers/parsers.py
index 8bcbfc5046..cdad7d9d02 100644
--- a/contrib/python/Pygments/py2/pygments/lexers/parsers.py
+++ b/contrib/python/Pygments/py2/pygments/lexers/parsers.py
@@ -1,835 +1,835 @@
-# -*- coding: utf-8 -*-
-"""
- pygments.lexers.parsers
- ~~~~~~~~~~~~~~~~~~~~~~~
-
- Lexers for parser generators.
-
+# -*- coding: utf-8 -*-
+"""
+ pygments.lexers.parsers
+ ~~~~~~~~~~~~~~~~~~~~~~~
+
+ Lexers for parser generators.
+
:copyright: Copyright 2006-2019 by the Pygments team, see AUTHORS.
- :license: BSD, see LICENSE for details.
-"""
-
-import re
-
-from pygments.lexer import RegexLexer, DelegatingLexer, \
- include, bygroups, using
-from pygments.token import Punctuation, Other, Text, Comment, Operator, \
- Keyword, Name, String, Number, Whitespace
-from pygments.lexers.jvm import JavaLexer
-from pygments.lexers.c_cpp import CLexer, CppLexer
-from pygments.lexers.objective import ObjectiveCLexer
-from pygments.lexers.d import DLexer
-from pygments.lexers.dotnet import CSharpLexer
-from pygments.lexers.ruby import RubyLexer
-from pygments.lexers.python import PythonLexer
-from pygments.lexers.perl import PerlLexer
-
-__all__ = ['RagelLexer', 'RagelEmbeddedLexer', 'RagelCLexer', 'RagelDLexer',
- 'RagelCppLexer', 'RagelObjectiveCLexer', 'RagelRubyLexer',
- 'RagelJavaLexer', 'AntlrLexer', 'AntlrPythonLexer',
- 'AntlrPerlLexer', 'AntlrRubyLexer', 'AntlrCppLexer',
- # 'AntlrCLexer',
- 'AntlrCSharpLexer', 'AntlrObjectiveCLexer',
- 'AntlrJavaLexer', 'AntlrActionScriptLexer',
- 'TreetopLexer', 'EbnfLexer']
-
-
-class RagelLexer(RegexLexer):
- """
- A pure `Ragel <http://www.complang.org/ragel/>`_ lexer. Use this for
- fragments of Ragel. For ``.rl`` files, use RagelEmbeddedLexer instead
- (or one of the language-specific subclasses).
-
- .. versionadded:: 1.1
- """
-
- name = 'Ragel'
- aliases = ['ragel']
- filenames = []
-
- tokens = {
- 'whitespace': [
- (r'\s+', Whitespace)
- ],
- 'comments': [
- (r'\#.*$', Comment),
- ],
- 'keywords': [
- (r'(access|action|alphtype)\b', Keyword),
- (r'(getkey|write|machine|include)\b', Keyword),
- (r'(any|ascii|extend|alpha|digit|alnum|lower|upper)\b', Keyword),
- (r'(xdigit|cntrl|graph|print|punct|space|zlen|empty)\b', Keyword)
- ],
- 'numbers': [
- (r'0x[0-9A-Fa-f]+', Number.Hex),
- (r'[+-]?[0-9]+', Number.Integer),
- ],
- 'literals': [
- (r'"(\\\\|\\"|[^"])*"', String), # double quote string
- (r"'(\\\\|\\'|[^'])*'", String), # single quote string
- (r'\[(\\\\|\\\]|[^\]])*\]', String), # square bracket literals
- (r'/(?!\*)(\\\\|\\/|[^/])*/', String.Regex), # regular expressions
- ],
- 'identifiers': [
- (r'[a-zA-Z_]\w*', Name.Variable),
- ],
- 'operators': [
- (r',', Operator), # Join
- (r'\||&|--?', Operator), # Union, Intersection and Subtraction
- (r'\.|<:|:>>?', Operator), # Concatention
- (r':', Operator), # Label
- (r'->', Operator), # Epsilon Transition
- (r'(>|\$|%|<|@|<>)(/|eof\b)', Operator), # EOF Actions
- (r'(>|\$|%|<|@|<>)(!|err\b)', Operator), # Global Error Actions
- (r'(>|\$|%|<|@|<>)(\^|lerr\b)', Operator), # Local Error Actions
- (r'(>|\$|%|<|@|<>)(~|to\b)', Operator), # To-State Actions
- (r'(>|\$|%|<|@|<>)(\*|from\b)', Operator), # From-State Actions
- (r'>|@|\$|%', Operator), # Transition Actions and Priorities
- (r'\*|\?|\+|\{[0-9]*,[0-9]*\}', Operator), # Repetition
- (r'!|\^', Operator), # Negation
- (r'\(|\)', Operator), # Grouping
- ],
- 'root': [
- include('literals'),
- include('whitespace'),
- include('comments'),
- include('keywords'),
- include('numbers'),
- include('identifiers'),
- include('operators'),
- (r'\{', Punctuation, 'host'),
- (r'=', Operator),
- (r';', Punctuation),
- ],
- 'host': [
- (r'(' + r'|'.join(( # keep host code in largest possible chunks
- r'[^{}\'"/#]+', # exclude unsafe characters
- r'[^\\]\\[{}]', # allow escaped { or }
-
- # strings and comments may safely contain unsafe characters
- r'"(\\\\|\\"|[^"])*"', # double quote string
- r"'(\\\\|\\'|[^'])*'", # single quote string
- r'//.*$\n?', # single line comment
- r'/\*(.|\n)*?\*/', # multi-line javadoc-style comment
- r'\#.*$\n?', # ruby comment
-
- # regular expression: There's no reason for it to start
- # with a * and this stops confusion with comments.
- r'/(?!\*)(\\\\|\\/|[^/])*/',
-
- # / is safe now that we've handled regex and javadoc comments
- r'/',
- )) + r')+', Other),
-
- (r'\{', Punctuation, '#push'),
- (r'\}', Punctuation, '#pop'),
- ],
- }
-
-
-class RagelEmbeddedLexer(RegexLexer):
- """
- A lexer for `Ragel`_ embedded in a host language file.
-
- This will only highlight Ragel statements. If you want host language
- highlighting then call the language-specific Ragel lexer.
-
- .. versionadded:: 1.1
- """
-
- name = 'Embedded Ragel'
- aliases = ['ragel-em']
- filenames = ['*.rl']
-
- tokens = {
- 'root': [
- (r'(' + r'|'.join(( # keep host code in largest possible chunks
- r'[^%\'"/#]+', # exclude unsafe characters
- r'%(?=[^%]|$)', # a single % sign is okay, just not 2 of them
-
- # strings and comments may safely contain unsafe characters
- r'"(\\\\|\\"|[^"])*"', # double quote string
- r"'(\\\\|\\'|[^'])*'", # single quote string
- r'/\*(.|\n)*?\*/', # multi-line javadoc-style comment
- r'//.*$\n?', # single line comment
- r'\#.*$\n?', # ruby/ragel comment
- r'/(?!\*)(\\\\|\\/|[^/])*/', # regular expression
-
- # / is safe now that we've handled regex and javadoc comments
- r'/',
- )) + r')+', Other),
-
- # Single Line FSM.
- # Please don't put a quoted newline in a single line FSM.
- # That's just mean. It will break this.
- (r'(%%)(?![{%])(.*)($|;)(\n?)', bygroups(Punctuation,
- using(RagelLexer),
- Punctuation, Text)),
-
- # Multi Line FSM.
- (r'(%%%%|%%)\{', Punctuation, 'multi-line-fsm'),
- ],
- 'multi-line-fsm': [
- (r'(' + r'|'.join(( # keep ragel code in largest possible chunks.
- r'(' + r'|'.join((
- r'[^}\'"\[/#]', # exclude unsafe characters
- r'\}(?=[^%]|$)', # } is okay as long as it's not followed by %
- r'\}%(?=[^%]|$)', # ...well, one %'s okay, just not two...
- r'[^\\]\\[{}]', # ...and } is okay if it's escaped
-
- # allow / if it's preceded with one of these symbols
- # (ragel EOF actions)
- r'(>|\$|%|<|@|<>)/',
-
- # specifically allow regex followed immediately by *
- # so it doesn't get mistaken for a comment
- r'/(?!\*)(\\\\|\\/|[^/])*/\*',
-
- # allow / as long as it's not followed by another / or by a *
- r'/(?=[^/*]|$)',
-
- # We want to match as many of these as we can in one block.
- # Not sure if we need the + sign here,
- # does it help performance?
- )) + r')+',
-
- # strings and comments may safely contain unsafe characters
- r'"(\\\\|\\"|[^"])*"', # double quote string
- r"'(\\\\|\\'|[^'])*'", # single quote string
- r"\[(\\\\|\\\]|[^\]])*\]", # square bracket literal
- r'/\*(.|\n)*?\*/', # multi-line javadoc-style comment
- r'//.*$\n?', # single line comment
- r'\#.*$\n?', # ruby/ragel comment
- )) + r')+', using(RagelLexer)),
-
- (r'\}%%', Punctuation, '#pop'),
- ]
- }
-
- def analyse_text(text):
- return '@LANG: indep' in text
-
-
-class RagelRubyLexer(DelegatingLexer):
- """
- A lexer for `Ragel`_ in a Ruby host file.
-
- .. versionadded:: 1.1
- """
-
- name = 'Ragel in Ruby Host'
- aliases = ['ragel-ruby', 'ragel-rb']
- filenames = ['*.rl']
-
- def __init__(self, **options):
- super(RagelRubyLexer, self).__init__(RubyLexer, RagelEmbeddedLexer,
- **options)
-
- def analyse_text(text):
- return '@LANG: ruby' in text
-
-
-class RagelCLexer(DelegatingLexer):
- """
- A lexer for `Ragel`_ in a C host file.
-
- .. versionadded:: 1.1
- """
-
- name = 'Ragel in C Host'
- aliases = ['ragel-c']
- filenames = ['*.rl']
-
- def __init__(self, **options):
- super(RagelCLexer, self).__init__(CLexer, RagelEmbeddedLexer,
- **options)
-
- def analyse_text(text):
- return '@LANG: c' in text
-
-
-class RagelDLexer(DelegatingLexer):
- """
- A lexer for `Ragel`_ in a D host file.
-
- .. versionadded:: 1.1
- """
-
- name = 'Ragel in D Host'
- aliases = ['ragel-d']
- filenames = ['*.rl']
-
- def __init__(self, **options):
- super(RagelDLexer, self).__init__(DLexer, RagelEmbeddedLexer, **options)
-
- def analyse_text(text):
- return '@LANG: d' in text
-
-
-class RagelCppLexer(DelegatingLexer):
- """
- A lexer for `Ragel`_ in a CPP host file.
-
- .. versionadded:: 1.1
- """
-
- name = 'Ragel in CPP Host'
- aliases = ['ragel-cpp']
- filenames = ['*.rl']
-
- def __init__(self, **options):
- super(RagelCppLexer, self).__init__(CppLexer, RagelEmbeddedLexer, **options)
-
- def analyse_text(text):
- return '@LANG: c++' in text
-
-
-class RagelObjectiveCLexer(DelegatingLexer):
- """
- A lexer for `Ragel`_ in an Objective C host file.
-
- .. versionadded:: 1.1
- """
-
- name = 'Ragel in Objective C Host'
- aliases = ['ragel-objc']
- filenames = ['*.rl']
-
- def __init__(self, **options):
- super(RagelObjectiveCLexer, self).__init__(ObjectiveCLexer,
- RagelEmbeddedLexer,
- **options)
-
- def analyse_text(text):
- return '@LANG: objc' in text
-
-
-class RagelJavaLexer(DelegatingLexer):
- """
- A lexer for `Ragel`_ in a Java host file.
-
- .. versionadded:: 1.1
- """
-
- name = 'Ragel in Java Host'
- aliases = ['ragel-java']
- filenames = ['*.rl']
-
- def __init__(self, **options):
- super(RagelJavaLexer, self).__init__(JavaLexer, RagelEmbeddedLexer,
- **options)
-
- def analyse_text(text):
- return '@LANG: java' in text
-
-
-class AntlrLexer(RegexLexer):
- """
- Generic `ANTLR`_ Lexer.
- Should not be called directly, instead
- use DelegatingLexer for your target language.
-
- .. versionadded:: 1.1
-
- .. _ANTLR: http://www.antlr.org/
- """
-
- name = 'ANTLR'
- aliases = ['antlr']
- filenames = []
-
- _id = r'[A-Za-z]\w*'
- _TOKEN_REF = r'[A-Z]\w*'
- _RULE_REF = r'[a-z]\w*'
- _STRING_LITERAL = r'\'(?:\\\\|\\\'|[^\']*)\''
- _INT = r'[0-9]+'
-
- tokens = {
- 'whitespace': [
- (r'\s+', Whitespace),
- ],
- 'comments': [
- (r'//.*$', Comment),
- (r'/\*(.|\n)*?\*/', Comment),
- ],
- 'root': [
- include('whitespace'),
- include('comments'),
-
- (r'(lexer|parser|tree)?(\s*)(grammar\b)(\s*)(' + _id + ')(;)',
- bygroups(Keyword, Whitespace, Keyword, Whitespace, Name.Class,
- Punctuation)),
- # optionsSpec
- (r'options\b', Keyword, 'options'),
- # tokensSpec
- (r'tokens\b', Keyword, 'tokens'),
- # attrScope
+ :license: BSD, see LICENSE for details.
+"""
+
+import re
+
+from pygments.lexer import RegexLexer, DelegatingLexer, \
+ include, bygroups, using
+from pygments.token import Punctuation, Other, Text, Comment, Operator, \
+ Keyword, Name, String, Number, Whitespace
+from pygments.lexers.jvm import JavaLexer
+from pygments.lexers.c_cpp import CLexer, CppLexer
+from pygments.lexers.objective import ObjectiveCLexer
+from pygments.lexers.d import DLexer
+from pygments.lexers.dotnet import CSharpLexer
+from pygments.lexers.ruby import RubyLexer
+from pygments.lexers.python import PythonLexer
+from pygments.lexers.perl import PerlLexer
+
+__all__ = ['RagelLexer', 'RagelEmbeddedLexer', 'RagelCLexer', 'RagelDLexer',
+ 'RagelCppLexer', 'RagelObjectiveCLexer', 'RagelRubyLexer',
+ 'RagelJavaLexer', 'AntlrLexer', 'AntlrPythonLexer',
+ 'AntlrPerlLexer', 'AntlrRubyLexer', 'AntlrCppLexer',
+ # 'AntlrCLexer',
+ 'AntlrCSharpLexer', 'AntlrObjectiveCLexer',
+ 'AntlrJavaLexer', 'AntlrActionScriptLexer',
+ 'TreetopLexer', 'EbnfLexer']
+
+
+class RagelLexer(RegexLexer):
+ """
+ A pure `Ragel <http://www.complang.org/ragel/>`_ lexer. Use this for
+ fragments of Ragel. For ``.rl`` files, use RagelEmbeddedLexer instead
+ (or one of the language-specific subclasses).
+
+ .. versionadded:: 1.1
+ """
+
+ name = 'Ragel'
+ aliases = ['ragel']
+ filenames = []
+
+ tokens = {
+ 'whitespace': [
+ (r'\s+', Whitespace)
+ ],
+ 'comments': [
+ (r'\#.*$', Comment),
+ ],
+ 'keywords': [
+ (r'(access|action|alphtype)\b', Keyword),
+ (r'(getkey|write|machine|include)\b', Keyword),
+ (r'(any|ascii|extend|alpha|digit|alnum|lower|upper)\b', Keyword),
+ (r'(xdigit|cntrl|graph|print|punct|space|zlen|empty)\b', Keyword)
+ ],
+ 'numbers': [
+ (r'0x[0-9A-Fa-f]+', Number.Hex),
+ (r'[+-]?[0-9]+', Number.Integer),
+ ],
+ 'literals': [
+ (r'"(\\\\|\\"|[^"])*"', String), # double quote string
+ (r"'(\\\\|\\'|[^'])*'", String), # single quote string
+ (r'\[(\\\\|\\\]|[^\]])*\]', String), # square bracket literals
+ (r'/(?!\*)(\\\\|\\/|[^/])*/', String.Regex), # regular expressions
+ ],
+ 'identifiers': [
+ (r'[a-zA-Z_]\w*', Name.Variable),
+ ],
+ 'operators': [
+ (r',', Operator), # Join
+ (r'\||&|--?', Operator), # Union, Intersection and Subtraction
+ (r'\.|<:|:>>?', Operator), # Concatention
+ (r':', Operator), # Label
+ (r'->', Operator), # Epsilon Transition
+ (r'(>|\$|%|<|@|<>)(/|eof\b)', Operator), # EOF Actions
+ (r'(>|\$|%|<|@|<>)(!|err\b)', Operator), # Global Error Actions
+ (r'(>|\$|%|<|@|<>)(\^|lerr\b)', Operator), # Local Error Actions
+ (r'(>|\$|%|<|@|<>)(~|to\b)', Operator), # To-State Actions
+ (r'(>|\$|%|<|@|<>)(\*|from\b)', Operator), # From-State Actions
+ (r'>|@|\$|%', Operator), # Transition Actions and Priorities
+ (r'\*|\?|\+|\{[0-9]*,[0-9]*\}', Operator), # Repetition
+ (r'!|\^', Operator), # Negation
+ (r'\(|\)', Operator), # Grouping
+ ],
+ 'root': [
+ include('literals'),
+ include('whitespace'),
+ include('comments'),
+ include('keywords'),
+ include('numbers'),
+ include('identifiers'),
+ include('operators'),
+ (r'\{', Punctuation, 'host'),
+ (r'=', Operator),
+ (r';', Punctuation),
+ ],
+ 'host': [
+ (r'(' + r'|'.join(( # keep host code in largest possible chunks
+ r'[^{}\'"/#]+', # exclude unsafe characters
+ r'[^\\]\\[{}]', # allow escaped { or }
+
+ # strings and comments may safely contain unsafe characters
+ r'"(\\\\|\\"|[^"])*"', # double quote string
+ r"'(\\\\|\\'|[^'])*'", # single quote string
+ r'//.*$\n?', # single line comment
+ r'/\*(.|\n)*?\*/', # multi-line javadoc-style comment
+ r'\#.*$\n?', # ruby comment
+
+ # regular expression: There's no reason for it to start
+ # with a * and this stops confusion with comments.
+ r'/(?!\*)(\\\\|\\/|[^/])*/',
+
+ # / is safe now that we've handled regex and javadoc comments
+ r'/',
+ )) + r')+', Other),
+
+ (r'\{', Punctuation, '#push'),
+ (r'\}', Punctuation, '#pop'),
+ ],
+ }
+
+
+class RagelEmbeddedLexer(RegexLexer):
+ """
+ A lexer for `Ragel`_ embedded in a host language file.
+
+ This will only highlight Ragel statements. If you want host language
+ highlighting then call the language-specific Ragel lexer.
+
+ .. versionadded:: 1.1
+ """
+
+ name = 'Embedded Ragel'
+ aliases = ['ragel-em']
+ filenames = ['*.rl']
+
+ tokens = {
+ 'root': [
+ (r'(' + r'|'.join(( # keep host code in largest possible chunks
+ r'[^%\'"/#]+', # exclude unsafe characters
+ r'%(?=[^%]|$)', # a single % sign is okay, just not 2 of them
+
+ # strings and comments may safely contain unsafe characters
+ r'"(\\\\|\\"|[^"])*"', # double quote string
+ r"'(\\\\|\\'|[^'])*'", # single quote string
+ r'/\*(.|\n)*?\*/', # multi-line javadoc-style comment
+ r'//.*$\n?', # single line comment
+ r'\#.*$\n?', # ruby/ragel comment
+ r'/(?!\*)(\\\\|\\/|[^/])*/', # regular expression
+
+ # / is safe now that we've handled regex and javadoc comments
+ r'/',
+ )) + r')+', Other),
+
+ # Single Line FSM.
+ # Please don't put a quoted newline in a single line FSM.
+ # That's just mean. It will break this.
+ (r'(%%)(?![{%])(.*)($|;)(\n?)', bygroups(Punctuation,
+ using(RagelLexer),
+ Punctuation, Text)),
+
+ # Multi Line FSM.
+ (r'(%%%%|%%)\{', Punctuation, 'multi-line-fsm'),
+ ],
+ 'multi-line-fsm': [
+ (r'(' + r'|'.join(( # keep ragel code in largest possible chunks.
+ r'(' + r'|'.join((
+ r'[^}\'"\[/#]', # exclude unsafe characters
+ r'\}(?=[^%]|$)', # } is okay as long as it's not followed by %
+ r'\}%(?=[^%]|$)', # ...well, one %'s okay, just not two...
+ r'[^\\]\\[{}]', # ...and } is okay if it's escaped
+
+ # allow / if it's preceded with one of these symbols
+ # (ragel EOF actions)
+ r'(>|\$|%|<|@|<>)/',
+
+ # specifically allow regex followed immediately by *
+ # so it doesn't get mistaken for a comment
+ r'/(?!\*)(\\\\|\\/|[^/])*/\*',
+
+ # allow / as long as it's not followed by another / or by a *
+ r'/(?=[^/*]|$)',
+
+ # We want to match as many of these as we can in one block.
+ # Not sure if we need the + sign here,
+ # does it help performance?
+ )) + r')+',
+
+ # strings and comments may safely contain unsafe characters
+ r'"(\\\\|\\"|[^"])*"', # double quote string
+ r"'(\\\\|\\'|[^'])*'", # single quote string
+ r"\[(\\\\|\\\]|[^\]])*\]", # square bracket literal
+ r'/\*(.|\n)*?\*/', # multi-line javadoc-style comment
+ r'//.*$\n?', # single line comment
+ r'\#.*$\n?', # ruby/ragel comment
+ )) + r')+', using(RagelLexer)),
+
+ (r'\}%%', Punctuation, '#pop'),
+ ]
+ }
+
+ def analyse_text(text):
+ return '@LANG: indep' in text
+
+
+class RagelRubyLexer(DelegatingLexer):
+ """
+ A lexer for `Ragel`_ in a Ruby host file.
+
+ .. versionadded:: 1.1
+ """
+
+ name = 'Ragel in Ruby Host'
+ aliases = ['ragel-ruby', 'ragel-rb']
+ filenames = ['*.rl']
+
+ def __init__(self, **options):
+ super(RagelRubyLexer, self).__init__(RubyLexer, RagelEmbeddedLexer,
+ **options)
+
+ def analyse_text(text):
+ return '@LANG: ruby' in text
+
+
+class RagelCLexer(DelegatingLexer):
+ """
+ A lexer for `Ragel`_ in a C host file.
+
+ .. versionadded:: 1.1
+ """
+
+ name = 'Ragel in C Host'
+ aliases = ['ragel-c']
+ filenames = ['*.rl']
+
+ def __init__(self, **options):
+ super(RagelCLexer, self).__init__(CLexer, RagelEmbeddedLexer,
+ **options)
+
+ def analyse_text(text):
+ return '@LANG: c' in text
+
+
+class RagelDLexer(DelegatingLexer):
+ """
+ A lexer for `Ragel`_ in a D host file.
+
+ .. versionadded:: 1.1
+ """
+
+ name = 'Ragel in D Host'
+ aliases = ['ragel-d']
+ filenames = ['*.rl']
+
+ def __init__(self, **options):
+ super(RagelDLexer, self).__init__(DLexer, RagelEmbeddedLexer, **options)
+
+ def analyse_text(text):
+ return '@LANG: d' in text
+
+
+class RagelCppLexer(DelegatingLexer):
+ """
+ A lexer for `Ragel`_ in a CPP host file.
+
+ .. versionadded:: 1.1
+ """
+
+ name = 'Ragel in CPP Host'
+ aliases = ['ragel-cpp']
+ filenames = ['*.rl']
+
+ def __init__(self, **options):
+ super(RagelCppLexer, self).__init__(CppLexer, RagelEmbeddedLexer, **options)
+
+ def analyse_text(text):
+ return '@LANG: c++' in text
+
+
+class RagelObjectiveCLexer(DelegatingLexer):
+ """
+ A lexer for `Ragel`_ in an Objective C host file.
+
+ .. versionadded:: 1.1
+ """
+
+ name = 'Ragel in Objective C Host'
+ aliases = ['ragel-objc']
+ filenames = ['*.rl']
+
+ def __init__(self, **options):
+ super(RagelObjectiveCLexer, self).__init__(ObjectiveCLexer,
+ RagelEmbeddedLexer,
+ **options)
+
+ def analyse_text(text):
+ return '@LANG: objc' in text
+
+
+class RagelJavaLexer(DelegatingLexer):
+ """
+ A lexer for `Ragel`_ in a Java host file.
+
+ .. versionadded:: 1.1
+ """
+
+ name = 'Ragel in Java Host'
+ aliases = ['ragel-java']
+ filenames = ['*.rl']
+
+ def __init__(self, **options):
+ super(RagelJavaLexer, self).__init__(JavaLexer, RagelEmbeddedLexer,
+ **options)
+
+ def analyse_text(text):
+ return '@LANG: java' in text
+
+
+class AntlrLexer(RegexLexer):
+ """
+ Generic `ANTLR`_ Lexer.
+ Should not be called directly, instead
+ use DelegatingLexer for your target language.
+
+ .. versionadded:: 1.1
+
+ .. _ANTLR: http://www.antlr.org/
+ """
+
+ name = 'ANTLR'
+ aliases = ['antlr']
+ filenames = []
+
+ _id = r'[A-Za-z]\w*'
+ _TOKEN_REF = r'[A-Z]\w*'
+ _RULE_REF = r'[a-z]\w*'
+ _STRING_LITERAL = r'\'(?:\\\\|\\\'|[^\']*)\''
+ _INT = r'[0-9]+'
+
+ tokens = {
+ 'whitespace': [
+ (r'\s+', Whitespace),
+ ],
+ 'comments': [
+ (r'//.*$', Comment),
+ (r'/\*(.|\n)*?\*/', Comment),
+ ],
+ 'root': [
+ include('whitespace'),
+ include('comments'),
+
+ (r'(lexer|parser|tree)?(\s*)(grammar\b)(\s*)(' + _id + ')(;)',
+ bygroups(Keyword, Whitespace, Keyword, Whitespace, Name.Class,
+ Punctuation)),
+ # optionsSpec
+ (r'options\b', Keyword, 'options'),
+ # tokensSpec
+ (r'tokens\b', Keyword, 'tokens'),
+ # attrScope
(r'(scope)(\s*)(' + _id + r')(\s*)(\{)',
- bygroups(Keyword, Whitespace, Name.Variable, Whitespace,
- Punctuation), 'action'),
- # exception
- (r'(catch|finally)\b', Keyword, 'exception'),
- # action
+ bygroups(Keyword, Whitespace, Name.Variable, Whitespace,
+ Punctuation), 'action'),
+ # exception
+ (r'(catch|finally)\b', Keyword, 'exception'),
+ # action
(r'(@' + _id + r')(\s*)(::)?(\s*)(' + _id + r')(\s*)(\{)',
- bygroups(Name.Label, Whitespace, Punctuation, Whitespace,
- Name.Label, Whitespace, Punctuation), 'action'),
- # rule
- (r'((?:protected|private|public|fragment)\b)?(\s*)(' + _id + ')(!)?',
- bygroups(Keyword, Whitespace, Name.Label, Punctuation),
- ('rule-alts', 'rule-prelims')),
- ],
- 'exception': [
- (r'\n', Whitespace, '#pop'),
- (r'\s', Whitespace),
- include('comments'),
-
- (r'\[', Punctuation, 'nested-arg-action'),
- (r'\{', Punctuation, 'action'),
- ],
- 'rule-prelims': [
- include('whitespace'),
- include('comments'),
-
- (r'returns\b', Keyword),
- (r'\[', Punctuation, 'nested-arg-action'),
- (r'\{', Punctuation, 'action'),
- # throwsSpec
- (r'(throws)(\s+)(' + _id + ')',
- bygroups(Keyword, Whitespace, Name.Label)),
- (r'(,)(\s*)(' + _id + ')',
- bygroups(Punctuation, Whitespace, Name.Label)), # Additional throws
- # optionsSpec
- (r'options\b', Keyword, 'options'),
- # ruleScopeSpec - scope followed by target language code or name of action
- # TODO finish implementing other possibilities for scope
- # L173 ANTLRv3.g from ANTLR book
- (r'(scope)(\s+)(\{)', bygroups(Keyword, Whitespace, Punctuation),
- 'action'),
+ bygroups(Name.Label, Whitespace, Punctuation, Whitespace,
+ Name.Label, Whitespace, Punctuation), 'action'),
+ # rule
+ (r'((?:protected|private|public|fragment)\b)?(\s*)(' + _id + ')(!)?',
+ bygroups(Keyword, Whitespace, Name.Label, Punctuation),
+ ('rule-alts', 'rule-prelims')),
+ ],
+ 'exception': [
+ (r'\n', Whitespace, '#pop'),
+ (r'\s', Whitespace),
+ include('comments'),
+
+ (r'\[', Punctuation, 'nested-arg-action'),
+ (r'\{', Punctuation, 'action'),
+ ],
+ 'rule-prelims': [
+ include('whitespace'),
+ include('comments'),
+
+ (r'returns\b', Keyword),
+ (r'\[', Punctuation, 'nested-arg-action'),
+ (r'\{', Punctuation, 'action'),
+ # throwsSpec
+ (r'(throws)(\s+)(' + _id + ')',
+ bygroups(Keyword, Whitespace, Name.Label)),
+ (r'(,)(\s*)(' + _id + ')',
+ bygroups(Punctuation, Whitespace, Name.Label)), # Additional throws
+ # optionsSpec
+ (r'options\b', Keyword, 'options'),
+ # ruleScopeSpec - scope followed by target language code or name of action
+ # TODO finish implementing other possibilities for scope
+ # L173 ANTLRv3.g from ANTLR book
+ (r'(scope)(\s+)(\{)', bygroups(Keyword, Whitespace, Punctuation),
+ 'action'),
(r'(scope)(\s+)(' + _id + r')(\s*)(;)',
- bygroups(Keyword, Whitespace, Name.Label, Whitespace, Punctuation)),
- # ruleAction
+ bygroups(Keyword, Whitespace, Name.Label, Whitespace, Punctuation)),
+ # ruleAction
(r'(@' + _id + r')(\s*)(\{)',
- bygroups(Name.Label, Whitespace, Punctuation), 'action'),
- # finished prelims, go to rule alts!
- (r':', Punctuation, '#pop')
- ],
- 'rule-alts': [
- include('whitespace'),
- include('comments'),
-
- # These might need to go in a separate 'block' state triggered by (
- (r'options\b', Keyword, 'options'),
- (r':', Punctuation),
-
- # literals
- (r"'(\\\\|\\'|[^'])*'", String),
- (r'"(\\\\|\\"|[^"])*"', String),
- (r'<<([^>]|>[^>])>>', String),
- # identifiers
- # Tokens start with capital letter.
- (r'\$?[A-Z_]\w*', Name.Constant),
- # Rules start with small letter.
- (r'\$?[a-z_]\w*', Name.Variable),
- # operators
- (r'(\+|\||->|=>|=|\(|\)|\.\.|\.|\?|\*|\^|!|\#|~)', Operator),
- (r',', Punctuation),
- (r'\[', Punctuation, 'nested-arg-action'),
- (r'\{', Punctuation, 'action'),
- (r';', Punctuation, '#pop')
- ],
- 'tokens': [
- include('whitespace'),
- include('comments'),
- (r'\{', Punctuation),
- (r'(' + _TOKEN_REF + r')(\s*)(=)?(\s*)(' + _STRING_LITERAL
+ bygroups(Name.Label, Whitespace, Punctuation), 'action'),
+ # finished prelims, go to rule alts!
+ (r':', Punctuation, '#pop')
+ ],
+ 'rule-alts': [
+ include('whitespace'),
+ include('comments'),
+
+ # These might need to go in a separate 'block' state triggered by (
+ (r'options\b', Keyword, 'options'),
+ (r':', Punctuation),
+
+ # literals
+ (r"'(\\\\|\\'|[^'])*'", String),
+ (r'"(\\\\|\\"|[^"])*"', String),
+ (r'<<([^>]|>[^>])>>', String),
+ # identifiers
+ # Tokens start with capital letter.
+ (r'\$?[A-Z_]\w*', Name.Constant),
+ # Rules start with small letter.
+ (r'\$?[a-z_]\w*', Name.Variable),
+ # operators
+ (r'(\+|\||->|=>|=|\(|\)|\.\.|\.|\?|\*|\^|!|\#|~)', Operator),
+ (r',', Punctuation),
+ (r'\[', Punctuation, 'nested-arg-action'),
+ (r'\{', Punctuation, 'action'),
+ (r';', Punctuation, '#pop')
+ ],
+ 'tokens': [
+ include('whitespace'),
+ include('comments'),
+ (r'\{', Punctuation),
+ (r'(' + _TOKEN_REF + r')(\s*)(=)?(\s*)(' + _STRING_LITERAL
+ r')?(\s*)(;)',
- bygroups(Name.Label, Whitespace, Punctuation, Whitespace,
- String, Whitespace, Punctuation)),
- (r'\}', Punctuation, '#pop'),
- ],
- 'options': [
- include('whitespace'),
- include('comments'),
- (r'\{', Punctuation),
- (r'(' + _id + r')(\s*)(=)(\s*)(' +
+ bygroups(Name.Label, Whitespace, Punctuation, Whitespace,
+ String, Whitespace, Punctuation)),
+ (r'\}', Punctuation, '#pop'),
+ ],
+ 'options': [
+ include('whitespace'),
+ include('comments'),
+ (r'\{', Punctuation),
+ (r'(' + _id + r')(\s*)(=)(\s*)(' +
'|'.join((_id, _STRING_LITERAL, _INT, r'\*')) + r')(\s*)(;)',
- bygroups(Name.Variable, Whitespace, Punctuation, Whitespace,
- Text, Whitespace, Punctuation)),
- (r'\}', Punctuation, '#pop'),
- ],
- 'action': [
- (r'(' + r'|'.join(( # keep host code in largest possible chunks
- r'[^${}\'"/\\]+', # exclude unsafe characters
-
- # strings and comments may safely contain unsafe characters
- r'"(\\\\|\\"|[^"])*"', # double quote string
- r"'(\\\\|\\'|[^'])*'", # single quote string
- r'//.*$\n?', # single line comment
- r'/\*(.|\n)*?\*/', # multi-line javadoc-style comment
-
- # regular expression: There's no reason for it to start
- # with a * and this stops confusion with comments.
- r'/(?!\*)(\\\\|\\/|[^/])*/',
-
- # backslashes are okay, as long as we are not backslashing a %
- r'\\(?!%)',
-
- # Now that we've handled regex and javadoc comments
- # it's safe to let / through.
- r'/',
- )) + r')+', Other),
- (r'(\\)(%)', bygroups(Punctuation, Other)),
- (r'(\$[a-zA-Z]+)(\.?)(text|value)?',
- bygroups(Name.Variable, Punctuation, Name.Property)),
- (r'\{', Punctuation, '#push'),
- (r'\}', Punctuation, '#pop'),
- ],
- 'nested-arg-action': [
- (r'(' + r'|'.join(( # keep host code in largest possible chunks.
- r'[^$\[\]\'"/]+', # exclude unsafe characters
-
- # strings and comments may safely contain unsafe characters
- r'"(\\\\|\\"|[^"])*"', # double quote string
- r"'(\\\\|\\'|[^'])*'", # single quote string
- r'//.*$\n?', # single line comment
- r'/\*(.|\n)*?\*/', # multi-line javadoc-style comment
-
- # regular expression: There's no reason for it to start
- # with a * and this stops confusion with comments.
- r'/(?!\*)(\\\\|\\/|[^/])*/',
-
- # Now that we've handled regex and javadoc comments
- # it's safe to let / through.
- r'/',
- )) + r')+', Other),
-
-
- (r'\[', Punctuation, '#push'),
- (r'\]', Punctuation, '#pop'),
- (r'(\$[a-zA-Z]+)(\.?)(text|value)?',
- bygroups(Name.Variable, Punctuation, Name.Property)),
- (r'(\\\\|\\\]|\\\[|[^\[\]])+', Other),
- ]
- }
-
- def analyse_text(text):
- return re.search(r'^\s*grammar\s+[a-zA-Z0-9]+\s*;', text, re.M)
-
-# http://www.antlr.org/wiki/display/ANTLR3/Code+Generation+Targets
-
-# TH: I'm not aware of any language features of C++ that will cause
-# incorrect lexing of C files. Antlr doesn't appear to make a distinction,
-# so just assume they're C++. No idea how to make Objective C work in the
-# future.
-
-# class AntlrCLexer(DelegatingLexer):
-# """
-# ANTLR with C Target
-#
-# .. versionadded:: 1.1
-# """
-#
-# name = 'ANTLR With C Target'
-# aliases = ['antlr-c']
-# filenames = ['*.G', '*.g']
-#
-# def __init__(self, **options):
-# super(AntlrCLexer, self).__init__(CLexer, AntlrLexer, **options)
-#
-# def analyse_text(text):
-# return re.match(r'^\s*language\s*=\s*C\s*;', text)
-
-
-class AntlrCppLexer(DelegatingLexer):
- """
- `ANTLR`_ with CPP Target
-
- .. versionadded:: 1.1
- """
-
- name = 'ANTLR With CPP Target'
- aliases = ['antlr-cpp']
- filenames = ['*.G', '*.g']
-
- def __init__(self, **options):
- super(AntlrCppLexer, self).__init__(CppLexer, AntlrLexer, **options)
-
- def analyse_text(text):
- return AntlrLexer.analyse_text(text) and \
- re.search(r'^\s*language\s*=\s*C\s*;', text, re.M)
-
-
-class AntlrObjectiveCLexer(DelegatingLexer):
- """
- `ANTLR`_ with Objective-C Target
-
- .. versionadded:: 1.1
- """
-
- name = 'ANTLR With ObjectiveC Target'
- aliases = ['antlr-objc']
- filenames = ['*.G', '*.g']
-
- def __init__(self, **options):
- super(AntlrObjectiveCLexer, self).__init__(ObjectiveCLexer,
- AntlrLexer, **options)
-
- def analyse_text(text):
- return AntlrLexer.analyse_text(text) and \
- re.search(r'^\s*language\s*=\s*ObjC\s*;', text)
-
-
-class AntlrCSharpLexer(DelegatingLexer):
- """
- `ANTLR`_ with C# Target
-
- .. versionadded:: 1.1
- """
-
- name = 'ANTLR With C# Target'
- aliases = ['antlr-csharp', 'antlr-c#']
- filenames = ['*.G', '*.g']
-
- def __init__(self, **options):
- super(AntlrCSharpLexer, self).__init__(CSharpLexer, AntlrLexer,
- **options)
-
- def analyse_text(text):
- return AntlrLexer.analyse_text(text) and \
- re.search(r'^\s*language\s*=\s*CSharp2\s*;', text, re.M)
-
-
-class AntlrPythonLexer(DelegatingLexer):
- """
- `ANTLR`_ with Python Target
-
- .. versionadded:: 1.1
- """
-
- name = 'ANTLR With Python Target'
- aliases = ['antlr-python']
- filenames = ['*.G', '*.g']
-
- def __init__(self, **options):
- super(AntlrPythonLexer, self).__init__(PythonLexer, AntlrLexer,
- **options)
-
- def analyse_text(text):
- return AntlrLexer.analyse_text(text) and \
- re.search(r'^\s*language\s*=\s*Python\s*;', text, re.M)
-
-
-class AntlrJavaLexer(DelegatingLexer):
- """
- `ANTLR`_ with Java Target
-
- .. versionadded:: 1.
- """
-
- name = 'ANTLR With Java Target'
- aliases = ['antlr-java']
- filenames = ['*.G', '*.g']
-
- def __init__(self, **options):
- super(AntlrJavaLexer, self).__init__(JavaLexer, AntlrLexer,
- **options)
-
- def analyse_text(text):
- # Antlr language is Java by default
- return AntlrLexer.analyse_text(text) and 0.9
-
-
-class AntlrRubyLexer(DelegatingLexer):
- """
- `ANTLR`_ with Ruby Target
-
- .. versionadded:: 1.1
- """
-
- name = 'ANTLR With Ruby Target'
- aliases = ['antlr-ruby', 'antlr-rb']
- filenames = ['*.G', '*.g']
-
- def __init__(self, **options):
- super(AntlrRubyLexer, self).__init__(RubyLexer, AntlrLexer,
- **options)
-
- def analyse_text(text):
- return AntlrLexer.analyse_text(text) and \
- re.search(r'^\s*language\s*=\s*Ruby\s*;', text, re.M)
-
-
-class AntlrPerlLexer(DelegatingLexer):
- """
- `ANTLR`_ with Perl Target
-
- .. versionadded:: 1.1
- """
-
- name = 'ANTLR With Perl Target'
- aliases = ['antlr-perl']
- filenames = ['*.G', '*.g']
-
- def __init__(self, **options):
- super(AntlrPerlLexer, self).__init__(PerlLexer, AntlrLexer,
- **options)
-
- def analyse_text(text):
- return AntlrLexer.analyse_text(text) and \
- re.search(r'^\s*language\s*=\s*Perl5\s*;', text, re.M)
-
-
-class AntlrActionScriptLexer(DelegatingLexer):
- """
- `ANTLR`_ with ActionScript Target
-
- .. versionadded:: 1.1
- """
-
- name = 'ANTLR With ActionScript Target'
- aliases = ['antlr-as', 'antlr-actionscript']
- filenames = ['*.G', '*.g']
-
- def __init__(self, **options):
- from pygments.lexers.actionscript import ActionScriptLexer
- super(AntlrActionScriptLexer, self).__init__(ActionScriptLexer,
- AntlrLexer, **options)
-
- def analyse_text(text):
- return AntlrLexer.analyse_text(text) and \
- re.search(r'^\s*language\s*=\s*ActionScript\s*;', text, re.M)
-
-
-class TreetopBaseLexer(RegexLexer):
- """
- A base lexer for `Treetop <http://treetop.rubyforge.org/>`_ grammars.
- Not for direct use; use TreetopLexer instead.
-
- .. versionadded:: 1.6
- """
-
- tokens = {
- 'root': [
- include('space'),
- (r'require[ \t]+[^\n\r]+[\n\r]', Other),
- (r'module\b', Keyword.Namespace, 'module'),
- (r'grammar\b', Keyword, 'grammar'),
- ],
- 'module': [
- include('space'),
- include('end'),
- (r'module\b', Keyword, '#push'),
- (r'grammar\b', Keyword, 'grammar'),
- (r'[A-Z]\w*(?:::[A-Z]\w*)*', Name.Namespace),
- ],
- 'grammar': [
- include('space'),
- include('end'),
- (r'rule\b', Keyword, 'rule'),
- (r'include\b', Keyword, 'include'),
- (r'[A-Z]\w*', Name),
- ],
- 'include': [
- include('space'),
- (r'[A-Z]\w*(?:::[A-Z]\w*)*', Name.Class, '#pop'),
- ],
- 'rule': [
- include('space'),
- include('end'),
- (r'"(\\\\|\\"|[^"])*"', String.Double),
- (r"'(\\\\|\\'|[^'])*'", String.Single),
- (r'([A-Za-z_]\w*)(:)', bygroups(Name.Label, Punctuation)),
- (r'[A-Za-z_]\w*', Name),
- (r'[()]', Punctuation),
- (r'[?+*/&!~]', Operator),
- (r'\[(?:\\.|\[:\^?[a-z]+:\]|[^\\\]])+\]', String.Regex),
- (r'([0-9]*)(\.\.)([0-9]*)',
- bygroups(Number.Integer, Operator, Number.Integer)),
- (r'(<)([^>]+)(>)', bygroups(Punctuation, Name.Class, Punctuation)),
- (r'\{', Punctuation, 'inline_module'),
- (r'\.', String.Regex),
- ],
- 'inline_module': [
- (r'\{', Other, 'ruby'),
- (r'\}', Punctuation, '#pop'),
- (r'[^{}]+', Other),
- ],
- 'ruby': [
- (r'\{', Other, '#push'),
- (r'\}', Other, '#pop'),
- (r'[^{}]+', Other),
- ],
- 'space': [
- (r'[ \t\n\r]+', Whitespace),
- (r'#[^\n]*', Comment.Single),
- ],
- 'end': [
- (r'end\b', Keyword, '#pop'),
- ],
- }
-
-
-class TreetopLexer(DelegatingLexer):
- """
- A lexer for `Treetop <http://treetop.rubyforge.org/>`_ grammars.
-
- .. versionadded:: 1.6
- """
-
- name = 'Treetop'
- aliases = ['treetop']
- filenames = ['*.treetop', '*.tt']
-
- def __init__(self, **options):
- super(TreetopLexer, self).__init__(RubyLexer, TreetopBaseLexer, **options)
-
-
-class EbnfLexer(RegexLexer):
- """
- Lexer for `ISO/IEC 14977 EBNF
- <http://en.wikipedia.org/wiki/Extended_Backus%E2%80%93Naur_Form>`_
- grammars.
-
- .. versionadded:: 2.0
- """
-
- name = 'EBNF'
- aliases = ['ebnf']
- filenames = ['*.ebnf']
- mimetypes = ['text/x-ebnf']
-
- tokens = {
- 'root': [
- include('whitespace'),
- include('comment_start'),
- include('identifier'),
- (r'=', Operator, 'production'),
- ],
- 'production': [
- include('whitespace'),
- include('comment_start'),
- include('identifier'),
- (r'"[^"]*"', String.Double),
- (r"'[^']*'", String.Single),
- (r'(\?[^?]*\?)', Name.Entity),
- (r'[\[\]{}(),|]', Punctuation),
- (r'-', Operator),
- (r';', Punctuation, '#pop'),
- (r'\.', Punctuation, '#pop'),
- ],
- 'whitespace': [
- (r'\s+', Text),
- ],
- 'comment_start': [
- (r'\(\*', Comment.Multiline, 'comment'),
- ],
- 'comment': [
- (r'[^*)]', Comment.Multiline),
- include('comment_start'),
- (r'\*\)', Comment.Multiline, '#pop'),
- (r'[*)]', Comment.Multiline),
- ],
- 'identifier': [
- (r'([a-zA-Z][\w \-]*)', Keyword),
- ],
- }
+ bygroups(Name.Variable, Whitespace, Punctuation, Whitespace,
+ Text, Whitespace, Punctuation)),
+ (r'\}', Punctuation, '#pop'),
+ ],
+ 'action': [
+ (r'(' + r'|'.join(( # keep host code in largest possible chunks
+ r'[^${}\'"/\\]+', # exclude unsafe characters
+
+ # strings and comments may safely contain unsafe characters
+ r'"(\\\\|\\"|[^"])*"', # double quote string
+ r"'(\\\\|\\'|[^'])*'", # single quote string
+ r'//.*$\n?', # single line comment
+ r'/\*(.|\n)*?\*/', # multi-line javadoc-style comment
+
+ # regular expression: There's no reason for it to start
+ # with a * and this stops confusion with comments.
+ r'/(?!\*)(\\\\|\\/|[^/])*/',
+
+ # backslashes are okay, as long as we are not backslashing a %
+ r'\\(?!%)',
+
+ # Now that we've handled regex and javadoc comments
+ # it's safe to let / through.
+ r'/',
+ )) + r')+', Other),
+ (r'(\\)(%)', bygroups(Punctuation, Other)),
+ (r'(\$[a-zA-Z]+)(\.?)(text|value)?',
+ bygroups(Name.Variable, Punctuation, Name.Property)),
+ (r'\{', Punctuation, '#push'),
+ (r'\}', Punctuation, '#pop'),
+ ],
+ 'nested-arg-action': [
+ (r'(' + r'|'.join(( # keep host code in largest possible chunks.
+ r'[^$\[\]\'"/]+', # exclude unsafe characters
+
+ # strings and comments may safely contain unsafe characters
+ r'"(\\\\|\\"|[^"])*"', # double quote string
+ r"'(\\\\|\\'|[^'])*'", # single quote string
+ r'//.*$\n?', # single line comment
+ r'/\*(.|\n)*?\*/', # multi-line javadoc-style comment
+
+ # regular expression: There's no reason for it to start
+ # with a * and this stops confusion with comments.
+ r'/(?!\*)(\\\\|\\/|[^/])*/',
+
+ # Now that we've handled regex and javadoc comments
+ # it's safe to let / through.
+ r'/',
+ )) + r')+', Other),
+
+
+ (r'\[', Punctuation, '#push'),
+ (r'\]', Punctuation, '#pop'),
+ (r'(\$[a-zA-Z]+)(\.?)(text|value)?',
+ bygroups(Name.Variable, Punctuation, Name.Property)),
+ (r'(\\\\|\\\]|\\\[|[^\[\]])+', Other),
+ ]
+ }
+
+ def analyse_text(text):
+ return re.search(r'^\s*grammar\s+[a-zA-Z0-9]+\s*;', text, re.M)
+
+# http://www.antlr.org/wiki/display/ANTLR3/Code+Generation+Targets
+
+# TH: I'm not aware of any language features of C++ that will cause
+# incorrect lexing of C files. Antlr doesn't appear to make a distinction,
+# so just assume they're C++. No idea how to make Objective C work in the
+# future.
+
+# class AntlrCLexer(DelegatingLexer):
+# """
+# ANTLR with C Target
+#
+# .. versionadded:: 1.1
+# """
+#
+# name = 'ANTLR With C Target'
+# aliases = ['antlr-c']
+# filenames = ['*.G', '*.g']
+#
+# def __init__(self, **options):
+# super(AntlrCLexer, self).__init__(CLexer, AntlrLexer, **options)
+#
+# def analyse_text(text):
+# return re.match(r'^\s*language\s*=\s*C\s*;', text)
+
+
+class AntlrCppLexer(DelegatingLexer):
+ """
+ `ANTLR`_ with CPP Target
+
+ .. versionadded:: 1.1
+ """
+
+ name = 'ANTLR With CPP Target'
+ aliases = ['antlr-cpp']
+ filenames = ['*.G', '*.g']
+
+ def __init__(self, **options):
+ super(AntlrCppLexer, self).__init__(CppLexer, AntlrLexer, **options)
+
+ def analyse_text(text):
+ return AntlrLexer.analyse_text(text) and \
+ re.search(r'^\s*language\s*=\s*C\s*;', text, re.M)
+
+
+class AntlrObjectiveCLexer(DelegatingLexer):
+ """
+ `ANTLR`_ with Objective-C Target
+
+ .. versionadded:: 1.1
+ """
+
+ name = 'ANTLR With ObjectiveC Target'
+ aliases = ['antlr-objc']
+ filenames = ['*.G', '*.g']
+
+ def __init__(self, **options):
+ super(AntlrObjectiveCLexer, self).__init__(ObjectiveCLexer,
+ AntlrLexer, **options)
+
+ def analyse_text(text):
+ return AntlrLexer.analyse_text(text) and \
+ re.search(r'^\s*language\s*=\s*ObjC\s*;', text)
+
+
+class AntlrCSharpLexer(DelegatingLexer):
+ """
+ `ANTLR`_ with C# Target
+
+ .. versionadded:: 1.1
+ """
+
+ name = 'ANTLR With C# Target'
+ aliases = ['antlr-csharp', 'antlr-c#']
+ filenames = ['*.G', '*.g']
+
+ def __init__(self, **options):
+ super(AntlrCSharpLexer, self).__init__(CSharpLexer, AntlrLexer,
+ **options)
+
+ def analyse_text(text):
+ return AntlrLexer.analyse_text(text) and \
+ re.search(r'^\s*language\s*=\s*CSharp2\s*;', text, re.M)
+
+
+class AntlrPythonLexer(DelegatingLexer):
+ """
+ `ANTLR`_ with Python Target
+
+ .. versionadded:: 1.1
+ """
+
+ name = 'ANTLR With Python Target'
+ aliases = ['antlr-python']
+ filenames = ['*.G', '*.g']
+
+ def __init__(self, **options):
+ super(AntlrPythonLexer, self).__init__(PythonLexer, AntlrLexer,
+ **options)
+
+ def analyse_text(text):
+ return AntlrLexer.analyse_text(text) and \
+ re.search(r'^\s*language\s*=\s*Python\s*;', text, re.M)
+
+
+class AntlrJavaLexer(DelegatingLexer):
+ """
+ `ANTLR`_ with Java Target
+
+ .. versionadded:: 1.
+ """
+
+ name = 'ANTLR With Java Target'
+ aliases = ['antlr-java']
+ filenames = ['*.G', '*.g']
+
+ def __init__(self, **options):
+ super(AntlrJavaLexer, self).__init__(JavaLexer, AntlrLexer,
+ **options)
+
+ def analyse_text(text):
+ # Antlr language is Java by default
+ return AntlrLexer.analyse_text(text) and 0.9
+
+
+class AntlrRubyLexer(DelegatingLexer):
+ """
+ `ANTLR`_ with Ruby Target
+
+ .. versionadded:: 1.1
+ """
+
+ name = 'ANTLR With Ruby Target'
+ aliases = ['antlr-ruby', 'antlr-rb']
+ filenames = ['*.G', '*.g']
+
+ def __init__(self, **options):
+ super(AntlrRubyLexer, self).__init__(RubyLexer, AntlrLexer,
+ **options)
+
+ def analyse_text(text):
+ return AntlrLexer.analyse_text(text) and \
+ re.search(r'^\s*language\s*=\s*Ruby\s*;', text, re.M)
+
+
+class AntlrPerlLexer(DelegatingLexer):
+ """
+ `ANTLR`_ with Perl Target
+
+ .. versionadded:: 1.1
+ """
+
+ name = 'ANTLR With Perl Target'
+ aliases = ['antlr-perl']
+ filenames = ['*.G', '*.g']
+
+ def __init__(self, **options):
+ super(AntlrPerlLexer, self).__init__(PerlLexer, AntlrLexer,
+ **options)
+
+ def analyse_text(text):
+ return AntlrLexer.analyse_text(text) and \
+ re.search(r'^\s*language\s*=\s*Perl5\s*;', text, re.M)
+
+
+class AntlrActionScriptLexer(DelegatingLexer):
+ """
+ `ANTLR`_ with ActionScript Target
+
+ .. versionadded:: 1.1
+ """
+
+ name = 'ANTLR With ActionScript Target'
+ aliases = ['antlr-as', 'antlr-actionscript']
+ filenames = ['*.G', '*.g']
+
+ def __init__(self, **options):
+ from pygments.lexers.actionscript import ActionScriptLexer
+ super(AntlrActionScriptLexer, self).__init__(ActionScriptLexer,
+ AntlrLexer, **options)
+
+ def analyse_text(text):
+ return AntlrLexer.analyse_text(text) and \
+ re.search(r'^\s*language\s*=\s*ActionScript\s*;', text, re.M)
+
+
+class TreetopBaseLexer(RegexLexer):
+ """
+ A base lexer for `Treetop <http://treetop.rubyforge.org/>`_ grammars.
+ Not for direct use; use TreetopLexer instead.
+
+ .. versionadded:: 1.6
+ """
+
+ tokens = {
+ 'root': [
+ include('space'),
+ (r'require[ \t]+[^\n\r]+[\n\r]', Other),
+ (r'module\b', Keyword.Namespace, 'module'),
+ (r'grammar\b', Keyword, 'grammar'),
+ ],
+ 'module': [
+ include('space'),
+ include('end'),
+ (r'module\b', Keyword, '#push'),
+ (r'grammar\b', Keyword, 'grammar'),
+ (r'[A-Z]\w*(?:::[A-Z]\w*)*', Name.Namespace),
+ ],
+ 'grammar': [
+ include('space'),
+ include('end'),
+ (r'rule\b', Keyword, 'rule'),
+ (r'include\b', Keyword, 'include'),
+ (r'[A-Z]\w*', Name),
+ ],
+ 'include': [
+ include('space'),
+ (r'[A-Z]\w*(?:::[A-Z]\w*)*', Name.Class, '#pop'),
+ ],
+ 'rule': [
+ include('space'),
+ include('end'),
+ (r'"(\\\\|\\"|[^"])*"', String.Double),
+ (r"'(\\\\|\\'|[^'])*'", String.Single),
+ (r'([A-Za-z_]\w*)(:)', bygroups(Name.Label, Punctuation)),
+ (r'[A-Za-z_]\w*', Name),
+ (r'[()]', Punctuation),
+ (r'[?+*/&!~]', Operator),
+ (r'\[(?:\\.|\[:\^?[a-z]+:\]|[^\\\]])+\]', String.Regex),
+ (r'([0-9]*)(\.\.)([0-9]*)',
+ bygroups(Number.Integer, Operator, Number.Integer)),
+ (r'(<)([^>]+)(>)', bygroups(Punctuation, Name.Class, Punctuation)),
+ (r'\{', Punctuation, 'inline_module'),
+ (r'\.', String.Regex),
+ ],
+ 'inline_module': [
+ (r'\{', Other, 'ruby'),
+ (r'\}', Punctuation, '#pop'),
+ (r'[^{}]+', Other),
+ ],
+ 'ruby': [
+ (r'\{', Other, '#push'),
+ (r'\}', Other, '#pop'),
+ (r'[^{}]+', Other),
+ ],
+ 'space': [
+ (r'[ \t\n\r]+', Whitespace),
+ (r'#[^\n]*', Comment.Single),
+ ],
+ 'end': [
+ (r'end\b', Keyword, '#pop'),
+ ],
+ }
+
+
+class TreetopLexer(DelegatingLexer):
+ """
+ A lexer for `Treetop <http://treetop.rubyforge.org/>`_ grammars.
+
+ .. versionadded:: 1.6
+ """
+
+ name = 'Treetop'
+ aliases = ['treetop']
+ filenames = ['*.treetop', '*.tt']
+
+ def __init__(self, **options):
+ super(TreetopLexer, self).__init__(RubyLexer, TreetopBaseLexer, **options)
+
+
+class EbnfLexer(RegexLexer):
+ """
+ Lexer for `ISO/IEC 14977 EBNF
+ <http://en.wikipedia.org/wiki/Extended_Backus%E2%80%93Naur_Form>`_
+ grammars.
+
+ .. versionadded:: 2.0
+ """
+
+ name = 'EBNF'
+ aliases = ['ebnf']
+ filenames = ['*.ebnf']
+ mimetypes = ['text/x-ebnf']
+
+ tokens = {
+ 'root': [
+ include('whitespace'),
+ include('comment_start'),
+ include('identifier'),
+ (r'=', Operator, 'production'),
+ ],
+ 'production': [
+ include('whitespace'),
+ include('comment_start'),
+ include('identifier'),
+ (r'"[^"]*"', String.Double),
+ (r"'[^']*'", String.Single),
+ (r'(\?[^?]*\?)', Name.Entity),
+ (r'[\[\]{}(),|]', Punctuation),
+ (r'-', Operator),
+ (r';', Punctuation, '#pop'),
+ (r'\.', Punctuation, '#pop'),
+ ],
+ 'whitespace': [
+ (r'\s+', Text),
+ ],
+ 'comment_start': [
+ (r'\(\*', Comment.Multiline, 'comment'),
+ ],
+ 'comment': [
+ (r'[^*)]', Comment.Multiline),
+ include('comment_start'),
+ (r'\*\)', Comment.Multiline, '#pop'),
+ (r'[*)]', Comment.Multiline),
+ ],
+ 'identifier': [
+ (r'([a-zA-Z][\w \-]*)', Keyword),
+ ],
+ }