intermediate changes

ref:cde9a383711a11544ce7e107a78147fb96cc4029
author: Devtools Arcadia <[email protected]> 2022-02-07 18:08:42 +0300
committer: Devtools Arcadia <[email protected]> 2022-02-07 18:08:42 +0300
commit: 1110808a9d39d4b808aef724c861a2e1a38d2a69 (patch)
tree: e26c9fed0de5d9873cce7e00bc214573dc2195b7 /contrib/python/Pygments/py3/pygments/lexers/grammar_notation.py
1 files changed, 269 insertions, 0 deletions
diff --git a/contrib/python/Pygments/py3/pygments/lexers/grammar_notation.py b/contrib/python/Pygments/py3/pygments/lexers/grammar_notation.py
new file mode 100644
index 00000000000..ff57c999173
--- /dev/null
+++ b/contrib/python/Pygments/py3/pygments/lexers/grammar_notation.py
@@ -0,0 +1,269 @@
+"""
+    pygments.lexers.grammar_notation
+    ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+    Lexers for grammar notations like BNF.
+
+    :copyright: Copyright 2006-2021 by the Pygments team, see AUTHORS.
+    :license: BSD, see LICENSE for details.
+"""
+
+import re
+
+from pygments.lexer import RegexLexer, bygroups, include, this, using, words
+from pygments.token import Comment, Keyword, Literal, Name, Number, \
+    Operator, Punctuation, String, Text, Whitespace
+
+__all__ = ['BnfLexer', 'AbnfLexer', 'JsgfLexer', 'PegLexer']
+
+
+class BnfLexer(RegexLexer):
+    """
+    This lexer is for grammar notations which are similar to
+    original BNF.
+
+    In order to maximize a number of targets of this lexer,
+    let's decide some designs:
+
+    * We don't distinguish `Terminal Symbol`.
+
+    * We do assume that `NonTerminal Symbol` are always enclosed
+      with arrow brackets.
+
+    * We do assume that `NonTerminal Symbol` may include
+      any printable characters except arrow brackets and ASCII 0x20.
+      This assumption is for `RBNF <http://www.rfc-base.org/txt/rfc-5511.txt>`_.
+
+    * We do assume that target notation doesn't support comment.
+
+    * We don't distinguish any operators and punctuation except
+      `::=`.
+
+    Though these desision making might cause too minimal highlighting
+    and you might be disappointed, but it is reasonable for us.
+
+    .. versionadded:: 2.1
+    """
+
+    name = 'BNF'
+    aliases = ['bnf']
+    filenames = ['*.bnf']
+    mimetypes = ['text/x-bnf']
+
+    tokens = {
+        'root': [
+            (r'(<)([ -;=?-~]+)(>)',
+             bygroups(Punctuation, Name.Class, Punctuation)),
+
+            # an only operator
+            (r'::=', Operator),
+
+            # fallback
+            (r'[^<>:]+', Text),  # for performance
+            (r'.', Text),
+        ],
+    }
+
+
+class AbnfLexer(RegexLexer):
+    """
+    Lexer for `IETF 7405 ABNF
+    <http://www.ietf.org/rfc/rfc7405.txt>`_
+    (Updates `5234 <http://www.ietf.org/rfc/rfc5234.txt>`_)
+    grammars.
+
+    .. versionadded:: 2.1
+    """
+
+    name = 'ABNF'
+    aliases = ['abnf']
+    filenames = ['*.abnf']
+    mimetypes = ['text/x-abnf']
+
+    _core_rules = (
+        'ALPHA', 'BIT', 'CHAR', 'CR', 'CRLF', 'CTL', 'DIGIT',
+        'DQUOTE', 'HEXDIG', 'HTAB', 'LF', 'LWSP', 'OCTET',
+        'SP', 'VCHAR', 'WSP')
+
+    tokens = {
+        'root': [
+            # comment
+            (r';.*$', Comment.Single),
+
+            # quoted
+            #   double quote itself in this state, it is as '%x22'.
+            (r'(%[si])?"[^"]*"', Literal),
+
+            # binary (but i have never seen...)
+            (r'%b[01]+\-[01]+\b', Literal),  # range
+            (r'%b[01]+(\.[01]+)*\b', Literal),  # concat
+
+            # decimal
+            (r'%d[0-9]+\-[0-9]+\b', Literal),  # range
+            (r'%d[0-9]+(\.[0-9]+)*\b', Literal),  # concat
+
+            # hexadecimal
+            (r'%x[0-9a-fA-F]+\-[0-9a-fA-F]+\b', Literal),  # range
+            (r'%x[0-9a-fA-F]+(\.[0-9a-fA-F]+)*\b', Literal),  # concat
+
+            # repetition (<a>*<b>element) including nRule
+            (r'\b[0-9]+\*[0-9]+', Operator),
+            (r'\b[0-9]+\*', Operator),
+            (r'\b[0-9]+', Operator),
+            (r'\*', Operator),
+
+            # Strictly speaking, these are not keyword but
+            # are called `Core Rule'.
+            (words(_core_rules, suffix=r'\b'), Keyword),
+
+            # nonterminals (ALPHA *(ALPHA / DIGIT / "-"))
+            (r'[a-zA-Z][a-zA-Z0-9-]*\b', Name.Class),
+
+            # operators
+            (r'(=/|=|/)', Operator),
+
+            # punctuation
+            (r'[\[\]()]', Punctuation),
+
+            # fallback
+            (r'\s+', Whitespace),
+            (r'.', Text),
+        ],
+    }
+
+
+class JsgfLexer(RegexLexer):
+    """
+    For `JSpeech Grammar Format <https://www.w3.org/TR/jsgf/>`_
+    grammars.
+
+    .. versionadded:: 2.2
+    """
+    name = 'JSGF'
+    aliases = ['jsgf']
+    filenames = ['*.jsgf']
+    mimetypes = ['application/jsgf', 'application/x-jsgf', 'text/jsgf']
+
+    flags = re.MULTILINE | re.UNICODE
+
+    tokens = {
+        'root': [
+            include('comments'),
+            include('non-comments'),
+        ],
+        'comments': [
+            (r'/\*\*(?!/)', Comment.Multiline, 'documentation comment'),
+            (r'/\*[\w\W]*?\*/', Comment.Multiline),
+            (r'//.*$', Comment.Single),
+        ],
+        'non-comments': [
+            (r'\A#JSGF[^;]*', Comment.Preproc),
+            (r'\s+', Whitespace),
+            (r';', Punctuation),
+            (r'[=|()\[\]*+]', Operator),
+            (r'/[^/]+/', Number.Float),
+            (r'"', String.Double, 'string'),
+            (r'\{', String.Other, 'tag'),
+            (words(('import', 'public'), suffix=r'\b'), Keyword.Reserved),
+            (r'grammar\b', Keyword.Reserved, 'grammar name'),
+            (r'(<)(NULL|VOID)(>)',
+             bygroups(Punctuation, Name.Builtin, Punctuation)),
+            (r'<', Punctuation, 'rulename'),
+            (r'\w+|[^\s;=|()\[\]*+/"{<\w]+', Text),
+        ],
+        'string': [
+            (r'"', String.Double, '#pop'),
+            (r'\\.', String.Escape),
+            (r'[^\\"]+', String.Double),
+        ],
+        'tag': [
+            (r'\}', String.Other, '#pop'),
+            (r'\\.', String.Escape),
+            (r'[^\\}]+', String.Other),
+        ],
+        'grammar name': [
+            (r';', Punctuation, '#pop'),
+            (r'\s+', Whitespace),
+            (r'\.', Punctuation),
+            (r'[^;\s.]+', Name.Namespace),
+        ],
+        'rulename': [
+            (r'>', Punctuation, '#pop'),
+            (r'\*', Punctuation),
+            (r'\s+', Whitespace),
+            (r'([^.>]+)(\s*)(\.)', bygroups(Name.Namespace, Text, Punctuation)),
+            (r'[^.>]+', Name.Constant),
+        ],
+        'documentation comment': [
+            (r'\*/', Comment.Multiline, '#pop'),
+            (r'^(\s*)(\*?)(\s*)(@(?:example|see))(\s+)'
+             r'([\w\W]*?(?=(?:^\s*\*?\s*@|\*/)))',
+             bygroups(Whitespace,Comment.Multiline, Whitespace, Comment.Special,
+                      Whitespace, using(this, state='example'))),
+            (r'(^\s*\*?\s*)(@\S*)',
+             bygroups(Comment.Multiline, Comment.Special)),
+            (r'[^*\n@]+|\w|\W', Comment.Multiline),
+        ],
+        'example': [
+            (r'(\n\s*)(\*)', bygroups(Whitespace, Comment.Multiline)),
+            include('non-comments'),
+            (r'.', Comment.Multiline),
+        ],
+    }
+
+
+class PegLexer(RegexLexer):
+    """
+    This lexer is for `Parsing Expression Grammars
+    <https://bford.info/pub/lang/peg.pdf>`_ (PEG).
+
+    Various implementations of PEG have made different decisions
+    regarding the syntax, so let's try to be accommodating:
+
+    * `<-`, `←`, `:`, and `=` are all accepted as rule operators.
+
+    * Both `|` and `/` are choice operators.
+
+    * `^`, `↑`, and `~` are cut operators.
+
+    * A single `a-z` character immediately before a string, or
+      multiple `a-z` characters following a string, are part of the
+      string (e.g., `r"..."` or `"..."ilmsuxa`).
+
+    .. versionadded:: 2.6
+    """
+
+    name = 'PEG'
+    aliases = ['peg']
+    filenames = ['*.peg']
+    mimetypes = ['text/x-peg']
+
+    tokens = {
+        'root': [
+            # Comments
+            (r'#.*$', Comment.Single),
+
+            # All operators
+            (r'<-|[←:=/|&!?*+^↑~]', Operator),
+
+            # Other punctuation
+            (r'[()]', Punctuation),
+
+            # Keywords
+            (r'\.', Keyword),
+
+            # Character classes
+            (r'(\[)([^\]]*(?:\\.[^\]\\]*)*)(\])',
+             bygroups(Punctuation, String, Punctuation)),
+
+            # Single and double quoted strings (with optional modifiers)
+            (r'[a-z]?"[^"\\]*(?:\\.[^"\\]*)*"[a-z]*', String.Double),
+            (r"[a-z]?'[^'\\]*(?:\\.[^'\\]*)*'[a-z]*", String.Single),
+
+            # Nonterminals are not whitespace, operators, or punctuation
+            (r'[^\s<←:=/|&!?*+\^↑~()\[\]"\'#]+', Name.Class),
+
+            # Fallback
+            (r'.', Text),
+        ],
+    }
author	Devtools Arcadia <[email protected]>	2022-02-07 18:08:42 +0300
committer	Devtools Arcadia <[email protected]>	2022-02-07 18:08:42 +0300
commit	1110808a9d39d4b808aef724c861a2e1a38d2a69 (patch)
tree	e26c9fed0de5d9873cce7e00bc214573dc2195b7 /contrib/python/Pygments/py3/pygments/lexers/grammar_notation.py