diff options
author | ilezhankin <ilezhankin@yandex-team.ru> | 2022-02-10 16:45:55 +0300 |
---|---|---|
committer | Daniil Cherednik <dcherednik@yandex-team.ru> | 2022-02-10 16:45:55 +0300 |
commit | 1d125034f06575234f83f24f08677955133f140e (patch) | |
tree | ec05fbbd61dc118d5de37f206ab978cff58774bd /contrib/python/Pygments/py2/pygments/lexers/erlang.py | |
parent | 3a7a498715ef1b66f5054455421b845e45e3a653 (diff) | |
download | ydb-1d125034f06575234f83f24f08677955133f140e.tar.gz |
Restoring authorship annotation for <ilezhankin@yandex-team.ru>. Commit 1 of 2.
Diffstat (limited to 'contrib/python/Pygments/py2/pygments/lexers/erlang.py')
-rw-r--r-- | contrib/python/Pygments/py2/pygments/lexers/erlang.py | 1004 |
1 files changed, 502 insertions, 502 deletions
diff --git a/contrib/python/Pygments/py2/pygments/lexers/erlang.py b/contrib/python/Pygments/py2/pygments/lexers/erlang.py index 07a46c80ff..c022c35167 100644 --- a/contrib/python/Pygments/py2/pygments/lexers/erlang.py +++ b/contrib/python/Pygments/py2/pygments/lexers/erlang.py @@ -1,121 +1,121 @@ -# -*- coding: utf-8 -*- -""" - pygments.lexers.erlang - ~~~~~~~~~~~~~~~~~~~~~~ - - Lexers for Erlang. - +# -*- coding: utf-8 -*- +""" + pygments.lexers.erlang + ~~~~~~~~~~~~~~~~~~~~~~ + + Lexers for Erlang. + :copyright: Copyright 2006-2019 by the Pygments team, see AUTHORS. - :license: BSD, see LICENSE for details. -""" - -import re - -from pygments.lexer import Lexer, RegexLexer, bygroups, words, do_insertions, \ - include, default -from pygments.token import Text, Comment, Operator, Keyword, Name, String, \ - Number, Punctuation, Generic - -__all__ = ['ErlangLexer', 'ErlangShellLexer', 'ElixirConsoleLexer', - 'ElixirLexer'] - - -line_re = re.compile('.*?\n') - - -class ErlangLexer(RegexLexer): - """ - For the Erlang functional programming language. - - Blame Jeremy Thurgood (http://jerith.za.net/). - - .. versionadded:: 0.9 - """ - - name = 'Erlang' - aliases = ['erlang'] - filenames = ['*.erl', '*.hrl', '*.es', '*.escript'] - mimetypes = ['text/x-erlang'] - - keywords = ( - 'after', 'begin', 'case', 'catch', 'cond', 'end', 'fun', 'if', - 'let', 'of', 'query', 'receive', 'try', 'when', - ) - - builtins = ( # See erlang(3) man page - 'abs', 'append_element', 'apply', 'atom_to_list', 'binary_to_list', - 'bitstring_to_list', 'binary_to_term', 'bit_size', 'bump_reductions', - 'byte_size', 'cancel_timer', 'check_process_code', 'delete_module', - 'demonitor', 'disconnect_node', 'display', 'element', 'erase', 'exit', - 'float', 'float_to_list', 'fun_info', 'fun_to_list', - 'function_exported', 'garbage_collect', 'get', 'get_keys', - 'group_leader', 'hash', 'hd', 'integer_to_list', 'iolist_to_binary', - 'iolist_size', 'is_atom', 'is_binary', 'is_bitstring', 'is_boolean', - 'is_builtin', 'is_float', 'is_function', 'is_integer', 'is_list', - 'is_number', 'is_pid', 'is_port', 'is_process_alive', 'is_record', - 'is_reference', 'is_tuple', 'length', 'link', 'list_to_atom', - 'list_to_binary', 'list_to_bitstring', 'list_to_existing_atom', - 'list_to_float', 'list_to_integer', 'list_to_pid', 'list_to_tuple', - 'load_module', 'localtime_to_universaltime', 'make_tuple', 'md5', - 'md5_final', 'md5_update', 'memory', 'module_loaded', 'monitor', - 'monitor_node', 'node', 'nodes', 'open_port', 'phash', 'phash2', - 'pid_to_list', 'port_close', 'port_command', 'port_connect', - 'port_control', 'port_call', 'port_info', 'port_to_list', - 'process_display', 'process_flag', 'process_info', 'purge_module', - 'put', 'read_timer', 'ref_to_list', 'register', 'resume_process', - 'round', 'send', 'send_after', 'send_nosuspend', 'set_cookie', - 'setelement', 'size', 'spawn', 'spawn_link', 'spawn_monitor', - 'spawn_opt', 'split_binary', 'start_timer', 'statistics', - 'suspend_process', 'system_flag', 'system_info', 'system_monitor', - 'system_profile', 'term_to_binary', 'tl', 'trace', 'trace_delivered', - 'trace_info', 'trace_pattern', 'trunc', 'tuple_size', 'tuple_to_list', - 'universaltime_to_localtime', 'unlink', 'unregister', 'whereis' - ) - - operators = r'(\+\+?|--?|\*|/|<|>|/=|=:=|=/=|=<|>=|==?|<-|!|\?)' - word_operators = ( - 'and', 'andalso', 'band', 'bnot', 'bor', 'bsl', 'bsr', 'bxor', - 'div', 'not', 'or', 'orelse', 'rem', 'xor' - ) - - atom_re = r"(?:[a-z]\w*|'[^\n']*[^\\]')" - - variable_re = r'(?:[A-Z_]\w*)' - + :license: BSD, see LICENSE for details. +""" + +import re + +from pygments.lexer import Lexer, RegexLexer, bygroups, words, do_insertions, \ + include, default +from pygments.token import Text, Comment, Operator, Keyword, Name, String, \ + Number, Punctuation, Generic + +__all__ = ['ErlangLexer', 'ErlangShellLexer', 'ElixirConsoleLexer', + 'ElixirLexer'] + + +line_re = re.compile('.*?\n') + + +class ErlangLexer(RegexLexer): + """ + For the Erlang functional programming language. + + Blame Jeremy Thurgood (http://jerith.za.net/). + + .. versionadded:: 0.9 + """ + + name = 'Erlang' + aliases = ['erlang'] + filenames = ['*.erl', '*.hrl', '*.es', '*.escript'] + mimetypes = ['text/x-erlang'] + + keywords = ( + 'after', 'begin', 'case', 'catch', 'cond', 'end', 'fun', 'if', + 'let', 'of', 'query', 'receive', 'try', 'when', + ) + + builtins = ( # See erlang(3) man page + 'abs', 'append_element', 'apply', 'atom_to_list', 'binary_to_list', + 'bitstring_to_list', 'binary_to_term', 'bit_size', 'bump_reductions', + 'byte_size', 'cancel_timer', 'check_process_code', 'delete_module', + 'demonitor', 'disconnect_node', 'display', 'element', 'erase', 'exit', + 'float', 'float_to_list', 'fun_info', 'fun_to_list', + 'function_exported', 'garbage_collect', 'get', 'get_keys', + 'group_leader', 'hash', 'hd', 'integer_to_list', 'iolist_to_binary', + 'iolist_size', 'is_atom', 'is_binary', 'is_bitstring', 'is_boolean', + 'is_builtin', 'is_float', 'is_function', 'is_integer', 'is_list', + 'is_number', 'is_pid', 'is_port', 'is_process_alive', 'is_record', + 'is_reference', 'is_tuple', 'length', 'link', 'list_to_atom', + 'list_to_binary', 'list_to_bitstring', 'list_to_existing_atom', + 'list_to_float', 'list_to_integer', 'list_to_pid', 'list_to_tuple', + 'load_module', 'localtime_to_universaltime', 'make_tuple', 'md5', + 'md5_final', 'md5_update', 'memory', 'module_loaded', 'monitor', + 'monitor_node', 'node', 'nodes', 'open_port', 'phash', 'phash2', + 'pid_to_list', 'port_close', 'port_command', 'port_connect', + 'port_control', 'port_call', 'port_info', 'port_to_list', + 'process_display', 'process_flag', 'process_info', 'purge_module', + 'put', 'read_timer', 'ref_to_list', 'register', 'resume_process', + 'round', 'send', 'send_after', 'send_nosuspend', 'set_cookie', + 'setelement', 'size', 'spawn', 'spawn_link', 'spawn_monitor', + 'spawn_opt', 'split_binary', 'start_timer', 'statistics', + 'suspend_process', 'system_flag', 'system_info', 'system_monitor', + 'system_profile', 'term_to_binary', 'tl', 'trace', 'trace_delivered', + 'trace_info', 'trace_pattern', 'trunc', 'tuple_size', 'tuple_to_list', + 'universaltime_to_localtime', 'unlink', 'unregister', 'whereis' + ) + + operators = r'(\+\+?|--?|\*|/|<|>|/=|=:=|=/=|=<|>=|==?|<-|!|\?)' + word_operators = ( + 'and', 'andalso', 'band', 'bnot', 'bor', 'bsl', 'bsr', 'bxor', + 'div', 'not', 'or', 'orelse', 'rem', 'xor' + ) + + atom_re = r"(?:[a-z]\w*|'[^\n']*[^\\]')" + + variable_re = r'(?:[A-Z_]\w*)' + esc_char_re = r'[bdefnrstv\'"\\]' esc_octal_re = r'[0-7][0-7]?[0-7]?' esc_hex_re = r'(?:x[0-9a-fA-F]{2}|x\{[0-9a-fA-F]+\})' esc_ctrl_re = r'\^[a-zA-Z]' escape_re = r'(?:\\(?:'+esc_char_re+r'|'+esc_octal_re+r'|'+esc_hex_re+r'|'+esc_ctrl_re+r'))' - - macro_re = r'(?:'+variable_re+r'|'+atom_re+r')' - - base_re = r'(?:[2-9]|[12][0-9]|3[0-6])' - - tokens = { - 'root': [ - (r'\s+', Text), - (r'%.*\n', Comment), - (words(keywords, suffix=r'\b'), Keyword), - (words(builtins, suffix=r'\b'), Name.Builtin), - (words(word_operators, suffix=r'\b'), Operator.Word), - (r'^-', Punctuation, 'directive'), - (operators, Operator), - (r'"', String, 'string'), - (r'<<', Name.Label), - (r'>>', Name.Label), - ('(' + atom_re + ')(:)', bygroups(Name.Namespace, Punctuation)), - ('(?:^|(?<=:))(' + atom_re + r')(\s*)(\()', - bygroups(Name.Function, Text, Punctuation)), - (r'[+-]?' + base_re + r'#[0-9a-zA-Z]+', Number.Integer), - (r'[+-]?\d+', Number.Integer), - (r'[+-]?\d+.\d+', Number.Float), - (r'[]\[:_@\".{}()|;,]', Punctuation), - (variable_re, Name.Variable), - (atom_re, Name), - (r'\?'+macro_re, Name.Constant), - (r'\$(?:'+escape_re+r'|\\[ %]|[^\\])', String.Char), - (r'#'+atom_re+r'(:?\.'+atom_re+r')?', Name.Label), + + macro_re = r'(?:'+variable_re+r'|'+atom_re+r')' + + base_re = r'(?:[2-9]|[12][0-9]|3[0-6])' + + tokens = { + 'root': [ + (r'\s+', Text), + (r'%.*\n', Comment), + (words(keywords, suffix=r'\b'), Keyword), + (words(builtins, suffix=r'\b'), Name.Builtin), + (words(word_operators, suffix=r'\b'), Operator.Word), + (r'^-', Punctuation, 'directive'), + (operators, Operator), + (r'"', String, 'string'), + (r'<<', Name.Label), + (r'>>', Name.Label), + ('(' + atom_re + ')(:)', bygroups(Name.Namespace, Punctuation)), + ('(?:^|(?<=:))(' + atom_re + r')(\s*)(\()', + bygroups(Name.Function, Text, Punctuation)), + (r'[+-]?' + base_re + r'#[0-9a-zA-Z]+', Number.Integer), + (r'[+-]?\d+', Number.Integer), + (r'[+-]?\d+.\d+', Number.Float), + (r'[]\[:_@\".{}()|;,]', Punctuation), + (variable_re, Name.Variable), + (atom_re, Name), + (r'\?'+macro_re, Name.Constant), + (r'\$(?:'+escape_re+r'|\\[ %]|[^\\])', String.Char), + (r'#'+atom_re+r'(:?\.'+atom_re+r')?', Name.Label), # Erlang script shebang (r'\A#!.+\n', Comment.Hashbang), @@ -123,21 +123,21 @@ class ErlangLexer(RegexLexer): # EEP 43: Maps # http://www.erlang.org/eeps/eep-0043.html (r'#\{', Punctuation, 'map_key'), - ], - 'string': [ - (escape_re, String.Escape), - (r'"', String, '#pop'), + ], + 'string': [ + (escape_re, String.Escape), + (r'"', String, '#pop'), (r'~[0-9.*]*[~#+BPWXb-ginpswx]', String.Interpol), - (r'[^"\\~]+', String), - (r'~', String), - ], - 'directive': [ - (r'(define)(\s*)(\()('+macro_re+r')', - bygroups(Name.Entity, Text, Punctuation, Name.Constant), '#pop'), - (r'(record)(\s*)(\()('+macro_re+r')', - bygroups(Name.Entity, Text, Punctuation, Name.Label), '#pop'), - (atom_re, Name.Entity, '#pop'), - ], + (r'[^"\\~]+', String), + (r'~', String), + ], + 'directive': [ + (r'(define)(\s*)(\()('+macro_re+r')', + bygroups(Name.Entity, Text, Punctuation, Name.Constant), '#pop'), + (r'(record)(\s*)(\()('+macro_re+r')', + bygroups(Name.Entity, Text, Punctuation, Name.Label), '#pop'), + (atom_re, Name.Entity, '#pop'), + ], 'map_key': [ include('root'), (r'=>', Punctuation, 'map_val'), @@ -149,385 +149,385 @@ class ErlangLexer(RegexLexer): (r',', Punctuation, '#pop'), (r'(?=\})', Punctuation, '#pop'), ], - } - - -class ErlangShellLexer(Lexer): - """ - Shell sessions in erl (for Erlang code). - - .. versionadded:: 1.1 - """ - name = 'Erlang erl session' - aliases = ['erl'] - filenames = ['*.erl-sh'] - mimetypes = ['text/x-erl-shellsession'] - + } + + +class ErlangShellLexer(Lexer): + """ + Shell sessions in erl (for Erlang code). + + .. versionadded:: 1.1 + """ + name = 'Erlang erl session' + aliases = ['erl'] + filenames = ['*.erl-sh'] + mimetypes = ['text/x-erl-shellsession'] + _prompt_re = re.compile(r'(?:\([\w@_.]+\))?\d+>(?=\s|\Z)') - - def get_tokens_unprocessed(self, text): - erlexer = ErlangLexer(**self.options) - - curcode = '' - insertions = [] - for match in line_re.finditer(text): - line = match.group() - m = self._prompt_re.match(line) - if m is not None: - end = m.end() - insertions.append((len(curcode), - [(0, Generic.Prompt, line[:end])])) - curcode += line[end:] - else: - if curcode: - for item in do_insertions(insertions, - erlexer.get_tokens_unprocessed(curcode)): - yield item - curcode = '' - insertions = [] - if line.startswith('*'): - yield match.start(), Generic.Traceback, line - else: - yield match.start(), Generic.Output, line - if curcode: - for item in do_insertions(insertions, - erlexer.get_tokens_unprocessed(curcode)): - yield item - - -def gen_elixir_string_rules(name, symbol, token): - states = {} - states['string_' + name] = [ - (r'[^#%s\\]+' % (symbol,), token), - include('escapes'), - (r'\\.', token), - (r'(%s)' % (symbol,), bygroups(token), "#pop"), - include('interpol') - ] - return states - - -def gen_elixir_sigstr_rules(term, token, interpol=True): - if interpol: - return [ - (r'[^#%s\\]+' % (term,), token), - include('escapes'), - (r'\\.', token), - (r'%s[a-zA-Z]*' % (term,), token, '#pop'), - include('interpol') - ] - else: - return [ - (r'[^%s\\]+' % (term,), token), - (r'\\.', token), - (r'%s[a-zA-Z]*' % (term,), token, '#pop'), - ] - - -class ElixirLexer(RegexLexer): - """ - For the `Elixir language <http://elixir-lang.org>`_. - - .. versionadded:: 1.5 - """ - - name = 'Elixir' - aliases = ['elixir', 'ex', 'exs'] - filenames = ['*.ex', '*.exs'] - mimetypes = ['text/x-elixir'] - - KEYWORD = ('fn', 'do', 'end', 'after', 'else', 'rescue', 'catch') - KEYWORD_OPERATOR = ('not', 'and', 'or', 'when', 'in') - BUILTIN = ( - 'case', 'cond', 'for', 'if', 'unless', 'try', 'receive', 'raise', + + def get_tokens_unprocessed(self, text): + erlexer = ErlangLexer(**self.options) + + curcode = '' + insertions = [] + for match in line_re.finditer(text): + line = match.group() + m = self._prompt_re.match(line) + if m is not None: + end = m.end() + insertions.append((len(curcode), + [(0, Generic.Prompt, line[:end])])) + curcode += line[end:] + else: + if curcode: + for item in do_insertions(insertions, + erlexer.get_tokens_unprocessed(curcode)): + yield item + curcode = '' + insertions = [] + if line.startswith('*'): + yield match.start(), Generic.Traceback, line + else: + yield match.start(), Generic.Output, line + if curcode: + for item in do_insertions(insertions, + erlexer.get_tokens_unprocessed(curcode)): + yield item + + +def gen_elixir_string_rules(name, symbol, token): + states = {} + states['string_' + name] = [ + (r'[^#%s\\]+' % (symbol,), token), + include('escapes'), + (r'\\.', token), + (r'(%s)' % (symbol,), bygroups(token), "#pop"), + include('interpol') + ] + return states + + +def gen_elixir_sigstr_rules(term, token, interpol=True): + if interpol: + return [ + (r'[^#%s\\]+' % (term,), token), + include('escapes'), + (r'\\.', token), + (r'%s[a-zA-Z]*' % (term,), token, '#pop'), + include('interpol') + ] + else: + return [ + (r'[^%s\\]+' % (term,), token), + (r'\\.', token), + (r'%s[a-zA-Z]*' % (term,), token, '#pop'), + ] + + +class ElixirLexer(RegexLexer): + """ + For the `Elixir language <http://elixir-lang.org>`_. + + .. versionadded:: 1.5 + """ + + name = 'Elixir' + aliases = ['elixir', 'ex', 'exs'] + filenames = ['*.ex', '*.exs'] + mimetypes = ['text/x-elixir'] + + KEYWORD = ('fn', 'do', 'end', 'after', 'else', 'rescue', 'catch') + KEYWORD_OPERATOR = ('not', 'and', 'or', 'when', 'in') + BUILTIN = ( + 'case', 'cond', 'for', 'if', 'unless', 'try', 'receive', 'raise', 'quote', 'unquote', 'unquote_splicing', 'throw', 'super', - ) - BUILTIN_DECLARATION = ( - 'def', 'defp', 'defmodule', 'defprotocol', 'defmacro', 'defmacrop', + ) + BUILTIN_DECLARATION = ( + 'def', 'defp', 'defmodule', 'defprotocol', 'defmacro', 'defmacrop', 'defdelegate', 'defexception', 'defstruct', 'defimpl', 'defcallback', - ) - - BUILTIN_NAMESPACE = ('import', 'require', 'use', 'alias') - CONSTANT = ('nil', 'true', 'false') - - PSEUDO_VAR = ('_', '__MODULE__', '__DIR__', '__ENV__', '__CALLER__') - - OPERATORS3 = ( - '<<<', '>>>', '|||', '&&&', '^^^', '~~~', '===', '!==', - '~>>', '<~>', '|~>', '<|>', - ) - OPERATORS2 = ( - '==', '!=', '<=', '>=', '&&', '||', '<>', '++', '--', '|>', '=~', - '->', '<-', '|', '.', '=', '~>', '<~', - ) - OPERATORS1 = ('<', '>', '+', '-', '*', '/', '!', '^', '&') - - PUNCTUATION = ( + ) + + BUILTIN_NAMESPACE = ('import', 'require', 'use', 'alias') + CONSTANT = ('nil', 'true', 'false') + + PSEUDO_VAR = ('_', '__MODULE__', '__DIR__', '__ENV__', '__CALLER__') + + OPERATORS3 = ( + '<<<', '>>>', '|||', '&&&', '^^^', '~~~', '===', '!==', + '~>>', '<~>', '|~>', '<|>', + ) + OPERATORS2 = ( + '==', '!=', '<=', '>=', '&&', '||', '<>', '++', '--', '|>', '=~', + '->', '<-', '|', '.', '=', '~>', '<~', + ) + OPERATORS1 = ('<', '>', '+', '-', '*', '/', '!', '^', '&') + + PUNCTUATION = ( '\\\\', '<<', '>>', '=>', '(', ')', ':', ';', ',', '[', ']', - ) - - def get_tokens_unprocessed(self, text): - for index, token, value in RegexLexer.get_tokens_unprocessed(self, text): - if token is Name: - if value in self.KEYWORD: - yield index, Keyword, value - elif value in self.KEYWORD_OPERATOR: - yield index, Operator.Word, value - elif value in self.BUILTIN: - yield index, Keyword, value - elif value in self.BUILTIN_DECLARATION: - yield index, Keyword.Declaration, value - elif value in self.BUILTIN_NAMESPACE: - yield index, Keyword.Namespace, value - elif value in self.CONSTANT: - yield index, Name.Constant, value - elif value in self.PSEUDO_VAR: - yield index, Name.Builtin.Pseudo, value - else: - yield index, token, value - else: - yield index, token, value - - def gen_elixir_sigil_rules(): - # all valid sigil terminators (excluding heredocs) - terminators = [ - (r'\{', r'\}', 'cb'), - (r'\[', r'\]', 'sb'), - (r'\(', r'\)', 'pa'), - (r'<', r'>', 'ab'), - (r'/', r'/', 'slas'), - (r'\|', r'\|', 'pipe'), - ('"', '"', 'quot'), - ("'", "'", 'apos'), - ] - - # heredocs have slightly different rules - triquotes = [(r'"""', 'triquot'), (r"'''", 'triapos')] - - token = String.Other - states = {'sigils': []} - - for term, name in triquotes: - states['sigils'] += [ - (r'(~[a-z])(%s)' % (term,), bygroups(token, String.Heredoc), - (name + '-end', name + '-intp')), - (r'(~[A-Z])(%s)' % (term,), bygroups(token, String.Heredoc), - (name + '-end', name + '-no-intp')), - ] - - states[name + '-end'] = [ - (r'[a-zA-Z]+', token, '#pop'), - default('#pop'), - ] - states[name + '-intp'] = [ - (r'^\s*' + term, String.Heredoc, '#pop'), - include('heredoc_interpol'), - ] - states[name + '-no-intp'] = [ - (r'^\s*' + term, String.Heredoc, '#pop'), - include('heredoc_no_interpol'), - ] - - for lterm, rterm, name in terminators: - states['sigils'] += [ - (r'~[a-z]' + lterm, token, name + '-intp'), - (r'~[A-Z]' + lterm, token, name + '-no-intp'), - ] - states[name + '-intp'] = gen_elixir_sigstr_rules(rterm, token) - states[name + '-no-intp'] = \ - gen_elixir_sigstr_rules(rterm, token, interpol=False) - - return states - - op3_re = "|".join(re.escape(s) for s in OPERATORS3) - op2_re = "|".join(re.escape(s) for s in OPERATORS2) - op1_re = "|".join(re.escape(s) for s in OPERATORS1) - ops_re = r'(?:%s|%s|%s)' % (op3_re, op2_re, op1_re) - punctuation_re = "|".join(re.escape(s) for s in PUNCTUATION) + ) + + def get_tokens_unprocessed(self, text): + for index, token, value in RegexLexer.get_tokens_unprocessed(self, text): + if token is Name: + if value in self.KEYWORD: + yield index, Keyword, value + elif value in self.KEYWORD_OPERATOR: + yield index, Operator.Word, value + elif value in self.BUILTIN: + yield index, Keyword, value + elif value in self.BUILTIN_DECLARATION: + yield index, Keyword.Declaration, value + elif value in self.BUILTIN_NAMESPACE: + yield index, Keyword.Namespace, value + elif value in self.CONSTANT: + yield index, Name.Constant, value + elif value in self.PSEUDO_VAR: + yield index, Name.Builtin.Pseudo, value + else: + yield index, token, value + else: + yield index, token, value + + def gen_elixir_sigil_rules(): + # all valid sigil terminators (excluding heredocs) + terminators = [ + (r'\{', r'\}', 'cb'), + (r'\[', r'\]', 'sb'), + (r'\(', r'\)', 'pa'), + (r'<', r'>', 'ab'), + (r'/', r'/', 'slas'), + (r'\|', r'\|', 'pipe'), + ('"', '"', 'quot'), + ("'", "'", 'apos'), + ] + + # heredocs have slightly different rules + triquotes = [(r'"""', 'triquot'), (r"'''", 'triapos')] + + token = String.Other + states = {'sigils': []} + + for term, name in triquotes: + states['sigils'] += [ + (r'(~[a-z])(%s)' % (term,), bygroups(token, String.Heredoc), + (name + '-end', name + '-intp')), + (r'(~[A-Z])(%s)' % (term,), bygroups(token, String.Heredoc), + (name + '-end', name + '-no-intp')), + ] + + states[name + '-end'] = [ + (r'[a-zA-Z]+', token, '#pop'), + default('#pop'), + ] + states[name + '-intp'] = [ + (r'^\s*' + term, String.Heredoc, '#pop'), + include('heredoc_interpol'), + ] + states[name + '-no-intp'] = [ + (r'^\s*' + term, String.Heredoc, '#pop'), + include('heredoc_no_interpol'), + ] + + for lterm, rterm, name in terminators: + states['sigils'] += [ + (r'~[a-z]' + lterm, token, name + '-intp'), + (r'~[A-Z]' + lterm, token, name + '-no-intp'), + ] + states[name + '-intp'] = gen_elixir_sigstr_rules(rterm, token) + states[name + '-no-intp'] = \ + gen_elixir_sigstr_rules(rterm, token, interpol=False) + + return states + + op3_re = "|".join(re.escape(s) for s in OPERATORS3) + op2_re = "|".join(re.escape(s) for s in OPERATORS2) + op1_re = "|".join(re.escape(s) for s in OPERATORS1) + ops_re = r'(?:%s|%s|%s)' % (op3_re, op2_re, op1_re) + punctuation_re = "|".join(re.escape(s) for s in PUNCTUATION) alnum = r'\w' - name_re = r'(?:\.\.\.|[a-z_]%s*[!?]?)' % alnum - modname_re = r'[A-Z]%(alnum)s*(?:\.[A-Z]%(alnum)s*)*' % {'alnum': alnum} - complex_name_re = r'(?:%s|%s|%s)' % (name_re, modname_re, ops_re) - special_atom_re = r'(?:\.\.\.|<<>>|%\{\}|%|\{\})' - - long_hex_char_re = r'(\\x\{)([\da-fA-F]+)(\})' - hex_char_re = r'(\\x[\da-fA-F]{1,2})' - escape_char_re = r'(\\[abdefnrstv])' - - tokens = { - 'root': [ - (r'\s+', Text), - (r'#.*$', Comment.Single), - - # Various kinds of characters - (r'(\?)' + long_hex_char_re, - bygroups(String.Char, - String.Escape, Number.Hex, String.Escape)), - (r'(\?)' + hex_char_re, - bygroups(String.Char, String.Escape)), - (r'(\?)' + escape_char_re, - bygroups(String.Char, String.Escape)), - (r'\?\\?.', String.Char), - - # '::' has to go before atoms - (r':::', String.Symbol), - (r'::', Operator), - - # atoms - (r':' + special_atom_re, String.Symbol), - (r':' + complex_name_re, String.Symbol), - (r':"', String.Symbol, 'string_double_atom'), - (r":'", String.Symbol, 'string_single_atom'), - - # [keywords: ...] - (r'(%s|%s)(:)(?=\s|\n)' % (special_atom_re, complex_name_re), - bygroups(String.Symbol, Punctuation)), - - # @attributes - (r'@' + name_re, Name.Attribute), - - # identifiers - (name_re, Name), - (r'(%%?)(%s)' % (modname_re,), bygroups(Punctuation, Name.Class)), - - # operators and punctuation - (op3_re, Operator), - (op2_re, Operator), - (punctuation_re, Punctuation), - (r'&\d', Name.Entity), # anon func arguments - (op1_re, Operator), - - # numbers - (r'0b[01]+', Number.Bin), - (r'0o[0-7]+', Number.Oct), - (r'0x[\da-fA-F]+', Number.Hex), - (r'\d(_?\d)*\.\d(_?\d)*([eE][-+]?\d(_?\d)*)?', Number.Float), - (r'\d(_?\d)*', Number.Integer), - - # strings and heredocs - (r'"""\s*', String.Heredoc, 'heredoc_double'), - (r"'''\s*$", String.Heredoc, 'heredoc_single'), - (r'"', String.Double, 'string_double'), - (r"'", String.Single, 'string_single'), - - include('sigils'), - - (r'%\{', Punctuation, 'map_key'), - (r'\{', Punctuation, 'tuple'), - ], - 'heredoc_double': [ - (r'^\s*"""', String.Heredoc, '#pop'), - include('heredoc_interpol'), - ], - 'heredoc_single': [ - (r"^\s*'''", String.Heredoc, '#pop'), - include('heredoc_interpol'), - ], - 'heredoc_interpol': [ - (r'[^#\\\n]+', String.Heredoc), - include('escapes'), - (r'\\.', String.Heredoc), - (r'\n+', String.Heredoc), - include('interpol'), - ], - 'heredoc_no_interpol': [ - (r'[^\\\n]+', String.Heredoc), - (r'\\.', String.Heredoc), - (r'\n+', String.Heredoc), - ], - 'escapes': [ - (long_hex_char_re, - bygroups(String.Escape, Number.Hex, String.Escape)), - (hex_char_re, String.Escape), - (escape_char_re, String.Escape), - ], - 'interpol': [ - (r'#\{', String.Interpol, 'interpol_string'), - ], - 'interpol_string': [ - (r'\}', String.Interpol, "#pop"), - include('root') - ], - 'map_key': [ - include('root'), - (r':', Punctuation, 'map_val'), - (r'=>', Punctuation, 'map_val'), - (r'\}', Punctuation, '#pop'), - ], - 'map_val': [ - include('root'), - (r',', Punctuation, '#pop'), - (r'(?=\})', Punctuation, '#pop'), - ], - 'tuple': [ - include('root'), - (r'\}', Punctuation, '#pop'), - ], - } - tokens.update(gen_elixir_string_rules('double', '"', String.Double)) - tokens.update(gen_elixir_string_rules('single', "'", String.Single)) - tokens.update(gen_elixir_string_rules('double_atom', '"', String.Symbol)) - tokens.update(gen_elixir_string_rules('single_atom', "'", String.Symbol)) - tokens.update(gen_elixir_sigil_rules()) - - -class ElixirConsoleLexer(Lexer): - """ - For Elixir interactive console (iex) output like: - - .. sourcecode:: iex - - iex> [head | tail] = [1,2,3] - [1,2,3] - iex> head - 1 - iex> tail - [2,3] - iex> [head | tail] - [1,2,3] - iex> length [head | tail] - 3 - - .. versionadded:: 1.5 - """ - - name = 'Elixir iex session' - aliases = ['iex'] - mimetypes = ['text/x-elixir-shellsession'] - + name_re = r'(?:\.\.\.|[a-z_]%s*[!?]?)' % alnum + modname_re = r'[A-Z]%(alnum)s*(?:\.[A-Z]%(alnum)s*)*' % {'alnum': alnum} + complex_name_re = r'(?:%s|%s|%s)' % (name_re, modname_re, ops_re) + special_atom_re = r'(?:\.\.\.|<<>>|%\{\}|%|\{\})' + + long_hex_char_re = r'(\\x\{)([\da-fA-F]+)(\})' + hex_char_re = r'(\\x[\da-fA-F]{1,2})' + escape_char_re = r'(\\[abdefnrstv])' + + tokens = { + 'root': [ + (r'\s+', Text), + (r'#.*$', Comment.Single), + + # Various kinds of characters + (r'(\?)' + long_hex_char_re, + bygroups(String.Char, + String.Escape, Number.Hex, String.Escape)), + (r'(\?)' + hex_char_re, + bygroups(String.Char, String.Escape)), + (r'(\?)' + escape_char_re, + bygroups(String.Char, String.Escape)), + (r'\?\\?.', String.Char), + + # '::' has to go before atoms + (r':::', String.Symbol), + (r'::', Operator), + + # atoms + (r':' + special_atom_re, String.Symbol), + (r':' + complex_name_re, String.Symbol), + (r':"', String.Symbol, 'string_double_atom'), + (r":'", String.Symbol, 'string_single_atom'), + + # [keywords: ...] + (r'(%s|%s)(:)(?=\s|\n)' % (special_atom_re, complex_name_re), + bygroups(String.Symbol, Punctuation)), + + # @attributes + (r'@' + name_re, Name.Attribute), + + # identifiers + (name_re, Name), + (r'(%%?)(%s)' % (modname_re,), bygroups(Punctuation, Name.Class)), + + # operators and punctuation + (op3_re, Operator), + (op2_re, Operator), + (punctuation_re, Punctuation), + (r'&\d', Name.Entity), # anon func arguments + (op1_re, Operator), + + # numbers + (r'0b[01]+', Number.Bin), + (r'0o[0-7]+', Number.Oct), + (r'0x[\da-fA-F]+', Number.Hex), + (r'\d(_?\d)*\.\d(_?\d)*([eE][-+]?\d(_?\d)*)?', Number.Float), + (r'\d(_?\d)*', Number.Integer), + + # strings and heredocs + (r'"""\s*', String.Heredoc, 'heredoc_double'), + (r"'''\s*$", String.Heredoc, 'heredoc_single'), + (r'"', String.Double, 'string_double'), + (r"'", String.Single, 'string_single'), + + include('sigils'), + + (r'%\{', Punctuation, 'map_key'), + (r'\{', Punctuation, 'tuple'), + ], + 'heredoc_double': [ + (r'^\s*"""', String.Heredoc, '#pop'), + include('heredoc_interpol'), + ], + 'heredoc_single': [ + (r"^\s*'''", String.Heredoc, '#pop'), + include('heredoc_interpol'), + ], + 'heredoc_interpol': [ + (r'[^#\\\n]+', String.Heredoc), + include('escapes'), + (r'\\.', String.Heredoc), + (r'\n+', String.Heredoc), + include('interpol'), + ], + 'heredoc_no_interpol': [ + (r'[^\\\n]+', String.Heredoc), + (r'\\.', String.Heredoc), + (r'\n+', String.Heredoc), + ], + 'escapes': [ + (long_hex_char_re, + bygroups(String.Escape, Number.Hex, String.Escape)), + (hex_char_re, String.Escape), + (escape_char_re, String.Escape), + ], + 'interpol': [ + (r'#\{', String.Interpol, 'interpol_string'), + ], + 'interpol_string': [ + (r'\}', String.Interpol, "#pop"), + include('root') + ], + 'map_key': [ + include('root'), + (r':', Punctuation, 'map_val'), + (r'=>', Punctuation, 'map_val'), + (r'\}', Punctuation, '#pop'), + ], + 'map_val': [ + include('root'), + (r',', Punctuation, '#pop'), + (r'(?=\})', Punctuation, '#pop'), + ], + 'tuple': [ + include('root'), + (r'\}', Punctuation, '#pop'), + ], + } + tokens.update(gen_elixir_string_rules('double', '"', String.Double)) + tokens.update(gen_elixir_string_rules('single', "'", String.Single)) + tokens.update(gen_elixir_string_rules('double_atom', '"', String.Symbol)) + tokens.update(gen_elixir_string_rules('single_atom', "'", String.Symbol)) + tokens.update(gen_elixir_sigil_rules()) + + +class ElixirConsoleLexer(Lexer): + """ + For Elixir interactive console (iex) output like: + + .. sourcecode:: iex + + iex> [head | tail] = [1,2,3] + [1,2,3] + iex> head + 1 + iex> tail + [2,3] + iex> [head | tail] + [1,2,3] + iex> length [head | tail] + 3 + + .. versionadded:: 1.5 + """ + + name = 'Elixir iex session' + aliases = ['iex'] + mimetypes = ['text/x-elixir-shellsession'] + _prompt_re = re.compile(r'(iex|\.{3})((?:\([\w@_.]+\))?\d+|\(\d+\))?> ') - - def get_tokens_unprocessed(self, text): - exlexer = ElixirLexer(**self.options) - - curcode = '' - in_error = False - insertions = [] - for match in line_re.finditer(text): - line = match.group() - if line.startswith(u'** '): - in_error = True - insertions.append((len(curcode), - [(0, Generic.Error, line[:-1])])) - curcode += line[-1:] - else: - m = self._prompt_re.match(line) - if m is not None: - in_error = False - end = m.end() - insertions.append((len(curcode), - [(0, Generic.Prompt, line[:end])])) - curcode += line[end:] - else: - if curcode: - for item in do_insertions( - insertions, exlexer.get_tokens_unprocessed(curcode)): - yield item - curcode = '' - insertions = [] - token = Generic.Error if in_error else Generic.Output - yield match.start(), token, line - if curcode: - for item in do_insertions( - insertions, exlexer.get_tokens_unprocessed(curcode)): - yield item + + def get_tokens_unprocessed(self, text): + exlexer = ElixirLexer(**self.options) + + curcode = '' + in_error = False + insertions = [] + for match in line_re.finditer(text): + line = match.group() + if line.startswith(u'** '): + in_error = True + insertions.append((len(curcode), + [(0, Generic.Error, line[:-1])])) + curcode += line[-1:] + else: + m = self._prompt_re.match(line) + if m is not None: + in_error = False + end = m.end() + insertions.append((len(curcode), + [(0, Generic.Prompt, line[:end])])) + curcode += line[end:] + else: + if curcode: + for item in do_insertions( + insertions, exlexer.get_tokens_unprocessed(curcode)): + yield item + curcode = '' + insertions = [] + token = Generic.Error if in_error else Generic.Output + yield match.start(), token, line + if curcode: + for item in do_insertions( + insertions, exlexer.get_tokens_unprocessed(curcode)): + yield item |