aboutsummaryrefslogtreecommitdiffstats
path: root/contrib/python/ipython/py2/IPython/lib/lexers.py
diff options
context:
space:
mode:
authorMikhail Borisov <borisov.mikhail@gmail.com>2022-02-10 16:45:39 +0300
committerDaniil Cherednik <dcherednik@yandex-team.ru>2022-02-10 16:45:39 +0300
commita6a92afe03e02795227d2641b49819b687f088f8 (patch)
treef6984a1d27d5a7ec88a6fdd6e20cd5b7693b6ece /contrib/python/ipython/py2/IPython/lib/lexers.py
parentc6dc8b8bd530985bc4cce0137e9a5de32f1087cb (diff)
downloadydb-a6a92afe03e02795227d2641b49819b687f088f8.tar.gz
Restoring authorship annotation for Mikhail Borisov <borisov.mikhail@gmail.com>. Commit 1 of 2.
Diffstat (limited to 'contrib/python/ipython/py2/IPython/lib/lexers.py')
-rw-r--r--contrib/python/ipython/py2/IPython/lib/lexers.py1000
1 files changed, 500 insertions, 500 deletions
diff --git a/contrib/python/ipython/py2/IPython/lib/lexers.py b/contrib/python/ipython/py2/IPython/lib/lexers.py
index 9160ae1245..ec43856115 100644
--- a/contrib/python/ipython/py2/IPython/lib/lexers.py
+++ b/contrib/python/ipython/py2/IPython/lib/lexers.py
@@ -1,517 +1,517 @@
-# -*- coding: utf-8 -*-
-"""
-Defines a variety of Pygments lexers for highlighting IPython code.
-
-This includes:
-
- IPythonLexer, IPython3Lexer
- Lexers for pure IPython (python + magic/shell commands)
-
- IPythonPartialTracebackLexer, IPythonTracebackLexer
- Supports 2.x and 3.x via keyword `python3`. The partial traceback
- lexer reads everything but the Python code appearing in a traceback.
- The full lexer combines the partial lexer with an IPython lexer.
-
- IPythonConsoleLexer
- A lexer for IPython console sessions, with support for tracebacks.
-
- IPyLexer
- A friendly lexer which examines the first line of text and from it,
- decides whether to use an IPython lexer or an IPython console lexer.
- This is probably the only lexer that needs to be explicitly added
- to Pygments.
-
-"""
-#-----------------------------------------------------------------------------
-# Copyright (c) 2013, the IPython Development Team.
-#
-# Distributed under the terms of the Modified BSD License.
-#
-# The full license is in the file COPYING.txt, distributed with this software.
-#-----------------------------------------------------------------------------
-
-# Standard library
-import re
-
-# Third party
+# -*- coding: utf-8 -*-
+"""
+Defines a variety of Pygments lexers for highlighting IPython code.
+
+This includes:
+
+ IPythonLexer, IPython3Lexer
+ Lexers for pure IPython (python + magic/shell commands)
+
+ IPythonPartialTracebackLexer, IPythonTracebackLexer
+ Supports 2.x and 3.x via keyword `python3`. The partial traceback
+ lexer reads everything but the Python code appearing in a traceback.
+ The full lexer combines the partial lexer with an IPython lexer.
+
+ IPythonConsoleLexer
+ A lexer for IPython console sessions, with support for tracebacks.
+
+ IPyLexer
+ A friendly lexer which examines the first line of text and from it,
+ decides whether to use an IPython lexer or an IPython console lexer.
+ This is probably the only lexer that needs to be explicitly added
+ to Pygments.
+
+"""
+#-----------------------------------------------------------------------------
+# Copyright (c) 2013, the IPython Development Team.
+#
+# Distributed under the terms of the Modified BSD License.
+#
+# The full license is in the file COPYING.txt, distributed with this software.
+#-----------------------------------------------------------------------------
+
+# Standard library
+import re
+
+# Third party
from pygments.lexers import BashLexer, Python3Lexer
try:
# PythonLexer was renamed to Python2Lexer in pygments 2.5
from pygments.lexers import Python2Lexer
except ImportError:
from pygments.lexers import PythonLexer as Python2Lexer
-from pygments.lexer import (
- Lexer, DelegatingLexer, RegexLexer, do_insertions, bygroups, using,
-)
-from pygments.token import (
+from pygments.lexer import (
+ Lexer, DelegatingLexer, RegexLexer, do_insertions, bygroups, using,
+)
+from pygments.token import (
Generic, Keyword, Literal, Name, Operator, Other, Text, Error,
-)
-from pygments.util import get_bool_opt
-
-# Local
-
-line_re = re.compile('.*?\n')
-
-__all__ = ['build_ipy_lexer', 'IPython3Lexer', 'IPythonLexer',
- 'IPythonPartialTracebackLexer', 'IPythonTracebackLexer',
- 'IPythonConsoleLexer', 'IPyLexer']
-
-ipython_tokens = [
- (r"(?s)(\s*)(%%)(\w+)(.*)", bygroups(Text, Operator, Keyword, Text)),
- (r'(?s)(^\s*)(%%!)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(BashLexer))),
- (r"(%%?)(\w+)(\?\??)$", bygroups(Operator, Keyword, Operator)),
- (r"\b(\?\??)(\s*)$", bygroups(Operator, Text)),
- (r'(%)(sx|sc|system)(.*)(\n)', bygroups(Operator, Keyword,
- using(BashLexer), Text)),
- (r'(%)(\w+)(.*\n)', bygroups(Operator, Keyword, Text)),
- (r'^(!!)(.+)(\n)', bygroups(Operator, using(BashLexer), Text)),
- (r'(!)(?!=)(.+)(\n)', bygroups(Operator, using(BashLexer), Text)),
- (r'^(\s*)(\?\??)(\s*%{0,2}[\w\.\*]*)', bygroups(Text, Operator, Text)),
- (r'(\s*%{0,2}[\w\.\*]*)(\?\??)(\s*)$', bygroups(Text, Operator, Text)),
-]
-
-def build_ipy_lexer(python3):
- """Builds IPython lexers depending on the value of `python3`.
-
- The lexer inherits from an appropriate Python lexer and then adds
- information about IPython specific keywords (i.e. magic commands,
- shell commands, etc.)
-
- Parameters
- ----------
- python3 : bool
- If `True`, then build an IPython lexer from a Python 3 lexer.
-
- """
- # It would be nice to have a single IPython lexer class which takes
- # a boolean `python3`. But since there are two Python lexer classes,
- # we will also have two IPython lexer classes.
- if python3:
- PyLexer = Python3Lexer
- name = 'IPython3'
- aliases = ['ipython3']
- doc = """IPython3 Lexer"""
- else:
+)
+from pygments.util import get_bool_opt
+
+# Local
+
+line_re = re.compile('.*?\n')
+
+__all__ = ['build_ipy_lexer', 'IPython3Lexer', 'IPythonLexer',
+ 'IPythonPartialTracebackLexer', 'IPythonTracebackLexer',
+ 'IPythonConsoleLexer', 'IPyLexer']
+
+ipython_tokens = [
+ (r"(?s)(\s*)(%%)(\w+)(.*)", bygroups(Text, Operator, Keyword, Text)),
+ (r'(?s)(^\s*)(%%!)([^\n]*\n)(.*)', bygroups(Text, Operator, Text, using(BashLexer))),
+ (r"(%%?)(\w+)(\?\??)$", bygroups(Operator, Keyword, Operator)),
+ (r"\b(\?\??)(\s*)$", bygroups(Operator, Text)),
+ (r'(%)(sx|sc|system)(.*)(\n)', bygroups(Operator, Keyword,
+ using(BashLexer), Text)),
+ (r'(%)(\w+)(.*\n)', bygroups(Operator, Keyword, Text)),
+ (r'^(!!)(.+)(\n)', bygroups(Operator, using(BashLexer), Text)),
+ (r'(!)(?!=)(.+)(\n)', bygroups(Operator, using(BashLexer), Text)),
+ (r'^(\s*)(\?\??)(\s*%{0,2}[\w\.\*]*)', bygroups(Text, Operator, Text)),
+ (r'(\s*%{0,2}[\w\.\*]*)(\?\??)(\s*)$', bygroups(Text, Operator, Text)),
+]
+
+def build_ipy_lexer(python3):
+ """Builds IPython lexers depending on the value of `python3`.
+
+ The lexer inherits from an appropriate Python lexer and then adds
+ information about IPython specific keywords (i.e. magic commands,
+ shell commands, etc.)
+
+ Parameters
+ ----------
+ python3 : bool
+ If `True`, then build an IPython lexer from a Python 3 lexer.
+
+ """
+ # It would be nice to have a single IPython lexer class which takes
+ # a boolean `python3`. But since there are two Python lexer classes,
+ # we will also have two IPython lexer classes.
+ if python3:
+ PyLexer = Python3Lexer
+ name = 'IPython3'
+ aliases = ['ipython3']
+ doc = """IPython3 Lexer"""
+ else:
PyLexer = Python2Lexer
- name = 'IPython'
- aliases = ['ipython2', 'ipython']
- doc = """IPython Lexer"""
-
- tokens = PyLexer.tokens.copy()
- tokens['root'] = ipython_tokens + tokens['root']
-
- attrs = {'name': name, 'aliases': aliases, 'filenames': [],
- '__doc__': doc, 'tokens': tokens}
-
- return type(name, (PyLexer,), attrs)
-
-
-IPython3Lexer = build_ipy_lexer(python3=True)
-IPythonLexer = build_ipy_lexer(python3=False)
-
-
-class IPythonPartialTracebackLexer(RegexLexer):
- """
- Partial lexer for IPython tracebacks.
-
- Handles all the non-python output. This works for both Python 2.x and 3.x.
-
- """
- name = 'IPython Partial Traceback'
-
- tokens = {
- 'root': [
- # Tracebacks for syntax errors have a different style.
- # For both types of tracebacks, we mark the first line with
- # Generic.Traceback. For syntax errors, we mark the filename
- # as we mark the filenames for non-syntax tracebacks.
- #
- # These two regexps define how IPythonConsoleLexer finds a
- # traceback.
- #
- ## Non-syntax traceback
- (r'^(\^C)?(-+\n)', bygroups(Error, Generic.Traceback)),
- ## Syntax traceback
- (r'^( File)(.*)(, line )(\d+\n)',
- bygroups(Generic.Traceback, Name.Namespace,
- Generic.Traceback, Literal.Number.Integer)),
-
- # (Exception Identifier)(Whitespace)(Traceback Message)
- (r'(?u)(^[^\d\W]\w*)(\s*)(Traceback.*?\n)',
- bygroups(Name.Exception, Generic.Whitespace, Text)),
- # (Module/Filename)(Text)(Callee)(Function Signature)
- # Better options for callee and function signature?
- (r'(.*)( in )(.*)(\(.*\)\n)',
- bygroups(Name.Namespace, Text, Name.Entity, Name.Tag)),
- # Regular line: (Whitespace)(Line Number)(Python Code)
- (r'(\s*?)(\d+)(.*?\n)',
- bygroups(Generic.Whitespace, Literal.Number.Integer, Other)),
- # Emphasized line: (Arrow)(Line Number)(Python Code)
- # Using Exception token so arrow color matches the Exception.
- (r'(-*>?\s?)(\d+)(.*?\n)',
- bygroups(Name.Exception, Literal.Number.Integer, Other)),
- # (Exception Identifier)(Message)
- (r'(?u)(^[^\d\W]\w*)(:.*?\n)',
- bygroups(Name.Exception, Text)),
- # Tag everything else as Other, will be handled later.
- (r'.*\n', Other),
- ],
- }
-
-
-class IPythonTracebackLexer(DelegatingLexer):
- """
- IPython traceback lexer.
-
- For doctests, the tracebacks can be snipped as much as desired with the
- exception to the lines that designate a traceback. For non-syntax error
- tracebacks, this is the line of hyphens. For syntax error tracebacks,
- this is the line which lists the File and line number.
-
- """
- # The lexer inherits from DelegatingLexer. The "root" lexer is an
- # appropriate IPython lexer, which depends on the value of the boolean
- # `python3`. First, we parse with the partial IPython traceback lexer.
- # Then, any code marked with the "Other" token is delegated to the root
- # lexer.
- #
- name = 'IPython Traceback'
- aliases = ['ipythontb']
-
- def __init__(self, **options):
- self.python3 = get_bool_opt(options, 'python3', False)
- if self.python3:
- self.aliases = ['ipython3tb']
- else:
- self.aliases = ['ipython2tb', 'ipythontb']
-
- if self.python3:
- IPyLexer = IPython3Lexer
- else:
- IPyLexer = IPythonLexer
-
- DelegatingLexer.__init__(self, IPyLexer,
- IPythonPartialTracebackLexer, **options)
-
-class IPythonConsoleLexer(Lexer):
- """
- An IPython console lexer for IPython code-blocks and doctests, such as:
-
- .. code-block:: rst
-
- .. code-block:: ipythonconsole
-
- In [1]: a = 'foo'
-
- In [2]: a
- Out[2]: 'foo'
-
- In [3]: print a
- foo
-
- In [4]: 1 / 0
-
-
- Support is also provided for IPython exceptions:
-
- .. code-block:: rst
-
- .. code-block:: ipythonconsole
-
- In [1]: raise Exception
-
- ---------------------------------------------------------------------------
- Exception Traceback (most recent call last)
- <ipython-input-1-fca2ab0ca76b> in <module>()
- ----> 1 raise Exception
-
- Exception:
-
- """
- name = 'IPython console session'
- aliases = ['ipythonconsole']
- mimetypes = ['text/x-ipython-console']
-
- # The regexps used to determine what is input and what is output.
- # The default prompts for IPython are:
- #
+ name = 'IPython'
+ aliases = ['ipython2', 'ipython']
+ doc = """IPython Lexer"""
+
+ tokens = PyLexer.tokens.copy()
+ tokens['root'] = ipython_tokens + tokens['root']
+
+ attrs = {'name': name, 'aliases': aliases, 'filenames': [],
+ '__doc__': doc, 'tokens': tokens}
+
+ return type(name, (PyLexer,), attrs)
+
+
+IPython3Lexer = build_ipy_lexer(python3=True)
+IPythonLexer = build_ipy_lexer(python3=False)
+
+
+class IPythonPartialTracebackLexer(RegexLexer):
+ """
+ Partial lexer for IPython tracebacks.
+
+ Handles all the non-python output. This works for both Python 2.x and 3.x.
+
+ """
+ name = 'IPython Partial Traceback'
+
+ tokens = {
+ 'root': [
+ # Tracebacks for syntax errors have a different style.
+ # For both types of tracebacks, we mark the first line with
+ # Generic.Traceback. For syntax errors, we mark the filename
+ # as we mark the filenames for non-syntax tracebacks.
+ #
+ # These two regexps define how IPythonConsoleLexer finds a
+ # traceback.
+ #
+ ## Non-syntax traceback
+ (r'^(\^C)?(-+\n)', bygroups(Error, Generic.Traceback)),
+ ## Syntax traceback
+ (r'^( File)(.*)(, line )(\d+\n)',
+ bygroups(Generic.Traceback, Name.Namespace,
+ Generic.Traceback, Literal.Number.Integer)),
+
+ # (Exception Identifier)(Whitespace)(Traceback Message)
+ (r'(?u)(^[^\d\W]\w*)(\s*)(Traceback.*?\n)',
+ bygroups(Name.Exception, Generic.Whitespace, Text)),
+ # (Module/Filename)(Text)(Callee)(Function Signature)
+ # Better options for callee and function signature?
+ (r'(.*)( in )(.*)(\(.*\)\n)',
+ bygroups(Name.Namespace, Text, Name.Entity, Name.Tag)),
+ # Regular line: (Whitespace)(Line Number)(Python Code)
+ (r'(\s*?)(\d+)(.*?\n)',
+ bygroups(Generic.Whitespace, Literal.Number.Integer, Other)),
+ # Emphasized line: (Arrow)(Line Number)(Python Code)
+ # Using Exception token so arrow color matches the Exception.
+ (r'(-*>?\s?)(\d+)(.*?\n)',
+ bygroups(Name.Exception, Literal.Number.Integer, Other)),
+ # (Exception Identifier)(Message)
+ (r'(?u)(^[^\d\W]\w*)(:.*?\n)',
+ bygroups(Name.Exception, Text)),
+ # Tag everything else as Other, will be handled later.
+ (r'.*\n', Other),
+ ],
+ }
+
+
+class IPythonTracebackLexer(DelegatingLexer):
+ """
+ IPython traceback lexer.
+
+ For doctests, the tracebacks can be snipped as much as desired with the
+ exception to the lines that designate a traceback. For non-syntax error
+ tracebacks, this is the line of hyphens. For syntax error tracebacks,
+ this is the line which lists the File and line number.
+
+ """
+ # The lexer inherits from DelegatingLexer. The "root" lexer is an
+ # appropriate IPython lexer, which depends on the value of the boolean
+ # `python3`. First, we parse with the partial IPython traceback lexer.
+ # Then, any code marked with the "Other" token is delegated to the root
+ # lexer.
+ #
+ name = 'IPython Traceback'
+ aliases = ['ipythontb']
+
+ def __init__(self, **options):
+ self.python3 = get_bool_opt(options, 'python3', False)
+ if self.python3:
+ self.aliases = ['ipython3tb']
+ else:
+ self.aliases = ['ipython2tb', 'ipythontb']
+
+ if self.python3:
+ IPyLexer = IPython3Lexer
+ else:
+ IPyLexer = IPythonLexer
+
+ DelegatingLexer.__init__(self, IPyLexer,
+ IPythonPartialTracebackLexer, **options)
+
+class IPythonConsoleLexer(Lexer):
+ """
+ An IPython console lexer for IPython code-blocks and doctests, such as:
+
+ .. code-block:: rst
+
+ .. code-block:: ipythonconsole
+
+ In [1]: a = 'foo'
+
+ In [2]: a
+ Out[2]: 'foo'
+
+ In [3]: print a
+ foo
+
+ In [4]: 1 / 0
+
+
+ Support is also provided for IPython exceptions:
+
+ .. code-block:: rst
+
+ .. code-block:: ipythonconsole
+
+ In [1]: raise Exception
+
+ ---------------------------------------------------------------------------
+ Exception Traceback (most recent call last)
+ <ipython-input-1-fca2ab0ca76b> in <module>()
+ ----> 1 raise Exception
+
+ Exception:
+
+ """
+ name = 'IPython console session'
+ aliases = ['ipythonconsole']
+ mimetypes = ['text/x-ipython-console']
+
+ # The regexps used to determine what is input and what is output.
+ # The default prompts for IPython are:
+ #
# in = 'In [#]: '
# continuation = ' .D.: '
# template = 'Out[#]: '
- #
+ #
# Where '#' is the 'prompt number' or 'execution count' and 'D'
# D is a number of dots matching the width of the execution count
#
- in1_regex = r'In \[[0-9]+\]: '
- in2_regex = r' \.\.+\.: '
- out_regex = r'Out\[[0-9]+\]: '
-
- #: The regex to determine when a traceback starts.
- ipytb_start = re.compile(r'^(\^C)?(-+\n)|^( File)(.*)(, line )(\d+\n)')
-
- def __init__(self, **options):
- """Initialize the IPython console lexer.
-
- Parameters
- ----------
- python3 : bool
- If `True`, then the console inputs are parsed using a Python 3
- lexer. Otherwise, they are parsed using a Python 2 lexer.
- in1_regex : RegexObject
- The compiled regular expression used to detect the start
- of inputs. Although the IPython configuration setting may have a
- trailing whitespace, do not include it in the regex. If `None`,
- then the default input prompt is assumed.
- in2_regex : RegexObject
- The compiled regular expression used to detect the continuation
- of inputs. Although the IPython configuration setting may have a
- trailing whitespace, do not include it in the regex. If `None`,
- then the default input prompt is assumed.
- out_regex : RegexObject
- The compiled regular expression used to detect outputs. If `None`,
- then the default output prompt is assumed.
-
- """
- self.python3 = get_bool_opt(options, 'python3', False)
- if self.python3:
- self.aliases = ['ipython3console']
- else:
- self.aliases = ['ipython2console', 'ipythonconsole']
-
- in1_regex = options.get('in1_regex', self.in1_regex)
- in2_regex = options.get('in2_regex', self.in2_regex)
- out_regex = options.get('out_regex', self.out_regex)
-
- # So that we can work with input and output prompts which have been
- # rstrip'd (possibly by editors) we also need rstrip'd variants. If
- # we do not do this, then such prompts will be tagged as 'output'.
- # The reason can't just use the rstrip'd variants instead is because
- # we want any whitespace associated with the prompt to be inserted
- # with the token. This allows formatted code to be modified so as hide
- # the appearance of prompts, with the whitespace included. One example
- # use of this is in copybutton.js from the standard lib Python docs.
- in1_regex_rstrip = in1_regex.rstrip() + '\n'
- in2_regex_rstrip = in2_regex.rstrip() + '\n'
- out_regex_rstrip = out_regex.rstrip() + '\n'
-
- # Compile and save them all.
- attrs = ['in1_regex', 'in2_regex', 'out_regex',
- 'in1_regex_rstrip', 'in2_regex_rstrip', 'out_regex_rstrip']
- for attr in attrs:
- self.__setattr__(attr, re.compile(locals()[attr]))
-
- Lexer.__init__(self, **options)
-
- if self.python3:
- pylexer = IPython3Lexer
- tblexer = IPythonTracebackLexer
- else:
- pylexer = IPythonLexer
- tblexer = IPythonTracebackLexer
-
- self.pylexer = pylexer(**options)
- self.tblexer = tblexer(**options)
-
- self.reset()
-
- def reset(self):
- self.mode = 'output'
- self.index = 0
- self.buffer = u''
- self.insertions = []
-
- def buffered_tokens(self):
- """
- Generator of unprocessed tokens after doing insertions and before
- changing to a new state.
-
- """
- if self.mode == 'output':
- tokens = [(0, Generic.Output, self.buffer)]
- elif self.mode == 'input':
- tokens = self.pylexer.get_tokens_unprocessed(self.buffer)
- else: # traceback
- tokens = self.tblexer.get_tokens_unprocessed(self.buffer)
-
- for i, t, v in do_insertions(self.insertions, tokens):
- # All token indexes are relative to the buffer.
- yield self.index + i, t, v
-
- # Clear it all
- self.index += len(self.buffer)
- self.buffer = u''
- self.insertions = []
-
- def get_mci(self, line):
- """
- Parses the line and returns a 3-tuple: (mode, code, insertion).
-
- `mode` is the next mode (or state) of the lexer, and is always equal
- to 'input', 'output', or 'tb'.
-
- `code` is a portion of the line that should be added to the buffer
- corresponding to the next mode and eventually lexed by another lexer.
- For example, `code` could be Python code if `mode` were 'input'.
-
- `insertion` is a 3-tuple (index, token, text) representing an
- unprocessed "token" that will be inserted into the stream of tokens
- that are created from the buffer once we change modes. This is usually
- the input or output prompt.
-
- In general, the next mode depends on current mode and on the contents
- of `line`.
-
- """
- # To reduce the number of regex match checks, we have multiple
- # 'if' blocks instead of 'if-elif' blocks.
-
- # Check for possible end of input
- in2_match = self.in2_regex.match(line)
- in2_match_rstrip = self.in2_regex_rstrip.match(line)
- if (in2_match and in2_match.group().rstrip() == line.rstrip()) or \
- in2_match_rstrip:
- end_input = True
- else:
- end_input = False
- if end_input and self.mode != 'tb':
- # Only look for an end of input when not in tb mode.
- # An ellipsis could appear within the traceback.
- mode = 'output'
- code = u''
- insertion = (0, Generic.Prompt, line)
- return mode, code, insertion
-
- # Check for output prompt
- out_match = self.out_regex.match(line)
- out_match_rstrip = self.out_regex_rstrip.match(line)
- if out_match or out_match_rstrip:
- mode = 'output'
- if out_match:
- idx = out_match.end()
- else:
- idx = out_match_rstrip.end()
- code = line[idx:]
- # Use the 'heading' token for output. We cannot use Generic.Error
- # since it would conflict with exceptions.
- insertion = (0, Generic.Heading, line[:idx])
- return mode, code, insertion
-
-
- # Check for input or continuation prompt (non stripped version)
- in1_match = self.in1_regex.match(line)
- if in1_match or (in2_match and self.mode != 'tb'):
- # New input or when not in tb, continued input.
- # We do not check for continued input when in tb since it is
- # allowable to replace a long stack with an ellipsis.
- mode = 'input'
- if in1_match:
- idx = in1_match.end()
- else: # in2_match
- idx = in2_match.end()
- code = line[idx:]
- insertion = (0, Generic.Prompt, line[:idx])
- return mode, code, insertion
-
- # Check for input or continuation prompt (stripped version)
- in1_match_rstrip = self.in1_regex_rstrip.match(line)
- if in1_match_rstrip or (in2_match_rstrip and self.mode != 'tb'):
- # New input or when not in tb, continued input.
- # We do not check for continued input when in tb since it is
- # allowable to replace a long stack with an ellipsis.
- mode = 'input'
- if in1_match_rstrip:
- idx = in1_match_rstrip.end()
- else: # in2_match
- idx = in2_match_rstrip.end()
- code = line[idx:]
- insertion = (0, Generic.Prompt, line[:idx])
- return mode, code, insertion
-
- # Check for traceback
- if self.ipytb_start.match(line):
- mode = 'tb'
- code = line
- insertion = None
- return mode, code, insertion
-
- # All other stuff...
- if self.mode in ('input', 'output'):
- # We assume all other text is output. Multiline input that
- # does not use the continuation marker cannot be detected.
- # For example, the 3 in the following is clearly output:
- #
- # In [1]: print 3
- # 3
- #
- # But the following second line is part of the input:
- #
- # In [2]: while True:
- # print True
- #
- # In both cases, the 2nd line will be 'output'.
- #
- mode = 'output'
- else:
- mode = 'tb'
-
- code = line
- insertion = None
-
- return mode, code, insertion
-
- def get_tokens_unprocessed(self, text):
- self.reset()
- for match in line_re.finditer(text):
- line = match.group()
- mode, code, insertion = self.get_mci(line)
-
- if mode != self.mode:
- # Yield buffered tokens before transitioning to new mode.
- for token in self.buffered_tokens():
- yield token
- self.mode = mode
-
- if insertion:
- self.insertions.append((len(self.buffer), [insertion]))
- self.buffer += code
-
+ in1_regex = r'In \[[0-9]+\]: '
+ in2_regex = r' \.\.+\.: '
+ out_regex = r'Out\[[0-9]+\]: '
+
+ #: The regex to determine when a traceback starts.
+ ipytb_start = re.compile(r'^(\^C)?(-+\n)|^( File)(.*)(, line )(\d+\n)')
+
+ def __init__(self, **options):
+ """Initialize the IPython console lexer.
+
+ Parameters
+ ----------
+ python3 : bool
+ If `True`, then the console inputs are parsed using a Python 3
+ lexer. Otherwise, they are parsed using a Python 2 lexer.
+ in1_regex : RegexObject
+ The compiled regular expression used to detect the start
+ of inputs. Although the IPython configuration setting may have a
+ trailing whitespace, do not include it in the regex. If `None`,
+ then the default input prompt is assumed.
+ in2_regex : RegexObject
+ The compiled regular expression used to detect the continuation
+ of inputs. Although the IPython configuration setting may have a
+ trailing whitespace, do not include it in the regex. If `None`,
+ then the default input prompt is assumed.
+ out_regex : RegexObject
+ The compiled regular expression used to detect outputs. If `None`,
+ then the default output prompt is assumed.
+
+ """
+ self.python3 = get_bool_opt(options, 'python3', False)
+ if self.python3:
+ self.aliases = ['ipython3console']
+ else:
+ self.aliases = ['ipython2console', 'ipythonconsole']
+
+ in1_regex = options.get('in1_regex', self.in1_regex)
+ in2_regex = options.get('in2_regex', self.in2_regex)
+ out_regex = options.get('out_regex', self.out_regex)
+
+ # So that we can work with input and output prompts which have been
+ # rstrip'd (possibly by editors) we also need rstrip'd variants. If
+ # we do not do this, then such prompts will be tagged as 'output'.
+ # The reason can't just use the rstrip'd variants instead is because
+ # we want any whitespace associated with the prompt to be inserted
+ # with the token. This allows formatted code to be modified so as hide
+ # the appearance of prompts, with the whitespace included. One example
+ # use of this is in copybutton.js from the standard lib Python docs.
+ in1_regex_rstrip = in1_regex.rstrip() + '\n'
+ in2_regex_rstrip = in2_regex.rstrip() + '\n'
+ out_regex_rstrip = out_regex.rstrip() + '\n'
+
+ # Compile and save them all.
+ attrs = ['in1_regex', 'in2_regex', 'out_regex',
+ 'in1_regex_rstrip', 'in2_regex_rstrip', 'out_regex_rstrip']
+ for attr in attrs:
+ self.__setattr__(attr, re.compile(locals()[attr]))
+
+ Lexer.__init__(self, **options)
+
+ if self.python3:
+ pylexer = IPython3Lexer
+ tblexer = IPythonTracebackLexer
+ else:
+ pylexer = IPythonLexer
+ tblexer = IPythonTracebackLexer
+
+ self.pylexer = pylexer(**options)
+ self.tblexer = tblexer(**options)
+
+ self.reset()
+
+ def reset(self):
+ self.mode = 'output'
+ self.index = 0
+ self.buffer = u''
+ self.insertions = []
+
+ def buffered_tokens(self):
+ """
+ Generator of unprocessed tokens after doing insertions and before
+ changing to a new state.
+
+ """
+ if self.mode == 'output':
+ tokens = [(0, Generic.Output, self.buffer)]
+ elif self.mode == 'input':
+ tokens = self.pylexer.get_tokens_unprocessed(self.buffer)
+ else: # traceback
+ tokens = self.tblexer.get_tokens_unprocessed(self.buffer)
+
+ for i, t, v in do_insertions(self.insertions, tokens):
+ # All token indexes are relative to the buffer.
+ yield self.index + i, t, v
+
+ # Clear it all
+ self.index += len(self.buffer)
+ self.buffer = u''
+ self.insertions = []
+
+ def get_mci(self, line):
+ """
+ Parses the line and returns a 3-tuple: (mode, code, insertion).
+
+ `mode` is the next mode (or state) of the lexer, and is always equal
+ to 'input', 'output', or 'tb'.
+
+ `code` is a portion of the line that should be added to the buffer
+ corresponding to the next mode and eventually lexed by another lexer.
+ For example, `code` could be Python code if `mode` were 'input'.
+
+ `insertion` is a 3-tuple (index, token, text) representing an
+ unprocessed "token" that will be inserted into the stream of tokens
+ that are created from the buffer once we change modes. This is usually
+ the input or output prompt.
+
+ In general, the next mode depends on current mode and on the contents
+ of `line`.
+
+ """
+ # To reduce the number of regex match checks, we have multiple
+ # 'if' blocks instead of 'if-elif' blocks.
+
+ # Check for possible end of input
+ in2_match = self.in2_regex.match(line)
+ in2_match_rstrip = self.in2_regex_rstrip.match(line)
+ if (in2_match and in2_match.group().rstrip() == line.rstrip()) or \
+ in2_match_rstrip:
+ end_input = True
+ else:
+ end_input = False
+ if end_input and self.mode != 'tb':
+ # Only look for an end of input when not in tb mode.
+ # An ellipsis could appear within the traceback.
+ mode = 'output'
+ code = u''
+ insertion = (0, Generic.Prompt, line)
+ return mode, code, insertion
+
+ # Check for output prompt
+ out_match = self.out_regex.match(line)
+ out_match_rstrip = self.out_regex_rstrip.match(line)
+ if out_match or out_match_rstrip:
+ mode = 'output'
+ if out_match:
+ idx = out_match.end()
+ else:
+ idx = out_match_rstrip.end()
+ code = line[idx:]
+ # Use the 'heading' token for output. We cannot use Generic.Error
+ # since it would conflict with exceptions.
+ insertion = (0, Generic.Heading, line[:idx])
+ return mode, code, insertion
+
+
+ # Check for input or continuation prompt (non stripped version)
+ in1_match = self.in1_regex.match(line)
+ if in1_match or (in2_match and self.mode != 'tb'):
+ # New input or when not in tb, continued input.
+ # We do not check for continued input when in tb since it is
+ # allowable to replace a long stack with an ellipsis.
+ mode = 'input'
+ if in1_match:
+ idx = in1_match.end()
+ else: # in2_match
+ idx = in2_match.end()
+ code = line[idx:]
+ insertion = (0, Generic.Prompt, line[:idx])
+ return mode, code, insertion
+
+ # Check for input or continuation prompt (stripped version)
+ in1_match_rstrip = self.in1_regex_rstrip.match(line)
+ if in1_match_rstrip or (in2_match_rstrip and self.mode != 'tb'):
+ # New input or when not in tb, continued input.
+ # We do not check for continued input when in tb since it is
+ # allowable to replace a long stack with an ellipsis.
+ mode = 'input'
+ if in1_match_rstrip:
+ idx = in1_match_rstrip.end()
+ else: # in2_match
+ idx = in2_match_rstrip.end()
+ code = line[idx:]
+ insertion = (0, Generic.Prompt, line[:idx])
+ return mode, code, insertion
+
+ # Check for traceback
+ if self.ipytb_start.match(line):
+ mode = 'tb'
+ code = line
+ insertion = None
+ return mode, code, insertion
+
+ # All other stuff...
+ if self.mode in ('input', 'output'):
+ # We assume all other text is output. Multiline input that
+ # does not use the continuation marker cannot be detected.
+ # For example, the 3 in the following is clearly output:
+ #
+ # In [1]: print 3
+ # 3
+ #
+ # But the following second line is part of the input:
+ #
+ # In [2]: while True:
+ # print True
+ #
+ # In both cases, the 2nd line will be 'output'.
+ #
+ mode = 'output'
+ else:
+ mode = 'tb'
+
+ code = line
+ insertion = None
+
+ return mode, code, insertion
+
+ def get_tokens_unprocessed(self, text):
+ self.reset()
+ for match in line_re.finditer(text):
+ line = match.group()
+ mode, code, insertion = self.get_mci(line)
+
+ if mode != self.mode:
+ # Yield buffered tokens before transitioning to new mode.
+ for token in self.buffered_tokens():
+ yield token
+ self.mode = mode
+
+ if insertion:
+ self.insertions.append((len(self.buffer), [insertion]))
+ self.buffer += code
+
for token in self.buffered_tokens():
yield token
-class IPyLexer(Lexer):
- """
- Primary lexer for all IPython-like code.
-
- This is a simple helper lexer. If the first line of the text begins with
- "In \[[0-9]+\]:", then the entire text is parsed with an IPython console
- lexer. If not, then the entire text is parsed with an IPython lexer.
-
- The goal is to reduce the number of lexers that are registered
- with Pygments.
-
- """
- name = 'IPy session'
- aliases = ['ipy']
-
- def __init__(self, **options):
- self.python3 = get_bool_opt(options, 'python3', False)
- if self.python3:
- self.aliases = ['ipy3']
- else:
- self.aliases = ['ipy2', 'ipy']
-
- Lexer.__init__(self, **options)
-
- self.IPythonLexer = IPythonLexer(**options)
- self.IPythonConsoleLexer = IPythonConsoleLexer(**options)
-
- def get_tokens_unprocessed(self, text):
- # Search for the input prompt anywhere...this allows code blocks to
- # begin with comments as well.
- if re.match(r'.*(In \[[0-9]+\]:)', text.strip(), re.DOTALL):
- lex = self.IPythonConsoleLexer
- else:
- lex = self.IPythonLexer
- for token in lex.get_tokens_unprocessed(text):
- yield token
-
+class IPyLexer(Lexer):
+ """
+ Primary lexer for all IPython-like code.
+
+ This is a simple helper lexer. If the first line of the text begins with
+ "In \[[0-9]+\]:", then the entire text is parsed with an IPython console
+ lexer. If not, then the entire text is parsed with an IPython lexer.
+
+ The goal is to reduce the number of lexers that are registered
+ with Pygments.
+
+ """
+ name = 'IPy session'
+ aliases = ['ipy']
+
+ def __init__(self, **options):
+ self.python3 = get_bool_opt(options, 'python3', False)
+ if self.python3:
+ self.aliases = ['ipy3']
+ else:
+ self.aliases = ['ipy2', 'ipy']
+
+ Lexer.__init__(self, **options)
+
+ self.IPythonLexer = IPythonLexer(**options)
+ self.IPythonConsoleLexer = IPythonConsoleLexer(**options)
+
+ def get_tokens_unprocessed(self, text):
+ # Search for the input prompt anywhere...this allows code blocks to
+ # begin with comments as well.
+ if re.match(r'.*(In \[[0-9]+\]:)', text.strip(), re.DOTALL):
+ lex = self.IPythonConsoleLexer
+ else:
+ lex = self.IPythonLexer
+ for token in lex.get_tokens_unprocessed(text):
+ yield token
+