intermediate changes

ref:cde9a383711a11544ce7e107a78147fb96cc4029
author: Devtools Arcadia <arcadia-devtools@yandex-team.ru> 2022-02-07 18:08:42 +0300
committer: Devtools Arcadia <arcadia-devtools@mous.vla.yp-c.yandex.net> 2022-02-07 18:08:42 +0300
commit: 1110808a9d39d4b808aef724c861a2e1a38d2a69 (patch)
tree: e26c9fed0de5d9873cce7e00bc214573dc2195b7 /contrib/python/prompt-toolkit/py2/prompt_toolkit/layout/lexers.py
download: ydb-1110808a9d39d4b808aef724c861a2e1a38d2a69.tar.gz
1 files changed, 320 insertions, 0 deletions
diff --git a/contrib/python/prompt-toolkit/py2/prompt_toolkit/layout/lexers.py b/contrib/python/prompt-toolkit/py2/prompt_toolkit/layout/lexers.py
new file mode 100644
index 00000000000..a928fd82264
--- /dev/null
+++ b/contrib/python/prompt-toolkit/py2/prompt_toolkit/layout/lexers.py
@@ -0,0 +1,320 @@
+"""
+Lexer interface and implementation.
+Used for syntax highlighting.
+"""
+from __future__ import unicode_literals
+from abc import ABCMeta, abstractmethod
+from six import with_metaclass
+from six.moves import range
+
+from prompt_toolkit.token import Token
+from prompt_toolkit.filters import to_cli_filter
+from .utils import split_lines
+
+import re
+import six
+
+__all__ = (
+    'Lexer',
+    'SimpleLexer',
+    'PygmentsLexer',
+    'SyntaxSync',
+    'SyncFromStart',
+    'RegexSync',
+)
+
+
+class Lexer(with_metaclass(ABCMeta, object)):
+    """
+    Base class for all lexers.
+    """
+    @abstractmethod
+    def lex_document(self, cli, document):
+        """
+        Takes a :class:`~prompt_toolkit.document.Document` and returns a
+        callable that takes a line number and returns the tokens for that line.
+        """
+
+
+class SimpleLexer(Lexer):
+    """
+    Lexer that doesn't do any tokenizing and returns the whole input as one token.
+
+    :param token: The `Token` for this lexer.
+    """
+    # `default_token` parameter is deprecated!
+    def __init__(self, token=Token, default_token=None):
+        self.token = token
+
+        if default_token is not None:
+            self.token = default_token
+
+    def lex_document(self, cli, document):
+        lines = document.lines
+
+        def get_line(lineno):
+            " Return the tokens for the given line. "
+            try:
+                return [(self.token, lines[lineno])]
+            except IndexError:
+                return []
+        return get_line
+
+
+class SyntaxSync(with_metaclass(ABCMeta, object)):
+    """
+    Syntax synchroniser. This is a tool that finds a start position for the
+    lexer. This is especially important when editing big documents; we don't
+    want to start the highlighting by running the lexer from the beginning of
+    the file. That is very slow when editing.
+    """
+    @abstractmethod
+    def get_sync_start_position(self, document, lineno):
+        """
+        Return the position from where we can start lexing as a (row, column)
+        tuple.
+
+        :param document: `Document` instance that contains all the lines.
+        :param lineno: The line that we want to highlight. (We need to return
+            this line, or an earlier position.)
+        """
+
+class SyncFromStart(SyntaxSync):
+    """
+    Always start the syntax highlighting from the beginning.
+    """
+    def get_sync_start_position(self, document, lineno):
+        return 0, 0
+
+
+class RegexSync(SyntaxSync):
+    """
+    Synchronize by starting at a line that matches the given regex pattern.
+    """
+    # Never go more than this amount of lines backwards for synchronisation.
+    # That would be too CPU intensive.
+    MAX_BACKWARDS = 500
+
+    # Start lexing at the start, if we are in the first 'n' lines and no
+    # synchronisation position was found.
+    FROM_START_IF_NO_SYNC_POS_FOUND = 100
+
+    def __init__(self, pattern):
+        assert isinstance(pattern, six.text_type)
+        self._compiled_pattern = re.compile(pattern)
+
+    def get_sync_start_position(self, document, lineno):
+        " Scan backwards, and find a possible position to start. "
+        pattern = self._compiled_pattern
+        lines = document.lines
+
+        # Scan upwards, until we find a point where we can start the syntax
+        # synchronisation.
+        for i in range(lineno, max(-1, lineno - self.MAX_BACKWARDS), -1):
+            match = pattern.match(lines[i])
+            if match:
+                return i, match.start()
+
+        # No synchronisation point found. If we aren't that far from the
+        # beginning, start at the very beginning, otherwise, just try to start
+        # at the current line.
+        if lineno < self.FROM_START_IF_NO_SYNC_POS_FOUND:
+            return 0, 0
+        else:
+            return lineno, 0
+
+    @classmethod
+    def from_pygments_lexer_cls(cls, lexer_cls):
+        """
+        Create a :class:`.RegexSync` instance for this Pygments lexer class.
+        """
+        patterns = {
+            # For Python, start highlighting at any class/def block.
+            'Python':   r'^\s*(class|def)\s+',
+            'Python 3': r'^\s*(class|def)\s+',
+
+            # For HTML, start at any open/close tag definition.
+            'HTML': r'<[/a-zA-Z]',
+
+            # For javascript, start at a function.
+            'JavaScript': r'\bfunction\b'
+
+            # TODO: Add definitions for other languages.
+            #       By default, we start at every possible line.
+        }
+        p = patterns.get(lexer_cls.name, '^')
+        return cls(p)
+
+
+class PygmentsLexer(Lexer):
+    """
+    Lexer that calls a pygments lexer.
+
+    Example::
+
+        from pygments.lexers import HtmlLexer
+        lexer = PygmentsLexer(HtmlLexer)
+
+    Note: Don't forget to also load a Pygments compatible style. E.g.::
+
+        from prompt_toolkit.styles.from_pygments import style_from_pygments
+        from pygments.styles import get_style_by_name
+        style = style_from_pygments(get_style_by_name('monokai'))
+
+    :param pygments_lexer_cls: A `Lexer` from Pygments.
+    :param sync_from_start: Start lexing at the start of the document. This
+        will always give the best results, but it will be slow for bigger
+        documents. (When the last part of the document is display, then the
+        whole document will be lexed by Pygments on every key stroke.) It is
+        recommended to disable this for inputs that are expected to be more
+        than 1,000 lines.
+    :param syntax_sync: `SyntaxSync` object.
+    """
+    # Minimum amount of lines to go backwards when starting the parser.
+    # This is important when the lines are retrieved in reverse order, or when
+    # scrolling upwards. (Due to the complexity of calculating the vertical
+    # scroll offset in the `Window` class, lines are not always retrieved in
+    # order.)
+    MIN_LINES_BACKWARDS = 50
+
+    # When a parser was started this amount of lines back, read the parser
+    # until we get the current line. Otherwise, start a new parser.
+    # (This should probably be bigger than MIN_LINES_BACKWARDS.)
+    REUSE_GENERATOR_MAX_DISTANCE = 100
+
+    def __init__(self, pygments_lexer_cls, sync_from_start=True, syntax_sync=None):
+        assert syntax_sync is None or isinstance(syntax_sync, SyntaxSync)
+
+        self.pygments_lexer_cls = pygments_lexer_cls
+        self.sync_from_start = to_cli_filter(sync_from_start)
+
+        # Instantiate the Pygments lexer.
+        self.pygments_lexer = pygments_lexer_cls(
+            stripnl=False,
+            stripall=False,
+            ensurenl=False)
+
+        # Create syntax sync instance.
+        self.syntax_sync = syntax_sync or RegexSync.from_pygments_lexer_cls(pygments_lexer_cls)
+
+    @classmethod
+    def from_filename(cls, filename, sync_from_start=True):
+        """
+        Create a `Lexer` from a filename.
+        """
+        # Inline imports: the Pygments dependency is optional!
+        from pygments.util import ClassNotFound
+        from pygments.lexers import get_lexer_for_filename
+
+        try:
+            pygments_lexer = get_lexer_for_filename(filename)
+        except ClassNotFound:
+            return SimpleLexer()
+        else:
+            return cls(pygments_lexer.__class__, sync_from_start=sync_from_start)
+
+    def lex_document(self, cli, document):
+        """
+        Create a lexer function that takes a line number and returns the list
+        of (Token, text) tuples as the Pygments lexer returns for that line.
+        """
+        # Cache of already lexed lines.
+        cache = {}
+
+        # Pygments generators that are currently lexing.
+        line_generators = {}  # Map lexer generator to the line number.
+
+        def get_syntax_sync():
+            " The Syntax synchronisation objcet that we currently use. "
+            if self.sync_from_start(cli):
+                return SyncFromStart()
+            else:
+                return self.syntax_sync
+
+        def find_closest_generator(i):
+            " Return a generator close to line 'i', or None if none was fonud. "
+            for generator, lineno in line_generators.items():
+                if lineno < i and i - lineno < self.REUSE_GENERATOR_MAX_DISTANCE:
+                    return generator
+
+        def create_line_generator(start_lineno, column=0):
+            """
+            Create a generator that yields the lexed lines.
+            Each iteration it yields a (line_number, [(token, text), ...]) tuple.
+            """
+            def get_tokens():
+                text = '\n'.join(document.lines[start_lineno:])[column:]
+
+                # We call `get_tokens_unprocessed`, because `get_tokens` will
+                # still replace \r\n and \r by \n.  (We don't want that,
+                # Pygments should return exactly the same amount of text, as we
+                # have given as input.)
+                for _, t, v in self.pygments_lexer.get_tokens_unprocessed(text):
+                    yield t, v
+
+            return enumerate(split_lines(get_tokens()), start_lineno)
+
+        def get_generator(i):
+            """
+            Find an already started generator that is close, or create a new one.
+            """
+            # Find closest line generator.
+            generator = find_closest_generator(i)
+            if generator:
+                return generator
+
+            # No generator found. Determine starting point for the syntax
+            # synchronisation first.
+
+            # Go at least x lines back. (Make scrolling upwards more
+            # efficient.)
+            i = max(0, i - self.MIN_LINES_BACKWARDS)
+
+            if i == 0:
+                row = 0
+                column = 0
+            else:
+                row, column = get_syntax_sync().get_sync_start_position(document, i)
+
+            # Find generator close to this point, or otherwise create a new one.
+            generator = find_closest_generator(i)
+            if generator:
+                return generator
+            else:
+                generator = create_line_generator(row, column)
+
+            # If the column is not 0, ignore the first line. (Which is
+            # incomplete. This happens when the synchronisation algorithm tells
+            # us to start parsing in the middle of a line.)
+            if column:
+                next(generator)
+                row += 1
+
+            line_generators[generator] = row
+            return generator
+
+        def get_line(i):
+            " Return the tokens for a given line number. "
+            try:
+                return cache[i]
+            except KeyError:
+                generator = get_generator(i)
+
+                # Exhaust the generator, until we find the requested line.
+                for num, line in generator:
+                    cache[num] = line
+                    if num == i:
+                        line_generators[generator] = i
+
+                        # Remove the next item from the cache.
+                        # (It could happen that it's already there, because of
+                        # another generator that started filling these lines,
+                        # but we want to synchronise these lines with the
+                        # current lexer's state.)
+                        if num + 1 in cache:
+                            del cache[num + 1]
+
+                        return cache[num]
+            return []
+
+        return get_line
author	Devtools Arcadia <arcadia-devtools@yandex-team.ru>	2022-02-07 18:08:42 +0300
committer	Devtools Arcadia <arcadia-devtools@mous.vla.yp-c.yandex.net>	2022-02-07 18:08:42 +0300
commit	1110808a9d39d4b808aef724c861a2e1a38d2a69 (patch)
tree	e26c9fed0de5d9873cce7e00bc214573dc2195b7 /contrib/python/prompt-toolkit/py2/prompt_toolkit/layout/lexers.py
download	ydb-1110808a9d39d4b808aef724c861a2e1a38d2a69.tar.gz