Update contrib/python/Pygments/py3 to 2.17.2

author: robot-contrib <robot-contrib@yandex-team.com> 2023-12-09 00:19:25 +0300
committer: robot-contrib <robot-contrib@yandex-team.com> 2023-12-09 00:50:41 +0300
commit: 83b8a2f9228353759e59a093cb3c1270ea2c9d5b (patch)
tree: a90f4f91780c0613bea19f33ff8af8e93a335e8b /contrib/python/Pygments/py3/pygments/lexer.py
parent: 460528e80f26d04487dc242b7333d45bbeb43a4d (diff)
download: ydb-83b8a2f9228353759e59a093cb3c1270ea2c9d5b.tar.gz
1 files changed, 20 insertions, 13 deletions
diff --git a/contrib/python/Pygments/py3/pygments/lexer.py b/contrib/python/Pygments/py3/pygments/lexer.py
index 93d90bfbe6..eb5403e798 100644
--- a/contrib/python/Pygments/py3/pygments/lexer.py
+++ b/contrib/python/Pygments/py3/pygments/lexer.py
@@ -199,20 +199,9 @@ class Lexer(metaclass=LexerMeta):
         it's the same as if the return values was ``0.0``.
         """
 
-    def get_tokens(self, text, unfiltered=False):
-        """
-        This method is the basic interface of a lexer. It is called by
-        the `highlight()` function. It must process the text and return an
-        iterable of ``(tokentype, value)`` pairs from `text`.
+    def _preprocess_lexer_input(self, text):
+        """Apply preprocessing such as decoding the input, removing BOM and normalizing newlines."""
 
-        Normally, you don't need to override this method. The default
-        implementation processes the options recognized by all lexers
-        (`stripnl`, `stripall` and so on), and then yields all tokens
-        from `get_tokens_unprocessed()`, with the ``index`` dropped.
-
-        If `unfiltered` is set to `True`, the filtering mechanism is
-        bypassed even if filters are defined.
-        """
         if not isinstance(text, str):
             if self.encoding == 'guess':
                 text, _ = guess_decode(text)
@@ -255,6 +244,24 @@ class Lexer(metaclass=LexerMeta):
         if self.ensurenl and not text.endswith('\n'):
             text += '\n'
 
+        return text
+
+    def get_tokens(self, text, unfiltered=False):
+        """
+        This method is the basic interface of a lexer. It is called by
+        the `highlight()` function. It must process the text and return an
+        iterable of ``(tokentype, value)`` pairs from `text`.
+
+        Normally, you don't need to override this method. The default
+        implementation processes the options recognized by all lexers
+        (`stripnl`, `stripall` and so on), and then yields all tokens
+        from `get_tokens_unprocessed()`, with the ``index`` dropped.
+
+        If `unfiltered` is set to `True`, the filtering mechanism is
+        bypassed even if filters are defined.
+        """
+        text = self._preprocess_lexer_input(text)
+
         def streamer():
             for _, t, v in self.get_tokens_unprocessed(text):
                 yield t, v
author	robot-contrib <robot-contrib@yandex-team.com>	2023-12-09 00:19:25 +0300
committer	robot-contrib <robot-contrib@yandex-team.com>	2023-12-09 00:50:41 +0300
commit	83b8a2f9228353759e59a093cb3c1270ea2c9d5b (patch)
tree	a90f4f91780c0613bea19f33ff8af8e93a335e8b /contrib/python/Pygments/py3/pygments/lexer.py
parent	460528e80f26d04487dc242b7333d45bbeb43a4d (diff)
download	ydb-83b8a2f9228353759e59a093cb3c1270ea2c9d5b.tar.gz