aboutsummaryrefslogtreecommitdiffstats
path: root/contrib/python/Pygments/py3/pygments/lexer.py
diff options
context:
space:
mode:
authorrobot-contrib <robot-contrib@yandex-team.com>2023-12-09 00:19:25 +0300
committerrobot-contrib <robot-contrib@yandex-team.com>2023-12-09 00:50:41 +0300
commit83b8a2f9228353759e59a093cb3c1270ea2c9d5b (patch)
treea90f4f91780c0613bea19f33ff8af8e93a335e8b /contrib/python/Pygments/py3/pygments/lexer.py
parent460528e80f26d04487dc242b7333d45bbeb43a4d (diff)
downloadydb-83b8a2f9228353759e59a093cb3c1270ea2c9d5b.tar.gz
Update contrib/python/Pygments/py3 to 2.17.2
Diffstat (limited to 'contrib/python/Pygments/py3/pygments/lexer.py')
-rw-r--r--contrib/python/Pygments/py3/pygments/lexer.py33
1 files changed, 20 insertions, 13 deletions
diff --git a/contrib/python/Pygments/py3/pygments/lexer.py b/contrib/python/Pygments/py3/pygments/lexer.py
index 93d90bfbe6..eb5403e798 100644
--- a/contrib/python/Pygments/py3/pygments/lexer.py
+++ b/contrib/python/Pygments/py3/pygments/lexer.py
@@ -199,20 +199,9 @@ class Lexer(metaclass=LexerMeta):
it's the same as if the return values was ``0.0``.
"""
- def get_tokens(self, text, unfiltered=False):
- """
- This method is the basic interface of a lexer. It is called by
- the `highlight()` function. It must process the text and return an
- iterable of ``(tokentype, value)`` pairs from `text`.
+ def _preprocess_lexer_input(self, text):
+ """Apply preprocessing such as decoding the input, removing BOM and normalizing newlines."""
- Normally, you don't need to override this method. The default
- implementation processes the options recognized by all lexers
- (`stripnl`, `stripall` and so on), and then yields all tokens
- from `get_tokens_unprocessed()`, with the ``index`` dropped.
-
- If `unfiltered` is set to `True`, the filtering mechanism is
- bypassed even if filters are defined.
- """
if not isinstance(text, str):
if self.encoding == 'guess':
text, _ = guess_decode(text)
@@ -255,6 +244,24 @@ class Lexer(metaclass=LexerMeta):
if self.ensurenl and not text.endswith('\n'):
text += '\n'
+ return text
+
+ def get_tokens(self, text, unfiltered=False):
+ """
+ This method is the basic interface of a lexer. It is called by
+ the `highlight()` function. It must process the text and return an
+ iterable of ``(tokentype, value)`` pairs from `text`.
+
+ Normally, you don't need to override this method. The default
+ implementation processes the options recognized by all lexers
+ (`stripnl`, `stripall` and so on), and then yields all tokens
+ from `get_tokens_unprocessed()`, with the ``index`` dropped.
+
+ If `unfiltered` is set to `True`, the filtering mechanism is
+ bypassed even if filters are defined.
+ """
+ text = self._preprocess_lexer_input(text)
+
def streamer():
for _, t, v in self.get_tokens_unprocessed(text):
yield t, v