diff options
author | floatdrop <floatdrop@yandex-team.ru> | 2022-02-10 16:47:15 +0300 |
---|---|---|
committer | Daniil Cherednik <dcherednik@yandex-team.ru> | 2022-02-10 16:47:15 +0300 |
commit | 4267de875ca703ff841f2e025723dadc78f3cc02 (patch) | |
tree | 9814fbd1c3effac9b8377c5d604b367b14e2db55 /contrib/python/Jinja2/py3/jinja2/lexer.py | |
parent | e63b84f1d39557d9e46ac380b1f388271894293c (diff) | |
download | ydb-4267de875ca703ff841f2e025723dadc78f3cc02.tar.gz |
Restoring authorship annotation for <floatdrop@yandex-team.ru>. Commit 2 of 2.
Diffstat (limited to 'contrib/python/Jinja2/py3/jinja2/lexer.py')
-rw-r--r-- | contrib/python/Jinja2/py3/jinja2/lexer.py | 624 |
1 files changed, 312 insertions, 312 deletions
diff --git a/contrib/python/Jinja2/py3/jinja2/lexer.py b/contrib/python/Jinja2/py3/jinja2/lexer.py index 2817bf989f3..c25ab0f6901 100644 --- a/contrib/python/Jinja2/py3/jinja2/lexer.py +++ b/contrib/python/Jinja2/py3/jinja2/lexer.py @@ -2,26 +2,26 @@ is used to do some preprocessing. It filters out invalid operators like the bitshift operators we don't allow in templates. It separates template code and python code in expressions. -""" -import re +""" +import re import typing as t from ast import literal_eval -from collections import deque +from collections import deque from sys import intern - + from ._identifier import pattern as name_re from .exceptions import TemplateSyntaxError from .utils import LRUCache - + if t.TYPE_CHECKING: import typing_extensions as te from .environment import Environment -# cache for the lexers. Exists in order to be able to have multiple -# environments with the same lexer +# cache for the lexers. Exists in order to be able to have multiple +# environments with the same lexer _lexer_cache: t.MutableMapping[t.Tuple, "Lexer"] = LRUCache(50) # type: ignore - -# static regular expressions + +# static regular expressions whitespace_re = re.compile(r"\s+") newline_re = re.compile(r"(\r\n|\r|\n)") string_re = re.compile( @@ -56,8 +56,8 @@ float_re = re.compile( """, re.IGNORECASE | re.VERBOSE, ) - -# internal the tokens and keep references to them + +# internal the tokens and keep references to them TOKEN_ADD = intern("add") TOKEN_ASSIGN = intern("assign") TOKEN_COLON = intern("colon") @@ -107,9 +107,9 @@ TOKEN_LINECOMMENT = intern("linecomment") TOKEN_DATA = intern("data") TOKEN_INITIAL = intern("initial") TOKEN_EOF = intern("eof") - -# bind operators to token types -operators = { + +# bind operators to token types +operators = { "+": TOKEN_ADD, "-": TOKEN_SUB, "/": TOKEN_DIV, @@ -136,14 +136,14 @@ operators = { "|": TOKEN_PIPE, ",": TOKEN_COMMA, ";": TOKEN_SEMICOLON, -} - +} + reverse_operators = {v: k for k, v in operators.items()} assert len(operators) == len(reverse_operators), "operators dropped" operator_re = re.compile( f"({'|'.join(re.escape(x) for x in sorted(operators, key=lambda x: -len(x)))})" ) - + ignored_tokens = frozenset( [ TOKEN_COMMENT_BEGIN, @@ -158,13 +158,13 @@ ignored_tokens = frozenset( ignore_if_empty = frozenset( [TOKEN_WHITESPACE, TOKEN_DATA, TOKEN_COMMENT, TOKEN_LINECOMMENT] ) - - + + def _describe_token_type(token_type: str) -> str: - if token_type in reverse_operators: - return reverse_operators[token_type] + if token_type in reverse_operators: + return reverse_operators[token_type] - return { + return { TOKEN_COMMENT_BEGIN: "begin of comment", TOKEN_COMMENT_END: "end of comment", TOKEN_COMMENT: "comment", @@ -177,41 +177,41 @@ def _describe_token_type(token_type: str) -> str: TOKEN_LINESTATEMENT_END: "end of line statement", TOKEN_DATA: "template data / text", TOKEN_EOF: "end of template", - }.get(token_type, token_type) - - + }.get(token_type, token_type) + + def describe_token(token: "Token") -> str: - """Returns a description of the token.""" + """Returns a description of the token.""" if token.type == TOKEN_NAME: - return token.value + return token.value + + return _describe_token_type(token.type) + - return _describe_token_type(token.type) - - def describe_token_expr(expr: str) -> str: - """Like `describe_token` but for token expressions.""" + """Like `describe_token` but for token expressions.""" if ":" in expr: type, value = expr.split(":", 1) if type == TOKEN_NAME: - return value - else: - type = expr + return value + else: + type = expr + + return _describe_token_type(type) + - return _describe_token_type(type) - - def count_newlines(value: str) -> int: - """Count the number of newline characters in the string. This is - useful for extensions that filter a stream. - """ - return len(newline_re.findall(value)) - - + """Count the number of newline characters in the string. This is + useful for extensions that filter a stream. + """ + return len(newline_re.findall(value)) + + def compile_rules(environment: "Environment") -> t.List[t.Tuple[str, str]]: - """Compiles all the rules from the environment into a list of rules.""" - e = re.escape - rules = [ + """Compiles all the rules from the environment into a list of rules.""" + e = re.escape + rules = [ ( len(environment.comment_start_string), TOKEN_COMMENT_BEGIN, @@ -227,9 +227,9 @@ def compile_rules(environment: "Environment") -> t.List[t.Tuple[str, str]]: TOKEN_VARIABLE_BEGIN, e(environment.variable_start_string), ), - ] - - if environment.line_statement_prefix is not None: + ] + + if environment.line_statement_prefix is not None: rules.append( ( len(environment.line_statement_prefix), @@ -237,7 +237,7 @@ def compile_rules(environment: "Environment") -> t.List[t.Tuple[str, str]]: r"^[ \t\v]*" + e(environment.line_statement_prefix), ) ) - if environment.line_comment_prefix is not None: + if environment.line_comment_prefix is not None: rules.append( ( len(environment.line_comment_prefix), @@ -245,25 +245,25 @@ def compile_rules(environment: "Environment") -> t.List[t.Tuple[str, str]]: r"(?:^|(?<=\S))[^\S\r\n]*" + e(environment.line_comment_prefix), ) ) - - return [x[1:] for x in sorted(rules, reverse=True)] - - + + return [x[1:] for x in sorted(rules, reverse=True)] + + class Failure: - """Class that raises a `TemplateSyntaxError` if called. - Used by the `Lexer` to specify known errors. - """ - + """Class that raises a `TemplateSyntaxError` if called. + Used by the `Lexer` to specify known errors. + """ + def __init__( self, message: str, cls: t.Type[TemplateSyntaxError] = TemplateSyntaxError ) -> None: - self.message = message - self.error_class = cls - + self.message = message + self.error_class = cls + def __call__(self, lineno: int, filename: str) -> "te.NoReturn": - raise self.error_class(self.message, lineno, filename) - - + raise self.error_class(self.message, lineno, filename) + + class Token(t.NamedTuple): lineno: int type: str @@ -271,141 +271,141 @@ class Token(t.NamedTuple): def __str__(self) -> str: return describe_token(self) - + def test(self, expr: str) -> bool: - """Test a token against a token expression. This can either be a - token type or ``'token_type:token_value'``. This can only test - against string values and types. - """ - # here we do a regular string equality check as test_any is usually - # passed an iterable of not interned strings. - if self.type == expr: - return True + """Test a token against a token expression. This can either be a + token type or ``'token_type:token_value'``. This can only test + against string values and types. + """ + # here we do a regular string equality check as test_any is usually + # passed an iterable of not interned strings. + if self.type == expr: + return True if ":" in expr: return expr.split(":", 1) == [self.type, self.value] - return False - + return False + def test_any(self, *iterable: str) -> bool: - """Test against multiple token expressions.""" + """Test against multiple token expressions.""" return any(self.test(expr) for expr in iterable) - - + + class TokenStreamIterator: - """The iterator for tokenstreams. Iterate over the stream - until the eof token is reached. - """ - + """The iterator for tokenstreams. Iterate over the stream + until the eof token is reached. + """ + def __init__(self, stream: "TokenStream") -> None: - self.stream = stream - + self.stream = stream + def __iter__(self) -> "TokenStreamIterator": - return self - + return self + def __next__(self) -> Token: - token = self.stream.current + token = self.stream.current - if token.type is TOKEN_EOF: - self.stream.close() + if token.type is TOKEN_EOF: + self.stream.close() raise StopIteration - next(self.stream) - return token - - + next(self.stream) + return token + + class TokenStream: - """A token stream is an iterable that yields :class:`Token`\\s. The - parser however does not iterate over it but calls :meth:`next` to go - one token ahead. The current active token is stored as :attr:`current`. - """ - + """A token stream is an iterable that yields :class:`Token`\\s. The + parser however does not iterate over it but calls :meth:`next` to go + one token ahead. The current active token is stored as :attr:`current`. + """ + def __init__( self, generator: t.Iterable[Token], name: t.Optional[str], filename: t.Optional[str], ): - self._iter = iter(generator) + self._iter = iter(generator) self._pushed: "te.Deque[Token]" = deque() - self.name = name - self.filename = filename - self.closed = False + self.name = name + self.filename = filename + self.closed = False self.current = Token(1, TOKEN_INITIAL, "") - next(self) - + next(self) + def __iter__(self) -> TokenStreamIterator: - return TokenStreamIterator(self) - + return TokenStreamIterator(self) + def __bool__(self) -> bool: - return bool(self._pushed) or self.current.type is not TOKEN_EOF + return bool(self._pushed) or self.current.type is not TOKEN_EOF @property def eos(self) -> bool: """Are we at the end of the stream?""" return not self - + def push(self, token: Token) -> None: - """Push a token back to the stream.""" - self._pushed.append(token) - + """Push a token back to the stream.""" + self._pushed.append(token) + def look(self) -> Token: - """Look at the next token.""" - old_token = next(self) - result = self.current - self.push(result) - self.current = old_token - return result - + """Look at the next token.""" + old_token = next(self) + result = self.current + self.push(result) + self.current = old_token + return result + def skip(self, n: int = 1) -> None: - """Got n tokens ahead.""" + """Got n tokens ahead.""" for _ in range(n): - next(self) - + next(self) + def next_if(self, expr: str) -> t.Optional[Token]: - """Perform the token test and return the token if it matched. - Otherwise the return value is `None`. - """ - if self.current.test(expr): - return next(self) - + """Perform the token test and return the token if it matched. + Otherwise the return value is `None`. + """ + if self.current.test(expr): + return next(self) + return None def skip_if(self, expr: str) -> bool: - """Like :meth:`next_if` but only returns `True` or `False`.""" - return self.next_if(expr) is not None - + """Like :meth:`next_if` but only returns `True` or `False`.""" + return self.next_if(expr) is not None + def __next__(self) -> Token: - """Go one token ahead and return the old one. - - Use the built-in :func:`next` instead of calling this directly. - """ - rv = self.current - - if self._pushed: - self.current = self._pushed.popleft() - elif self.current.type is not TOKEN_EOF: - try: - self.current = next(self._iter) - except StopIteration: - self.close() - - return rv - + """Go one token ahead and return the old one. + + Use the built-in :func:`next` instead of calling this directly. + """ + rv = self.current + + if self._pushed: + self.current = self._pushed.popleft() + elif self.current.type is not TOKEN_EOF: + try: + self.current = next(self._iter) + except StopIteration: + self.close() + + return rv + def close(self) -> None: - """Close the stream.""" + """Close the stream.""" self.current = Token(self.current.lineno, TOKEN_EOF, "") self._iter = iter(()) - self.closed = True - + self.closed = True + def expect(self, expr: str) -> Token: - """Expect a given token type and return it. This accepts the same - argument as :meth:`jinja2.lexer.Token.test`. - """ - if not self.current.test(expr): - expr = describe_token_expr(expr) + """Expect a given token type and return it. This accepts the same + argument as :meth:`jinja2.lexer.Token.test`. + """ + if not self.current.test(expr): + expr = describe_token_expr(expr) - if self.current.type is TOKEN_EOF: + if self.current.type is TOKEN_EOF: raise TemplateSyntaxError( f"unexpected end of template, expected {expr!r}.", self.current.lineno, @@ -419,12 +419,12 @@ class TokenStream: self.name, self.filename, ) - + return next(self) - + def get_lexer(environment: "Environment") -> "Lexer": - """Return a lexer which is probably cached.""" + """Return a lexer which is probably cached.""" key = ( environment.block_start_string, environment.block_end_string, @@ -439,14 +439,14 @@ def get_lexer(environment: "Environment") -> "Lexer": environment.newline_sequence, environment.keep_trailing_newline, ) - lexer = _lexer_cache.get(key) + lexer = _lexer_cache.get(key) - if lexer is None: + if lexer is None: _lexer_cache[key] = lexer = Lexer(environment) - return lexer - - + return lexer + + class OptionalLStrip(tuple): """A special tuple for marking a point in the state that can have lstrip applied. @@ -467,21 +467,21 @@ class _Rule(t.NamedTuple): class Lexer: - """Class that implements a lexer for a given environment. Automatically - created by the environment class, usually you don't have to do that. - - Note that the lexer is not automatically bound to an environment. - Multiple environments can share the same lexer. - """ - + """Class that implements a lexer for a given environment. Automatically + created by the environment class, usually you don't have to do that. + + Note that the lexer is not automatically bound to an environment. + Multiple environments can share the same lexer. + """ + def __init__(self, environment: "Environment") -> None: - # shortcuts - e = re.escape - + # shortcuts + e = re.escape + def c(x: str) -> t.Pattern[str]: return re.compile(x, re.M | re.S) - # lexing rules for tags + # lexing rules for tags tag_rules: t.List[_Rule] = [ _Rule(whitespace_re, TOKEN_WHITESPACE, None), _Rule(float_re, TOKEN_FLOAT, None), @@ -489,31 +489,31 @@ class Lexer: _Rule(name_re, TOKEN_NAME, None), _Rule(string_re, TOKEN_STRING, None), _Rule(operator_re, TOKEN_OPERATOR, None), - ] - - # assemble the root lexing rule. because "|" is ungreedy - # we have to sort by length so that the lexer continues working - # as expected when we have parsing rules like <% for block and - # <%= for variables. (if someone wants asp like syntax) - # variables are just part of the rules if variable processing - # is required. - root_tag_rules = compile_rules(environment) - + ] + + # assemble the root lexing rule. because "|" is ungreedy + # we have to sort by length so that the lexer continues working + # as expected when we have parsing rules like <% for block and + # <%= for variables. (if someone wants asp like syntax) + # variables are just part of the rules if variable processing + # is required. + root_tag_rules = compile_rules(environment) + block_start_re = e(environment.block_start_string) block_end_re = e(environment.block_end_string) comment_end_re = e(environment.comment_end_string) variable_end_re = e(environment.variable_end_string) - # block suffix if trimming is enabled + # block suffix if trimming is enabled block_suffix_re = "\\n?" if environment.trim_blocks else "" - + # If lstrip is enabled, it should not be applied if there is any # non-whitespace between the newline and block. self.lstrip_unless_re = c(r"[^ \t]") if environment.lstrip_blocks else None - - self.newline_sequence = environment.newline_sequence - self.keep_trailing_newline = environment.keep_trailing_newline - + + self.newline_sequence = environment.newline_sequence + self.keep_trailing_newline = environment.keep_trailing_newline + root_raw_re = ( fr"(?P<raw_begin>{block_start_re}(\-|\+|)\s*raw\s*" fr"(?:\-{block_end_re}\s*|{block_end_re}))" @@ -522,20 +522,20 @@ class Lexer: [root_raw_re] + [fr"(?P<{n}>{r}(\-|\+|))" for n, r in root_tag_rules] ) - # global lexing rules + # global lexing rules self.rules: t.Dict[str, t.List[_Rule]] = { "root": [ - # directives + # directives _Rule( c(fr"(.*?)(?:{root_parts_re})"), OptionalLStrip(TOKEN_DATA, "#bygroup"), # type: ignore "#bygroup", ), - # data + # data _Rule(c(".+"), TOKEN_DATA, None), - ], - # comments - TOKEN_COMMENT_BEGIN: [ + ], + # comments + TOKEN_COMMENT_BEGIN: [ _Rule( c( fr"(.*?)((?:\+{comment_end_re}|\-{comment_end_re}\s*" @@ -545,9 +545,9 @@ class Lexer: "#pop", ), _Rule(c(r"(.)"), (Failure("Missing end of comment tag"),), None), - ], - # blocks - TOKEN_BLOCK_BEGIN: [ + ], + # blocks + TOKEN_BLOCK_BEGIN: [ _Rule( c( fr"(?:\+{block_end_re}|\-{block_end_re}\s*" @@ -558,8 +558,8 @@ class Lexer: ), ] + tag_rules, - # variables - TOKEN_VARIABLE_BEGIN: [ + # variables + TOKEN_VARIABLE_BEGIN: [ _Rule( c(fr"\-{variable_end_re}\s*|{variable_end_re}"), TOKEN_VARIABLE_END, @@ -567,8 +567,8 @@ class Lexer: ) ] + tag_rules, - # raw block - TOKEN_RAW_BEGIN: [ + # raw block + TOKEN_RAW_BEGIN: [ _Rule( c( fr"(.*?)((?:{block_start_re}(\-|\+|))\s*endraw\s*" @@ -579,28 +579,28 @@ class Lexer: "#pop", ), _Rule(c(r"(.)"), (Failure("Missing end of raw directive"),), None), - ], - # line statements - TOKEN_LINESTATEMENT_BEGIN: [ + ], + # line statements + TOKEN_LINESTATEMENT_BEGIN: [ _Rule(c(r"\s*(\n|$)"), TOKEN_LINESTATEMENT_END, "#pop") ] + tag_rules, - # line comments - TOKEN_LINECOMMENT_BEGIN: [ + # line comments + TOKEN_LINECOMMENT_BEGIN: [ _Rule( c(r"(.*?)()(?=\n|$)"), (TOKEN_LINECOMMENT, TOKEN_LINECOMMENT_END), "#pop", ) ], - } - + } + def _normalize_newlines(self, value: str) -> str: """Replace all newlines with the configured sequence in strings and template data. """ - return newline_re.sub(self.newline_sequence, value) - + return newline_re.sub(self.newline_sequence, value) + def tokenize( self, source: str, @@ -609,21 +609,21 @@ class Lexer: state: t.Optional[str] = None, ) -> TokenStream: """Calls tokeniter + tokenize and wraps it in a token stream.""" - stream = self.tokeniter(source, name, filename, state) - return TokenStream(self.wrap(stream, name, filename), name, filename) - + stream = self.tokeniter(source, name, filename, state) + return TokenStream(self.wrap(stream, name, filename), name, filename) + def wrap( self, stream: t.Iterable[t.Tuple[int, str, str]], name: t.Optional[str] = None, filename: t.Optional[str] = None, ) -> t.Iterator[Token]: - """This is called with the stream as returned by `tokenize` and wraps - every token in a :class:`Token` and converts the value. - """ + """This is called with the stream as returned by `tokenize` and wraps + every token in a :class:`Token` and converts the value. + """ for lineno, token, value_str in stream: - if token in ignored_tokens: - continue + if token in ignored_tokens: + continue value: t.Any = value_str @@ -631,9 +631,9 @@ class Lexer: token = TOKEN_BLOCK_BEGIN elif token == TOKEN_LINESTATEMENT_END: token = TOKEN_BLOCK_END - # we are not interested in those tokens in the parser + # we are not interested in those tokens in the parser elif token in (TOKEN_RAW_BEGIN, TOKEN_RAW_END): - continue + continue elif token == TOKEN_DATA: value = self._normalize_newlines(value_str) elif token == "keyword": @@ -642,18 +642,18 @@ class Lexer: value = value_str if not value.isidentifier(): - raise TemplateSyntaxError( + raise TemplateSyntaxError( "Invalid character in identifier", lineno, name, filename ) elif token == TOKEN_STRING: - # try to unescape string - try: + # try to unescape string + try: value = ( self._normalize_newlines(value_str[1:-1]) .encode("ascii", "backslashreplace") .decode("unicode-escape") ) - except Exception as e: + except Exception as e: msg = str(e).split(":")[-1].strip() raise TemplateSyntaxError(msg, lineno, name, filename) from e elif token == TOKEN_INTEGER: @@ -664,8 +664,8 @@ class Lexer: elif token == TOKEN_OPERATOR: token = operators[value_str] - yield Token(lineno, token, value) - + yield Token(lineno, token, value) + def tokeniter( self, source: str, @@ -673,56 +673,56 @@ class Lexer: filename: t.Optional[str] = None, state: t.Optional[str] = None, ) -> t.Iterator[t.Tuple[int, str, str]]: - """This method tokenizes the text and returns the tokens in a + """This method tokenizes the text and returns the tokens in a generator. Use this method if you just want to tokenize a template. .. versionchanged:: 3.0 Only ``\\n``, ``\\r\\n`` and ``\\r`` are treated as line breaks. - """ + """ lines = newline_re.split(source)[::2] if not self.keep_trailing_newline and lines[-1] == "": del lines[-1] source = "\n".join(lines) - pos = 0 - lineno = 1 + pos = 0 + lineno = 1 stack = ["root"] if state is not None and state != "root": assert state in ("variable", "block"), "invalid state" stack.append(state + "_begin") - statetokens = self.rules[stack[-1]] - source_length = len(source) + statetokens = self.rules[stack[-1]] + source_length = len(source) balancing_stack: t.List[str] = [] lstrip_unless_re = self.lstrip_unless_re newlines_stripped = 0 line_starting = True - + while True: - # tokenizer loop - for regex, tokens, new_state in statetokens: - m = regex.match(source, pos) - - # if no match we try again with the next rule - if m is None: - continue - - # we only match blocks and variables if braces / parentheses - # are balanced. continue parsing with the lower rule which - # is the operator rule. do this only if the end tags look - # like operators + # tokenizer loop + for regex, tokens, new_state in statetokens: + m = regex.match(source, pos) + + # if no match we try again with the next rule + if m is None: + continue + + # we only match blocks and variables if braces / parentheses + # are balanced. continue parsing with the lower rule which + # is the operator rule. do this only if the end tags look + # like operators if balancing_stack and tokens in ( TOKEN_VARIABLE_END, TOKEN_BLOCK_END, TOKEN_LINESTATEMENT_END, ): - continue - - # tuples support more options - if isinstance(tokens, tuple): + continue + + # tuples support more options + if isinstance(tokens, tuple): groups = m.groups() if isinstance(tokens, OptionalLStrip): @@ -756,39 +756,39 @@ class Lexer: if not lstrip_unless_re.search(text, l_pos): groups = [text[:l_pos], *groups[1:]] - for idx, token in enumerate(tokens): - # failure group - if token.__class__ is Failure: - raise token(lineno, filename) - # bygroup is a bit more complex, in that case we - # yield for the current token the first named - # group that matched + for idx, token in enumerate(tokens): + # failure group + if token.__class__ is Failure: + raise token(lineno, filename) + # bygroup is a bit more complex, in that case we + # yield for the current token the first named + # group that matched elif token == "#bygroup": for key, value in m.groupdict().items(): - if value is not None: - yield lineno, key, value + if value is not None: + yield lineno, key, value lineno += value.count("\n") - break - else: + break + else: raise RuntimeError( f"{regex!r} wanted to resolve the token dynamically" " but no group matched" ) - # normal group - else: + # normal group + else: data = groups[idx] - if data or token not in ignore_if_empty: - yield lineno, token, data + if data or token not in ignore_if_empty: + yield lineno, token, data lineno += data.count("\n") + newlines_stripped newlines_stripped = 0 - - # strings as token just are yielded as it. - else: - data = m.group() - # update brace/parentheses balance + # strings as token just are yielded as it. + else: + data = m.group() + + # update brace/parentheses balance if tokens == TOKEN_OPERATOR: if data == "{": balancing_stack.append("}") @@ -797,14 +797,14 @@ class Lexer: elif data == "[": balancing_stack.append("]") elif data in ("}", ")", "]"): - if not balancing_stack: + if not balancing_stack: raise TemplateSyntaxError( f"unexpected '{data}'", lineno, name, filename ) - expected_op = balancing_stack.pop() + expected_op = balancing_stack.pop() - if expected_op != data: + if expected_op != data: raise TemplateSyntaxError( f"unexpected '{data}', expected '{expected_op}'", lineno, @@ -812,58 +812,58 @@ class Lexer: filename, ) - # yield items - if data or tokens not in ignore_if_empty: - yield lineno, tokens, data + # yield items + if data or tokens not in ignore_if_empty: + yield lineno, tokens, data lineno += data.count("\n") - + line_starting = m.group()[-1:] == "\n" - # fetch new position into new variable so that we can check - # if there is a internal parsing error which would result - # in an infinite loop - pos2 = m.end() - - # handle state changes - if new_state is not None: - # remove the uppermost state + # fetch new position into new variable so that we can check + # if there is a internal parsing error which would result + # in an infinite loop + pos2 = m.end() + + # handle state changes + if new_state is not None: + # remove the uppermost state if new_state == "#pop": - stack.pop() - # resolve the new state by group checking + stack.pop() + # resolve the new state by group checking elif new_state == "#bygroup": for key, value in m.groupdict().items(): - if value is not None: - stack.append(key) - break - else: + if value is not None: + stack.append(key) + break + else: raise RuntimeError( f"{regex!r} wanted to resolve the new state dynamically" f" but no group matched" ) - # direct state name given - else: - stack.append(new_state) - - statetokens = self.rules[stack[-1]] - # we are still at the same position and no stack change. - # this means a loop without break condition, avoid that and - # raise error - elif pos2 == pos: + # direct state name given + else: + stack.append(new_state) + + statetokens = self.rules[stack[-1]] + # we are still at the same position and no stack change. + # this means a loop without break condition, avoid that and + # raise error + elif pos2 == pos: raise RuntimeError( f"{regex!r} yielded empty string without stack change" ) - # publish new function and start again - pos = pos2 - break - # if loop terminated without break we haven't found a single match - # either we are at the end of the file or we have a problem - else: - # end of text - if pos >= source_length: - return - - # something went wrong + # publish new function and start again + pos = pos2 + break + # if loop terminated without break we haven't found a single match + # either we are at the end of the file or we have a problem + else: + # end of text + if pos >= source_length: + return + + # something went wrong raise TemplateSyntaxError( f"unexpected char {source[pos]!r} at {pos}", lineno, name, filename ) |