diff options
author | floatdrop <floatdrop@yandex-team.ru> | 2022-02-10 16:47:15 +0300 |
---|---|---|
committer | Daniil Cherednik <dcherednik@yandex-team.ru> | 2022-02-10 16:47:15 +0300 |
commit | 4267de875ca703ff841f2e025723dadc78f3cc02 (patch) | |
tree | 9814fbd1c3effac9b8377c5d604b367b14e2db55 /contrib/python/Jinja2/py2/jinja2/lexer.py | |
parent | e63b84f1d39557d9e46ac380b1f388271894293c (diff) | |
download | ydb-4267de875ca703ff841f2e025723dadc78f3cc02.tar.gz |
Restoring authorship annotation for <floatdrop@yandex-team.ru>. Commit 2 of 2.
Diffstat (limited to 'contrib/python/Jinja2/py2/jinja2/lexer.py')
-rw-r--r-- | contrib/python/Jinja2/py2/jinja2/lexer.py | 820 |
1 files changed, 410 insertions, 410 deletions
diff --git a/contrib/python/Jinja2/py2/jinja2/lexer.py b/contrib/python/Jinja2/py2/jinja2/lexer.py index 075484b88b..552356a12d 100644 --- a/contrib/python/Jinja2/py2/jinja2/lexer.py +++ b/contrib/python/Jinja2/py2/jinja2/lexer.py @@ -1,26 +1,26 @@ -# -*- coding: utf-8 -*- +# -*- coding: utf-8 -*- """Implements a Jinja / Python combination lexer. The ``Lexer`` class is used to do some preprocessing. It filters out invalid operators like the bitshift operators we don't allow in templates. It separates template code and python code in expressions. -""" -import re +""" +import re from ast import literal_eval -from collections import deque -from operator import itemgetter - +from collections import deque +from operator import itemgetter + from ._compat import implements_iterator from ._compat import intern from ._compat import iteritems from ._compat import text_type from .exceptions import TemplateSyntaxError from .utils import LRUCache - -# cache for the lexers. Exists in order to be able to have multiple -# environments with the same lexer -_lexer_cache = LRUCache(50) - -# static regular expressions + +# cache for the lexers. Exists in order to be able to have multiple +# environments with the same lexer +_lexer_cache = LRUCache(50) + +# static regular expressions whitespace_re = re.compile(r"\s+", re.U) newline_re = re.compile(r"(\r\n|\r|\n)") string_re = re.compile( @@ -40,22 +40,22 @@ float_re = re.compile( """, re.IGNORECASE | re.VERBOSE, ) - -try: - # check if this Python supports Unicode identifiers + +try: + # check if this Python supports Unicode identifiers compile("föö", "<unknown>", "eval") -except SyntaxError: +except SyntaxError: # Python 2, no Unicode support, use ASCII identifiers name_re = re.compile(r"[a-zA-Z_][a-zA-Z0-9_]*") - check_ident = False -else: + check_ident = False +else: # Unicode support, import generated re pattern and set flag to use # str.isidentifier to validate during lexing. from ._identifier import pattern as name_re - check_ident = True - -# internal the tokens and keep references to them + check_ident = True + +# internal the tokens and keep references to them TOKEN_ADD = intern("add") TOKEN_ASSIGN = intern("assign") TOKEN_COLON = intern("colon") @@ -105,9 +105,9 @@ TOKEN_LINECOMMENT = intern("linecomment") TOKEN_DATA = intern("data") TOKEN_INITIAL = intern("initial") TOKEN_EOF = intern("eof") - -# bind operators to token types -operators = { + +# bind operators to token types +operators = { "+": TOKEN_ADD, "-": TOKEN_SUB, "/": TOKEN_DIV, @@ -134,14 +134,14 @@ operators = { "|": TOKEN_PIPE, ",": TOKEN_COMMA, ";": TOKEN_SEMICOLON, -} - -reverse_operators = dict([(v, k) for k, v in iteritems(operators)]) +} + +reverse_operators = dict([(v, k) for k, v in iteritems(operators)]) assert len(operators) == len(reverse_operators), "operators dropped" operator_re = re.compile( "(%s)" % "|".join(re.escape(x) for x in sorted(operators, key=lambda x: -len(x))) ) - + ignored_tokens = frozenset( [ TOKEN_COMMENT_BEGIN, @@ -156,12 +156,12 @@ ignored_tokens = frozenset( ignore_if_empty = frozenset( [TOKEN_WHITESPACE, TOKEN_DATA, TOKEN_COMMENT, TOKEN_LINECOMMENT] ) - - -def _describe_token_type(token_type): - if token_type in reverse_operators: - return reverse_operators[token_type] - return { + + +def _describe_token_type(token_type): + if token_type in reverse_operators: + return reverse_operators[token_type] + return { TOKEN_COMMENT_BEGIN: "begin of comment", TOKEN_COMMENT_END: "end of comment", TOKEN_COMMENT: "comment", @@ -174,38 +174,38 @@ def _describe_token_type(token_type): TOKEN_LINESTATEMENT_END: "end of line statement", TOKEN_DATA: "template data / text", TOKEN_EOF: "end of template", - }.get(token_type, token_type) - - -def describe_token(token): - """Returns a description of the token.""" + }.get(token_type, token_type) + + +def describe_token(token): + """Returns a description of the token.""" if token.type == TOKEN_NAME: - return token.value - return _describe_token_type(token.type) - - -def describe_token_expr(expr): - """Like `describe_token` but for token expressions.""" + return token.value + return _describe_token_type(token.type) + + +def describe_token_expr(expr): + """Like `describe_token` but for token expressions.""" if ":" in expr: type, value = expr.split(":", 1) if type == TOKEN_NAME: - return value - else: - type = expr - return _describe_token_type(type) - - -def count_newlines(value): - """Count the number of newline characters in the string. This is - useful for extensions that filter a stream. - """ - return len(newline_re.findall(value)) - - -def compile_rules(environment): - """Compiles all the rules from the environment into a list of rules.""" - e = re.escape - rules = [ + return value + else: + type = expr + return _describe_token_type(type) + + +def count_newlines(value): + """Count the number of newline characters in the string. This is + useful for extensions that filter a stream. + """ + return len(newline_re.findall(value)) + + +def compile_rules(environment): + """Compiles all the rules from the environment into a list of rules.""" + e = re.escape + rules = [ ( len(environment.comment_start_string), TOKEN_COMMENT_BEGIN, @@ -221,9 +221,9 @@ def compile_rules(environment): TOKEN_VARIABLE_BEGIN, e(environment.variable_start_string), ), - ] - - if environment.line_statement_prefix is not None: + ] + + if environment.line_statement_prefix is not None: rules.append( ( len(environment.line_statement_prefix), @@ -231,7 +231,7 @@ def compile_rules(environment): r"^[ \t\v]*" + e(environment.line_statement_prefix), ) ) - if environment.line_comment_prefix is not None: + if environment.line_comment_prefix is not None: rules.append( ( len(environment.line_comment_prefix), @@ -239,169 +239,169 @@ def compile_rules(environment): r"(?:^|(?<=\S))[^\S\r\n]*" + e(environment.line_comment_prefix), ) ) - - return [x[1:] for x in sorted(rules, reverse=True)] - - -class Failure(object): - """Class that raises a `TemplateSyntaxError` if called. - Used by the `Lexer` to specify known errors. - """ - - def __init__(self, message, cls=TemplateSyntaxError): - self.message = message - self.error_class = cls - - def __call__(self, lineno, filename): - raise self.error_class(self.message, lineno, filename) - - -class Token(tuple): - """Token class.""" - - __slots__ = () - lineno, type, value = (property(itemgetter(x)) for x in range(3)) - - def __new__(cls, lineno, type, value): - return tuple.__new__(cls, (lineno, intern(str(type)), value)) - - def __str__(self): - if self.type in reverse_operators: - return reverse_operators[self.type] + + return [x[1:] for x in sorted(rules, reverse=True)] + + +class Failure(object): + """Class that raises a `TemplateSyntaxError` if called. + Used by the `Lexer` to specify known errors. + """ + + def __init__(self, message, cls=TemplateSyntaxError): + self.message = message + self.error_class = cls + + def __call__(self, lineno, filename): + raise self.error_class(self.message, lineno, filename) + + +class Token(tuple): + """Token class.""" + + __slots__ = () + lineno, type, value = (property(itemgetter(x)) for x in range(3)) + + def __new__(cls, lineno, type, value): + return tuple.__new__(cls, (lineno, intern(str(type)), value)) + + def __str__(self): + if self.type in reverse_operators: + return reverse_operators[self.type] elif self.type == "name": - return self.value - return self.type - - def test(self, expr): - """Test a token against a token expression. This can either be a - token type or ``'token_type:token_value'``. This can only test - against string values and types. - """ - # here we do a regular string equality check as test_any is usually - # passed an iterable of not interned strings. - if self.type == expr: - return True + return self.value + return self.type + + def test(self, expr): + """Test a token against a token expression. This can either be a + token type or ``'token_type:token_value'``. This can only test + against string values and types. + """ + # here we do a regular string equality check as test_any is usually + # passed an iterable of not interned strings. + if self.type == expr: + return True elif ":" in expr: return expr.split(":", 1) == [self.type, self.value] - return False - - def test_any(self, *iterable): - """Test against multiple token expressions.""" - for expr in iterable: - if self.test(expr): - return True - return False - - def __repr__(self): + return False + + def test_any(self, *iterable): + """Test against multiple token expressions.""" + for expr in iterable: + if self.test(expr): + return True + return False + + def __repr__(self): return "Token(%r, %r, %r)" % (self.lineno, self.type, self.value) - - -@implements_iterator -class TokenStreamIterator(object): - """The iterator for tokenstreams. Iterate over the stream - until the eof token is reached. - """ - - def __init__(self, stream): - self.stream = stream - - def __iter__(self): - return self - - def __next__(self): - token = self.stream.current - if token.type is TOKEN_EOF: - self.stream.close() - raise StopIteration() - next(self.stream) - return token - - -@implements_iterator -class TokenStream(object): - """A token stream is an iterable that yields :class:`Token`\\s. The - parser however does not iterate over it but calls :meth:`next` to go - one token ahead. The current active token is stored as :attr:`current`. - """ - - def __init__(self, generator, name, filename): - self._iter = iter(generator) - self._pushed = deque() - self.name = name - self.filename = filename - self.closed = False + + +@implements_iterator +class TokenStreamIterator(object): + """The iterator for tokenstreams. Iterate over the stream + until the eof token is reached. + """ + + def __init__(self, stream): + self.stream = stream + + def __iter__(self): + return self + + def __next__(self): + token = self.stream.current + if token.type is TOKEN_EOF: + self.stream.close() + raise StopIteration() + next(self.stream) + return token + + +@implements_iterator +class TokenStream(object): + """A token stream is an iterable that yields :class:`Token`\\s. The + parser however does not iterate over it but calls :meth:`next` to go + one token ahead. The current active token is stored as :attr:`current`. + """ + + def __init__(self, generator, name, filename): + self._iter = iter(generator) + self._pushed = deque() + self.name = name + self.filename = filename + self.closed = False self.current = Token(1, TOKEN_INITIAL, "") - next(self) - - def __iter__(self): - return TokenStreamIterator(self) - - def __bool__(self): - return bool(self._pushed) or self.current.type is not TOKEN_EOF - - __nonzero__ = __bool__ # py2 - + next(self) + + def __iter__(self): + return TokenStreamIterator(self) + + def __bool__(self): + return bool(self._pushed) or self.current.type is not TOKEN_EOF + + __nonzero__ = __bool__ # py2 + @property def eos(self): """Are we at the end of the stream?""" return not self - - def push(self, token): - """Push a token back to the stream.""" - self._pushed.append(token) - - def look(self): - """Look at the next token.""" - old_token = next(self) - result = self.current - self.push(result) - self.current = old_token - return result - - def skip(self, n=1): - """Got n tokens ahead.""" + + def push(self, token): + """Push a token back to the stream.""" + self._pushed.append(token) + + def look(self): + """Look at the next token.""" + old_token = next(self) + result = self.current + self.push(result) + self.current = old_token + return result + + def skip(self, n=1): + """Got n tokens ahead.""" for _ in range(n): - next(self) - - def next_if(self, expr): - """Perform the token test and return the token if it matched. - Otherwise the return value is `None`. - """ - if self.current.test(expr): - return next(self) - - def skip_if(self, expr): - """Like :meth:`next_if` but only returns `True` or `False`.""" - return self.next_if(expr) is not None - - def __next__(self): - """Go one token ahead and return the old one. - - Use the built-in :func:`next` instead of calling this directly. - """ - rv = self.current - if self._pushed: - self.current = self._pushed.popleft() - elif self.current.type is not TOKEN_EOF: - try: - self.current = next(self._iter) - except StopIteration: - self.close() - return rv - - def close(self): - """Close the stream.""" + next(self) + + def next_if(self, expr): + """Perform the token test and return the token if it matched. + Otherwise the return value is `None`. + """ + if self.current.test(expr): + return next(self) + + def skip_if(self, expr): + """Like :meth:`next_if` but only returns `True` or `False`.""" + return self.next_if(expr) is not None + + def __next__(self): + """Go one token ahead and return the old one. + + Use the built-in :func:`next` instead of calling this directly. + """ + rv = self.current + if self._pushed: + self.current = self._pushed.popleft() + elif self.current.type is not TOKEN_EOF: + try: + self.current = next(self._iter) + except StopIteration: + self.close() + return rv + + def close(self): + """Close the stream.""" self.current = Token(self.current.lineno, TOKEN_EOF, "") - self._iter = None - self.closed = True - - def expect(self, expr): - """Expect a given token type and return it. This accepts the same - argument as :meth:`jinja2.lexer.Token.test`. - """ - if not self.current.test(expr): - expr = describe_token_expr(expr) - if self.current.type is TOKEN_EOF: + self._iter = None + self.closed = True + + def expect(self, expr): + """Expect a given token type and return it. This accepts the same + argument as :meth:`jinja2.lexer.Token.test`. + """ + if not self.current.test(expr): + expr = describe_token_expr(expr) + if self.current.type is TOKEN_EOF: raise TemplateSyntaxError( "unexpected end of template, expected %r." % expr, self.current.lineno, @@ -414,14 +414,14 @@ class TokenStream(object): self.name, self.filename, ) - try: - return self.current - finally: - next(self) - - -def get_lexer(environment): - """Return a lexer which is probably cached.""" + try: + return self.current + finally: + next(self) + + +def get_lexer(environment): + """Return a lexer which is probably cached.""" key = ( environment.block_start_string, environment.block_end_string, @@ -436,13 +436,13 @@ def get_lexer(environment): environment.newline_sequence, environment.keep_trailing_newline, ) - lexer = _lexer_cache.get(key) - if lexer is None: - lexer = Lexer(environment) - _lexer_cache[key] = lexer - return lexer - - + lexer = _lexer_cache.get(key) + if lexer is None: + lexer = Lexer(environment) + _lexer_cache[key] = lexer + return lexer + + class OptionalLStrip(tuple): """A special tuple for marking a point in the state that can have lstrip applied. @@ -456,53 +456,53 @@ class OptionalLStrip(tuple): return super(OptionalLStrip, cls).__new__(cls, members) -class Lexer(object): - """Class that implements a lexer for a given environment. Automatically - created by the environment class, usually you don't have to do that. - - Note that the lexer is not automatically bound to an environment. - Multiple environments can share the same lexer. - """ - - def __init__(self, environment): - # shortcuts - e = re.escape - +class Lexer(object): + """Class that implements a lexer for a given environment. Automatically + created by the environment class, usually you don't have to do that. + + Note that the lexer is not automatically bound to an environment. + Multiple environments can share the same lexer. + """ + + def __init__(self, environment): + # shortcuts + e = re.escape + def c(x): return re.compile(x, re.M | re.S) - # lexing rules for tags - tag_rules = [ - (whitespace_re, TOKEN_WHITESPACE, None), - (float_re, TOKEN_FLOAT, None), - (integer_re, TOKEN_INTEGER, None), - (name_re, TOKEN_NAME, None), - (string_re, TOKEN_STRING, None), + # lexing rules for tags + tag_rules = [ + (whitespace_re, TOKEN_WHITESPACE, None), + (float_re, TOKEN_FLOAT, None), + (integer_re, TOKEN_INTEGER, None), + (name_re, TOKEN_NAME, None), + (string_re, TOKEN_STRING, None), (operator_re, TOKEN_OPERATOR, None), - ] - - # assemble the root lexing rule. because "|" is ungreedy - # we have to sort by length so that the lexer continues working - # as expected when we have parsing rules like <% for block and - # <%= for variables. (if someone wants asp like syntax) - # variables are just part of the rules if variable processing - # is required. - root_tag_rules = compile_rules(environment) - - # block suffix if trimming is enabled + ] + + # assemble the root lexing rule. because "|" is ungreedy + # we have to sort by length so that the lexer continues working + # as expected when we have parsing rules like <% for block and + # <%= for variables. (if someone wants asp like syntax) + # variables are just part of the rules if variable processing + # is required. + root_tag_rules = compile_rules(environment) + + # block suffix if trimming is enabled block_suffix_re = environment.trim_blocks and "\\n?" or "" - + # If lstrip is enabled, it should not be applied if there is any # non-whitespace between the newline and block. self.lstrip_unless_re = c(r"[^ \t]") if environment.lstrip_blocks else None - - self.newline_sequence = environment.newline_sequence - self.keep_trailing_newline = environment.keep_trailing_newline - - # global lexing rules - self.rules = { + + self.newline_sequence = environment.newline_sequence + self.keep_trailing_newline = environment.keep_trailing_newline + + # global lexing rules + self.rules = { "root": [ - # directives + # directives ( c( "(.*?)(?:%s)" @@ -524,11 +524,11 @@ class Lexer(object): OptionalLStrip(TOKEN_DATA, "#bygroup"), "#bygroup", ), - # data + # data (c(".+"), TOKEN_DATA, None), - ], - # comments - TOKEN_COMMENT_BEGIN: [ + ], + # comments + TOKEN_COMMENT_BEGIN: [ ( c( r"(.*?)((?:\-%s\s*|%s)%s)" @@ -542,9 +542,9 @@ class Lexer(object): "#pop", ), (c("(.)"), (Failure("Missing end of comment tag"),), None), - ], - # blocks - TOKEN_BLOCK_BEGIN: [ + ], + # blocks + TOKEN_BLOCK_BEGIN: [ ( c( r"(?:\-%s\s*|%s)%s" @@ -559,8 +559,8 @@ class Lexer(object): ), ] + tag_rules, - # variables - TOKEN_VARIABLE_BEGIN: [ + # variables + TOKEN_VARIABLE_BEGIN: [ ( c( r"\-%s\s*|%s" @@ -574,8 +574,8 @@ class Lexer(object): ) ] + tag_rules, - # raw block - TOKEN_RAW_BEGIN: [ + # raw block + TOKEN_RAW_BEGIN: [ ( c( r"(.*?)((?:%s(\-|\+|))\s*endraw\s*(?:\-%s\s*|%s%s))" @@ -590,121 +590,121 @@ class Lexer(object): "#pop", ), (c("(.)"), (Failure("Missing end of raw directive"),), None), - ], - # line statements - TOKEN_LINESTATEMENT_BEGIN: [ + ], + # line statements + TOKEN_LINESTATEMENT_BEGIN: [ (c(r"\s*(\n|$)"), TOKEN_LINESTATEMENT_END, "#pop") ] + tag_rules, - # line comments - TOKEN_LINECOMMENT_BEGIN: [ + # line comments + TOKEN_LINECOMMENT_BEGIN: [ ( c(r"(.*?)()(?=\n|$)"), (TOKEN_LINECOMMENT, TOKEN_LINECOMMENT_END), "#pop", ) ], - } - - def _normalize_newlines(self, value): - """Called for strings and template data to normalize it to unicode.""" - return newline_re.sub(self.newline_sequence, value) - - def tokenize(self, source, name=None, filename=None, state=None): + } + + def _normalize_newlines(self, value): + """Called for strings and template data to normalize it to unicode.""" + return newline_re.sub(self.newline_sequence, value) + + def tokenize(self, source, name=None, filename=None, state=None): """Calls tokeniter + tokenize and wraps it in a token stream.""" - stream = self.tokeniter(source, name, filename, state) - return TokenStream(self.wrap(stream, name, filename), name, filename) - - def wrap(self, stream, name=None, filename=None): - """This is called with the stream as returned by `tokenize` and wraps - every token in a :class:`Token` and converts the value. - """ - for lineno, token, value in stream: - if token in ignored_tokens: - continue + stream = self.tokeniter(source, name, filename, state) + return TokenStream(self.wrap(stream, name, filename), name, filename) + + def wrap(self, stream, name=None, filename=None): + """This is called with the stream as returned by `tokenize` and wraps + every token in a :class:`Token` and converts the value. + """ + for lineno, token, value in stream: + if token in ignored_tokens: + continue elif token == TOKEN_LINESTATEMENT_BEGIN: token = TOKEN_BLOCK_BEGIN elif token == TOKEN_LINESTATEMENT_END: token = TOKEN_BLOCK_END - # we are not interested in those tokens in the parser + # we are not interested in those tokens in the parser elif token in (TOKEN_RAW_BEGIN, TOKEN_RAW_END): - continue + continue elif token == TOKEN_DATA: - value = self._normalize_newlines(value) + value = self._normalize_newlines(value) elif token == "keyword": - token = value + token = value elif token == TOKEN_NAME: - value = str(value) - if check_ident and not value.isidentifier(): - raise TemplateSyntaxError( + value = str(value) + if check_ident and not value.isidentifier(): + raise TemplateSyntaxError( "Invalid character in identifier", lineno, name, filename ) elif token == TOKEN_STRING: - # try to unescape string - try: + # try to unescape string + try: value = ( self._normalize_newlines(value[1:-1]) .encode("ascii", "backslashreplace") .decode("unicode-escape") ) - except Exception as e: + except Exception as e: msg = str(e).split(":")[-1].strip() - raise TemplateSyntaxError(msg, lineno, name, filename) + raise TemplateSyntaxError(msg, lineno, name, filename) elif token == TOKEN_INTEGER: value = int(value.replace("_", "")) elif token == TOKEN_FLOAT: # remove all "_" first to support more Python versions value = literal_eval(value.replace("_", "")) elif token == TOKEN_OPERATOR: - token = operators[value] - yield Token(lineno, token, value) - - def tokeniter(self, source, name, filename=None, state=None): - """This method tokenizes the text and returns the tokens in a - generator. Use this method if you just want to tokenize a template. - """ - source = text_type(source) - lines = source.splitlines() - if self.keep_trailing_newline and source: + token = operators[value] + yield Token(lineno, token, value) + + def tokeniter(self, source, name, filename=None, state=None): + """This method tokenizes the text and returns the tokens in a + generator. Use this method if you just want to tokenize a template. + """ + source = text_type(source) + lines = source.splitlines() + if self.keep_trailing_newline and source: for newline in ("\r\n", "\r", "\n"): - if source.endswith(newline): + if source.endswith(newline): lines.append("") - break + break source = "\n".join(lines) - pos = 0 - lineno = 1 + pos = 0 + lineno = 1 stack = ["root"] if state is not None and state != "root": assert state in ("variable", "block"), "invalid state" stack.append(state + "_begin") - statetokens = self.rules[stack[-1]] - source_length = len(source) - balancing_stack = [] + statetokens = self.rules[stack[-1]] + source_length = len(source) + balancing_stack = [] lstrip_unless_re = self.lstrip_unless_re newlines_stripped = 0 line_starting = True - - while 1: - # tokenizer loop - for regex, tokens, new_state in statetokens: - m = regex.match(source, pos) - # if no match we try again with the next rule - if m is None: - continue - - # we only match blocks and variables if braces / parentheses - # are balanced. continue parsing with the lower rule which - # is the operator rule. do this only if the end tags look - # like operators + + while 1: + # tokenizer loop + for regex, tokens, new_state in statetokens: + m = regex.match(source, pos) + # if no match we try again with the next rule + if m is None: + continue + + # we only match blocks and variables if braces / parentheses + # are balanced. continue parsing with the lower rule which + # is the operator rule. do this only if the end tags look + # like operators if balancing_stack and tokens in ( TOKEN_VARIABLE_END, TOKEN_BLOCK_END, TOKEN_LINESTATEMENT_END, ): - continue - - # tuples support more options - if isinstance(tokens, tuple): + continue + + # tuples support more options + if isinstance(tokens, tuple): groups = m.groups() if isinstance(tokens, OptionalLStrip): @@ -738,37 +738,37 @@ class Lexer(object): if not lstrip_unless_re.search(text, l_pos): groups = (text[:l_pos],) + groups[1:] - for idx, token in enumerate(tokens): - # failure group - if token.__class__ is Failure: - raise token(lineno, filename) - # bygroup is a bit more complex, in that case we - # yield for the current token the first named - # group that matched + for idx, token in enumerate(tokens): + # failure group + if token.__class__ is Failure: + raise token(lineno, filename) + # bygroup is a bit more complex, in that case we + # yield for the current token the first named + # group that matched elif token == "#bygroup": - for key, value in iteritems(m.groupdict()): - if value is not None: - yield lineno, key, value + for key, value in iteritems(m.groupdict()): + if value is not None: + yield lineno, key, value lineno += value.count("\n") - break - else: + break + else: raise RuntimeError( "%r wanted to resolve " "the token dynamically" " but no group matched" % regex ) - # normal group - else: + # normal group + else: data = groups[idx] - if data or token not in ignore_if_empty: - yield lineno, token, data + if data or token not in ignore_if_empty: + yield lineno, token, data lineno += data.count("\n") + newlines_stripped newlines_stripped = 0 - - # strings as token just are yielded as it. - else: - data = m.group() - # update brace/parentheses balance + + # strings as token just are yielded as it. + else: + data = m.group() + # update brace/parentheses balance if tokens == TOKEN_OPERATOR: if data == "{": balancing_stack.append("}") @@ -777,12 +777,12 @@ class Lexer(object): elif data == "[": balancing_stack.append("]") elif data in ("}", ")", "]"): - if not balancing_stack: + if not balancing_stack: raise TemplateSyntaxError( "unexpected '%s'" % data, lineno, name, filename ) - expected_op = balancing_stack.pop() - if expected_op != data: + expected_op = balancing_stack.pop() + if expected_op != data: raise TemplateSyntaxError( "unexpected '%s', " "expected '%s'" % (data, expected_op), @@ -790,56 +790,56 @@ class Lexer(object): name, filename, ) - # yield items - if data or tokens not in ignore_if_empty: - yield lineno, tokens, data + # yield items + if data or tokens not in ignore_if_empty: + yield lineno, tokens, data lineno += data.count("\n") - + line_starting = m.group()[-1:] == "\n" - # fetch new position into new variable so that we can check - # if there is a internal parsing error which would result - # in an infinite loop - pos2 = m.end() - - # handle state changes - if new_state is not None: - # remove the uppermost state + # fetch new position into new variable so that we can check + # if there is a internal parsing error which would result + # in an infinite loop + pos2 = m.end() + + # handle state changes + if new_state is not None: + # remove the uppermost state if new_state == "#pop": - stack.pop() - # resolve the new state by group checking + stack.pop() + # resolve the new state by group checking elif new_state == "#bygroup": - for key, value in iteritems(m.groupdict()): - if value is not None: - stack.append(key) - break - else: + for key, value in iteritems(m.groupdict()): + if value is not None: + stack.append(key) + break + else: raise RuntimeError( "%r wanted to resolve the " "new state dynamically but" " no group matched" % regex ) - # direct state name given - else: - stack.append(new_state) - statetokens = self.rules[stack[-1]] - # we are still at the same position and no stack change. - # this means a loop without break condition, avoid that and - # raise error - elif pos2 == pos: + # direct state name given + else: + stack.append(new_state) + statetokens = self.rules[stack[-1]] + # we are still at the same position and no stack change. + # this means a loop without break condition, avoid that and + # raise error + elif pos2 == pos: raise RuntimeError( "%r yielded empty string without stack change" % regex ) - # publish new function and start again - pos = pos2 - break - # if loop terminated without break we haven't found a single match - # either we are at the end of the file or we have a problem - else: - # end of text - if pos >= source_length: - return - # something went wrong + # publish new function and start again + pos = pos2 + break + # if loop terminated without break we haven't found a single match + # either we are at the end of the file or we have a problem + else: + # end of text + if pos >= source_length: + return + # something went wrong raise TemplateSyntaxError( "unexpected char %r at %d" % (source[pos], pos), lineno, |