diff options
Diffstat (limited to 'contrib/tools/python3/Lib/tokenize.py')
| -rw-r--r-- | contrib/tools/python3/Lib/tokenize.py | 32 |
1 files changed, 22 insertions, 10 deletions
diff --git a/contrib/tools/python3/Lib/tokenize.py b/contrib/tools/python3/Lib/tokenize.py index 553c1ca4388..7ca552c4fc5 100644 --- a/contrib/tools/python3/Lib/tokenize.py +++ b/contrib/tools/python3/Lib/tokenize.py @@ -41,7 +41,7 @@ blank_re = re.compile(br'^[ \t\f]*(?:[#\r\n]|$)', re.ASCII) import token __all__ = token.__all__ + ["tokenize", "generate_tokens", "detect_encoding", - "untokenize", "TokenInfo"] + "untokenize", "TokenInfo", "open", "TokenError"] del token class TokenInfo(collections.namedtuple('TokenInfo', 'type string start end line')): @@ -162,8 +162,6 @@ tabsize = 8 class TokenError(Exception): pass -class StopTokenizing(Exception): pass - class Untokenizer: def __init__(self): @@ -171,6 +169,7 @@ class Untokenizer: self.prev_row = 1 self.prev_col = 0 self.prev_type = None + self.prev_line = "" self.encoding = None def add_whitespace(self, start): @@ -178,14 +177,28 @@ class Untokenizer: if row < self.prev_row or row == self.prev_row and col < self.prev_col: raise ValueError("start ({},{}) precedes previous end ({},{})" .format(row, col, self.prev_row, self.prev_col)) - row_offset = row - self.prev_row - if row_offset: - self.tokens.append("\\\n" * row_offset) - self.prev_col = 0 + self.add_backslash_continuation(start) col_offset = col - self.prev_col if col_offset: self.tokens.append(" " * col_offset) + def add_backslash_continuation(self, start): + """Add backslash continuation characters if the row has increased + without encountering a newline token. + + This also inserts the correct amount of whitespace before the backslash. + """ + row = start[0] + row_offset = row - self.prev_row + if row_offset == 0: + return + + newline = '\r\n' if self.prev_line.endswith('\r\n') else '\n' + line = self.prev_line.rstrip('\\\r\n') + ws = ''.join(_itertools.takewhile(str.isspace, reversed(line))) + self.tokens.append(ws + f"\\{newline}" * row_offset) + self.prev_col = 0 + def escape_brackets(self, token): characters = [] consume_until_next_bracket = False @@ -245,8 +258,6 @@ class Untokenizer: end_line, end_col = end extra_chars = last_line.count("{{") + last_line.count("}}") end = (end_line, end_col + extra_chars) - elif tok_type in (STRING, FSTRING_START) and self.prev_type in (STRING, FSTRING_END): - self.tokens.append(" ") self.add_whitespace(start) self.tokens.append(token) @@ -255,6 +266,7 @@ class Untokenizer: self.prev_row += 1 self.prev_col = 0 self.prev_type = tok_type + self.prev_line = line return "".join(self.tokens) def compat(self, token, iterable): @@ -333,7 +345,7 @@ def untokenize(iterable): def _get_normal_name(orig_enc): - """Imitates get_normal_name in tokenizer.c.""" + """Imitates get_normal_name in Parser/tokenizer/helpers.c.""" # Only care about the first 12 characters. enc = orig_enc[:12].lower().replace("_", "-") if enc == "utf-8" or enc.startswith("utf-8-"): |
