summaryrefslogtreecommitdiffstats
path: root/contrib/tools/python3/Lib/tokenize.py
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/tools/python3/Lib/tokenize.py')
-rw-r--r--contrib/tools/python3/Lib/tokenize.py32
1 files changed, 22 insertions, 10 deletions
diff --git a/contrib/tools/python3/Lib/tokenize.py b/contrib/tools/python3/Lib/tokenize.py
index 553c1ca4388..7ca552c4fc5 100644
--- a/contrib/tools/python3/Lib/tokenize.py
+++ b/contrib/tools/python3/Lib/tokenize.py
@@ -41,7 +41,7 @@ blank_re = re.compile(br'^[ \t\f]*(?:[#\r\n]|$)', re.ASCII)
import token
__all__ = token.__all__ + ["tokenize", "generate_tokens", "detect_encoding",
- "untokenize", "TokenInfo"]
+ "untokenize", "TokenInfo", "open", "TokenError"]
del token
class TokenInfo(collections.namedtuple('TokenInfo', 'type string start end line')):
@@ -162,8 +162,6 @@ tabsize = 8
class TokenError(Exception): pass
-class StopTokenizing(Exception): pass
-
class Untokenizer:
def __init__(self):
@@ -171,6 +169,7 @@ class Untokenizer:
self.prev_row = 1
self.prev_col = 0
self.prev_type = None
+ self.prev_line = ""
self.encoding = None
def add_whitespace(self, start):
@@ -178,14 +177,28 @@ class Untokenizer:
if row < self.prev_row or row == self.prev_row and col < self.prev_col:
raise ValueError("start ({},{}) precedes previous end ({},{})"
.format(row, col, self.prev_row, self.prev_col))
- row_offset = row - self.prev_row
- if row_offset:
- self.tokens.append("\\\n" * row_offset)
- self.prev_col = 0
+ self.add_backslash_continuation(start)
col_offset = col - self.prev_col
if col_offset:
self.tokens.append(" " * col_offset)
+ def add_backslash_continuation(self, start):
+ """Add backslash continuation characters if the row has increased
+ without encountering a newline token.
+
+ This also inserts the correct amount of whitespace before the backslash.
+ """
+ row = start[0]
+ row_offset = row - self.prev_row
+ if row_offset == 0:
+ return
+
+ newline = '\r\n' if self.prev_line.endswith('\r\n') else '\n'
+ line = self.prev_line.rstrip('\\\r\n')
+ ws = ''.join(_itertools.takewhile(str.isspace, reversed(line)))
+ self.tokens.append(ws + f"\\{newline}" * row_offset)
+ self.prev_col = 0
+
def escape_brackets(self, token):
characters = []
consume_until_next_bracket = False
@@ -245,8 +258,6 @@ class Untokenizer:
end_line, end_col = end
extra_chars = last_line.count("{{") + last_line.count("}}")
end = (end_line, end_col + extra_chars)
- elif tok_type in (STRING, FSTRING_START) and self.prev_type in (STRING, FSTRING_END):
- self.tokens.append(" ")
self.add_whitespace(start)
self.tokens.append(token)
@@ -255,6 +266,7 @@ class Untokenizer:
self.prev_row += 1
self.prev_col = 0
self.prev_type = tok_type
+ self.prev_line = line
return "".join(self.tokens)
def compat(self, token, iterable):
@@ -333,7 +345,7 @@ def untokenize(iterable):
def _get_normal_name(orig_enc):
- """Imitates get_normal_name in tokenizer.c."""
+ """Imitates get_normal_name in Parser/tokenizer/helpers.c."""
# Only care about the first 12 characters.
enc = orig_enc[:12].lower().replace("_", "-")
if enc == "utf-8" or enc.startswith("utf-8-"):