diff options
| author | shadchin <[email protected]> | 2026-02-07 19:56:35 +0300 |
|---|---|---|
| committer | shadchin <[email protected]> | 2026-02-07 20:23:53 +0300 |
| commit | 19d43a3e6fb4cb8ea11747d7d7bca7a3542fbb44 (patch) | |
| tree | 0b1418938140a0b6470953bef6069454ffdf1bd0 /contrib/tools/python3/Lib/email | |
| parent | 0879409bfc0891ab8103828a3bdbf0e960475fec (diff) | |
Update Python 3 to 3.13.12
commit_hash:71d3efea437a769b2b7910d196120bb02587046e
Diffstat (limited to 'contrib/tools/python3/Lib/email')
| -rw-r--r-- | contrib/tools/python3/Lib/email/_encoded_words.py | 2 | ||||
| -rw-r--r-- | contrib/tools/python3/Lib/email/_header_value_parser.py | 50 | ||||
| -rw-r--r-- | contrib/tools/python3/Lib/email/feedparser.py | 7 | ||||
| -rw-r--r-- | contrib/tools/python3/Lib/email/generator.py | 12 | ||||
| -rw-r--r-- | contrib/tools/python3/Lib/email/headerregistry.py | 14 |
5 files changed, 78 insertions, 7 deletions
diff --git a/contrib/tools/python3/Lib/email/_encoded_words.py b/contrib/tools/python3/Lib/email/_encoded_words.py index 6795a606de0..05a34a4c105 100644 --- a/contrib/tools/python3/Lib/email/_encoded_words.py +++ b/contrib/tools/python3/Lib/email/_encoded_words.py @@ -219,7 +219,7 @@ def encode(string, charset='utf-8', encoding=None, lang=''): """ if charset == 'unknown-8bit': - bstring = string.encode('ascii', 'surrogateescape') + bstring = string.encode('utf-8', 'surrogateescape') else: bstring = string.encode(charset) if encoding is None: diff --git a/contrib/tools/python3/Lib/email/_header_value_parser.py b/contrib/tools/python3/Lib/email/_header_value_parser.py index 91243378dc0..51727688c05 100644 --- a/contrib/tools/python3/Lib/email/_header_value_parser.py +++ b/contrib/tools/python3/Lib/email/_header_value_parser.py @@ -101,6 +101,12 @@ def make_quoted_pairs(value): return str(value).replace('\\', '\\\\').replace('"', '\\"') +def make_parenthesis_pairs(value): + """Escape parenthesis and backslash for use within a comment.""" + return str(value).replace('\\', '\\\\') \ + .replace('(', '\\(').replace(')', '\\)') + + def quote_string(value): escaped = make_quoted_pairs(value) return f'"{escaped}"' @@ -874,6 +880,12 @@ class MessageID(MsgID): class InvalidMessageID(MessageID): token_type = 'invalid-message-id' +class MessageIDList(TokenList): + token_type = 'message-id-list' + + @property + def message_ids(self): + return [x for x in self if x.token_type=='msg-id'] class Header(TokenList): token_type = 'header' @@ -933,7 +945,7 @@ class WhiteSpaceTerminal(Terminal): return ' ' def startswith_fws(self): - return True + return self and self[0] in WSP class ValueTerminal(Terminal): @@ -2171,6 +2183,32 @@ def parse_message_id(value): return message_id +def parse_message_ids(value): + """in-reply-to = "In-Reply-To:" 1*msg-id CRLF + references = "References:" 1*msg-id CRLF + """ + message_id_list = MessageIDList() + while value: + if value[0] == ',': + # message id list separated with commas - this is invalid, + # but happens rather frequently in the wild + message_id_list.defects.append( + errors.InvalidHeaderDefect("comma in msg-id list")) + message_id_list.append( + WhiteSpaceTerminal(' ', 'invalid-comma-replacement')) + value = value[1:] + continue + try: + token, value = get_msg_id(value) + message_id_list.append(token) + except errors.HeaderParseError as ex: + token = get_unstructured(value) + message_id_list.append(InvalidMessageID(token)) + message_id_list.defects.append( + errors.InvalidHeaderDefect("Invalid msg-id: {!r}".format(ex))) + break + return message_id_list + # # XXX: As I begin to add additional header parsers, I'm realizing we probably # have two level of parser routines: the get_XXX methods that get a token in @@ -2788,6 +2826,9 @@ def _steal_trailing_WSP_if_exists(lines): if lines and lines[-1] and lines[-1][-1] in WSP: wsp = lines[-1][-1] lines[-1] = lines[-1][:-1] + # gh-142006: if the line is now empty, remove it entirely. + if not lines[-1]: + lines.pop() return wsp def _refold_parse_tree(parse_tree, *, policy): @@ -2924,6 +2965,13 @@ def _refold_parse_tree(parse_tree, *, policy): [ValueTerminal(make_quoted_pairs(p), 'ptext') for p in newparts] + [ValueTerminal('"', 'ptext')]) + if part.token_type == 'comment': + newparts = ( + [ValueTerminal('(', 'ptext')] + + [ValueTerminal(make_parenthesis_pairs(p), 'ptext') + if p.token_type == 'ptext' else p + for p in newparts] + + [ValueTerminal(')', 'ptext')]) if not part.as_ew_allowed: wrap_as_ew_blocked += 1 newparts.append(end_ew_not_allowed) diff --git a/contrib/tools/python3/Lib/email/feedparser.py b/contrib/tools/python3/Lib/email/feedparser.py index bc773f38030..8e60f1d1181 100644 --- a/contrib/tools/python3/Lib/email/feedparser.py +++ b/contrib/tools/python3/Lib/email/feedparser.py @@ -504,10 +504,9 @@ class FeedParser: self._input.unreadline(line) return else: - # Weirdly placed unix-from line. Note this as a defect - # and ignore it. + # Weirdly placed unix-from line. defect = errors.MisplacedEnvelopeHeaderDefect(line) - self._cur.defects.append(defect) + self.policy.handle_defect(self._cur, defect) continue # Split the line on the colon separating field name from value. # There will always be a colon, because if there wasn't the part of @@ -519,7 +518,7 @@ class FeedParser: # message. Track the error but keep going. if i == 0: defect = errors.InvalidHeaderDefect("Missing header name.") - self._cur.defects.append(defect) + self.policy.handle_defect(self._cur, defect) continue assert i>0, "_parse_headers fed line with no : and no leading WS" diff --git a/contrib/tools/python3/Lib/email/generator.py b/contrib/tools/python3/Lib/email/generator.py index ce94f5c56fe..a03eb1fbbc9 100644 --- a/contrib/tools/python3/Lib/email/generator.py +++ b/contrib/tools/python3/Lib/email/generator.py @@ -22,6 +22,7 @@ NL = '\n' # XXX: no longer used by the code below. NLCRE = re.compile(r'\r\n|\r|\n') fcre = re.compile(r'^From ', re.MULTILINE) NEWLINE_WITHOUT_FWSP = re.compile(r'\r\n[^ \t]|\r[^ \n\t]|\n[^ \t]') +NEWLINE_WITHOUT_FWSP_BYTES = re.compile(br'\r\n[^ \t]|\r[^ \n\t]|\n[^ \t]') class Generator: @@ -429,7 +430,16 @@ class BytesGenerator(Generator): # This is almost the same as the string version, except for handling # strings with 8bit bytes. for h, v in msg.raw_items(): - self._fp.write(self.policy.fold_binary(h, v)) + folded = self.policy.fold_binary(h, v) + if self.policy.verify_generated_headers: + linesep = self.policy.linesep.encode() + if not folded.endswith(linesep): + raise HeaderWriteError( + f'folded header does not end with {linesep!r}: {folded!r}') + if NEWLINE_WITHOUT_FWSP_BYTES.search(folded.removesuffix(linesep)): + raise HeaderWriteError( + f'folded header contains newline: {folded!r}') + self._fp.write(folded) # A blank line always separates headers from body self.write(self._NL) diff --git a/contrib/tools/python3/Lib/email/headerregistry.py b/contrib/tools/python3/Lib/email/headerregistry.py index 543141dc427..0e8698efc0b 100644 --- a/contrib/tools/python3/Lib/email/headerregistry.py +++ b/contrib/tools/python3/Lib/email/headerregistry.py @@ -534,6 +534,18 @@ class MessageIDHeader: kwds['defects'].extend(parse_tree.all_defects) +class ReferencesHeader: + + max_count = 1 + value_parser = staticmethod(parser.parse_message_ids) + + @classmethod + def parse(cls, value, kwds): + kwds['parse_tree'] = parse_tree = cls.value_parser(value) + kwds['decoded'] = str(parse_tree) + kwds['defects'].extend(parse_tree.all_defects) + + # The header factory # _default_header_map = { @@ -557,6 +569,8 @@ _default_header_map = { 'content-disposition': ContentDispositionHeader, 'content-transfer-encoding': ContentTransferEncodingHeader, 'message-id': MessageIDHeader, + 'in-reply-to': ReferencesHeader, + 'references': ReferencesHeader, } class HeaderRegistry: |
