diff options
Diffstat (limited to 'contrib/tools/python3/Lib/email')
| -rw-r--r-- | contrib/tools/python3/Lib/email/_encoded_words.py | 2 | ||||
| -rw-r--r-- | contrib/tools/python3/Lib/email/_header_value_parser.py | 139 | ||||
| -rw-r--r-- | contrib/tools/python3/Lib/email/_parseaddr.py | 18 | ||||
| -rw-r--r-- | contrib/tools/python3/Lib/email/_policybase.py | 2 | ||||
| -rw-r--r-- | contrib/tools/python3/Lib/email/contentmanager.py | 12 | ||||
| -rw-r--r-- | contrib/tools/python3/Lib/email/feedparser.py | 26 | ||||
| -rw-r--r-- | contrib/tools/python3/Lib/email/generator.py | 14 | ||||
| -rw-r--r-- | contrib/tools/python3/Lib/email/header.py | 17 | ||||
| -rw-r--r-- | contrib/tools/python3/Lib/email/headerregistry.py | 14 | ||||
| -rw-r--r-- | contrib/tools/python3/Lib/email/message.py | 32 | ||||
| -rw-r--r-- | contrib/tools/python3/Lib/email/mime/audio.py | 11 | ||||
| -rw-r--r-- | contrib/tools/python3/Lib/email/parser.py | 14 | ||||
| -rw-r--r-- | contrib/tools/python3/Lib/email/utils.py | 22 |
13 files changed, 215 insertions, 108 deletions
diff --git a/contrib/tools/python3/Lib/email/_encoded_words.py b/contrib/tools/python3/Lib/email/_encoded_words.py index 6795a606de0..05a34a4c105 100644 --- a/contrib/tools/python3/Lib/email/_encoded_words.py +++ b/contrib/tools/python3/Lib/email/_encoded_words.py @@ -219,7 +219,7 @@ def encode(string, charset='utf-8', encoding=None, lang=''): """ if charset == 'unknown-8bit': - bstring = string.encode('ascii', 'surrogateescape') + bstring = string.encode('utf-8', 'surrogateescape') else: bstring = string.encode(charset) if encoding is None: diff --git a/contrib/tools/python3/Lib/email/_header_value_parser.py b/contrib/tools/python3/Lib/email/_header_value_parser.py index 3d845c09d41..03fedd99539 100644 --- a/contrib/tools/python3/Lib/email/_header_value_parser.py +++ b/contrib/tools/python3/Lib/email/_header_value_parser.py @@ -80,7 +80,8 @@ from email import utils # Useful constants and functions # -WSP = set(' \t') +_WSP = ' \t' +WSP = set(_WSP) CFWS_LEADER = WSP | set('(') SPECIALS = set(r'()<>@,:;.\"[]') ATOM_ENDS = SPECIALS | WSP @@ -101,6 +102,12 @@ def make_quoted_pairs(value): return str(value).replace('\\', '\\\\').replace('"', '\\"') +def make_parenthesis_pairs(value): + """Escape parenthesis and backslash for use within a comment.""" + return str(value).replace('\\', '\\\\') \ + .replace('(', '\\(').replace(')', '\\)') + + def quote_string(value): escaped = make_quoted_pairs(value) return f'"{escaped}"' @@ -874,6 +881,12 @@ class MessageID(MsgID): class InvalidMessageID(MessageID): token_type = 'invalid-message-id' +class MessageIDList(TokenList): + token_type = 'message-id-list' + + @property + def message_ids(self): + return [x for x in self if x.token_type=='msg-id'] class Header(TokenList): token_type = 'header' @@ -933,7 +946,7 @@ class WhiteSpaceTerminal(Terminal): return ' ' def startswith_fws(self): - return True + return self and self[0] in WSP class ValueTerminal(Terminal): @@ -1020,6 +1033,8 @@ def _get_ptext_to_endchars(value, endchars): a flag that is True iff there were any quoted printables decoded. """ + if not value: + return '', '', False fragment, *remainder = _wsp_splitter(value, 1) vchars = [] escape = False @@ -1053,7 +1068,7 @@ def get_fws(value): fws = WhiteSpaceTerminal(value[:len(value)-len(newvalue)], 'fws') return fws, newvalue -def get_encoded_word(value): +def get_encoded_word(value, terminal_type='vtext'): """ encoded-word = "=?" charset "?" encoding "?" encoded-text "?=" """ @@ -1092,7 +1107,7 @@ def get_encoded_word(value): ew.append(token) continue chars, *remainder = _wsp_splitter(text, 1) - vtext = ValueTerminal(chars, 'vtext') + vtext = ValueTerminal(chars, terminal_type) _validate_xtext(vtext) ew.append(vtext) text = ''.join(remainder) @@ -1134,7 +1149,7 @@ def get_unstructured(value): valid_ew = True if value.startswith('=?'): try: - token, value = get_encoded_word(value) + token, value = get_encoded_word(value, 'utext') except _InvalidEwError: valid_ew = False except errors.HeaderParseError: @@ -1163,7 +1178,7 @@ def get_unstructured(value): # the parser to go in an infinite loop. if valid_ew and rfc2047_matcher.search(tok): tok, *remainder = value.partition('=?') - vtext = ValueTerminal(tok, 'vtext') + vtext = ValueTerminal(tok, 'utext') _validate_xtext(vtext) unstructured.append(vtext) value = ''.join(remainder) @@ -1573,7 +1588,7 @@ def get_dtext(value): def _check_for_early_dl_end(value, domain_literal): if value: return False - domain_literal.append(errors.InvalidHeaderDefect( + domain_literal.defects.append(errors.InvalidHeaderDefect( "end of input inside domain-literal")) domain_literal.append(ValueTerminal(']', 'domain-literal-end')) return True @@ -1592,9 +1607,9 @@ def get_domain_literal(value): raise errors.HeaderParseError("expected '[' at start of domain-literal " "but found '{}'".format(value)) value = value[1:] + domain_literal.append(ValueTerminal('[', 'domain-literal-start')) if _check_for_early_dl_end(value, domain_literal): return domain_literal, value - domain_literal.append(ValueTerminal('[', 'domain-literal-start')) if value[0] in WSP: token, value = get_fws(value) domain_literal.append(token) @@ -2169,6 +2184,32 @@ def parse_message_id(value): return message_id +def parse_message_ids(value): + """in-reply-to = "In-Reply-To:" 1*msg-id CRLF + references = "References:" 1*msg-id CRLF + """ + message_id_list = MessageIDList() + while value: + if value[0] == ',': + # message id list separated with commas - this is invalid, + # but happens rather frequently in the wild + message_id_list.defects.append( + errors.InvalidHeaderDefect("comma in msg-id list")) + message_id_list.append( + WhiteSpaceTerminal(' ', 'invalid-comma-replacement')) + value = value[1:] + continue + try: + token, value = get_msg_id(value) + message_id_list.append(token) + except errors.HeaderParseError as ex: + token = get_unstructured(value) + message_id_list.append(InvalidMessageID(token)) + message_id_list.defects.append( + errors.InvalidHeaderDefect("Invalid msg-id: {!r}".format(ex))) + break + return message_id_list + # # XXX: As I begin to add additional header parsers, I'm realizing we probably # have two level of parser routines: the get_XXX methods that get a token in @@ -2786,8 +2827,12 @@ def _steal_trailing_WSP_if_exists(lines): if lines and lines[-1] and lines[-1][-1] in WSP: wsp = lines[-1][-1] lines[-1] = lines[-1][:-1] + # gh-142006: if the line is now empty, remove it entirely. + if not lines[-1]: + lines.pop() return wsp + def _refold_parse_tree(parse_tree, *, policy): """Return string of contents of parse_tree folded according to RFC rules. @@ -2796,11 +2841,9 @@ def _refold_parse_tree(parse_tree, *, policy): maxlen = policy.max_line_length or sys.maxsize encoding = 'utf-8' if policy.utf8 else 'us-ascii' lines = [''] # Folded lines to be output - leading_whitespace = '' # When we have whitespace between two encoded - # words, we may need to encode the whitespace - # at the beginning of the second word. - last_ew = None # Points to the last encoded character if there's an ew on - # the line + last_word_is_ew = False + last_ew = None # if there is an encoded word in the last line of lines, + # points to the encoded word's first character last_charset = None wrap_as_ew_blocked = 0 want_encoding = False # This is set to True if we need to encode this part @@ -2813,7 +2856,7 @@ def _refold_parse_tree(parse_tree, *, policy): continue tstr = str(part) if not want_encoding: - if part.token_type == 'ptext': + if part.token_type in ('ptext', 'vtext'): # Encode if tstr contains special characters. want_encoding = not SPECIALSNL.isdisjoint(tstr) else: @@ -2835,6 +2878,7 @@ def _refold_parse_tree(parse_tree, *, policy): if part.token_type == 'mime-parameters': # Mime parameter folding (using RFC2231) is extra special. _fold_mime_parameters(part, lines, maxlen, encoding) + last_word_is_ew = False continue if want_encoding and not wrap_as_ew_blocked: @@ -2851,6 +2895,7 @@ def _refold_parse_tree(parse_tree, *, policy): # XXX what if encoded_part has no leading FWS? lines.append(newline) lines[-1] += encoded_part + last_word_is_ew = False continue # Either this is not a major syntactic break, so we don't # want it on a line by itself even if it fits, or it @@ -2869,11 +2914,16 @@ def _refold_parse_tree(parse_tree, *, policy): (last_charset == 'unknown-8bit' or last_charset == 'utf-8' and charset != 'us-ascii')): last_ew = None - last_ew = _fold_as_ew(tstr, lines, maxlen, last_ew, - part.ew_combine_allowed, charset, leading_whitespace) - # This whitespace has been added to the lines in _fold_as_ew() - # so clear it now. - leading_whitespace = '' + last_ew = _fold_as_ew( + tstr, + lines, + maxlen, + last_ew, + part.ew_combine_allowed, + charset, + last_word_is_ew, + ) + last_word_is_ew = True last_charset = charset want_encoding = False continue @@ -2886,28 +2936,19 @@ def _refold_parse_tree(parse_tree, *, policy): if len(tstr) <= maxlen - len(lines[-1]): lines[-1] += tstr + last_word_is_ew = last_word_is_ew and not bool(tstr.strip(_WSP)) continue # This part is too long to fit. The RFC wants us to break at # "major syntactic breaks", so unless we don't consider this # to be one, check if it will fit on the next line by itself. - leading_whitespace = '' if (part.syntactic_break and len(tstr) + 1 <= maxlen): newline = _steal_trailing_WSP_if_exists(lines) if newline or part.startswith_fws(): - # We're going to fold the data onto a new line here. Due to - # the way encoded strings handle continuation lines, we need to - # be prepared to encode any whitespace if the next line turns - # out to start with an encoded word. lines.append(newline + tstr) - - whitespace_accumulator = [] - for char in lines[-1]: - if char not in WSP: - break - whitespace_accumulator.append(char) - leading_whitespace = ''.join(whitespace_accumulator) + last_word_is_ew = (last_word_is_ew + and not bool(lines[-1].strip(_WSP))) last_ew = None continue if not hasattr(part, 'encode'): @@ -2922,6 +2963,13 @@ def _refold_parse_tree(parse_tree, *, policy): [ValueTerminal(make_quoted_pairs(p), 'ptext') for p in newparts] + [ValueTerminal('"', 'ptext')]) + if part.token_type == 'comment': + newparts = ( + [ValueTerminal('(', 'ptext')] + + [ValueTerminal(make_parenthesis_pairs(p), 'ptext') + if p.token_type == 'ptext' else p + for p in newparts] + + [ValueTerminal(')', 'ptext')]) if not part.as_ew_allowed: wrap_as_ew_blocked += 1 newparts.append(end_ew_not_allowed) @@ -2940,10 +2988,11 @@ def _refold_parse_tree(parse_tree, *, policy): else: # We can't fold it onto the next line either... lines[-1] += tstr + last_word_is_ew = last_word_is_ew and not bool(tstr.strip(_WSP)) return policy.linesep.join(lines) + policy.linesep -def _fold_as_ew(to_encode, lines, maxlen, last_ew, ew_combine_allowed, charset, leading_whitespace): +def _fold_as_ew(to_encode, lines, maxlen, last_ew, ew_combine_allowed, charset, last_word_is_ew): """Fold string to_encode into lines as encoded word, combining if allowed. Return the new value for last_ew, or None if ew_combine_allowed is False. @@ -2958,6 +3007,16 @@ def _fold_as_ew(to_encode, lines, maxlen, last_ew, ew_combine_allowed, charset, to_encode = str( get_unstructured(lines[-1][last_ew:] + to_encode)) lines[-1] = lines[-1][:last_ew] + elif last_word_is_ew: + # If we are following up an encoded word with another encoded word, + # any white space between the two will be ignored when decoded. + # Therefore, we encode all to-be-displayed whitespace in the second + # encoded word. + len_without_wsp = len(lines[-1].rstrip(_WSP)) + leading_whitespace = lines[-1][len_without_wsp:] + lines[-1] = (lines[-1][:len_without_wsp] + + (' ' if leading_whitespace else '')) + to_encode = leading_whitespace + to_encode elif to_encode[0] in WSP: # We're joining this to non-encoded text, so don't encode # the leading blank. @@ -2986,20 +3045,13 @@ def _fold_as_ew(to_encode, lines, maxlen, last_ew, ew_combine_allowed, charset, while to_encode: remaining_space = maxlen - len(lines[-1]) - text_space = remaining_space - chrome_len - len(leading_whitespace) + text_space = remaining_space - chrome_len if text_space <= 0: - lines.append(' ') + newline = _steal_trailing_WSP_if_exists(lines) + lines.append(newline or ' ') + new_last_ew = len(lines[-1]) continue - # If we are at the start of a continuation line, prepend whitespace - # (we only want to do this when the line starts with an encoded word - # but if we're folding in this helper function, then we know that we - # are going to be writing out an encoded word.) - if len(lines) > 1 and len(lines[-1]) == 1 and leading_whitespace: - encoded_word = _ew.encode(leading_whitespace, charset=encode_as) - lines[-1] += encoded_word - leading_whitespace = '' - to_encode_word = to_encode[:text_space] encoded_word = _ew.encode(to_encode_word, charset=encode_as) excess = len(encoded_word) - remaining_space @@ -3011,7 +3063,6 @@ def _fold_as_ew(to_encode, lines, maxlen, last_ew, ew_combine_allowed, charset, excess = len(encoded_word) - remaining_space lines[-1] += encoded_word to_encode = to_encode[len(to_encode_word):] - leading_whitespace = '' if to_encode: lines.append(' ') diff --git a/contrib/tools/python3/Lib/email/_parseaddr.py b/contrib/tools/python3/Lib/email/_parseaddr.py index febe411355d..565af0cf361 100644 --- a/contrib/tools/python3/Lib/email/_parseaddr.py +++ b/contrib/tools/python3/Lib/email/_parseaddr.py @@ -13,7 +13,7 @@ __all__ = [ 'quote', ] -import time, calendar +import time SPACE = ' ' EMPTYSTRING = '' @@ -146,8 +146,9 @@ def _parsedate_tz(data): return None # Check for a yy specified in two-digit format, then convert it to the # appropriate four-digit format, according to the POSIX standard. RFC 822 - # calls for a two-digit yy, but RFC 2822 (which obsoletes RFC 822) - # mandates a 4-digit yy. For more information, see the documentation for + # calls for a two-digit yy, but RFC 2822 (which obsoletes RFC 822) already + # mandated a 4-digit yy, and RFC 5322 (which obsoletes RFC 2822) continues + # this requirement. For more information, see the documentation for # the time module. if yy < 100: # The year is between 1969 and 1999 (inclusive). @@ -194,6 +195,9 @@ def mktime_tz(data): # No zone info, so localtime is better assumption than GMT return time.mktime(data[:8] + (-1,)) else: + # Delay the import, since mktime_tz is rarely used + import calendar + t = calendar.timegm(data) return t - data[9] @@ -230,9 +234,11 @@ class AddrlistClass: self.CR = '\r\n' self.FWS = self.LWS + self.CR self.atomends = self.specials + self.LWS + self.CR - # Note that RFC 2822 now specifies `.' as obs-phrase, meaning that it - # is obsolete syntax. RFC 2822 requires that we recognize obsolete - # syntax, so allow dots in phrases. + # Note that RFC 2822 section 4.1 introduced '.' as obs-phrase to handle + # existing practice (periods in display names), even though it was not + # allowed in RFC 822. RFC 5322 section 4.1 (which obsoletes RFC 2822) + # continues this requirement. We must recognize obsolete syntax, so + # allow dots in phrases. self.phraseends = self.atomends.replace('.', '') self.field = field self.commentlist = [] diff --git a/contrib/tools/python3/Lib/email/_policybase.py b/contrib/tools/python3/Lib/email/_policybase.py index c9f0d743090..0d486c90a9c 100644 --- a/contrib/tools/python3/Lib/email/_policybase.py +++ b/contrib/tools/python3/Lib/email/_policybase.py @@ -370,7 +370,7 @@ class Compat32(Policy): h = value if h is not None: # The Header class interprets a value of None for maxlinelen as the - # default value of 78, as recommended by RFC 2822. + # default value of 78, as recommended by RFC 5322 section 2.1.1. maxlinelen = 0 if self.max_line_length is not None: maxlinelen = self.max_line_length diff --git a/contrib/tools/python3/Lib/email/contentmanager.py b/contrib/tools/python3/Lib/email/contentmanager.py index b4f5830bead..11d1536db27 100644 --- a/contrib/tools/python3/Lib/email/contentmanager.py +++ b/contrib/tools/python3/Lib/email/contentmanager.py @@ -2,6 +2,7 @@ import binascii import email.charset import email.message import email.errors +import sys from email import quoprimime class ContentManager: @@ -142,13 +143,15 @@ def _encode_base64(data, max_line_length): def _encode_text(string, charset, cte, policy): + # If max_line_length is 0 or None, there is no limit. + maxlen = policy.max_line_length or sys.maxsize lines = string.encode(charset).splitlines() linesep = policy.linesep.encode('ascii') def embedded_body(lines): return linesep.join(lines) + linesep def normal_body(lines): return b'\n'.join(lines) + b'\n' if cte is None: # Use heuristics to decide on the "best" encoding. - if max((len(x) for x in lines), default=0) <= policy.max_line_length: + if max(map(len, lines), default=0) <= maxlen: try: return '7bit', normal_body(lines).decode('ascii') except UnicodeDecodeError: @@ -156,8 +159,7 @@ def _encode_text(string, charset, cte, policy): if policy.cte_type == '8bit': return '8bit', normal_body(lines).decode('ascii', 'surrogateescape') sniff = embedded_body(lines[:10]) - sniff_qp = quoprimime.body_encode(sniff.decode('latin-1'), - policy.max_line_length) + sniff_qp = quoprimime.body_encode(sniff.decode('latin-1'), maxlen) sniff_base64 = binascii.b2a_base64(sniff) # This is a little unfair to qp; it includes lineseps, base64 doesn't. if len(sniff_qp) > len(sniff_base64): @@ -172,9 +174,9 @@ def _encode_text(string, charset, cte, policy): data = normal_body(lines).decode('ascii', 'surrogateescape') elif cte == 'quoted-printable': data = quoprimime.body_encode(normal_body(lines).decode('latin-1'), - policy.max_line_length) + maxlen) elif cte == 'base64': - data = _encode_base64(embedded_body(lines), policy.max_line_length) + data = _encode_base64(embedded_body(lines), maxlen) else: raise ValueError("Unknown content transfer encoding {}".format(cte)) return cte, data diff --git a/contrib/tools/python3/Lib/email/feedparser.py b/contrib/tools/python3/Lib/email/feedparser.py index c2881d9bc52..8e60f1d1181 100644 --- a/contrib/tools/python3/Lib/email/feedparser.py +++ b/contrib/tools/python3/Lib/email/feedparser.py @@ -32,11 +32,13 @@ NLCRE = re.compile(r'\r\n|\r|\n') NLCRE_bol = re.compile(r'(\r\n|\r|\n)') NLCRE_eol = re.compile(r'(\r\n|\r|\n)\Z') NLCRE_crack = re.compile(r'(\r\n|\r|\n)') -# RFC 2822 $3.6.8 Optional fields. ftext is %d33-57 / %d59-126, Any character +# RFC 5322 section 3.6.8 Optional fields. ftext is %d33-57 / %d59-126, Any character # except controls, SP, and ":". headerRE = re.compile(r'^(From |[\041-\071\073-\176]*:|[\t ])') EMPTYSTRING = '' NL = '\n' +boundaryendRE = re.compile( + r'(?P<end>--)?(?P<ws>[ \t]*)(?P<linesep>\r\n|\r|\n)?$') NeedMoreData = object() @@ -292,7 +294,7 @@ class FeedParser: return if self._cur.get_content_maintype() == 'message': # The message claims to be a message/* type, then what follows is - # another RFC 2822 message. + # another RFC 5322 message. for retval in self._parsegen(): if retval is NeedMoreData: yield NeedMoreData @@ -327,9 +329,10 @@ class FeedParser: # this onto the input stream until we've scanned past the # preamble. separator = '--' + boundary - boundaryre = re.compile( - '(?P<sep>' + re.escape(separator) + - r')(?P<end>--)?(?P<ws>[ \t]*)(?P<linesep>\r\n|\r|\n)?$') + def boundarymatch(line): + if not line.startswith(separator): + return None + return boundaryendRE.match(line, len(separator)) capturing_preamble = True preamble = [] linesep = False @@ -341,7 +344,7 @@ class FeedParser: continue if line == '': break - mo = boundaryre.match(line) + mo = boundarymatch(line) if mo: # If we're looking at the end boundary, we're done with # this multipart. If there was a newline at the end of @@ -373,13 +376,13 @@ class FeedParser: if line is NeedMoreData: yield NeedMoreData continue - mo = boundaryre.match(line) + mo = boundarymatch(line) if not mo: self._input.unreadline(line) break # Recurse to parse this subpart; the input stream points # at the subpart's first line. - self._input.push_eof_matcher(boundaryre.match) + self._input.push_eof_matcher(boundarymatch) for retval in self._parsegen(): if retval is NeedMoreData: yield NeedMoreData @@ -501,10 +504,9 @@ class FeedParser: self._input.unreadline(line) return else: - # Weirdly placed unix-from line. Note this as a defect - # and ignore it. + # Weirdly placed unix-from line. defect = errors.MisplacedEnvelopeHeaderDefect(line) - self._cur.defects.append(defect) + self.policy.handle_defect(self._cur, defect) continue # Split the line on the colon separating field name from value. # There will always be a colon, because if there wasn't the part of @@ -516,7 +518,7 @@ class FeedParser: # message. Track the error but keep going. if i == 0: defect = errors.InvalidHeaderDefect("Missing header name.") - self._cur.defects.append(defect) + self.policy.handle_defect(self._cur, defect) continue assert i>0, "_parse_headers fed line with no : and no leading WS" diff --git a/contrib/tools/python3/Lib/email/generator.py b/contrib/tools/python3/Lib/email/generator.py index 47b9df8f4e6..a03eb1fbbc9 100644 --- a/contrib/tools/python3/Lib/email/generator.py +++ b/contrib/tools/python3/Lib/email/generator.py @@ -22,6 +22,7 @@ NL = '\n' # XXX: no longer used by the code below. NLCRE = re.compile(r'\r\n|\r|\n') fcre = re.compile(r'^From ', re.MULTILINE) NEWLINE_WITHOUT_FWSP = re.compile(r'\r\n[^ \t]|\r[^ \n\t]|\n[^ \t]') +NEWLINE_WITHOUT_FWSP_BYTES = re.compile(br'\r\n[^ \t]|\r[^ \n\t]|\n[^ \t]') class Generator: @@ -50,7 +51,7 @@ class Generator: expanded to 8 spaces) than maxheaderlen, the header will split as defined in the Header class. Set maxheaderlen to zero to disable header wrapping. The default is 78, as recommended (but not required) - by RFC 2822. + by RFC 5322 section 2.1.1. The policy keyword specifies a policy object that controls a number of aspects of the generator's operation. If no policy is specified, @@ -429,7 +430,16 @@ class BytesGenerator(Generator): # This is almost the same as the string version, except for handling # strings with 8bit bytes. for h, v in msg.raw_items(): - self._fp.write(self.policy.fold_binary(h, v)) + folded = self.policy.fold_binary(h, v) + if self.policy.verify_generated_headers: + linesep = self.policy.linesep.encode() + if not folded.endswith(linesep): + raise HeaderWriteError( + f'folded header does not end with {linesep!r}: {folded!r}') + if NEWLINE_WITHOUT_FWSP_BYTES.search(folded.removesuffix(linesep)): + raise HeaderWriteError( + f'folded header contains newline: {folded!r}') + self._fp.write(folded) # A blank line always separates headers from body self.write(self._NL) diff --git a/contrib/tools/python3/Lib/email/header.py b/contrib/tools/python3/Lib/email/header.py index 984851a7d9a..a0aadb97ca6 100644 --- a/contrib/tools/python3/Lib/email/header.py +++ b/contrib/tools/python3/Lib/email/header.py @@ -59,16 +59,22 @@ _max_append = email.quoprimime._max_append def decode_header(header): """Decode a message header value without converting charset. - Returns a list of (string, charset) pairs containing each of the decoded - parts of the header. Charset is None for non-encoded parts of the header, - otherwise a lower-case string containing the name of the character set - specified in the encoded string. + For historical reasons, this function may return either: + + 1. A list of length 1 containing a pair (str, None). + 2. A list of (bytes, charset) pairs containing each of the decoded + parts of the header. Charset is None for non-encoded parts of the header, + otherwise a lower-case string containing the name of the character set + specified in the encoded string. header may be a string that may or may not contain RFC2047 encoded words, or it may be a Header object. An email.errors.HeaderParseError may be raised when certain decoding error occurs (e.g. a base64 decoding exception). + + This function exists for backwards compatibility only. For new code, we + recommend using email.headerregistry.HeaderRegistry instead. """ # If it is a Header object, we can just return the encoded chunks. if hasattr(header, '_chunks'): @@ -161,6 +167,9 @@ def make_header(decoded_seq, maxlinelen=None, header_name=None, This function takes one of those sequence of pairs and returns a Header instance. Optional maxlinelen, header_name, and continuation_ws are as in the Header constructor. + + This function exists for backwards compatibility only, and is not + recommended for use in new code. """ h = Header(maxlinelen=maxlinelen, header_name=header_name, continuation_ws=continuation_ws) diff --git a/contrib/tools/python3/Lib/email/headerregistry.py b/contrib/tools/python3/Lib/email/headerregistry.py index 543141dc427..0e8698efc0b 100644 --- a/contrib/tools/python3/Lib/email/headerregistry.py +++ b/contrib/tools/python3/Lib/email/headerregistry.py @@ -534,6 +534,18 @@ class MessageIDHeader: kwds['defects'].extend(parse_tree.all_defects) +class ReferencesHeader: + + max_count = 1 + value_parser = staticmethod(parser.parse_message_ids) + + @classmethod + def parse(cls, value, kwds): + kwds['parse_tree'] = parse_tree = cls.value_parser(value) + kwds['decoded'] = str(parse_tree) + kwds['defects'].extend(parse_tree.all_defects) + + # The header factory # _default_header_map = { @@ -557,6 +569,8 @@ _default_header_map = { 'content-disposition': ContentDispositionHeader, 'content-transfer-encoding': ContentTransferEncodingHeader, 'message-id': MessageIDHeader, + 'in-reply-to': ReferencesHeader, + 'references': ReferencesHeader, } class HeaderRegistry: diff --git a/contrib/tools/python3/Lib/email/message.py b/contrib/tools/python3/Lib/email/message.py index 6b7c3a23777..80f01d66a33 100644 --- a/contrib/tools/python3/Lib/email/message.py +++ b/contrib/tools/python3/Lib/email/message.py @@ -74,19 +74,25 @@ def _parseparam(s): # RDM This might be a Header, so for now stringify it. s = ';' + str(s) plist = [] - while s[:1] == ';': - s = s[1:] - end = s.find(';') - while end > 0 and (s.count('"', 0, end) - s.count('\\"', 0, end)) % 2: - end = s.find(';', end + 1) + start = 0 + while s.find(';', start) == start: + start += 1 + end = s.find(';', start) + ind, diff = start, 0 + while end > 0: + diff += s.count('"', ind, end) - s.count('\\"', ind, end) + if diff % 2 == 0: + break + end, ind = ind, s.find(';', end + 1) if end < 0: end = len(s) - f = s[:end] - if '=' in f: - i = f.index('=') - f = f[:i].strip().lower() + '=' + f[i+1:].strip() + i = s.find('=', start, end) + if i == -1: + f = s[start:end] + else: + f = s[start:i].rstrip().lower() + '=' + s[i+1:end].lstrip() plist.append(f.strip()) - s = s[end:] + start = end return plist @@ -135,7 +141,7 @@ def _decode_uu(encoded): class Message: """Basic message object. - A message object is defined as something that has a bunch of RFC 2822 + A message object is defined as something that has a bunch of RFC 5322 headers and a payload. It may optionally have an envelope header (a.k.a. Unix-From or From_ header). If the message is a container (i.e. a multipart or a message/rfc822), then the payload is a list of Message @@ -313,6 +319,8 @@ class Message: # If it does happen, turn the string into bytes in a way # guaranteed not to fail. bpayload = payload.encode('raw-unicode-escape') + else: + bpayload = payload if cte == 'quoted-printable': return quopri.decodestring(bpayload) elif cte == 'base64': @@ -564,7 +572,7 @@ class Message: msg.add_header('content-disposition', 'attachment', filename='bud.gif') msg.add_header('content-disposition', 'attachment', - filename=('utf-8', '', Fußballer.ppt')) + filename=('utf-8', '', 'Fußballer.ppt')) msg.add_header('content-disposition', 'attachment', filename='Fußballer.ppt')) """ diff --git a/contrib/tools/python3/Lib/email/mime/audio.py b/contrib/tools/python3/Lib/email/mime/audio.py index 065819b2a21..aa0c4905cbb 100644 --- a/contrib/tools/python3/Lib/email/mime/audio.py +++ b/contrib/tools/python3/Lib/email/mime/audio.py @@ -6,7 +6,6 @@ __all__ = ['MIMEAudio'] -from io import BytesIO from email import encoders from email.mime.nonmultipart import MIMENonMultipart @@ -59,10 +58,8 @@ def _what(data): # sndhdr.what() had a pretty cruddy interface, unfortunately. This is why # we re-do it here. It would be easier to reverse engineer the Unix 'file' # command and use the standard 'magic' file, as shipped with a modern Unix. - hdr = data[:512] - fakefile = BytesIO(hdr) for testfn in _rules: - if res := testfn(hdr, fakefile): + if res := testfn(data): return res else: return None @@ -74,7 +71,7 @@ def rule(rulefunc): @rule -def _aiff(h, f): +def _aiff(h): if not h.startswith(b'FORM'): return None if h[8:12] in {b'AIFC', b'AIFF'}: @@ -84,7 +81,7 @@ def _aiff(h, f): @rule -def _au(h, f): +def _au(h): if h.startswith(b'.snd'): return 'basic' else: @@ -92,7 +89,7 @@ def _au(h, f): @rule -def _wav(h, f): +def _wav(h): # 'RIFF' <len> 'WAVE' 'fmt ' <len> if not h.startswith(b'RIFF') or h[8:12] != b'WAVE' or h[12:16] != b'fmt ': return None diff --git a/contrib/tools/python3/Lib/email/parser.py b/contrib/tools/python3/Lib/email/parser.py index 06d99b17f2f..e3003118ce1 100644 --- a/contrib/tools/python3/Lib/email/parser.py +++ b/contrib/tools/python3/Lib/email/parser.py @@ -2,7 +2,7 @@ # Author: Barry Warsaw, Thomas Wouters, Anthony Baxter # Contact: [email protected] -"""A parser of RFC 2822 and MIME email messages.""" +"""A parser of RFC 5322 and MIME email messages.""" __all__ = ['Parser', 'HeaderParser', 'BytesParser', 'BytesHeaderParser', 'FeedParser', 'BytesFeedParser'] @@ -15,14 +15,14 @@ from email._policybase import compat32 class Parser: def __init__(self, _class=None, *, policy=compat32): - """Parser of RFC 2822 and MIME email messages. + """Parser of RFC 5322 and MIME email messages. Creates an in-memory object tree representing the email message, which can then be manipulated and turned over to a Generator to return the textual representation of the message. - The string must be formatted as a block of RFC 2822 headers and header - continuation lines, optionally preceded by a `Unix-from' header. The + The string must be formatted as a block of RFC 5322 headers and header + continuation lines, optionally preceded by a 'Unix-from' header. The header block is terminated either by the end of the string or by a blank line. @@ -75,14 +75,14 @@ class HeaderParser(Parser): class BytesParser: def __init__(self, *args, **kw): - """Parser of binary RFC 2822 and MIME email messages. + """Parser of binary RFC 5322 and MIME email messages. Creates an in-memory object tree representing the email message, which can then be manipulated and turned over to a Generator to return the textual representation of the message. - The input must be formatted as a block of RFC 2822 headers and header - continuation lines, optionally preceded by a `Unix-from' header. The + The input must be formatted as a block of RFC 5322 headers and header + continuation lines, optionally preceded by a 'Unix-from' header. The header block is terminated either by the end of the input or by a blank line. diff --git a/contrib/tools/python3/Lib/email/utils.py b/contrib/tools/python3/Lib/email/utils.py index e53abc8b840..e4d35f06abc 100644 --- a/contrib/tools/python3/Lib/email/utils.py +++ b/contrib/tools/python3/Lib/email/utils.py @@ -25,8 +25,6 @@ __all__ = [ import os import re import time -import random -import socket import datetime import urllib.parse @@ -36,9 +34,6 @@ from email._parseaddr import mktime_tz from email._parseaddr import parsedate, parsedate_tz, _parsedate_tz -# Intrapackage imports -from email.charset import Charset - COMMASPACE = ', ' EMPTYSTRING = '' UEMPTYSTRING = '' @@ -95,6 +90,8 @@ def formataddr(pair, charset='utf-8'): name.encode('ascii') except UnicodeEncodeError: if isinstance(charset, str): + # lazy import to improve module import time + from email.charset import Charset charset = Charset(charset) encoded_name = charset.header_encode(name) return "%s <%s>" % (encoded_name, address) @@ -297,6 +294,11 @@ def make_msgid(idstring=None, domain=None): portion of the message id after the '@'. It defaults to the locally defined hostname. """ + # Lazy imports to speedup module import time + # (no other functions in email.utils need these modules) + import random + import socket + timeval = int(time.time()*100) pid = os.getpid() randint = random.getrandbits(64) @@ -415,8 +417,14 @@ def decode_params(params): for name, continuations in rfc2231_params.items(): value = [] extended = False - # Sort by number - continuations.sort() + # Sort by number, treating None as 0 if there is no 0, + # and ignore it if there is already a 0. + has_zero = any(x[0] == 0 for x in continuations) + if has_zero: + continuations = [x for x in continuations if x[0] is not None] + else: + continuations = [(x[0] or 0, x[1], x[2]) for x in continuations] + continuations.sort(key=lambda x: x[0]) # And now append all values in numerical order, converting # %-encodings for the encoded segments. If any of the # continuation names ends in a *, then the entire string, after |
