summaryrefslogtreecommitdiffstats
path: root/contrib/tools/python3/Lib/email
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/tools/python3/Lib/email')
-rw-r--r--contrib/tools/python3/Lib/email/_encoded_words.py2
-rw-r--r--contrib/tools/python3/Lib/email/_header_value_parser.py139
-rw-r--r--contrib/tools/python3/Lib/email/_parseaddr.py18
-rw-r--r--contrib/tools/python3/Lib/email/_policybase.py2
-rw-r--r--contrib/tools/python3/Lib/email/contentmanager.py12
-rw-r--r--contrib/tools/python3/Lib/email/feedparser.py26
-rw-r--r--contrib/tools/python3/Lib/email/generator.py14
-rw-r--r--contrib/tools/python3/Lib/email/header.py17
-rw-r--r--contrib/tools/python3/Lib/email/headerregistry.py14
-rw-r--r--contrib/tools/python3/Lib/email/message.py32
-rw-r--r--contrib/tools/python3/Lib/email/mime/audio.py11
-rw-r--r--contrib/tools/python3/Lib/email/parser.py14
-rw-r--r--contrib/tools/python3/Lib/email/utils.py22
13 files changed, 215 insertions, 108 deletions
diff --git a/contrib/tools/python3/Lib/email/_encoded_words.py b/contrib/tools/python3/Lib/email/_encoded_words.py
index 6795a606de0..05a34a4c105 100644
--- a/contrib/tools/python3/Lib/email/_encoded_words.py
+++ b/contrib/tools/python3/Lib/email/_encoded_words.py
@@ -219,7 +219,7 @@ def encode(string, charset='utf-8', encoding=None, lang=''):
"""
if charset == 'unknown-8bit':
- bstring = string.encode('ascii', 'surrogateescape')
+ bstring = string.encode('utf-8', 'surrogateescape')
else:
bstring = string.encode(charset)
if encoding is None:
diff --git a/contrib/tools/python3/Lib/email/_header_value_parser.py b/contrib/tools/python3/Lib/email/_header_value_parser.py
index 3d845c09d41..03fedd99539 100644
--- a/contrib/tools/python3/Lib/email/_header_value_parser.py
+++ b/contrib/tools/python3/Lib/email/_header_value_parser.py
@@ -80,7 +80,8 @@ from email import utils
# Useful constants and functions
#
-WSP = set(' \t')
+_WSP = ' \t'
+WSP = set(_WSP)
CFWS_LEADER = WSP | set('(')
SPECIALS = set(r'()<>@,:;.\"[]')
ATOM_ENDS = SPECIALS | WSP
@@ -101,6 +102,12 @@ def make_quoted_pairs(value):
return str(value).replace('\\', '\\\\').replace('"', '\\"')
+def make_parenthesis_pairs(value):
+ """Escape parenthesis and backslash for use within a comment."""
+ return str(value).replace('\\', '\\\\') \
+ .replace('(', '\\(').replace(')', '\\)')
+
+
def quote_string(value):
escaped = make_quoted_pairs(value)
return f'"{escaped}"'
@@ -874,6 +881,12 @@ class MessageID(MsgID):
class InvalidMessageID(MessageID):
token_type = 'invalid-message-id'
+class MessageIDList(TokenList):
+ token_type = 'message-id-list'
+
+ @property
+ def message_ids(self):
+ return [x for x in self if x.token_type=='msg-id']
class Header(TokenList):
token_type = 'header'
@@ -933,7 +946,7 @@ class WhiteSpaceTerminal(Terminal):
return ' '
def startswith_fws(self):
- return True
+ return self and self[0] in WSP
class ValueTerminal(Terminal):
@@ -1020,6 +1033,8 @@ def _get_ptext_to_endchars(value, endchars):
a flag that is True iff there were any quoted printables decoded.
"""
+ if not value:
+ return '', '', False
fragment, *remainder = _wsp_splitter(value, 1)
vchars = []
escape = False
@@ -1053,7 +1068,7 @@ def get_fws(value):
fws = WhiteSpaceTerminal(value[:len(value)-len(newvalue)], 'fws')
return fws, newvalue
-def get_encoded_word(value):
+def get_encoded_word(value, terminal_type='vtext'):
""" encoded-word = "=?" charset "?" encoding "?" encoded-text "?="
"""
@@ -1092,7 +1107,7 @@ def get_encoded_word(value):
ew.append(token)
continue
chars, *remainder = _wsp_splitter(text, 1)
- vtext = ValueTerminal(chars, 'vtext')
+ vtext = ValueTerminal(chars, terminal_type)
_validate_xtext(vtext)
ew.append(vtext)
text = ''.join(remainder)
@@ -1134,7 +1149,7 @@ def get_unstructured(value):
valid_ew = True
if value.startswith('=?'):
try:
- token, value = get_encoded_word(value)
+ token, value = get_encoded_word(value, 'utext')
except _InvalidEwError:
valid_ew = False
except errors.HeaderParseError:
@@ -1163,7 +1178,7 @@ def get_unstructured(value):
# the parser to go in an infinite loop.
if valid_ew and rfc2047_matcher.search(tok):
tok, *remainder = value.partition('=?')
- vtext = ValueTerminal(tok, 'vtext')
+ vtext = ValueTerminal(tok, 'utext')
_validate_xtext(vtext)
unstructured.append(vtext)
value = ''.join(remainder)
@@ -1573,7 +1588,7 @@ def get_dtext(value):
def _check_for_early_dl_end(value, domain_literal):
if value:
return False
- domain_literal.append(errors.InvalidHeaderDefect(
+ domain_literal.defects.append(errors.InvalidHeaderDefect(
"end of input inside domain-literal"))
domain_literal.append(ValueTerminal(']', 'domain-literal-end'))
return True
@@ -1592,9 +1607,9 @@ def get_domain_literal(value):
raise errors.HeaderParseError("expected '[' at start of domain-literal "
"but found '{}'".format(value))
value = value[1:]
+ domain_literal.append(ValueTerminal('[', 'domain-literal-start'))
if _check_for_early_dl_end(value, domain_literal):
return domain_literal, value
- domain_literal.append(ValueTerminal('[', 'domain-literal-start'))
if value[0] in WSP:
token, value = get_fws(value)
domain_literal.append(token)
@@ -2169,6 +2184,32 @@ def parse_message_id(value):
return message_id
+def parse_message_ids(value):
+ """in-reply-to = "In-Reply-To:" 1*msg-id CRLF
+ references = "References:" 1*msg-id CRLF
+ """
+ message_id_list = MessageIDList()
+ while value:
+ if value[0] == ',':
+ # message id list separated with commas - this is invalid,
+ # but happens rather frequently in the wild
+ message_id_list.defects.append(
+ errors.InvalidHeaderDefect("comma in msg-id list"))
+ message_id_list.append(
+ WhiteSpaceTerminal(' ', 'invalid-comma-replacement'))
+ value = value[1:]
+ continue
+ try:
+ token, value = get_msg_id(value)
+ message_id_list.append(token)
+ except errors.HeaderParseError as ex:
+ token = get_unstructured(value)
+ message_id_list.append(InvalidMessageID(token))
+ message_id_list.defects.append(
+ errors.InvalidHeaderDefect("Invalid msg-id: {!r}".format(ex)))
+ break
+ return message_id_list
+
#
# XXX: As I begin to add additional header parsers, I'm realizing we probably
# have two level of parser routines: the get_XXX methods that get a token in
@@ -2786,8 +2827,12 @@ def _steal_trailing_WSP_if_exists(lines):
if lines and lines[-1] and lines[-1][-1] in WSP:
wsp = lines[-1][-1]
lines[-1] = lines[-1][:-1]
+ # gh-142006: if the line is now empty, remove it entirely.
+ if not lines[-1]:
+ lines.pop()
return wsp
+
def _refold_parse_tree(parse_tree, *, policy):
"""Return string of contents of parse_tree folded according to RFC rules.
@@ -2796,11 +2841,9 @@ def _refold_parse_tree(parse_tree, *, policy):
maxlen = policy.max_line_length or sys.maxsize
encoding = 'utf-8' if policy.utf8 else 'us-ascii'
lines = [''] # Folded lines to be output
- leading_whitespace = '' # When we have whitespace between two encoded
- # words, we may need to encode the whitespace
- # at the beginning of the second word.
- last_ew = None # Points to the last encoded character if there's an ew on
- # the line
+ last_word_is_ew = False
+ last_ew = None # if there is an encoded word in the last line of lines,
+ # points to the encoded word's first character
last_charset = None
wrap_as_ew_blocked = 0
want_encoding = False # This is set to True if we need to encode this part
@@ -2813,7 +2856,7 @@ def _refold_parse_tree(parse_tree, *, policy):
continue
tstr = str(part)
if not want_encoding:
- if part.token_type == 'ptext':
+ if part.token_type in ('ptext', 'vtext'):
# Encode if tstr contains special characters.
want_encoding = not SPECIALSNL.isdisjoint(tstr)
else:
@@ -2835,6 +2878,7 @@ def _refold_parse_tree(parse_tree, *, policy):
if part.token_type == 'mime-parameters':
# Mime parameter folding (using RFC2231) is extra special.
_fold_mime_parameters(part, lines, maxlen, encoding)
+ last_word_is_ew = False
continue
if want_encoding and not wrap_as_ew_blocked:
@@ -2851,6 +2895,7 @@ def _refold_parse_tree(parse_tree, *, policy):
# XXX what if encoded_part has no leading FWS?
lines.append(newline)
lines[-1] += encoded_part
+ last_word_is_ew = False
continue
# Either this is not a major syntactic break, so we don't
# want it on a line by itself even if it fits, or it
@@ -2869,11 +2914,16 @@ def _refold_parse_tree(parse_tree, *, policy):
(last_charset == 'unknown-8bit' or
last_charset == 'utf-8' and charset != 'us-ascii')):
last_ew = None
- last_ew = _fold_as_ew(tstr, lines, maxlen, last_ew,
- part.ew_combine_allowed, charset, leading_whitespace)
- # This whitespace has been added to the lines in _fold_as_ew()
- # so clear it now.
- leading_whitespace = ''
+ last_ew = _fold_as_ew(
+ tstr,
+ lines,
+ maxlen,
+ last_ew,
+ part.ew_combine_allowed,
+ charset,
+ last_word_is_ew,
+ )
+ last_word_is_ew = True
last_charset = charset
want_encoding = False
continue
@@ -2886,28 +2936,19 @@ def _refold_parse_tree(parse_tree, *, policy):
if len(tstr) <= maxlen - len(lines[-1]):
lines[-1] += tstr
+ last_word_is_ew = last_word_is_ew and not bool(tstr.strip(_WSP))
continue
# This part is too long to fit. The RFC wants us to break at
# "major syntactic breaks", so unless we don't consider this
# to be one, check if it will fit on the next line by itself.
- leading_whitespace = ''
if (part.syntactic_break and
len(tstr) + 1 <= maxlen):
newline = _steal_trailing_WSP_if_exists(lines)
if newline or part.startswith_fws():
- # We're going to fold the data onto a new line here. Due to
- # the way encoded strings handle continuation lines, we need to
- # be prepared to encode any whitespace if the next line turns
- # out to start with an encoded word.
lines.append(newline + tstr)
-
- whitespace_accumulator = []
- for char in lines[-1]:
- if char not in WSP:
- break
- whitespace_accumulator.append(char)
- leading_whitespace = ''.join(whitespace_accumulator)
+ last_word_is_ew = (last_word_is_ew
+ and not bool(lines[-1].strip(_WSP)))
last_ew = None
continue
if not hasattr(part, 'encode'):
@@ -2922,6 +2963,13 @@ def _refold_parse_tree(parse_tree, *, policy):
[ValueTerminal(make_quoted_pairs(p), 'ptext')
for p in newparts] +
[ValueTerminal('"', 'ptext')])
+ if part.token_type == 'comment':
+ newparts = (
+ [ValueTerminal('(', 'ptext')] +
+ [ValueTerminal(make_parenthesis_pairs(p), 'ptext')
+ if p.token_type == 'ptext' else p
+ for p in newparts] +
+ [ValueTerminal(')', 'ptext')])
if not part.as_ew_allowed:
wrap_as_ew_blocked += 1
newparts.append(end_ew_not_allowed)
@@ -2940,10 +2988,11 @@ def _refold_parse_tree(parse_tree, *, policy):
else:
# We can't fold it onto the next line either...
lines[-1] += tstr
+ last_word_is_ew = last_word_is_ew and not bool(tstr.strip(_WSP))
return policy.linesep.join(lines) + policy.linesep
-def _fold_as_ew(to_encode, lines, maxlen, last_ew, ew_combine_allowed, charset, leading_whitespace):
+def _fold_as_ew(to_encode, lines, maxlen, last_ew, ew_combine_allowed, charset, last_word_is_ew):
"""Fold string to_encode into lines as encoded word, combining if allowed.
Return the new value for last_ew, or None if ew_combine_allowed is False.
@@ -2958,6 +3007,16 @@ def _fold_as_ew(to_encode, lines, maxlen, last_ew, ew_combine_allowed, charset,
to_encode = str(
get_unstructured(lines[-1][last_ew:] + to_encode))
lines[-1] = lines[-1][:last_ew]
+ elif last_word_is_ew:
+ # If we are following up an encoded word with another encoded word,
+ # any white space between the two will be ignored when decoded.
+ # Therefore, we encode all to-be-displayed whitespace in the second
+ # encoded word.
+ len_without_wsp = len(lines[-1].rstrip(_WSP))
+ leading_whitespace = lines[-1][len_without_wsp:]
+ lines[-1] = (lines[-1][:len_without_wsp]
+ + (' ' if leading_whitespace else ''))
+ to_encode = leading_whitespace + to_encode
elif to_encode[0] in WSP:
# We're joining this to non-encoded text, so don't encode
# the leading blank.
@@ -2986,20 +3045,13 @@ def _fold_as_ew(to_encode, lines, maxlen, last_ew, ew_combine_allowed, charset,
while to_encode:
remaining_space = maxlen - len(lines[-1])
- text_space = remaining_space - chrome_len - len(leading_whitespace)
+ text_space = remaining_space - chrome_len
if text_space <= 0:
- lines.append(' ')
+ newline = _steal_trailing_WSP_if_exists(lines)
+ lines.append(newline or ' ')
+ new_last_ew = len(lines[-1])
continue
- # If we are at the start of a continuation line, prepend whitespace
- # (we only want to do this when the line starts with an encoded word
- # but if we're folding in this helper function, then we know that we
- # are going to be writing out an encoded word.)
- if len(lines) > 1 and len(lines[-1]) == 1 and leading_whitespace:
- encoded_word = _ew.encode(leading_whitespace, charset=encode_as)
- lines[-1] += encoded_word
- leading_whitespace = ''
-
to_encode_word = to_encode[:text_space]
encoded_word = _ew.encode(to_encode_word, charset=encode_as)
excess = len(encoded_word) - remaining_space
@@ -3011,7 +3063,6 @@ def _fold_as_ew(to_encode, lines, maxlen, last_ew, ew_combine_allowed, charset,
excess = len(encoded_word) - remaining_space
lines[-1] += encoded_word
to_encode = to_encode[len(to_encode_word):]
- leading_whitespace = ''
if to_encode:
lines.append(' ')
diff --git a/contrib/tools/python3/Lib/email/_parseaddr.py b/contrib/tools/python3/Lib/email/_parseaddr.py
index febe411355d..565af0cf361 100644
--- a/contrib/tools/python3/Lib/email/_parseaddr.py
+++ b/contrib/tools/python3/Lib/email/_parseaddr.py
@@ -13,7 +13,7 @@ __all__ = [
'quote',
]
-import time, calendar
+import time
SPACE = ' '
EMPTYSTRING = ''
@@ -146,8 +146,9 @@ def _parsedate_tz(data):
return None
# Check for a yy specified in two-digit format, then convert it to the
# appropriate four-digit format, according to the POSIX standard. RFC 822
- # calls for a two-digit yy, but RFC 2822 (which obsoletes RFC 822)
- # mandates a 4-digit yy. For more information, see the documentation for
+ # calls for a two-digit yy, but RFC 2822 (which obsoletes RFC 822) already
+ # mandated a 4-digit yy, and RFC 5322 (which obsoletes RFC 2822) continues
+ # this requirement. For more information, see the documentation for
# the time module.
if yy < 100:
# The year is between 1969 and 1999 (inclusive).
@@ -194,6 +195,9 @@ def mktime_tz(data):
# No zone info, so localtime is better assumption than GMT
return time.mktime(data[:8] + (-1,))
else:
+ # Delay the import, since mktime_tz is rarely used
+ import calendar
+
t = calendar.timegm(data)
return t - data[9]
@@ -230,9 +234,11 @@ class AddrlistClass:
self.CR = '\r\n'
self.FWS = self.LWS + self.CR
self.atomends = self.specials + self.LWS + self.CR
- # Note that RFC 2822 now specifies `.' as obs-phrase, meaning that it
- # is obsolete syntax. RFC 2822 requires that we recognize obsolete
- # syntax, so allow dots in phrases.
+ # Note that RFC 2822 section 4.1 introduced '.' as obs-phrase to handle
+ # existing practice (periods in display names), even though it was not
+ # allowed in RFC 822. RFC 5322 section 4.1 (which obsoletes RFC 2822)
+ # continues this requirement. We must recognize obsolete syntax, so
+ # allow dots in phrases.
self.phraseends = self.atomends.replace('.', '')
self.field = field
self.commentlist = []
diff --git a/contrib/tools/python3/Lib/email/_policybase.py b/contrib/tools/python3/Lib/email/_policybase.py
index c9f0d743090..0d486c90a9c 100644
--- a/contrib/tools/python3/Lib/email/_policybase.py
+++ b/contrib/tools/python3/Lib/email/_policybase.py
@@ -370,7 +370,7 @@ class Compat32(Policy):
h = value
if h is not None:
# The Header class interprets a value of None for maxlinelen as the
- # default value of 78, as recommended by RFC 2822.
+ # default value of 78, as recommended by RFC 5322 section 2.1.1.
maxlinelen = 0
if self.max_line_length is not None:
maxlinelen = self.max_line_length
diff --git a/contrib/tools/python3/Lib/email/contentmanager.py b/contrib/tools/python3/Lib/email/contentmanager.py
index b4f5830bead..11d1536db27 100644
--- a/contrib/tools/python3/Lib/email/contentmanager.py
+++ b/contrib/tools/python3/Lib/email/contentmanager.py
@@ -2,6 +2,7 @@ import binascii
import email.charset
import email.message
import email.errors
+import sys
from email import quoprimime
class ContentManager:
@@ -142,13 +143,15 @@ def _encode_base64(data, max_line_length):
def _encode_text(string, charset, cte, policy):
+ # If max_line_length is 0 or None, there is no limit.
+ maxlen = policy.max_line_length or sys.maxsize
lines = string.encode(charset).splitlines()
linesep = policy.linesep.encode('ascii')
def embedded_body(lines): return linesep.join(lines) + linesep
def normal_body(lines): return b'\n'.join(lines) + b'\n'
if cte is None:
# Use heuristics to decide on the "best" encoding.
- if max((len(x) for x in lines), default=0) <= policy.max_line_length:
+ if max(map(len, lines), default=0) <= maxlen:
try:
return '7bit', normal_body(lines).decode('ascii')
except UnicodeDecodeError:
@@ -156,8 +159,7 @@ def _encode_text(string, charset, cte, policy):
if policy.cte_type == '8bit':
return '8bit', normal_body(lines).decode('ascii', 'surrogateescape')
sniff = embedded_body(lines[:10])
- sniff_qp = quoprimime.body_encode(sniff.decode('latin-1'),
- policy.max_line_length)
+ sniff_qp = quoprimime.body_encode(sniff.decode('latin-1'), maxlen)
sniff_base64 = binascii.b2a_base64(sniff)
# This is a little unfair to qp; it includes lineseps, base64 doesn't.
if len(sniff_qp) > len(sniff_base64):
@@ -172,9 +174,9 @@ def _encode_text(string, charset, cte, policy):
data = normal_body(lines).decode('ascii', 'surrogateescape')
elif cte == 'quoted-printable':
data = quoprimime.body_encode(normal_body(lines).decode('latin-1'),
- policy.max_line_length)
+ maxlen)
elif cte == 'base64':
- data = _encode_base64(embedded_body(lines), policy.max_line_length)
+ data = _encode_base64(embedded_body(lines), maxlen)
else:
raise ValueError("Unknown content transfer encoding {}".format(cte))
return cte, data
diff --git a/contrib/tools/python3/Lib/email/feedparser.py b/contrib/tools/python3/Lib/email/feedparser.py
index c2881d9bc52..8e60f1d1181 100644
--- a/contrib/tools/python3/Lib/email/feedparser.py
+++ b/contrib/tools/python3/Lib/email/feedparser.py
@@ -32,11 +32,13 @@ NLCRE = re.compile(r'\r\n|\r|\n')
NLCRE_bol = re.compile(r'(\r\n|\r|\n)')
NLCRE_eol = re.compile(r'(\r\n|\r|\n)\Z')
NLCRE_crack = re.compile(r'(\r\n|\r|\n)')
-# RFC 2822 $3.6.8 Optional fields. ftext is %d33-57 / %d59-126, Any character
+# RFC 5322 section 3.6.8 Optional fields. ftext is %d33-57 / %d59-126, Any character
# except controls, SP, and ":".
headerRE = re.compile(r'^(From |[\041-\071\073-\176]*:|[\t ])')
EMPTYSTRING = ''
NL = '\n'
+boundaryendRE = re.compile(
+ r'(?P<end>--)?(?P<ws>[ \t]*)(?P<linesep>\r\n|\r|\n)?$')
NeedMoreData = object()
@@ -292,7 +294,7 @@ class FeedParser:
return
if self._cur.get_content_maintype() == 'message':
# The message claims to be a message/* type, then what follows is
- # another RFC 2822 message.
+ # another RFC 5322 message.
for retval in self._parsegen():
if retval is NeedMoreData:
yield NeedMoreData
@@ -327,9 +329,10 @@ class FeedParser:
# this onto the input stream until we've scanned past the
# preamble.
separator = '--' + boundary
- boundaryre = re.compile(
- '(?P<sep>' + re.escape(separator) +
- r')(?P<end>--)?(?P<ws>[ \t]*)(?P<linesep>\r\n|\r|\n)?$')
+ def boundarymatch(line):
+ if not line.startswith(separator):
+ return None
+ return boundaryendRE.match(line, len(separator))
capturing_preamble = True
preamble = []
linesep = False
@@ -341,7 +344,7 @@ class FeedParser:
continue
if line == '':
break
- mo = boundaryre.match(line)
+ mo = boundarymatch(line)
if mo:
# If we're looking at the end boundary, we're done with
# this multipart. If there was a newline at the end of
@@ -373,13 +376,13 @@ class FeedParser:
if line is NeedMoreData:
yield NeedMoreData
continue
- mo = boundaryre.match(line)
+ mo = boundarymatch(line)
if not mo:
self._input.unreadline(line)
break
# Recurse to parse this subpart; the input stream points
# at the subpart's first line.
- self._input.push_eof_matcher(boundaryre.match)
+ self._input.push_eof_matcher(boundarymatch)
for retval in self._parsegen():
if retval is NeedMoreData:
yield NeedMoreData
@@ -501,10 +504,9 @@ class FeedParser:
self._input.unreadline(line)
return
else:
- # Weirdly placed unix-from line. Note this as a defect
- # and ignore it.
+ # Weirdly placed unix-from line.
defect = errors.MisplacedEnvelopeHeaderDefect(line)
- self._cur.defects.append(defect)
+ self.policy.handle_defect(self._cur, defect)
continue
# Split the line on the colon separating field name from value.
# There will always be a colon, because if there wasn't the part of
@@ -516,7 +518,7 @@ class FeedParser:
# message. Track the error but keep going.
if i == 0:
defect = errors.InvalidHeaderDefect("Missing header name.")
- self._cur.defects.append(defect)
+ self.policy.handle_defect(self._cur, defect)
continue
assert i>0, "_parse_headers fed line with no : and no leading WS"
diff --git a/contrib/tools/python3/Lib/email/generator.py b/contrib/tools/python3/Lib/email/generator.py
index 47b9df8f4e6..a03eb1fbbc9 100644
--- a/contrib/tools/python3/Lib/email/generator.py
+++ b/contrib/tools/python3/Lib/email/generator.py
@@ -22,6 +22,7 @@ NL = '\n' # XXX: no longer used by the code below.
NLCRE = re.compile(r'\r\n|\r|\n')
fcre = re.compile(r'^From ', re.MULTILINE)
NEWLINE_WITHOUT_FWSP = re.compile(r'\r\n[^ \t]|\r[^ \n\t]|\n[^ \t]')
+NEWLINE_WITHOUT_FWSP_BYTES = re.compile(br'\r\n[^ \t]|\r[^ \n\t]|\n[^ \t]')
class Generator:
@@ -50,7 +51,7 @@ class Generator:
expanded to 8 spaces) than maxheaderlen, the header will split as
defined in the Header class. Set maxheaderlen to zero to disable
header wrapping. The default is 78, as recommended (but not required)
- by RFC 2822.
+ by RFC 5322 section 2.1.1.
The policy keyword specifies a policy object that controls a number of
aspects of the generator's operation. If no policy is specified,
@@ -429,7 +430,16 @@ class BytesGenerator(Generator):
# This is almost the same as the string version, except for handling
# strings with 8bit bytes.
for h, v in msg.raw_items():
- self._fp.write(self.policy.fold_binary(h, v))
+ folded = self.policy.fold_binary(h, v)
+ if self.policy.verify_generated_headers:
+ linesep = self.policy.linesep.encode()
+ if not folded.endswith(linesep):
+ raise HeaderWriteError(
+ f'folded header does not end with {linesep!r}: {folded!r}')
+ if NEWLINE_WITHOUT_FWSP_BYTES.search(folded.removesuffix(linesep)):
+ raise HeaderWriteError(
+ f'folded header contains newline: {folded!r}')
+ self._fp.write(folded)
# A blank line always separates headers from body
self.write(self._NL)
diff --git a/contrib/tools/python3/Lib/email/header.py b/contrib/tools/python3/Lib/email/header.py
index 984851a7d9a..a0aadb97ca6 100644
--- a/contrib/tools/python3/Lib/email/header.py
+++ b/contrib/tools/python3/Lib/email/header.py
@@ -59,16 +59,22 @@ _max_append = email.quoprimime._max_append
def decode_header(header):
"""Decode a message header value without converting charset.
- Returns a list of (string, charset) pairs containing each of the decoded
- parts of the header. Charset is None for non-encoded parts of the header,
- otherwise a lower-case string containing the name of the character set
- specified in the encoded string.
+ For historical reasons, this function may return either:
+
+ 1. A list of length 1 containing a pair (str, None).
+ 2. A list of (bytes, charset) pairs containing each of the decoded
+ parts of the header. Charset is None for non-encoded parts of the header,
+ otherwise a lower-case string containing the name of the character set
+ specified in the encoded string.
header may be a string that may or may not contain RFC2047 encoded words,
or it may be a Header object.
An email.errors.HeaderParseError may be raised when certain decoding error
occurs (e.g. a base64 decoding exception).
+
+ This function exists for backwards compatibility only. For new code, we
+ recommend using email.headerregistry.HeaderRegistry instead.
"""
# If it is a Header object, we can just return the encoded chunks.
if hasattr(header, '_chunks'):
@@ -161,6 +167,9 @@ def make_header(decoded_seq, maxlinelen=None, header_name=None,
This function takes one of those sequence of pairs and returns a Header
instance. Optional maxlinelen, header_name, and continuation_ws are as in
the Header constructor.
+
+ This function exists for backwards compatibility only, and is not
+ recommended for use in new code.
"""
h = Header(maxlinelen=maxlinelen, header_name=header_name,
continuation_ws=continuation_ws)
diff --git a/contrib/tools/python3/Lib/email/headerregistry.py b/contrib/tools/python3/Lib/email/headerregistry.py
index 543141dc427..0e8698efc0b 100644
--- a/contrib/tools/python3/Lib/email/headerregistry.py
+++ b/contrib/tools/python3/Lib/email/headerregistry.py
@@ -534,6 +534,18 @@ class MessageIDHeader:
kwds['defects'].extend(parse_tree.all_defects)
+class ReferencesHeader:
+
+ max_count = 1
+ value_parser = staticmethod(parser.parse_message_ids)
+
+ @classmethod
+ def parse(cls, value, kwds):
+ kwds['parse_tree'] = parse_tree = cls.value_parser(value)
+ kwds['decoded'] = str(parse_tree)
+ kwds['defects'].extend(parse_tree.all_defects)
+
+
# The header factory #
_default_header_map = {
@@ -557,6 +569,8 @@ _default_header_map = {
'content-disposition': ContentDispositionHeader,
'content-transfer-encoding': ContentTransferEncodingHeader,
'message-id': MessageIDHeader,
+ 'in-reply-to': ReferencesHeader,
+ 'references': ReferencesHeader,
}
class HeaderRegistry:
diff --git a/contrib/tools/python3/Lib/email/message.py b/contrib/tools/python3/Lib/email/message.py
index 6b7c3a23777..80f01d66a33 100644
--- a/contrib/tools/python3/Lib/email/message.py
+++ b/contrib/tools/python3/Lib/email/message.py
@@ -74,19 +74,25 @@ def _parseparam(s):
# RDM This might be a Header, so for now stringify it.
s = ';' + str(s)
plist = []
- while s[:1] == ';':
- s = s[1:]
- end = s.find(';')
- while end > 0 and (s.count('"', 0, end) - s.count('\\"', 0, end)) % 2:
- end = s.find(';', end + 1)
+ start = 0
+ while s.find(';', start) == start:
+ start += 1
+ end = s.find(';', start)
+ ind, diff = start, 0
+ while end > 0:
+ diff += s.count('"', ind, end) - s.count('\\"', ind, end)
+ if diff % 2 == 0:
+ break
+ end, ind = ind, s.find(';', end + 1)
if end < 0:
end = len(s)
- f = s[:end]
- if '=' in f:
- i = f.index('=')
- f = f[:i].strip().lower() + '=' + f[i+1:].strip()
+ i = s.find('=', start, end)
+ if i == -1:
+ f = s[start:end]
+ else:
+ f = s[start:i].rstrip().lower() + '=' + s[i+1:end].lstrip()
plist.append(f.strip())
- s = s[end:]
+ start = end
return plist
@@ -135,7 +141,7 @@ def _decode_uu(encoded):
class Message:
"""Basic message object.
- A message object is defined as something that has a bunch of RFC 2822
+ A message object is defined as something that has a bunch of RFC 5322
headers and a payload. It may optionally have an envelope header
(a.k.a. Unix-From or From_ header). If the message is a container (i.e. a
multipart or a message/rfc822), then the payload is a list of Message
@@ -313,6 +319,8 @@ class Message:
# If it does happen, turn the string into bytes in a way
# guaranteed not to fail.
bpayload = payload.encode('raw-unicode-escape')
+ else:
+ bpayload = payload
if cte == 'quoted-printable':
return quopri.decodestring(bpayload)
elif cte == 'base64':
@@ -564,7 +572,7 @@ class Message:
msg.add_header('content-disposition', 'attachment', filename='bud.gif')
msg.add_header('content-disposition', 'attachment',
- filename=('utf-8', '', Fußballer.ppt'))
+ filename=('utf-8', '', 'Fußballer.ppt'))
msg.add_header('content-disposition', 'attachment',
filename='Fußballer.ppt'))
"""
diff --git a/contrib/tools/python3/Lib/email/mime/audio.py b/contrib/tools/python3/Lib/email/mime/audio.py
index 065819b2a21..aa0c4905cbb 100644
--- a/contrib/tools/python3/Lib/email/mime/audio.py
+++ b/contrib/tools/python3/Lib/email/mime/audio.py
@@ -6,7 +6,6 @@
__all__ = ['MIMEAudio']
-from io import BytesIO
from email import encoders
from email.mime.nonmultipart import MIMENonMultipart
@@ -59,10 +58,8 @@ def _what(data):
# sndhdr.what() had a pretty cruddy interface, unfortunately. This is why
# we re-do it here. It would be easier to reverse engineer the Unix 'file'
# command and use the standard 'magic' file, as shipped with a modern Unix.
- hdr = data[:512]
- fakefile = BytesIO(hdr)
for testfn in _rules:
- if res := testfn(hdr, fakefile):
+ if res := testfn(data):
return res
else:
return None
@@ -74,7 +71,7 @@ def rule(rulefunc):
@rule
-def _aiff(h, f):
+def _aiff(h):
if not h.startswith(b'FORM'):
return None
if h[8:12] in {b'AIFC', b'AIFF'}:
@@ -84,7 +81,7 @@ def _aiff(h, f):
@rule
-def _au(h, f):
+def _au(h):
if h.startswith(b'.snd'):
return 'basic'
else:
@@ -92,7 +89,7 @@ def _au(h, f):
@rule
-def _wav(h, f):
+def _wav(h):
# 'RIFF' <len> 'WAVE' 'fmt ' <len>
if not h.startswith(b'RIFF') or h[8:12] != b'WAVE' or h[12:16] != b'fmt ':
return None
diff --git a/contrib/tools/python3/Lib/email/parser.py b/contrib/tools/python3/Lib/email/parser.py
index 06d99b17f2f..e3003118ce1 100644
--- a/contrib/tools/python3/Lib/email/parser.py
+++ b/contrib/tools/python3/Lib/email/parser.py
@@ -2,7 +2,7 @@
# Author: Barry Warsaw, Thomas Wouters, Anthony Baxter
# Contact: [email protected]
-"""A parser of RFC 2822 and MIME email messages."""
+"""A parser of RFC 5322 and MIME email messages."""
__all__ = ['Parser', 'HeaderParser', 'BytesParser', 'BytesHeaderParser',
'FeedParser', 'BytesFeedParser']
@@ -15,14 +15,14 @@ from email._policybase import compat32
class Parser:
def __init__(self, _class=None, *, policy=compat32):
- """Parser of RFC 2822 and MIME email messages.
+ """Parser of RFC 5322 and MIME email messages.
Creates an in-memory object tree representing the email message, which
can then be manipulated and turned over to a Generator to return the
textual representation of the message.
- The string must be formatted as a block of RFC 2822 headers and header
- continuation lines, optionally preceded by a `Unix-from' header. The
+ The string must be formatted as a block of RFC 5322 headers and header
+ continuation lines, optionally preceded by a 'Unix-from' header. The
header block is terminated either by the end of the string or by a
blank line.
@@ -75,14 +75,14 @@ class HeaderParser(Parser):
class BytesParser:
def __init__(self, *args, **kw):
- """Parser of binary RFC 2822 and MIME email messages.
+ """Parser of binary RFC 5322 and MIME email messages.
Creates an in-memory object tree representing the email message, which
can then be manipulated and turned over to a Generator to return the
textual representation of the message.
- The input must be formatted as a block of RFC 2822 headers and header
- continuation lines, optionally preceded by a `Unix-from' header. The
+ The input must be formatted as a block of RFC 5322 headers and header
+ continuation lines, optionally preceded by a 'Unix-from' header. The
header block is terminated either by the end of the input or by a
blank line.
diff --git a/contrib/tools/python3/Lib/email/utils.py b/contrib/tools/python3/Lib/email/utils.py
index e53abc8b840..e4d35f06abc 100644
--- a/contrib/tools/python3/Lib/email/utils.py
+++ b/contrib/tools/python3/Lib/email/utils.py
@@ -25,8 +25,6 @@ __all__ = [
import os
import re
import time
-import random
-import socket
import datetime
import urllib.parse
@@ -36,9 +34,6 @@ from email._parseaddr import mktime_tz
from email._parseaddr import parsedate, parsedate_tz, _parsedate_tz
-# Intrapackage imports
-from email.charset import Charset
-
COMMASPACE = ', '
EMPTYSTRING = ''
UEMPTYSTRING = ''
@@ -95,6 +90,8 @@ def formataddr(pair, charset='utf-8'):
name.encode('ascii')
except UnicodeEncodeError:
if isinstance(charset, str):
+ # lazy import to improve module import time
+ from email.charset import Charset
charset = Charset(charset)
encoded_name = charset.header_encode(name)
return "%s <%s>" % (encoded_name, address)
@@ -297,6 +294,11 @@ def make_msgid(idstring=None, domain=None):
portion of the message id after the '@'. It defaults to the locally
defined hostname.
"""
+ # Lazy imports to speedup module import time
+ # (no other functions in email.utils need these modules)
+ import random
+ import socket
+
timeval = int(time.time()*100)
pid = os.getpid()
randint = random.getrandbits(64)
@@ -415,8 +417,14 @@ def decode_params(params):
for name, continuations in rfc2231_params.items():
value = []
extended = False
- # Sort by number
- continuations.sort()
+ # Sort by number, treating None as 0 if there is no 0,
+ # and ignore it if there is already a 0.
+ has_zero = any(x[0] == 0 for x in continuations)
+ if has_zero:
+ continuations = [x for x in continuations if x[0] is not None]
+ else:
+ continuations = [(x[0] or 0, x[1], x[2]) for x in continuations]
+ continuations.sort(key=lambda x: x[0])
# And now append all values in numerical order, converting
# %-encodings for the encoded segments. If any of the
# continuation names ends in a *, then the entire string, after