13 files changed, 215 insertions, 108 deletions
diff --git a/contrib/tools/python3/Lib/email/_encoded_words.py b/contrib/tools/python3/Lib/email/_encoded_words.py
index 6795a606de0..05a34a4c105 100644
--- a/contrib/tools/python3/Lib/email/_encoded_words.py
+++ b/contrib/tools/python3/Lib/email/_encoded_words.py
@@ -219,7 +219,7 @@ def encode(string, charset='utf-8', encoding=None, lang=''):
 
     """
     if charset == 'unknown-8bit':
-        bstring = string.encode('ascii', 'surrogateescape')
+        bstring = string.encode('utf-8', 'surrogateescape')
     else:
         bstring = string.encode(charset)
     if encoding is None:
diff --git a/contrib/tools/python3/Lib/email/_header_value_parser.py b/contrib/tools/python3/Lib/email/_header_value_parser.py
index 3d845c09d41..03fedd99539 100644
--- a/contrib/tools/python3/Lib/email/_header_value_parser.py
+++ b/contrib/tools/python3/Lib/email/_header_value_parser.py
@@ -80,7 +80,8 @@ from email import utils
 # Useful constants and functions
 #
 
-WSP = set(' \t')
+_WSP = ' \t'
+WSP = set(_WSP)
 CFWS_LEADER = WSP | set('(')
 SPECIALS = set(r'()<>@,:;.\"[]')
 ATOM_ENDS = SPECIALS | WSP
@@ -101,6 +102,12 @@ def make_quoted_pairs(value):
     return str(value).replace('\\', '\\\\').replace('"', '\\"')
 
 
+def make_parenthesis_pairs(value):
+    """Escape parenthesis and backslash for use within a comment."""
+    return str(value).replace('\\', '\\\\') \
+        .replace('(', '\\(').replace(')', '\\)')
+
+
 def quote_string(value):
     escaped = make_quoted_pairs(value)
     return f'"{escaped}"'
@@ -874,6 +881,12 @@ class MessageID(MsgID):
 class InvalidMessageID(MessageID):
     token_type = 'invalid-message-id'
 
+class MessageIDList(TokenList):
+    token_type = 'message-id-list'
+
+    @property
+    def message_ids(self):
+        return [x for x in self if x.token_type=='msg-id']
 
 class Header(TokenList):
     token_type = 'header'
@@ -933,7 +946,7 @@ class WhiteSpaceTerminal(Terminal):
         return ' '
 
     def startswith_fws(self):
-        return True
+        return self and self[0] in WSP
 
 
 class ValueTerminal(Terminal):
@@ -1020,6 +1033,8 @@ def _get_ptext_to_endchars(value, endchars):
     a flag that is True iff there were any quoted printables decoded.
 
     """
+    if not value:
+        return '', '', False
     fragment, *remainder = _wsp_splitter(value, 1)
     vchars = []
     escape = False
@@ -1053,7 +1068,7 @@ def get_fws(value):
     fws = WhiteSpaceTerminal(value[:len(value)-len(newvalue)], 'fws')
     return fws, newvalue
 
-def get_encoded_word(value):
+def get_encoded_word(value, terminal_type='vtext'):
     """ encoded-word = "=?" charset "?" encoding "?" encoded-text "?="
 
     """
@@ -1092,7 +1107,7 @@ def get_encoded_word(value):
             ew.append(token)
             continue
         chars, *remainder = _wsp_splitter(text, 1)
-        vtext = ValueTerminal(chars, 'vtext')
+        vtext = ValueTerminal(chars, terminal_type)
         _validate_xtext(vtext)
         ew.append(vtext)
         text = ''.join(remainder)
@@ -1134,7 +1149,7 @@ def get_unstructured(value):
         valid_ew = True
         if value.startswith('=?'):
             try:
-                token, value = get_encoded_word(value)
+                token, value = get_encoded_word(value, 'utext')
             except _InvalidEwError:
                 valid_ew = False
             except errors.HeaderParseError:
@@ -1163,7 +1178,7 @@ def get_unstructured(value):
         # the parser to go in an infinite loop.
         if valid_ew and rfc2047_matcher.search(tok):
             tok, *remainder = value.partition('=?')
-        vtext = ValueTerminal(tok, 'vtext')
+        vtext = ValueTerminal(tok, 'utext')
         _validate_xtext(vtext)
         unstructured.append(vtext)
         value = ''.join(remainder)
@@ -1573,7 +1588,7 @@ def get_dtext(value):
 def _check_for_early_dl_end(value, domain_literal):
     if value:
         return False
-    domain_literal.append(errors.InvalidHeaderDefect(
+    domain_literal.defects.append(errors.InvalidHeaderDefect(
         "end of input inside domain-literal"))
     domain_literal.append(ValueTerminal(']', 'domain-literal-end'))
     return True
@@ -1592,9 +1607,9 @@ def get_domain_literal(value):
         raise errors.HeaderParseError("expected '[' at start of domain-literal "
                 "but found '{}'".format(value))
     value = value[1:]
+    domain_literal.append(ValueTerminal('[', 'domain-literal-start'))
     if _check_for_early_dl_end(value, domain_literal):
         return domain_literal, value
-    domain_literal.append(ValueTerminal('[', 'domain-literal-start'))
     if value[0] in WSP:
         token, value = get_fws(value)
         domain_literal.append(token)
@@ -2169,6 +2184,32 @@ def parse_message_id(value):
 
     return message_id
 
+def parse_message_ids(value):
+    """in-reply-to     =   "In-Reply-To:" 1*msg-id CRLF
+       references      =   "References:" 1*msg-id CRLF
+    """
+    message_id_list = MessageIDList()
+    while value:
+        if value[0] == ',':
+            # message id list separated with commas - this is invalid,
+            # but happens rather frequently in the wild
+            message_id_list.defects.append(
+                errors.InvalidHeaderDefect("comma in msg-id list"))
+            message_id_list.append(
+                WhiteSpaceTerminal(' ', 'invalid-comma-replacement'))
+            value = value[1:]
+            continue
+        try:
+            token, value = get_msg_id(value)
+            message_id_list.append(token)
+        except errors.HeaderParseError as ex:
+            token = get_unstructured(value)
+            message_id_list.append(InvalidMessageID(token))
+            message_id_list.defects.append(
+                errors.InvalidHeaderDefect("Invalid msg-id: {!r}".format(ex)))
+            break
+    return message_id_list
+
 #
 # XXX: As I begin to add additional header parsers, I'm realizing we probably
 # have two level of parser routines: the get_XXX methods that get a token in
@@ -2786,8 +2827,12 @@ def _steal_trailing_WSP_if_exists(lines):
     if lines and lines[-1] and lines[-1][-1] in WSP:
         wsp = lines[-1][-1]
         lines[-1] = lines[-1][:-1]
+        # gh-142006: if the line is now empty, remove it entirely.
+        if not lines[-1]:
+            lines.pop()
     return wsp
 
+
 def _refold_parse_tree(parse_tree, *, policy):
     """Return string of contents of parse_tree folded according to RFC rules.
 
@@ -2796,11 +2841,9 @@ def _refold_parse_tree(parse_tree, *, policy):
     maxlen = policy.max_line_length or sys.maxsize
     encoding = 'utf-8' if policy.utf8 else 'us-ascii'
     lines = ['']  # Folded lines to be output
-    leading_whitespace = ''  # When we have whitespace between two encoded
-                             # words, we may need to encode the whitespace
-                             # at the beginning of the second word.
-    last_ew = None  # Points to the last encoded character if there's an ew on
-                    # the line
+    last_word_is_ew = False
+    last_ew = None  # if there is an encoded word in the last line of lines,
+                    # points to the encoded word's first character
     last_charset = None
     wrap_as_ew_blocked = 0
     want_encoding = False  # This is set to True if we need to encode this part
@@ -2813,7 +2856,7 @@ def _refold_parse_tree(parse_tree, *, policy):
             continue
         tstr = str(part)
         if not want_encoding:
-            if part.token_type == 'ptext':
+            if part.token_type in ('ptext', 'vtext'):
                 # Encode if tstr contains special characters.
                 want_encoding = not SPECIALSNL.isdisjoint(tstr)
             else:
@@ -2835,6 +2878,7 @@ def _refold_parse_tree(parse_tree, *, policy):
         if part.token_type == 'mime-parameters':
             # Mime parameter folding (using RFC2231) is extra special.
             _fold_mime_parameters(part, lines, maxlen, encoding)
+            last_word_is_ew = False
             continue
 
         if want_encoding and not wrap_as_ew_blocked:
@@ -2851,6 +2895,7 @@ def _refold_parse_tree(parse_tree, *, policy):
                             # XXX what if encoded_part has no leading FWS?
                             lines.append(newline)
                         lines[-1] += encoded_part
+                        last_word_is_ew = False
                         continue
                 # Either this is not a major syntactic break, so we don't
                 # want it on a line by itself even if it fits, or it
@@ -2869,11 +2914,16 @@ def _refold_parse_tree(parse_tree, *, policy):
                     (last_charset == 'unknown-8bit' or
                      last_charset == 'utf-8' and charset != 'us-ascii')):
                     last_ew = None
-                last_ew = _fold_as_ew(tstr, lines, maxlen, last_ew,
-                                      part.ew_combine_allowed, charset, leading_whitespace)
-                # This whitespace has been added to the lines in _fold_as_ew()
-                # so clear it now.
-                leading_whitespace = ''
+                last_ew = _fold_as_ew(
+                    tstr,
+                    lines,
+                    maxlen,
+                    last_ew,
+                    part.ew_combine_allowed,
+                    charset,
+                    last_word_is_ew,
+                )
+                last_word_is_ew = True
                 last_charset = charset
                 want_encoding = False
                 continue
@@ -2886,28 +2936,19 @@ def _refold_parse_tree(parse_tree, *, policy):
 
         if len(tstr) <= maxlen - len(lines[-1]):
             lines[-1] += tstr
+            last_word_is_ew = last_word_is_ew and not bool(tstr.strip(_WSP))
             continue
 
         # This part is too long to fit.  The RFC wants us to break at
         # "major syntactic breaks", so unless we don't consider this
         # to be one, check if it will fit on the next line by itself.
-        leading_whitespace = ''
         if (part.syntactic_break and
                 len(tstr) + 1 <= maxlen):
             newline = _steal_trailing_WSP_if_exists(lines)
             if newline or part.startswith_fws():
-                # We're going to fold the data onto a new line here.  Due to
-                # the way encoded strings handle continuation lines, we need to
-                # be prepared to encode any whitespace if the next line turns
-                # out to start with an encoded word.
                 lines.append(newline + tstr)
-
-                whitespace_accumulator = []
-                for char in lines[-1]:
-                    if char not in WSP:
-                        break
-                    whitespace_accumulator.append(char)
-                leading_whitespace = ''.join(whitespace_accumulator)
+                last_word_is_ew = (last_word_is_ew
+                                   and not bool(lines[-1].strip(_WSP)))
                 last_ew = None
                 continue
         if not hasattr(part, 'encode'):
@@ -2922,6 +2963,13 @@ def _refold_parse_tree(parse_tree, *, policy):
                     [ValueTerminal(make_quoted_pairs(p), 'ptext')
                      for p in newparts] +
                     [ValueTerminal('"', 'ptext')])
+            if part.token_type == 'comment':
+                newparts = (
+                    [ValueTerminal('(', 'ptext')] +
+                    [ValueTerminal(make_parenthesis_pairs(p), 'ptext')
+                     if p.token_type == 'ptext' else p
+                     for p in newparts] +
+                    [ValueTerminal(')', 'ptext')])
             if not part.as_ew_allowed:
                 wrap_as_ew_blocked += 1
                 newparts.append(end_ew_not_allowed)
@@ -2940,10 +2988,11 @@ def _refold_parse_tree(parse_tree, *, policy):
         else:
             # We can't fold it onto the next line either...
             lines[-1] += tstr
+        last_word_is_ew = last_word_is_ew and not bool(tstr.strip(_WSP))
 
     return policy.linesep.join(lines) + policy.linesep
 
-def _fold_as_ew(to_encode, lines, maxlen, last_ew, ew_combine_allowed, charset, leading_whitespace):
+def _fold_as_ew(to_encode, lines, maxlen, last_ew, ew_combine_allowed, charset, last_word_is_ew):
     """Fold string to_encode into lines as encoded word, combining if allowed.
     Return the new value for last_ew, or None if ew_combine_allowed is False.
 
@@ -2958,6 +3007,16 @@ def _fold_as_ew(to_encode, lines, maxlen, last_ew, ew_combine_allowed, charset,
         to_encode = str(
             get_unstructured(lines[-1][last_ew:] + to_encode))
         lines[-1] = lines[-1][:last_ew]
+    elif last_word_is_ew:
+        # If we are following up an encoded word with another encoded word,
+        # any white space between the two will be ignored when decoded.
+        # Therefore, we encode all to-be-displayed whitespace in the second
+        # encoded word.
+        len_without_wsp = len(lines[-1].rstrip(_WSP))
+        leading_whitespace = lines[-1][len_without_wsp:]
+        lines[-1] = (lines[-1][:len_without_wsp]
+                     + (' ' if leading_whitespace else ''))
+        to_encode = leading_whitespace + to_encode
     elif to_encode[0] in WSP:
         # We're joining this to non-encoded text, so don't encode
         # the leading blank.
@@ -2986,20 +3045,13 @@ def _fold_as_ew(to_encode, lines, maxlen, last_ew, ew_combine_allowed, charset,
 
     while to_encode:
         remaining_space = maxlen - len(lines[-1])
-        text_space = remaining_space - chrome_len - len(leading_whitespace)
+        text_space = remaining_space - chrome_len
         if text_space <= 0:
-            lines.append(' ')
+            newline = _steal_trailing_WSP_if_exists(lines)
+            lines.append(newline or ' ')
+            new_last_ew = len(lines[-1])
             continue
 
-        # If we are at the start of a continuation line, prepend whitespace
-        # (we only want to do this when the line starts with an encoded word
-        # but if we're folding in this helper function, then we know that we
-        # are going to be writing out an encoded word.)
-        if len(lines) > 1 and len(lines[-1]) == 1 and leading_whitespace:
-            encoded_word = _ew.encode(leading_whitespace, charset=encode_as)
-            lines[-1] += encoded_word
-            leading_whitespace = ''
-
         to_encode_word = to_encode[:text_space]
         encoded_word = _ew.encode(to_encode_word, charset=encode_as)
         excess = len(encoded_word) - remaining_space
@@ -3011,7 +3063,6 @@ def _fold_as_ew(to_encode, lines, maxlen, last_ew, ew_combine_allowed, charset,
             excess = len(encoded_word) - remaining_space
         lines[-1] += encoded_word
         to_encode = to_encode[len(to_encode_word):]
-        leading_whitespace = ''
 
         if to_encode:
             lines.append(' ')
diff --git a/contrib/tools/python3/Lib/email/_parseaddr.py b/contrib/tools/python3/Lib/email/_parseaddr.py
index febe411355d..565af0cf361 100644
--- a/contrib/tools/python3/Lib/email/_parseaddr.py
+++ b/contrib/tools/python3/Lib/email/_parseaddr.py
@@ -13,7 +13,7 @@ __all__ = [
     'quote',
     ]
 
-import time, calendar
+import time
 
 SPACE = ' '
 EMPTYSTRING = ''
@@ -146,8 +146,9 @@ def _parsedate_tz(data):
         return None
     # Check for a yy specified in two-digit format, then convert it to the
     # appropriate four-digit format, according to the POSIX standard. RFC 822
-    # calls for a two-digit yy, but RFC 2822 (which obsoletes RFC 822)
-    # mandates a 4-digit yy. For more information, see the documentation for
+    # calls for a two-digit yy, but RFC 2822 (which obsoletes RFC 822) already
+    # mandated a 4-digit yy, and RFC 5322 (which obsoletes RFC 2822) continues
+    # this requirement. For more information, see the documentation for
     # the time module.
     if yy < 100:
         # The year is between 1969 and 1999 (inclusive).
@@ -194,6 +195,9 @@ def mktime_tz(data):
         # No zone info, so localtime is better assumption than GMT
         return time.mktime(data[:8] + (-1,))
     else:
+        # Delay the import, since mktime_tz is rarely used
+        import calendar
+
         t = calendar.timegm(data)
         return t - data[9]
 
@@ -230,9 +234,11 @@ class AddrlistClass:
         self.CR = '\r\n'
         self.FWS = self.LWS + self.CR
         self.atomends = self.specials + self.LWS + self.CR
-        # Note that RFC 2822 now specifies `.' as obs-phrase, meaning that it
-        # is obsolete syntax.  RFC 2822 requires that we recognize obsolete
-        # syntax, so allow dots in phrases.
+        # Note that RFC 2822 section 4.1 introduced '.' as obs-phrase to handle
+        # existing practice (periods in display names), even though it was not
+        # allowed in RFC 822. RFC 5322 section 4.1 (which obsoletes RFC 2822)
+        # continues this requirement. We must recognize obsolete syntax, so
+        # allow dots in phrases.
         self.phraseends = self.atomends.replace('.', '')
         self.field = field
         self.commentlist = []
diff --git a/contrib/tools/python3/Lib/email/_policybase.py b/contrib/tools/python3/Lib/email/_policybase.py
index c9f0d743090..0d486c90a9c 100644
--- a/contrib/tools/python3/Lib/email/_policybase.py
+++ b/contrib/tools/python3/Lib/email/_policybase.py
@@ -370,7 +370,7 @@ class Compat32(Policy):
             h = value
         if h is not None:
             # The Header class interprets a value of None for maxlinelen as the
-            # default value of 78, as recommended by RFC 2822.
+            # default value of 78, as recommended by RFC 5322 section 2.1.1.
             maxlinelen = 0
             if self.max_line_length is not None:
                 maxlinelen = self.max_line_length
diff --git a/contrib/tools/python3/Lib/email/contentmanager.py b/contrib/tools/python3/Lib/email/contentmanager.py
index b4f5830bead..11d1536db27 100644
--- a/contrib/tools/python3/Lib/email/contentmanager.py
+++ b/contrib/tools/python3/Lib/email/contentmanager.py
@@ -2,6 +2,7 @@ import binascii
 import email.charset
 import email.message
 import email.errors
+import sys
 from email import quoprimime
 
 class ContentManager:
@@ -142,13 +143,15 @@ def _encode_base64(data, max_line_length):
 
 
 def _encode_text(string, charset, cte, policy):
+    # If max_line_length is 0 or None, there is no limit.
+    maxlen = policy.max_line_length or sys.maxsize
     lines = string.encode(charset).splitlines()
     linesep = policy.linesep.encode('ascii')
     def embedded_body(lines): return linesep.join(lines) + linesep
     def normal_body(lines): return b'\n'.join(lines) + b'\n'
     if cte is None:
         # Use heuristics to decide on the "best" encoding.
-        if max((len(x) for x in lines), default=0) <= policy.max_line_length:
+        if max(map(len, lines), default=0) <= maxlen:
             try:
                 return '7bit', normal_body(lines).decode('ascii')
             except UnicodeDecodeError:
@@ -156,8 +159,7 @@ def _encode_text(string, charset, cte, policy):
             if policy.cte_type == '8bit':
                 return '8bit', normal_body(lines).decode('ascii', 'surrogateescape')
         sniff = embedded_body(lines[:10])
-        sniff_qp = quoprimime.body_encode(sniff.decode('latin-1'),
-                                          policy.max_line_length)
+        sniff_qp = quoprimime.body_encode(sniff.decode('latin-1'), maxlen)
         sniff_base64 = binascii.b2a_base64(sniff)
         # This is a little unfair to qp; it includes lineseps, base64 doesn't.
         if len(sniff_qp) > len(sniff_base64):
@@ -172,9 +174,9 @@ def _encode_text(string, charset, cte, policy):
         data = normal_body(lines).decode('ascii', 'surrogateescape')
     elif cte == 'quoted-printable':
         data = quoprimime.body_encode(normal_body(lines).decode('latin-1'),
-                                      policy.max_line_length)
+                                      maxlen)
     elif cte == 'base64':
-        data = _encode_base64(embedded_body(lines), policy.max_line_length)
+        data = _encode_base64(embedded_body(lines), maxlen)
     else:
         raise ValueError("Unknown content transfer encoding {}".format(cte))
     return cte, data
diff --git a/contrib/tools/python3/Lib/email/feedparser.py b/contrib/tools/python3/Lib/email/feedparser.py
index c2881d9bc52..8e60f1d1181 100644
--- a/contrib/tools/python3/Lib/email/feedparser.py
+++ b/contrib/tools/python3/Lib/email/feedparser.py
@@ -32,11 +32,13 @@ NLCRE = re.compile(r'\r\n|\r|\n')
 NLCRE_bol = re.compile(r'(\r\n|\r|\n)')
 NLCRE_eol = re.compile(r'(\r\n|\r|\n)\Z')
 NLCRE_crack = re.compile(r'(\r\n|\r|\n)')
-# RFC 2822 $3.6.8 Optional fields.  ftext is %d33-57 / %d59-126, Any character
+# RFC 5322 section 3.6.8 Optional fields.  ftext is %d33-57 / %d59-126, Any character
 # except controls, SP, and ":".
 headerRE = re.compile(r'^(From |[\041-\071\073-\176]*:|[\t ])')
 EMPTYSTRING = ''
 NL = '\n'
+boundaryendRE = re.compile(
+    r'(?P<end>--)?(?P<ws>[ \t]*)(?P<linesep>\r\n|\r|\n)?$')
 
 NeedMoreData = object()
 
@@ -292,7 +294,7 @@ class FeedParser:
             return
         if self._cur.get_content_maintype() == 'message':
             # The message claims to be a message/* type, then what follows is
-            # another RFC 2822 message.
+            # another RFC 5322 message.
             for retval in self._parsegen():
                 if retval is NeedMoreData:
                     yield NeedMoreData
@@ -327,9 +329,10 @@ class FeedParser:
             # this onto the input stream until we've scanned past the
             # preamble.
             separator = '--' + boundary
-            boundaryre = re.compile(
-                '(?P<sep>' + re.escape(separator) +
-                r')(?P<end>--)?(?P<ws>[ \t]*)(?P<linesep>\r\n|\r|\n)?$')
+            def boundarymatch(line):
+                if not line.startswith(separator):
+                    return None
+                return boundaryendRE.match(line, len(separator))
             capturing_preamble = True
             preamble = []
             linesep = False
@@ -341,7 +344,7 @@ class FeedParser:
                     continue
                 if line == '':
                     break
-                mo = boundaryre.match(line)
+                mo = boundarymatch(line)
                 if mo:
                     # If we're looking at the end boundary, we're done with
                     # this multipart.  If there was a newline at the end of
@@ -373,13 +376,13 @@ class FeedParser:
                         if line is NeedMoreData:
                             yield NeedMoreData
                             continue
-                        mo = boundaryre.match(line)
+                        mo = boundarymatch(line)
                         if not mo:
                             self._input.unreadline(line)
                             break
                     # Recurse to parse this subpart; the input stream points
                     # at the subpart's first line.
-                    self._input.push_eof_matcher(boundaryre.match)
+                    self._input.push_eof_matcher(boundarymatch)
                     for retval in self._parsegen():
                         if retval is NeedMoreData:
                             yield NeedMoreData
@@ -501,10 +504,9 @@ class FeedParser:
                     self._input.unreadline(line)
                     return
                 else:
-                    # Weirdly placed unix-from line.  Note this as a defect
-                    # and ignore it.
+                    # Weirdly placed unix-from line.
                     defect = errors.MisplacedEnvelopeHeaderDefect(line)
-                    self._cur.defects.append(defect)
+                    self.policy.handle_defect(self._cur, defect)
                     continue
             # Split the line on the colon separating field name from value.
             # There will always be a colon, because if there wasn't the part of
@@ -516,7 +518,7 @@ class FeedParser:
             # message. Track the error but keep going.
             if i == 0:
                 defect = errors.InvalidHeaderDefect("Missing header name.")
-                self._cur.defects.append(defect)
+                self.policy.handle_defect(self._cur, defect)
                 continue
 
             assert i>0, "_parse_headers fed line with no : and no leading WS"
diff --git a/contrib/tools/python3/Lib/email/generator.py b/contrib/tools/python3/Lib/email/generator.py
index 47b9df8f4e6..a03eb1fbbc9 100644
--- a/contrib/tools/python3/Lib/email/generator.py
+++ b/contrib/tools/python3/Lib/email/generator.py
@@ -22,6 +22,7 @@ NL = '\n'  # XXX: no longer used by the code below.
 NLCRE = re.compile(r'\r\n|\r|\n')
 fcre = re.compile(r'^From ', re.MULTILINE)
 NEWLINE_WITHOUT_FWSP = re.compile(r'\r\n[^ \t]|\r[^ \n\t]|\n[^ \t]')
+NEWLINE_WITHOUT_FWSP_BYTES = re.compile(br'\r\n[^ \t]|\r[^ \n\t]|\n[^ \t]')
 
 
 class Generator:
@@ -50,7 +51,7 @@ class Generator:
         expanded to 8 spaces) than maxheaderlen, the header will split as
         defined in the Header class.  Set maxheaderlen to zero to disable
         header wrapping.  The default is 78, as recommended (but not required)
-        by RFC 2822.
+        by RFC 5322 section 2.1.1.
 
         The policy keyword specifies a policy object that controls a number of
         aspects of the generator's operation.  If no policy is specified,
@@ -429,7 +430,16 @@ class BytesGenerator(Generator):
         # This is almost the same as the string version, except for handling
         # strings with 8bit bytes.
         for h, v in msg.raw_items():
-            self._fp.write(self.policy.fold_binary(h, v))
+            folded = self.policy.fold_binary(h, v)
+            if self.policy.verify_generated_headers:
+                linesep = self.policy.linesep.encode()
+                if not folded.endswith(linesep):
+                    raise HeaderWriteError(
+                        f'folded header does not end with {linesep!r}: {folded!r}')
+                if NEWLINE_WITHOUT_FWSP_BYTES.search(folded.removesuffix(linesep)):
+                    raise HeaderWriteError(
+                        f'folded header contains newline: {folded!r}')
+            self._fp.write(folded)
         # A blank line always separates headers from body
         self.write(self._NL)
 
diff --git a/contrib/tools/python3/Lib/email/header.py b/contrib/tools/python3/Lib/email/header.py
index 984851a7d9a..a0aadb97ca6 100644
--- a/contrib/tools/python3/Lib/email/header.py
+++ b/contrib/tools/python3/Lib/email/header.py
@@ -59,16 +59,22 @@ _max_append = email.quoprimime._max_append
 def decode_header(header):
     """Decode a message header value without converting charset.
 
-    Returns a list of (string, charset) pairs containing each of the decoded
-    parts of the header.  Charset is None for non-encoded parts of the header,
-    otherwise a lower-case string containing the name of the character set
-    specified in the encoded string.
+    For historical reasons, this function may return either:
+
+    1. A list of length 1 containing a pair (str, None).
+    2. A list of (bytes, charset) pairs containing each of the decoded
+       parts of the header.  Charset is None for non-encoded parts of the header,
+       otherwise a lower-case string containing the name of the character set
+       specified in the encoded string.
 
     header may be a string that may or may not contain RFC2047 encoded words,
     or it may be a Header object.
 
     An email.errors.HeaderParseError may be raised when certain decoding error
     occurs (e.g. a base64 decoding exception).
+
+    This function exists for backwards compatibility only. For new code, we
+    recommend using email.headerregistry.HeaderRegistry instead.
     """
     # If it is a Header object, we can just return the encoded chunks.
     if hasattr(header, '_chunks'):
@@ -161,6 +167,9 @@ def make_header(decoded_seq, maxlinelen=None, header_name=None,
     This function takes one of those sequence of pairs and returns a Header
     instance.  Optional maxlinelen, header_name, and continuation_ws are as in
     the Header constructor.
+
+    This function exists for backwards compatibility only, and is not
+    recommended for use in new code.
     """
     h = Header(maxlinelen=maxlinelen, header_name=header_name,
                continuation_ws=continuation_ws)
diff --git a/contrib/tools/python3/Lib/email/headerregistry.py b/contrib/tools/python3/Lib/email/headerregistry.py
index 543141dc427..0e8698efc0b 100644
--- a/contrib/tools/python3/Lib/email/headerregistry.py
+++ b/contrib/tools/python3/Lib/email/headerregistry.py
@@ -534,6 +534,18 @@ class MessageIDHeader:
         kwds['defects'].extend(parse_tree.all_defects)
 
 
+class ReferencesHeader:
+
+    max_count = 1
+    value_parser = staticmethod(parser.parse_message_ids)
+
+    @classmethod
+    def parse(cls, value, kwds):
+        kwds['parse_tree'] = parse_tree = cls.value_parser(value)
+        kwds['decoded'] = str(parse_tree)
+        kwds['defects'].extend(parse_tree.all_defects)
+
+
 # The header factory #
 
 _default_header_map = {
@@ -557,6 +569,8 @@ _default_header_map = {
     'content-disposition':          ContentDispositionHeader,
     'content-transfer-encoding':    ContentTransferEncodingHeader,
     'message-id':                   MessageIDHeader,
+    'in-reply-to':                  ReferencesHeader,
+    'references':                   ReferencesHeader,
     }
 
 class HeaderRegistry:
diff --git a/contrib/tools/python3/Lib/email/message.py b/contrib/tools/python3/Lib/email/message.py
index 6b7c3a23777..80f01d66a33 100644
--- a/contrib/tools/python3/Lib/email/message.py
+++ b/contrib/tools/python3/Lib/email/message.py
@@ -74,19 +74,25 @@ def _parseparam(s):
     # RDM This might be a Header, so for now stringify it.
     s = ';' + str(s)
     plist = []
-    while s[:1] == ';':
-        s = s[1:]
-        end = s.find(';')
-        while end > 0 and (s.count('"', 0, end) - s.count('\\"', 0, end)) % 2:
-            end = s.find(';', end + 1)
+    start = 0
+    while s.find(';', start) == start:
+        start += 1
+        end = s.find(';', start)
+        ind, diff = start, 0
+        while end > 0:
+            diff += s.count('"', ind, end) - s.count('\\"', ind, end)
+            if diff % 2 == 0:
+                break
+            end, ind = ind, s.find(';', end + 1)
         if end < 0:
             end = len(s)
-        f = s[:end]
-        if '=' in f:
-            i = f.index('=')
-            f = f[:i].strip().lower() + '=' + f[i+1:].strip()
+        i = s.find('=', start, end)
+        if i == -1:
+            f = s[start:end]
+        else:
+            f = s[start:i].rstrip().lower() + '=' + s[i+1:end].lstrip()
         plist.append(f.strip())
-        s = s[end:]
+        start = end
     return plist
 
 
@@ -135,7 +141,7 @@ def _decode_uu(encoded):
 class Message:
     """Basic message object.
 
-    A message object is defined as something that has a bunch of RFC 2822
+    A message object is defined as something that has a bunch of RFC 5322
     headers and a payload.  It may optionally have an envelope header
     (a.k.a. Unix-From or From_ header).  If the message is a container (i.e. a
     multipart or a message/rfc822), then the payload is a list of Message
@@ -313,6 +319,8 @@ class Message:
                 # If it does happen, turn the string into bytes in a way
                 # guaranteed not to fail.
                 bpayload = payload.encode('raw-unicode-escape')
+        else:
+            bpayload = payload
         if cte == 'quoted-printable':
             return quopri.decodestring(bpayload)
         elif cte == 'base64':
@@ -564,7 +572,7 @@ class Message:
 
         msg.add_header('content-disposition', 'attachment', filename='bud.gif')
         msg.add_header('content-disposition', 'attachment',
-                       filename=('utf-8', '', Fußballer.ppt'))
+                       filename=('utf-8', '', 'Fußballer.ppt'))
         msg.add_header('content-disposition', 'attachment',
                        filename='Fußballer.ppt'))
         """
diff --git a/contrib/tools/python3/Lib/email/mime/audio.py b/contrib/tools/python3/Lib/email/mime/audio.py
index 065819b2a21..aa0c4905cbb 100644
--- a/contrib/tools/python3/Lib/email/mime/audio.py
+++ b/contrib/tools/python3/Lib/email/mime/audio.py
@@ -6,7 +6,6 @@
 
 __all__ = ['MIMEAudio']
 
-from io import BytesIO
 from email import encoders
 from email.mime.nonmultipart import MIMENonMultipart
 
@@ -59,10 +58,8 @@ def _what(data):
     # sndhdr.what() had a pretty cruddy interface, unfortunately.  This is why
     # we re-do it here.  It would be easier to reverse engineer the Unix 'file'
     # command and use the standard 'magic' file, as shipped with a modern Unix.
-    hdr = data[:512]
-    fakefile = BytesIO(hdr)
     for testfn in _rules:
-        if res := testfn(hdr, fakefile):
+        if res := testfn(data):
             return res
     else:
         return None
@@ -74,7 +71,7 @@ def rule(rulefunc):
 
 
 @rule
-def _aiff(h, f):
+def _aiff(h):
     if not h.startswith(b'FORM'):
         return None
     if h[8:12] in {b'AIFC', b'AIFF'}:
@@ -84,7 +81,7 @@ def _aiff(h, f):
 
 
 @rule
-def _au(h, f):
+def _au(h):
     if h.startswith(b'.snd'):
         return 'basic'
     else:
@@ -92,7 +89,7 @@ def _au(h, f):
 
 
 @rule
-def _wav(h, f):
+def _wav(h):
     # 'RIFF' <len> 'WAVE' 'fmt ' <len>
     if not h.startswith(b'RIFF') or h[8:12] != b'WAVE' or h[12:16] != b'fmt ':
         return None
diff --git a/contrib/tools/python3/Lib/email/parser.py b/contrib/tools/python3/Lib/email/parser.py
index 06d99b17f2f..e3003118ce1 100644
--- a/contrib/tools/python3/Lib/email/parser.py
+++ b/contrib/tools/python3/Lib/email/parser.py
@@ -2,7 +2,7 @@
 # Author: Barry Warsaw, Thomas Wouters, Anthony Baxter
 # Contact: [email protected]
 
-"""A parser of RFC 2822 and MIME email messages."""
+"""A parser of RFC 5322 and MIME email messages."""
 
 __all__ = ['Parser', 'HeaderParser', 'BytesParser', 'BytesHeaderParser',
            'FeedParser', 'BytesFeedParser']
@@ -15,14 +15,14 @@ from email._policybase import compat32
 
 class Parser:
     def __init__(self, _class=None, *, policy=compat32):
-        """Parser of RFC 2822 and MIME email messages.
+        """Parser of RFC 5322 and MIME email messages.
 
         Creates an in-memory object tree representing the email message, which
         can then be manipulated and turned over to a Generator to return the
         textual representation of the message.
 
-        The string must be formatted as a block of RFC 2822 headers and header
-        continuation lines, optionally preceded by a `Unix-from' header.  The
+        The string must be formatted as a block of RFC 5322 headers and header
+        continuation lines, optionally preceded by a 'Unix-from' header.  The
         header block is terminated either by the end of the string or by a
         blank line.
 
@@ -75,14 +75,14 @@ class HeaderParser(Parser):
 class BytesParser:
 
     def __init__(self, *args, **kw):
-        """Parser of binary RFC 2822 and MIME email messages.
+        """Parser of binary RFC 5322 and MIME email messages.
 
         Creates an in-memory object tree representing the email message, which
         can then be manipulated and turned over to a Generator to return the
         textual representation of the message.
 
-        The input must be formatted as a block of RFC 2822 headers and header
-        continuation lines, optionally preceded by a `Unix-from' header.  The
+        The input must be formatted as a block of RFC 5322 headers and header
+        continuation lines, optionally preceded by a 'Unix-from' header.  The
         header block is terminated either by the end of the input or by a
         blank line.
 
diff --git a/contrib/tools/python3/Lib/email/utils.py b/contrib/tools/python3/Lib/email/utils.py
index e53abc8b840..e4d35f06abc 100644
--- a/contrib/tools/python3/Lib/email/utils.py
+++ b/contrib/tools/python3/Lib/email/utils.py
@@ -25,8 +25,6 @@ __all__ = [
 import os
 import re
 import time
-import random
-import socket
 import datetime
 import urllib.parse
 
@@ -36,9 +34,6 @@ from email._parseaddr import mktime_tz
 
 from email._parseaddr import parsedate, parsedate_tz, _parsedate_tz
 
-# Intrapackage imports
-from email.charset import Charset
-
 COMMASPACE = ', '
 EMPTYSTRING = ''
 UEMPTYSTRING = ''
@@ -95,6 +90,8 @@ def formataddr(pair, charset='utf-8'):
             name.encode('ascii')
         except UnicodeEncodeError:
             if isinstance(charset, str):
+                # lazy import to improve module import time
+                from email.charset import Charset
                 charset = Charset(charset)
             encoded_name = charset.header_encode(name)
             return "%s <%s>" % (encoded_name, address)
@@ -297,6 +294,11 @@ def make_msgid(idstring=None, domain=None):
     portion of the message id after the '@'.  It defaults to the locally
     defined hostname.
     """
+    # Lazy imports to speedup module import time
+    # (no other functions in email.utils need these modules)
+    import random
+    import socket
+
     timeval = int(time.time()*100)
     pid = os.getpid()
     randint = random.getrandbits(64)
@@ -415,8 +417,14 @@ def decode_params(params):
         for name, continuations in rfc2231_params.items():
             value = []
             extended = False
-            # Sort by number
-            continuations.sort()
+            # Sort by number, treating None as 0 if there is no 0,
+            # and ignore it if there is already a 0.
+            has_zero = any(x[0] == 0 for x in continuations)
+            if has_zero:
+                continuations = [x for x in continuations if x[0] is not None]
+            else:
+                continuations = [(x[0] or 0, x[1], x[2]) for x in continuations]
+            continuations.sort(key=lambda x: x[0])
             # And now append all values in numerical order, converting
             # %-encodings for the encoded segments.  If any of the
             # continuation names ends in a *, then the entire string, after