Update Python 3 to 3.12.6

commit_hash:43ed87a61b9efe3a87682fda1f0bff6f7b422cc9
author: shadchin <shadchin@yandex-team.com> 2024-09-17 19:54:19 +0300
committer: shadchin <shadchin@yandex-team.com> 2024-09-17 20:04:48 +0300
commit: ad73802f079a708231d906dd7273a632db735851 (patch)
tree: d8b2b9f0b31430e69b777af21f63b432f4b92cc4 /contrib/tools/python3/Lib/email/utils.py
parent: 7c4e4744ae44dd94daa179458190817615f9e8a1 (diff)
download: ydb-ad73802f079a708231d906dd7273a632db735851.tar.gz
1 files changed, 142 insertions, 9 deletions
diff --git a/contrib/tools/python3/Lib/email/utils.py b/contrib/tools/python3/Lib/email/utils.py
index 1de547a011..e53abc8b84 100644
--- a/contrib/tools/python3/Lib/email/utils.py
+++ b/contrib/tools/python3/Lib/email/utils.py
@@ -48,6 +48,7 @@ TICK = "'"
 specialsre = re.compile(r'[][\\()<>@,:;".]')
 escapesre = re.compile(r'[\\"]')
 
+
 def _has_surrogates(s):
     """Return True if s may contain surrogate-escaped binary data."""
     # This check is based on the fact that unless there are surrogates, utf8
@@ -106,12 +107,127 @@ def formataddr(pair, charset='utf-8'):
     return address
 
 
+def _iter_escaped_chars(addr):
+    pos = 0
+    escape = False
+    for pos, ch in enumerate(addr):
+        if escape:
+            yield (pos, '\\' + ch)
+            escape = False
+        elif ch == '\\':
+            escape = True
+        else:
+            yield (pos, ch)
+    if escape:
+        yield (pos, '\\')
+
+
+def _strip_quoted_realnames(addr):
+    """Strip real names between quotes."""
+    if '"' not in addr:
+        # Fast path
+        return addr
+
+    start = 0
+    open_pos = None
+    result = []
+    for pos, ch in _iter_escaped_chars(addr):
+        if ch == '"':
+            if open_pos is None:
+                open_pos = pos
+            else:
+                if start != open_pos:
+                    result.append(addr[start:open_pos])
+                start = pos + 1
+                open_pos = None
+
+    if start < len(addr):
+        result.append(addr[start:])
+
+    return ''.join(result)
 
-def getaddresses(fieldvalues):
-    """Return a list of (REALNAME, EMAIL) for each fieldvalue."""
-    all = COMMASPACE.join(str(v) for v in fieldvalues)
-    a = _AddressList(all)
-    return a.addresslist
+
+supports_strict_parsing = True
+
+def getaddresses(fieldvalues, *, strict=True):
+    """Return a list of (REALNAME, EMAIL) or ('','') for each fieldvalue.
+
+    When parsing fails for a fieldvalue, a 2-tuple of ('', '') is returned in
+    its place.
+
+    If strict is true, use a strict parser which rejects malformed inputs.
+    """
+
+    # If strict is true, if the resulting list of parsed addresses is greater
+    # than the number of fieldvalues in the input list, a parsing error has
+    # occurred and consequently a list containing a single empty 2-tuple [('',
+    # '')] is returned in its place. This is done to avoid invalid output.
+    #
+    # Malformed input: getaddresses(['alice@example.com <bob@example.com>'])
+    # Invalid output: [('', 'alice@example.com'), ('', 'bob@example.com')]
+    # Safe output: [('', '')]
+
+    if not strict:
+        all = COMMASPACE.join(str(v) for v in fieldvalues)
+        a = _AddressList(all)
+        return a.addresslist
+
+    fieldvalues = [str(v) for v in fieldvalues]
+    fieldvalues = _pre_parse_validation(fieldvalues)
+    addr = COMMASPACE.join(fieldvalues)
+    a = _AddressList(addr)
+    result = _post_parse_validation(a.addresslist)
+
+    # Treat output as invalid if the number of addresses is not equal to the
+    # expected number of addresses.
+    n = 0
+    for v in fieldvalues:
+        # When a comma is used in the Real Name part it is not a deliminator.
+        # So strip those out before counting the commas.
+        v = _strip_quoted_realnames(v)
+        # Expected number of addresses: 1 + number of commas
+        n += 1 + v.count(',')
+    if len(result) != n:
+        return [('', '')]
+
+    return result
+
+
+def _check_parenthesis(addr):
+    # Ignore parenthesis in quoted real names.
+    addr = _strip_quoted_realnames(addr)
+
+    opens = 0
+    for pos, ch in _iter_escaped_chars(addr):
+        if ch == '(':
+            opens += 1
+        elif ch == ')':
+            opens -= 1
+            if opens < 0:
+                return False
+    return (opens == 0)
+
+
+def _pre_parse_validation(email_header_fields):
+    accepted_values = []
+    for v in email_header_fields:
+        if not _check_parenthesis(v):
+            v = "('', '')"
+        accepted_values.append(v)
+
+    return accepted_values
+
+
+def _post_parse_validation(parsed_email_header_tuples):
+    accepted_values = []
+    # The parser would have parsed a correctly formatted domain-literal
+    # The existence of an [ after parsing indicates a parsing failure
+    for v in parsed_email_header_tuples:
+        if '[' in v[1]:
+            v = ('', '')
+        accepted_values.append(v)
+
+    return accepted_values
 
 
 def _format_timetuple_and_zone(timetuple, zone):
@@ -205,16 +321,33 @@ def parsedate_to_datetime(data):
             tzinfo=datetime.timezone(datetime.timedelta(seconds=tz)))
 
 
-def parseaddr(addr):
+def parseaddr(addr, *, strict=True):
     """
     Parse addr into its constituent realname and email address parts.
 
     Return a tuple of realname and email address, unless the parse fails, in
     which case return a 2-tuple of ('', '').
+
+    If strict is True, use a strict parser which rejects malformed inputs.
     """
-    addrs = _AddressList(addr).addresslist
-    if not addrs:
-        return '', ''
+    if not strict:
+        addrs = _AddressList(addr).addresslist
+        if not addrs:
+            return ('', '')
+        return addrs[0]
+
+    if isinstance(addr, list):
+        addr = addr[0]
+
+    if not isinstance(addr, str):
+        return ('', '')
+
+    addr = _pre_parse_validation([addr])[0]
+    addrs = _post_parse_validation(_AddressList(addr).addresslist)
+
+    if not addrs or len(addrs) > 1:
+        return ('', '')
+
     return addrs[0]
author	shadchin <shadchin@yandex-team.com>	2024-09-17 19:54:19 +0300
committer	shadchin <shadchin@yandex-team.com>	2024-09-17 20:04:48 +0300
commit	ad73802f079a708231d906dd7273a632db735851 (patch)
tree	d8b2b9f0b31430e69b777af21f63b432f4b92cc4 /contrib/tools/python3/Lib/email/utils.py
parent	7c4e4744ae44dd94daa179458190817615f9e8a1 (diff)
download	ydb-ad73802f079a708231d906dd7273a632db735851.tar.gz