Update Python 3 to 3.12.6

commit_hash:43ed87a61b9efe3a87682fda1f0bff6f7b422cc9
author: shadchin <shadchin@yandex-team.com> 2024-09-17 19:54:19 +0300
committer: shadchin <shadchin@yandex-team.com> 2024-09-17 20:04:48 +0300
commit: ad73802f079a708231d906dd7273a632db735851 (patch)
tree: d8b2b9f0b31430e69b777af21f63b432f4b92cc4 /contrib/tools/python3/Lib
parent: 7c4e4744ae44dd94daa179458190817615f9e8a1 (diff)
download: ydb-ad73802f079a708231d906dd7273a632db735851.tar.gz
16 files changed, 373 insertions, 178 deletions
diff --git a/contrib/tools/python3/Lib/asyncio/futures.py b/contrib/tools/python3/Lib/asyncio/futures.py
index 97fc4e3fcb..fd486f02c6 100644
--- a/contrib/tools/python3/Lib/asyncio/futures.py
+++ b/contrib/tools/python3/Lib/asyncio/futures.py
@@ -272,9 +272,13 @@ class Future:
             raise exceptions.InvalidStateError(f'{self._state}: {self!r}')
         if isinstance(exception, type):
             exception = exception()
-        if type(exception) is StopIteration:
-            raise TypeError("StopIteration interacts badly with generators "
-                            "and cannot be raised into a Future")
+        if isinstance(exception, StopIteration):
+            new_exc = RuntimeError("StopIteration interacts badly with "
+                                   "generators and cannot be raised into a "
+                                   "Future")
+            new_exc.__cause__ = exception
+            new_exc.__context__ = exception
+            exception = new_exc
         self._exception = exception
         self._exception_tb = exception.__traceback__
         self._state = _FINISHED
diff --git a/contrib/tools/python3/Lib/code.py b/contrib/tools/python3/Lib/code.py
index b4b1ef3b8b..cb7dd44b0a 100644
--- a/contrib/tools/python3/Lib/code.py
+++ b/contrib/tools/python3/Lib/code.py
@@ -105,29 +105,21 @@ class InteractiveInterpreter:
         The output is written by self.write(), below.
 
         """
-        type, value, tb = sys.exc_info()
-        sys.last_exc = value
-        sys.last_type = type
-        sys.last_value = value
-        sys.last_traceback = tb
-        if filename and type is SyntaxError:
-            # Work hard to stuff the correct filename in the exception
-            try:
-                msg, (dummy_filename, lineno, offset, line) = value.args
-            except ValueError:
-                # Not the format we expect; leave it alone
-                pass
-            else:
-                # Stuff in the right filename
-                value = SyntaxError(msg, (filename, lineno, offset, line))
-                sys.last_exc = sys.last_value = value
-        if sys.excepthook is sys.__excepthook__:
-            lines = traceback.format_exception_only(type, value)
-            self.write(''.join(lines))
-        else:
-            # If someone has set sys.excepthook, we let that take precedence
-            # over self.write
-            self._call_excepthook(type, value, tb)
+        try:
+            typ, value, tb = sys.exc_info()
+            if filename and typ is SyntaxError:
+                # Work hard to stuff the correct filename in the exception
+                try:
+                    msg, (dummy_filename, lineno, offset, line) = value.args
+                except ValueError:
+                    # Not the format we expect; leave it alone
+                    pass
+                else:
+                    # Stuff in the right filename
+                    value = SyntaxError(msg, (filename, lineno, offset, line))
+            self._showtraceback(typ, value, None)
+        finally:
+            typ = value = tb = None
 
     def showtraceback(self):
         """Display the exception that just occurred.
@@ -137,32 +129,34 @@ class InteractiveInterpreter:
         The output is written by self.write(), below.
 
         """
-        sys.last_type, sys.last_value, last_tb = ei = sys.exc_info()
-        sys.last_traceback = last_tb
-        sys.last_exc = ei[1]
         try:
-            if sys.excepthook is sys.__excepthook__:
-                lines = traceback.format_exception(ei[0], ei[1], last_tb.tb_next)
-                self.write(''.join(lines))
-            else:
-                # If someone has set sys.excepthook, we let that take precedence
-                # over self.write
-                self._call_excepthook(ei[0], ei[1], last_tb)
+            typ, value, tb = sys.exc_info()
+            self._showtraceback(typ, value, tb.tb_next)
         finally:
-            last_tb = ei = None
+            typ = value = tb = None
 
-    def _call_excepthook(self, typ, value, tb):
-        try:
-            sys.excepthook(typ, value, tb)
-        except SystemExit:
-            raise
-        except BaseException as e:
-            e.__context__ = None
-            print('Error in sys.excepthook:', file=sys.stderr)
-            sys.__excepthook__(type(e), e, e.__traceback__.tb_next)
-            print(file=sys.stderr)
-            print('Original exception was:', file=sys.stderr)
-            sys.__excepthook__(typ, value, tb)
+    def _showtraceback(self, typ, value, tb):
+        sys.last_type = typ
+        sys.last_traceback = tb
+        sys.last_exc = sys.last_value = value = value.with_traceback(tb)
+        if sys.excepthook is sys.__excepthook__:
+            lines = traceback.format_exception(typ, value, tb)
+            self.write(''.join(lines))
+        else:
+            # If someone has set sys.excepthook, we let that take precedence
+            # over self.write
+            try:
+                sys.excepthook(typ, value, tb)
+            except SystemExit:
+                raise
+            except BaseException as e:
+                e.__context__ = None
+                e = e.with_traceback(e.__traceback__.tb_next)
+                print('Error in sys.excepthook:', file=sys.stderr)
+                sys.__excepthook__(type(e), e, e.__traceback__)
+                print(file=sys.stderr)
+                print('Original exception was:', file=sys.stderr)
+                sys.__excepthook__(typ, value, tb)
 
     def write(self, data):
         """Write a string.
diff --git a/contrib/tools/python3/Lib/email/utils.py b/contrib/tools/python3/Lib/email/utils.py
index 1de547a011..e53abc8b84 100644
--- a/contrib/tools/python3/Lib/email/utils.py
+++ b/contrib/tools/python3/Lib/email/utils.py
@@ -48,6 +48,7 @@ TICK = "'"
 specialsre = re.compile(r'[][\\()<>@,:;".]')
 escapesre = re.compile(r'[\\"]')
 
+
 def _has_surrogates(s):
     """Return True if s may contain surrogate-escaped binary data."""
     # This check is based on the fact that unless there are surrogates, utf8
@@ -106,12 +107,127 @@ def formataddr(pair, charset='utf-8'):
     return address
 
 
+def _iter_escaped_chars(addr):
+    pos = 0
+    escape = False
+    for pos, ch in enumerate(addr):
+        if escape:
+            yield (pos, '\\' + ch)
+            escape = False
+        elif ch == '\\':
+            escape = True
+        else:
+            yield (pos, ch)
+    if escape:
+        yield (pos, '\\')
+
+
+def _strip_quoted_realnames(addr):
+    """Strip real names between quotes."""
+    if '"' not in addr:
+        # Fast path
+        return addr
+
+    start = 0
+    open_pos = None
+    result = []
+    for pos, ch in _iter_escaped_chars(addr):
+        if ch == '"':
+            if open_pos is None:
+                open_pos = pos
+            else:
+                if start != open_pos:
+                    result.append(addr[start:open_pos])
+                start = pos + 1
+                open_pos = None
+
+    if start < len(addr):
+        result.append(addr[start:])
+
+    return ''.join(result)
 
-def getaddresses(fieldvalues):
-    """Return a list of (REALNAME, EMAIL) for each fieldvalue."""
-    all = COMMASPACE.join(str(v) for v in fieldvalues)
-    a = _AddressList(all)
-    return a.addresslist
+
+supports_strict_parsing = True
+
+def getaddresses(fieldvalues, *, strict=True):
+    """Return a list of (REALNAME, EMAIL) or ('','') for each fieldvalue.
+
+    When parsing fails for a fieldvalue, a 2-tuple of ('', '') is returned in
+    its place.
+
+    If strict is true, use a strict parser which rejects malformed inputs.
+    """
+
+    # If strict is true, if the resulting list of parsed addresses is greater
+    # than the number of fieldvalues in the input list, a parsing error has
+    # occurred and consequently a list containing a single empty 2-tuple [('',
+    # '')] is returned in its place. This is done to avoid invalid output.
+    #
+    # Malformed input: getaddresses(['alice@example.com <bob@example.com>'])
+    # Invalid output: [('', 'alice@example.com'), ('', 'bob@example.com')]
+    # Safe output: [('', '')]
+
+    if not strict:
+        all = COMMASPACE.join(str(v) for v in fieldvalues)
+        a = _AddressList(all)
+        return a.addresslist
+
+    fieldvalues = [str(v) for v in fieldvalues]
+    fieldvalues = _pre_parse_validation(fieldvalues)
+    addr = COMMASPACE.join(fieldvalues)
+    a = _AddressList(addr)
+    result = _post_parse_validation(a.addresslist)
+
+    # Treat output as invalid if the number of addresses is not equal to the
+    # expected number of addresses.
+    n = 0
+    for v in fieldvalues:
+        # When a comma is used in the Real Name part it is not a deliminator.
+        # So strip those out before counting the commas.
+        v = _strip_quoted_realnames(v)
+        # Expected number of addresses: 1 + number of commas
+        n += 1 + v.count(',')
+    if len(result) != n:
+        return [('', '')]
+
+    return result
+
+
+def _check_parenthesis(addr):
+    # Ignore parenthesis in quoted real names.
+    addr = _strip_quoted_realnames(addr)
+
+    opens = 0
+    for pos, ch in _iter_escaped_chars(addr):
+        if ch == '(':
+            opens += 1
+        elif ch == ')':
+            opens -= 1
+            if opens < 0:
+                return False
+    return (opens == 0)
+
+
+def _pre_parse_validation(email_header_fields):
+    accepted_values = []
+    for v in email_header_fields:
+        if not _check_parenthesis(v):
+            v = "('', '')"
+        accepted_values.append(v)
+
+    return accepted_values
+
+
+def _post_parse_validation(parsed_email_header_tuples):
+    accepted_values = []
+    # The parser would have parsed a correctly formatted domain-literal
+    # The existence of an [ after parsing indicates a parsing failure
+    for v in parsed_email_header_tuples:
+        if '[' in v[1]:
+            v = ('', '')
+        accepted_values.append(v)
+
+    return accepted_values
 
 
 def _format_timetuple_and_zone(timetuple, zone):
@@ -205,16 +321,33 @@ def parsedate_to_datetime(data):
             tzinfo=datetime.timezone(datetime.timedelta(seconds=tz)))
 
 
-def parseaddr(addr):
+def parseaddr(addr, *, strict=True):
     """
     Parse addr into its constituent realname and email address parts.
 
     Return a tuple of realname and email address, unless the parse fails, in
     which case return a 2-tuple of ('', '').
+
+    If strict is True, use a strict parser which rejects malformed inputs.
     """
-    addrs = _AddressList(addr).addresslist
-    if not addrs:
-        return '', ''
+    if not strict:
+        addrs = _AddressList(addr).addresslist
+        if not addrs:
+            return ('', '')
+        return addrs[0]
+
+    if isinstance(addr, list):
+        addr = addr[0]
+
+    if not isinstance(addr, str):
+        return ('', '')
+
+    addr = _pre_parse_validation([addr])[0]
+    addrs = _post_parse_validation(_AddressList(addr).addresslist)
+
+    if not addrs or len(addrs) > 1:
+        return ('', '')
+
     return addrs[0]
 
 
diff --git a/contrib/tools/python3/Lib/http/cookies.py b/contrib/tools/python3/Lib/http/cookies.py
index 351faf428a..6b9ed24ad8 100644
--- a/contrib/tools/python3/Lib/http/cookies.py
+++ b/contrib/tools/python3/Lib/http/cookies.py
@@ -184,8 +184,13 @@ def _quote(str):
         return '"' + str.translate(_Translator) + '"'
 
 
-_OctalPatt = re.compile(r"\\[0-3][0-7][0-7]")
-_QuotePatt = re.compile(r"[\\].")
+_unquote_sub = re.compile(r'\\(?:([0-3][0-7][0-7])|(.))').sub
+
+def _unquote_replace(m):
+    if m[1]:
+        return chr(int(m[1], 8))
+    else:
+        return m[2]
 
 def _unquote(str):
     # If there aren't any doublequotes,
@@ -205,30 +210,7 @@ def _unquote(str):
     #    \012 --> \n
     #    \"   --> "
     #
-    i = 0
-    n = len(str)
-    res = []
-    while 0 <= i < n:
-        o_match = _OctalPatt.search(str, i)
-        q_match = _QuotePatt.search(str, i)
-        if not o_match and not q_match:              # Neither matched
-            res.append(str[i:])
-            break
-        # else:
-        j = k = -1
-        if o_match:
-            j = o_match.start(0)
-        if q_match:
-            k = q_match.start(0)
-        if q_match and (not o_match or k < j):     # QuotePatt matched
-            res.append(str[i:k])
-            res.append(str[k+1])
-            i = k + 2
-        else:                                      # OctalPatt matched
-            res.append(str[i:j])
-            res.append(chr(int(str[j+1:j+4], 8)))
-            i = j + 4
-    return _nulljoin(res)
+    return _unquote_sub(_unquote_replace, str)
 
 # The _getdate() routine is used to set the expiration time in the cookie's HTTP
 # header.  By default, _getdate() returns the current time in the appropriate
diff --git a/contrib/tools/python3/Lib/logging/handlers.py b/contrib/tools/python3/Lib/logging/handlers.py
index 715bce785c..73757758af 100644
--- a/contrib/tools/python3/Lib/logging/handlers.py
+++ b/contrib/tools/python3/Lib/logging/handlers.py
@@ -190,9 +190,12 @@ class RotatingFileHandler(BaseRotatingHandler):
         if self.stream is None:                 # delay was set...
             self.stream = self._open()
         if self.maxBytes > 0:                   # are we rolling over?
+            pos = self.stream.tell()
+            if not pos:
+                # gh-116263: Never rollover an empty file
+                return False
             msg = "%s\n" % self.format(record)
-            self.stream.seek(0, 2)  #due to non-posix-compliant Windows feature
-            if self.stream.tell() + len(msg) >= self.maxBytes:
+            if pos + len(msg) >= self.maxBytes:
                 # See bpo-45401: Never rollover anything other than regular files
                 if os.path.exists(self.baseFilename) and not os.path.isfile(self.baseFilename):
                     return False
diff --git a/contrib/tools/python3/Lib/pickle.py b/contrib/tools/python3/Lib/pickle.py
index c4d6e65821..01c1a10279 100644
--- a/contrib/tools/python3/Lib/pickle.py
+++ b/contrib/tools/python3/Lib/pickle.py
@@ -397,6 +397,8 @@ def decode_long(data):
     return int.from_bytes(data, byteorder='little', signed=True)
 
 
+_NoValue = object()
+
 # Pickling machinery
 
 class _Pickler:
@@ -1091,11 +1093,16 @@ class _Pickler:
                     (obj, module_name, name))
 
         if self.proto >= 2:
-            code = _extension_registry.get((module_name, name))
-            if code:
-                assert code > 0
+            code = _extension_registry.get((module_name, name), _NoValue)
+            if code is not _NoValue:
                 if code <= 0xff:
-                    write(EXT1 + pack("<B", code))
+                    data = pack("<B", code)
+                    if data == b'\0':
+                        # Should never happen in normal circumstances,
+                        # since the type and the value of the code are
+                        # checked in copyreg.add_extension().
+                        raise RuntimeError("extension code 0 is out of range")
+                    write(EXT1 + data)
                 elif code <= 0xffff:
                     write(EXT2 + pack("<H", code))
                 else:
@@ -1589,9 +1596,8 @@ class _Unpickler:
     dispatch[EXT4[0]] = load_ext4
 
     def get_extension(self, code):
-        nil = []
-        obj = _extension_cache.get(code, nil)
-        if obj is not nil:
+        obj = _extension_cache.get(code, _NoValue)
+        if obj is not _NoValue:
             self.append(obj)
             return
         key = _inverted_registry.get(code)
diff --git a/contrib/tools/python3/Lib/pydoc_data/topics.py b/contrib/tools/python3/Lib/pydoc_data/topics.py
index 33b5834b86..8113889a1f 100644
--- a/contrib/tools/python3/Lib/pydoc_data/topics.py
+++ b/contrib/tools/python3/Lib/pydoc_data/topics.py
@@ -1,5 +1,5 @@
 # -*- coding: utf-8 -*-
-# Autogenerated by Sphinx on Tue Aug  6 21:02:50 2024
+# Autogenerated by Sphinx on Fri Sep  6 21:00:45 2024
 # as part of the release process.
 topics = {'assert': 'The "assert" statement\n'
            '**********************\n'
@@ -3101,7 +3101,7 @@ topics = {'assert': 'The "assert" statement\n'
              '                       | "None"\n'
              '                       | "True"\n'
              '                       | "False"\n'
-             '                       | signed_number: NUMBER | "-" NUMBER\n'
+             '   signed_number   ::= ["-"] NUMBER\n'
              '\n'
              'The rule "strings" and the token "NUMBER" are defined in the '
              'standard\n'
@@ -8991,16 +8991,22 @@ topics = {'assert': 'The "assert" statement\n'
             'types, operations that compute new values may actually return a\n'
             'reference to any existing object with the same type and value, '
             'while\n'
-            'for mutable objects this is not allowed.  E.g., after "a = 1; b = '
-            '1",\n'
-            '"a" and "b" may or may not refer to the same object with the '
-            'value\n'
-            'one, depending on the implementation, but after "c = []; d = []", '
-            '"c"\n'
-            'and "d" are guaranteed to refer to two different, unique, newly\n'
-            'created empty lists. (Note that "c = d = []" assigns the same '
-            'object\n'
-            'to both "c" and "d".)\n',
+            'for mutable objects this is not allowed. For example, after "a = '
+            '1; b\n'
+            '= 1", *a* and *b* may or may not refer to the same object with '
+            'the\n'
+            'value one, depending on the implementation. This is because "int" '
+            'is\n'
+            'an immutable type, so the reference to "1" can be reused. This\n'
+            'behaviour depends on the implementation used, so should not be '
+            'relied\n'
+            'upon, but is something to be aware of when making use of object\n'
+            'identity tests. However, after "c = []; d = []", *c* and *d* are\n'
+            'guaranteed to refer to two different, unique, newly created '
+            'empty\n'
+            'lists. (Note that "e = f = []" assigns the *same* object to both '
+            '*e*\n'
+            'and *f*.)\n',
  'operator-summary': 'Operator precedence\n'
                      '*******************\n'
                      '\n'
@@ -13075,15 +13081,13 @@ topics = {'assert': 'The "assert" statement\n'
             'greater must be expressed with escapes.\n'
             '\n'
             'Both string and bytes literals may optionally be prefixed with a\n'
-            'letter "\'r\'" or "\'R\'"; such strings are called *raw strings* '
-            'and treat\n'
-            'backslashes as literal characters.  As a result, in string '
-            'literals,\n'
-            '"\'\\U\'" and "\'\\u\'" escapes in raw strings are not treated '
-            'specially.\n'
-            'Given that Python 2.x’s raw unicode literals behave differently '
-            'than\n'
-            'Python 3.x’s the "\'ur\'" syntax is not supported.\n'
+            'letter "\'r\'" or "\'R\'"; such constructs are called *raw '
+            'string\n'
+            'literals* and *raw bytes literals* respectively and treat '
+            'backslashes\n'
+            'as literal characters.  As a result, in raw string literals, '
+            '"\'\\U\'"\n'
+            'and "\'\\u\'" escapes are not treated specially.\n'
             '\n'
             'Added in version 3.3: The "\'rb\'" prefix of raw bytes literals '
             'has been\n'
@@ -13977,8 +13981,7 @@ topics = {'assert': 'The "assert" statement\n'
           'however removing a key and re-inserting it will add it to the end\n'
           'instead of keeping its old place.\n'
           '\n'
-          'Dictionaries are mutable; they can be created by the "{...}" '
-          'notation\n'
+          'Dictionaries are mutable; they can be created by the "{}" notation\n'
           '(see section Dictionary displays).\n'
           '\n'
           'The extension modules "dbm.ndbm" and "dbm.gnu" provide additional\n'
@@ -15963,8 +15966,8 @@ topics = {'assert': 'The "assert" statement\n'
              '|                                | also removes it from '
              '*s*         |                       |\n'
              '+--------------------------------+----------------------------------+-----------------------+\n'
-             '| "s.remove(x)"                  | remove the first item from '
-             '*s*   | (3)                   |\n'
+             '| "s.remove(x)"                  | removes the first item from '
+             '*s*  | (3)                   |\n'
              '|                                | where "s[i]" is equal to '
              '*x*     |                       |\n'
              '+--------------------------------+----------------------------------+-----------------------+\n'
@@ -16428,8 +16431,8 @@ topics = {'assert': 'The "assert" statement\n'
                      '|                                | also removes it from '
                      '*s*         |                       |\n'
                      '+--------------------------------+----------------------------------+-----------------------+\n'
-                     '| "s.remove(x)"                  | remove the first item '
-                     'from *s*   | (3)                   |\n'
+                     '| "s.remove(x)"                  | removes the first '
+                     'item from *s*  | (3)                   |\n'
                      '|                                | where "s[i]" is equal '
                      'to *x*     |                       |\n'
                      '+--------------------------------+----------------------------------+-----------------------+\n'
diff --git a/contrib/tools/python3/Lib/runpy.py b/contrib/tools/python3/Lib/runpy.py
index 42f896c9cd..ef54d3282e 100644
--- a/contrib/tools/python3/Lib/runpy.py
+++ b/contrib/tools/python3/Lib/runpy.py
@@ -247,17 +247,17 @@ def _get_main_module_details(error=ImportError):
         sys.modules[main_name] = saved_main
 
 
-def _get_code_from_file(run_name, fname):
+def _get_code_from_file(fname):
     # Check for a compiled file first
     from pkgutil import read_code
-    decoded_path = os.path.abspath(os.fsdecode(fname))
-    with io.open_code(decoded_path) as f:
+    code_path = os.path.abspath(fname)
+    with io.open_code(code_path) as f:
         code = read_code(f)
     if code is None:
         # That didn't work, so try it as normal source code
-        with io.open_code(decoded_path) as f:
+        with io.open_code(code_path) as f:
             code = compile(f.read(), fname, 'exec')
-    return code, fname
+    return code
 
 def run_path(path_name, init_globals=None, run_name=None):
     """Execute code located at the specified filesystem location.
@@ -279,12 +279,13 @@ def run_path(path_name, init_globals=None, run_name=None):
     pkg_name = run_name.rpartition(".")[0]
     from pkgutil import get_importer
     importer = get_importer(path_name)
+    path_name = os.fsdecode(path_name)
     if isinstance(importer, type(None)):
         # Not a valid sys.path entry, so run the code directly
         # execfile() doesn't help as we want to allow compiled files
-        code, fname = _get_code_from_file(run_name, path_name)
+        code = _get_code_from_file(path_name)
         return _run_module_code(code, init_globals, run_name,
-                                pkg_name=pkg_name, script_name=fname)
+                                pkg_name=pkg_name, script_name=path_name)
     else:
         # Finder is defined for path, so add it to
         # the start of sys.path
diff --git a/contrib/tools/python3/Lib/ssl.py b/contrib/tools/python3/Lib/ssl.py
index 9c1fea6d36..2a115987d9 100644
--- a/contrib/tools/python3/Lib/ssl.py
+++ b/contrib/tools/python3/Lib/ssl.py
@@ -527,18 +527,17 @@ class SSLContext(_SSLContext):
         self._set_alpn_protocols(protos)
 
     def _load_windows_store_certs(self, storename, purpose):
-        certs = bytearray()
         try:
             for cert, encoding, trust in enum_certificates(storename):
                 # CA certs are never PKCS#7 encoded
                 if encoding == "x509_asn":
                     if trust is True or purpose.oid in trust:
-                        certs.extend(cert)
+                        try:
+                            self.load_verify_locations(cadata=cert)
+                        except SSLError as exc:
+                            warnings.warn(f"Bad certificate in Windows certificate store: {exc!s}")
         except PermissionError:
             warnings.warn("unable to enumerate Windows certificate store")
-        if certs:
-            self.load_verify_locations(cadata=certs)
-        return certs
 
     def load_default_certs(self, purpose=Purpose.SERVER_AUTH):
         if not isinstance(purpose, _ASN1Object):
diff --git a/contrib/tools/python3/Lib/tarfile.py b/contrib/tools/python3/Lib/tarfile.py
index e1487e3864..0a0f31eca0 100755
--- a/contrib/tools/python3/Lib/tarfile.py
+++ b/contrib/tools/python3/Lib/tarfile.py
@@ -843,6 +843,9 @@ _NAMED_FILTERS = {
 # Sentinel for replace() defaults, meaning "don't change the attribute"
 _KEEP = object()
 
+# Header length is digits followed by a space.
+_header_length_prefix_re = re.compile(br"([0-9]{1,20}) ")
+
 class TarInfo(object):
     """Informational class which holds the details about an
        archive member given by a tar header block.
@@ -1412,37 +1415,59 @@ class TarInfo(object):
         else:
             pax_headers = tarfile.pax_headers.copy()
 
-        # Check if the pax header contains a hdrcharset field. This tells us
-        # the encoding of the path, linkpath, uname and gname fields. Normally,
-        # these fields are UTF-8 encoded but since POSIX.1-2008 tar
-        # implementations are allowed to store them as raw binary strings if
-        # the translation to UTF-8 fails.
-        match = re.search(br"\d+ hdrcharset=([^\n]+)\n", buf)
-        if match is not None:
-            pax_headers["hdrcharset"] = match.group(1).decode("utf-8")
-
-        # For the time being, we don't care about anything other than "BINARY".
-        # The only other value that is currently allowed by the standard is
-        # "ISO-IR 10646 2000 UTF-8" in other words UTF-8.
-        hdrcharset = pax_headers.get("hdrcharset")
-        if hdrcharset == "BINARY":
-            encoding = tarfile.encoding
-        else:
-            encoding = "utf-8"
-
         # Parse pax header information. A record looks like that:
         # "%d %s=%s\n" % (length, keyword, value). length is the size
         # of the complete record including the length field itself and
-        # the newline. keyword and value are both UTF-8 encoded strings.
-        regex = re.compile(br"(\d+) ([^=]+)=")
+        # the newline.
         pos = 0
-        while match := regex.match(buf, pos):
-            length, keyword = match.groups()
-            length = int(length)
-            if length == 0:
+        encoding = None
+        raw_headers = []
+        while len(buf) > pos and buf[pos] != 0x00:
+            if not (match := _header_length_prefix_re.match(buf, pos)):
+                raise InvalidHeaderError("invalid header")
+            try:
+                length = int(match.group(1))
+            except ValueError:
+                raise InvalidHeaderError("invalid header")
+            # Headers must be at least 5 bytes, shortest being '5 x=\n'.
+            # Value is allowed to be empty.
+            if length < 5:
+                raise InvalidHeaderError("invalid header")
+            if pos + length > len(buf):
+                raise InvalidHeaderError("invalid header")
+
+            header_value_end_offset = match.start(1) + length - 1  # Last byte of the header
+            keyword_and_value = buf[match.end(1) + 1:header_value_end_offset]
+            raw_keyword, equals, raw_value = keyword_and_value.partition(b"=")
+
+            # Check the framing of the header. The last character must be '\n' (0x0A)
+            if not raw_keyword or equals != b"=" or buf[header_value_end_offset] != 0x0A:
                 raise InvalidHeaderError("invalid header")
-            value = buf[match.end(2) + 1:match.start(1) + length - 1]
+            raw_headers.append((length, raw_keyword, raw_value))
+
+            # Check if the pax header contains a hdrcharset field. This tells us
+            # the encoding of the path, linkpath, uname and gname fields. Normally,
+            # these fields are UTF-8 encoded but since POSIX.1-2008 tar
+            # implementations are allowed to store them as raw binary strings if
+            # the translation to UTF-8 fails. For the time being, we don't care about
+            # anything other than "BINARY". The only other value that is currently
+            # allowed by the standard is "ISO-IR 10646 2000 UTF-8" in other words UTF-8.
+            # Note that we only follow the initial 'hdrcharset' setting to preserve
+            # the initial behavior of the 'tarfile' module.
+            if raw_keyword == b"hdrcharset" and encoding is None:
+                if raw_value == b"BINARY":
+                    encoding = tarfile.encoding
+                else:  # This branch ensures only the first 'hdrcharset' header is used.
+                    encoding = "utf-8"
 
+            pos += length
+
+        # If no explicit hdrcharset is set, we use UTF-8 as a default.
+        if encoding is None:
+            encoding = "utf-8"
+
+        # After parsing the raw headers we can decode them to text.
+        for length, raw_keyword, raw_value in raw_headers:
             # Normally, we could just use "utf-8" as the encoding and "strict"
             # as the error handler, but we better not take the risk. For
             # example, GNU tar <= 1.23 is known to store filenames it cannot
@@ -1450,17 +1475,16 @@ class TarInfo(object):
             # hdrcharset=BINARY header).
             # We first try the strict standard encoding, and if that fails we
             # fall back on the user's encoding and error handler.
-            keyword = self._decode_pax_field(keyword, "utf-8", "utf-8",
+            keyword = self._decode_pax_field(raw_keyword, "utf-8", "utf-8",
                     tarfile.errors)
             if keyword in PAX_NAME_FIELDS:
-                value = self._decode_pax_field(value, encoding, tarfile.encoding,
+                value = self._decode_pax_field(raw_value, encoding, tarfile.encoding,
                         tarfile.errors)
             else:
-                value = self._decode_pax_field(value, "utf-8", "utf-8",
+                value = self._decode_pax_field(raw_value, "utf-8", "utf-8",
                         tarfile.errors)
 
             pax_headers[keyword] = value
-            pos += length
 
         # Fetch the next header.
         try:
@@ -1475,7 +1499,7 @@ class TarInfo(object):
 
         elif "GNU.sparse.size" in pax_headers:
             # GNU extended sparse format version 0.0.
-            self._proc_gnusparse_00(next, pax_headers, buf)
+            self._proc_gnusparse_00(next, raw_headers)
 
         elif pax_headers.get("GNU.sparse.major") == "1" and pax_headers.get("GNU.sparse.minor") == "0":
             # GNU extended sparse format version 1.0.
@@ -1497,15 +1521,24 @@ class TarInfo(object):
 
         return next
 
-    def _proc_gnusparse_00(self, next, pax_headers, buf):
+    def _proc_gnusparse_00(self, next, raw_headers):
         """Process a GNU tar extended sparse header, version 0.0.
         """
         offsets = []
-        for match in re.finditer(br"\d+ GNU.sparse.offset=(\d+)\n", buf):
-            offsets.append(int(match.group(1)))
         numbytes = []
-        for match in re.finditer(br"\d+ GNU.sparse.numbytes=(\d+)\n", buf):
-            numbytes.append(int(match.group(1)))
+        for _, keyword, value in raw_headers:
+            if keyword == b"GNU.sparse.offset":
+                try:
+                    offsets.append(int(value.decode()))
+                except ValueError:
+                    raise InvalidHeaderError("invalid header")
+
+            elif keyword == b"GNU.sparse.numbytes":
+                try:
+                    numbytes.append(int(value.decode()))
+                except ValueError:
+                    raise InvalidHeaderError("invalid header")
+
         next.sparse = list(zip(offsets, numbytes))
 
     def _proc_gnusparse_01(self, next, pax_headers):
diff --git a/contrib/tools/python3/Lib/turtle.py b/contrib/tools/python3/Lib/turtle.py
index 811c5dfa49..92ac58f8e6 100644
--- a/contrib/tools/python3/Lib/turtle.py
+++ b/contrib/tools/python3/Lib/turtle.py
@@ -1719,7 +1719,7 @@ class TNavigator(object):
         >>> reset()
         >>> turtle.left(60)
         >>> turtle.forward(100)
-        >>> print turtle.xcor()
+        >>> print(turtle.xcor())
         50.0
         """
         return self._position[0]
@@ -1733,7 +1733,7 @@ class TNavigator(object):
         >>> reset()
         >>> turtle.left(60)
         >>> turtle.forward(100)
-        >>> print turtle.ycor()
+        >>> print(turtle.ycor())
         86.6025403784
         """
         return self._position[1]
@@ -2336,7 +2336,7 @@ class TPen(object):
 
         Example (for a Turtle instance named turtle):
         >>> turtle.hideturtle()
-        >>> print turtle.isvisible():
+        >>> print(turtle.isvisible())
         False
         """
         return self._shown
diff --git a/contrib/tools/python3/Lib/urllib/parse.py b/contrib/tools/python3/Lib/urllib/parse.py
index 3932bb99c7..2481595203 100644
--- a/contrib/tools/python3/Lib/urllib/parse.py
+++ b/contrib/tools/python3/Lib/urllib/parse.py
@@ -525,9 +525,13 @@ def urlunsplit(components):
     empty query; the RFC states that these are equivalent)."""
     scheme, netloc, url, query, fragment, _coerce_result = (
                                           _coerce_args(*components))
-    if netloc or (scheme and scheme in uses_netloc) or url[:2] == '//':
+    if netloc:
         if url and url[:1] != '/': url = '/' + url
-        url = '//' + (netloc or '') + url
+        url = '//' + netloc + url
+    elif url[:2] == '//':
+        url = '//' + url
+    elif scheme and scheme in uses_netloc and (not url or url[:1] == '/'):
+        url = '//' + url
     if scheme:
         url = scheme + ':' + url
     if query:
diff --git a/contrib/tools/python3/Lib/webbrowser.py b/contrib/tools/python3/Lib/webbrowser.py
index ba6711e4ef..13b9e85f9e 100755
--- a/contrib/tools/python3/Lib/webbrowser.py
+++ b/contrib/tools/python3/Lib/webbrowser.py
@@ -30,7 +30,7 @@ def register(name, klass, instance=None, *, preferred=False):
         # Preferred browsers go to the front of the list.
         # Need to match to the default browser returned by xdg-settings, which
         # may be of the form e.g. "firefox.desktop".
-        if preferred or (_os_preferred_browser and name in _os_preferred_browser):
+        if preferred or (_os_preferred_browser and f'{name}.desktop' == _os_preferred_browser):
             _tryorder.insert(0, name)
         else:
             _tryorder.append(name)
@@ -77,6 +77,9 @@ def open(url, new=0, autoraise=True):
     - 1: a new browser window.
     - 2: a new browser page ("tab").
     If possible, autoraise raises the window (the default) or not.
+
+    If opening the browser succeeds, return True.
+    If there is a problem, return False.
     """
     if _tryorder is None:
         with _lock:
diff --git a/contrib/tools/python3/Lib/ya.make b/contrib/tools/python3/Lib/ya.make
index d6607786ff..cf6f637f36 100644
--- a/contrib/tools/python3/Lib/ya.make
+++ b/contrib/tools/python3/Lib/ya.make
@@ -4,9 +4,9 @@ ENABLE(PYBUILD_NO_PY)
 
 PY3_LIBRARY()
 
-VERSION(3.12.5)
+VERSION(3.12.6)
 
-ORIGINAL_SOURCE(https://github.com/python/cpython/archive/v3.12.5.tar.gz)
+ORIGINAL_SOURCE(https://github.com/python/cpython/archive/v3.12.6.tar.gz)
 
 LICENSE(Python-2.0)
 
diff --git a/contrib/tools/python3/Lib/zipfile/_path/__init__.py b/contrib/tools/python3/Lib/zipfile/_path/__init__.py
index 78c413563b..8db5ef18d7 100644
--- a/contrib/tools/python3/Lib/zipfile/_path/__init__.py
+++ b/contrib/tools/python3/Lib/zipfile/_path/__init__.py
@@ -1,3 +1,12 @@
+"""
+A Path-like interface for zipfiles.
+
+This codebase is shared between zipfile.Path in the stdlib
+and zipp in PyPI. See
+https://github.com/python/importlib_metadata/wiki/Development-Methodology
+for more detail.
+"""
+
 import io
 import posixpath
 import zipfile
@@ -34,7 +43,7 @@ def _parents(path):
 def _ancestry(path):
     """
     Given a path with elements separated by
-    posixpath.sep, generate all elements of that path
+    posixpath.sep, generate all elements of that path.
 
     >>> list(_ancestry('b/d'))
     ['b/d', 'b']
@@ -46,9 +55,14 @@ def _ancestry(path):
     ['b']
     >>> list(_ancestry(''))
     []
+
+    Multiple separators are treated like a single.
+
+    >>> list(_ancestry('//b//d///f//'))
+    ['//b//d///f', '//b//d', '//b']
     """
     path = path.rstrip(posixpath.sep)
-    while path and path != posixpath.sep:
+    while path.rstrip(posixpath.sep):
         yield path
         path, tail = posixpath.split(path)
 
@@ -174,7 +188,10 @@ def _extract_text_encoding(encoding=None, *args, **kwargs):
 
 class Path:
     """
-    A pathlib-compatible interface for zip files.
+    A :class:`importlib.resources.abc.Traversable` interface for zip files.
+
+    Implements many of the features users enjoy from
+    :class:`pathlib.Path`.
 
     Consider a zip file with this structure::
 
diff --git a/contrib/tools/python3/Lib/zipfile/_path/glob.py b/contrib/tools/python3/Lib/zipfile/_path/glob.py
index 4a2e665e27..d5213533ad 100644
--- a/contrib/tools/python3/Lib/zipfile/_path/glob.py
+++ b/contrib/tools/python3/Lib/zipfile/_path/glob.py
@@ -2,6 +2,19 @@ import re
 
 
 def translate(pattern):
+    return match_dirs(translate_core(pattern))
+
+
+def match_dirs(pattern):
+    """
+    Ensure that zipfile.Path directory names are matched.
+
+    zipfile.Path directory names always end in a slash.
+    """
+    return rf'{pattern}[/]?'
+
+
+def translate_core(pattern):
     r"""
     Given a glob pattern, produce a regex that matches it.
author	shadchin <shadchin@yandex-team.com>	2024-09-17 19:54:19 +0300
committer	shadchin <shadchin@yandex-team.com>	2024-09-17 20:04:48 +0300
commit	ad73802f079a708231d906dd7273a632db735851 (patch)
tree	d8b2b9f0b31430e69b777af21f63b432f4b92cc4 /contrib/tools/python3/Lib
parent	7c4e4744ae44dd94daa179458190817615f9e8a1 (diff)
download	ydb-ad73802f079a708231d906dd7273a632db735851.tar.gz