diff options
author | shadchin <shadchin@yandex-team.com> | 2024-09-17 19:54:19 +0300 |
---|---|---|
committer | shadchin <shadchin@yandex-team.com> | 2024-09-17 20:04:48 +0300 |
commit | ad73802f079a708231d906dd7273a632db735851 (patch) | |
tree | d8b2b9f0b31430e69b777af21f63b432f4b92cc4 /contrib/tools/python3/Lib | |
parent | 7c4e4744ae44dd94daa179458190817615f9e8a1 (diff) | |
download | ydb-ad73802f079a708231d906dd7273a632db735851.tar.gz |
Update Python 3 to 3.12.6
commit_hash:43ed87a61b9efe3a87682fda1f0bff6f7b422cc9
Diffstat (limited to 'contrib/tools/python3/Lib')
-rw-r--r-- | contrib/tools/python3/Lib/asyncio/futures.py | 10 | ||||
-rw-r--r-- | contrib/tools/python3/Lib/code.py | 86 | ||||
-rw-r--r-- | contrib/tools/python3/Lib/email/utils.py | 151 | ||||
-rw-r--r-- | contrib/tools/python3/Lib/http/cookies.py | 34 | ||||
-rw-r--r-- | contrib/tools/python3/Lib/logging/handlers.py | 7 | ||||
-rw-r--r-- | contrib/tools/python3/Lib/pickle.py | 20 | ||||
-rw-r--r-- | contrib/tools/python3/Lib/pydoc_data/topics.py | 57 | ||||
-rw-r--r-- | contrib/tools/python3/Lib/runpy.py | 15 | ||||
-rw-r--r-- | contrib/tools/python3/Lib/ssl.py | 9 | ||||
-rwxr-xr-x | contrib/tools/python3/Lib/tarfile.py | 103 | ||||
-rw-r--r-- | contrib/tools/python3/Lib/turtle.py | 6 | ||||
-rw-r--r-- | contrib/tools/python3/Lib/urllib/parse.py | 8 | ||||
-rwxr-xr-x | contrib/tools/python3/Lib/webbrowser.py | 5 | ||||
-rw-r--r-- | contrib/tools/python3/Lib/ya.make | 4 | ||||
-rw-r--r-- | contrib/tools/python3/Lib/zipfile/_path/__init__.py | 23 | ||||
-rw-r--r-- | contrib/tools/python3/Lib/zipfile/_path/glob.py | 13 |
16 files changed, 373 insertions, 178 deletions
diff --git a/contrib/tools/python3/Lib/asyncio/futures.py b/contrib/tools/python3/Lib/asyncio/futures.py index 97fc4e3fcb..fd486f02c6 100644 --- a/contrib/tools/python3/Lib/asyncio/futures.py +++ b/contrib/tools/python3/Lib/asyncio/futures.py @@ -272,9 +272,13 @@ class Future: raise exceptions.InvalidStateError(f'{self._state}: {self!r}') if isinstance(exception, type): exception = exception() - if type(exception) is StopIteration: - raise TypeError("StopIteration interacts badly with generators " - "and cannot be raised into a Future") + if isinstance(exception, StopIteration): + new_exc = RuntimeError("StopIteration interacts badly with " + "generators and cannot be raised into a " + "Future") + new_exc.__cause__ = exception + new_exc.__context__ = exception + exception = new_exc self._exception = exception self._exception_tb = exception.__traceback__ self._state = _FINISHED diff --git a/contrib/tools/python3/Lib/code.py b/contrib/tools/python3/Lib/code.py index b4b1ef3b8b..cb7dd44b0a 100644 --- a/contrib/tools/python3/Lib/code.py +++ b/contrib/tools/python3/Lib/code.py @@ -105,29 +105,21 @@ class InteractiveInterpreter: The output is written by self.write(), below. """ - type, value, tb = sys.exc_info() - sys.last_exc = value - sys.last_type = type - sys.last_value = value - sys.last_traceback = tb - if filename and type is SyntaxError: - # Work hard to stuff the correct filename in the exception - try: - msg, (dummy_filename, lineno, offset, line) = value.args - except ValueError: - # Not the format we expect; leave it alone - pass - else: - # Stuff in the right filename - value = SyntaxError(msg, (filename, lineno, offset, line)) - sys.last_exc = sys.last_value = value - if sys.excepthook is sys.__excepthook__: - lines = traceback.format_exception_only(type, value) - self.write(''.join(lines)) - else: - # If someone has set sys.excepthook, we let that take precedence - # over self.write - self._call_excepthook(type, value, tb) + try: + typ, value, tb = sys.exc_info() + if filename and typ is SyntaxError: + # Work hard to stuff the correct filename in the exception + try: + msg, (dummy_filename, lineno, offset, line) = value.args + except ValueError: + # Not the format we expect; leave it alone + pass + else: + # Stuff in the right filename + value = SyntaxError(msg, (filename, lineno, offset, line)) + self._showtraceback(typ, value, None) + finally: + typ = value = tb = None def showtraceback(self): """Display the exception that just occurred. @@ -137,32 +129,34 @@ class InteractiveInterpreter: The output is written by self.write(), below. """ - sys.last_type, sys.last_value, last_tb = ei = sys.exc_info() - sys.last_traceback = last_tb - sys.last_exc = ei[1] try: - if sys.excepthook is sys.__excepthook__: - lines = traceback.format_exception(ei[0], ei[1], last_tb.tb_next) - self.write(''.join(lines)) - else: - # If someone has set sys.excepthook, we let that take precedence - # over self.write - self._call_excepthook(ei[0], ei[1], last_tb) + typ, value, tb = sys.exc_info() + self._showtraceback(typ, value, tb.tb_next) finally: - last_tb = ei = None + typ = value = tb = None - def _call_excepthook(self, typ, value, tb): - try: - sys.excepthook(typ, value, tb) - except SystemExit: - raise - except BaseException as e: - e.__context__ = None - print('Error in sys.excepthook:', file=sys.stderr) - sys.__excepthook__(type(e), e, e.__traceback__.tb_next) - print(file=sys.stderr) - print('Original exception was:', file=sys.stderr) - sys.__excepthook__(typ, value, tb) + def _showtraceback(self, typ, value, tb): + sys.last_type = typ + sys.last_traceback = tb + sys.last_exc = sys.last_value = value = value.with_traceback(tb) + if sys.excepthook is sys.__excepthook__: + lines = traceback.format_exception(typ, value, tb) + self.write(''.join(lines)) + else: + # If someone has set sys.excepthook, we let that take precedence + # over self.write + try: + sys.excepthook(typ, value, tb) + except SystemExit: + raise + except BaseException as e: + e.__context__ = None + e = e.with_traceback(e.__traceback__.tb_next) + print('Error in sys.excepthook:', file=sys.stderr) + sys.__excepthook__(type(e), e, e.__traceback__) + print(file=sys.stderr) + print('Original exception was:', file=sys.stderr) + sys.__excepthook__(typ, value, tb) def write(self, data): """Write a string. diff --git a/contrib/tools/python3/Lib/email/utils.py b/contrib/tools/python3/Lib/email/utils.py index 1de547a011..e53abc8b84 100644 --- a/contrib/tools/python3/Lib/email/utils.py +++ b/contrib/tools/python3/Lib/email/utils.py @@ -48,6 +48,7 @@ TICK = "'" specialsre = re.compile(r'[][\\()<>@,:;".]') escapesre = re.compile(r'[\\"]') + def _has_surrogates(s): """Return True if s may contain surrogate-escaped binary data.""" # This check is based on the fact that unless there are surrogates, utf8 @@ -106,12 +107,127 @@ def formataddr(pair, charset='utf-8'): return address +def _iter_escaped_chars(addr): + pos = 0 + escape = False + for pos, ch in enumerate(addr): + if escape: + yield (pos, '\\' + ch) + escape = False + elif ch == '\\': + escape = True + else: + yield (pos, ch) + if escape: + yield (pos, '\\') + + +def _strip_quoted_realnames(addr): + """Strip real names between quotes.""" + if '"' not in addr: + # Fast path + return addr + + start = 0 + open_pos = None + result = [] + for pos, ch in _iter_escaped_chars(addr): + if ch == '"': + if open_pos is None: + open_pos = pos + else: + if start != open_pos: + result.append(addr[start:open_pos]) + start = pos + 1 + open_pos = None + + if start < len(addr): + result.append(addr[start:]) + + return ''.join(result) -def getaddresses(fieldvalues): - """Return a list of (REALNAME, EMAIL) for each fieldvalue.""" - all = COMMASPACE.join(str(v) for v in fieldvalues) - a = _AddressList(all) - return a.addresslist + +supports_strict_parsing = True + +def getaddresses(fieldvalues, *, strict=True): + """Return a list of (REALNAME, EMAIL) or ('','') for each fieldvalue. + + When parsing fails for a fieldvalue, a 2-tuple of ('', '') is returned in + its place. + + If strict is true, use a strict parser which rejects malformed inputs. + """ + + # If strict is true, if the resulting list of parsed addresses is greater + # than the number of fieldvalues in the input list, a parsing error has + # occurred and consequently a list containing a single empty 2-tuple [('', + # '')] is returned in its place. This is done to avoid invalid output. + # + # Malformed input: getaddresses(['alice@example.com <bob@example.com>']) + # Invalid output: [('', 'alice@example.com'), ('', 'bob@example.com')] + # Safe output: [('', '')] + + if not strict: + all = COMMASPACE.join(str(v) for v in fieldvalues) + a = _AddressList(all) + return a.addresslist + + fieldvalues = [str(v) for v in fieldvalues] + fieldvalues = _pre_parse_validation(fieldvalues) + addr = COMMASPACE.join(fieldvalues) + a = _AddressList(addr) + result = _post_parse_validation(a.addresslist) + + # Treat output as invalid if the number of addresses is not equal to the + # expected number of addresses. + n = 0 + for v in fieldvalues: + # When a comma is used in the Real Name part it is not a deliminator. + # So strip those out before counting the commas. + v = _strip_quoted_realnames(v) + # Expected number of addresses: 1 + number of commas + n += 1 + v.count(',') + if len(result) != n: + return [('', '')] + + return result + + +def _check_parenthesis(addr): + # Ignore parenthesis in quoted real names. + addr = _strip_quoted_realnames(addr) + + opens = 0 + for pos, ch in _iter_escaped_chars(addr): + if ch == '(': + opens += 1 + elif ch == ')': + opens -= 1 + if opens < 0: + return False + return (opens == 0) + + +def _pre_parse_validation(email_header_fields): + accepted_values = [] + for v in email_header_fields: + if not _check_parenthesis(v): + v = "('', '')" + accepted_values.append(v) + + return accepted_values + + +def _post_parse_validation(parsed_email_header_tuples): + accepted_values = [] + # The parser would have parsed a correctly formatted domain-literal + # The existence of an [ after parsing indicates a parsing failure + for v in parsed_email_header_tuples: + if '[' in v[1]: + v = ('', '') + accepted_values.append(v) + + return accepted_values def _format_timetuple_and_zone(timetuple, zone): @@ -205,16 +321,33 @@ def parsedate_to_datetime(data): tzinfo=datetime.timezone(datetime.timedelta(seconds=tz))) -def parseaddr(addr): +def parseaddr(addr, *, strict=True): """ Parse addr into its constituent realname and email address parts. Return a tuple of realname and email address, unless the parse fails, in which case return a 2-tuple of ('', ''). + + If strict is True, use a strict parser which rejects malformed inputs. """ - addrs = _AddressList(addr).addresslist - if not addrs: - return '', '' + if not strict: + addrs = _AddressList(addr).addresslist + if not addrs: + return ('', '') + return addrs[0] + + if isinstance(addr, list): + addr = addr[0] + + if not isinstance(addr, str): + return ('', '') + + addr = _pre_parse_validation([addr])[0] + addrs = _post_parse_validation(_AddressList(addr).addresslist) + + if not addrs or len(addrs) > 1: + return ('', '') + return addrs[0] diff --git a/contrib/tools/python3/Lib/http/cookies.py b/contrib/tools/python3/Lib/http/cookies.py index 351faf428a..6b9ed24ad8 100644 --- a/contrib/tools/python3/Lib/http/cookies.py +++ b/contrib/tools/python3/Lib/http/cookies.py @@ -184,8 +184,13 @@ def _quote(str): return '"' + str.translate(_Translator) + '"' -_OctalPatt = re.compile(r"\\[0-3][0-7][0-7]") -_QuotePatt = re.compile(r"[\\].") +_unquote_sub = re.compile(r'\\(?:([0-3][0-7][0-7])|(.))').sub + +def _unquote_replace(m): + if m[1]: + return chr(int(m[1], 8)) + else: + return m[2] def _unquote(str): # If there aren't any doublequotes, @@ -205,30 +210,7 @@ def _unquote(str): # \012 --> \n # \" --> " # - i = 0 - n = len(str) - res = [] - while 0 <= i < n: - o_match = _OctalPatt.search(str, i) - q_match = _QuotePatt.search(str, i) - if not o_match and not q_match: # Neither matched - res.append(str[i:]) - break - # else: - j = k = -1 - if o_match: - j = o_match.start(0) - if q_match: - k = q_match.start(0) - if q_match and (not o_match or k < j): # QuotePatt matched - res.append(str[i:k]) - res.append(str[k+1]) - i = k + 2 - else: # OctalPatt matched - res.append(str[i:j]) - res.append(chr(int(str[j+1:j+4], 8))) - i = j + 4 - return _nulljoin(res) + return _unquote_sub(_unquote_replace, str) # The _getdate() routine is used to set the expiration time in the cookie's HTTP # header. By default, _getdate() returns the current time in the appropriate diff --git a/contrib/tools/python3/Lib/logging/handlers.py b/contrib/tools/python3/Lib/logging/handlers.py index 715bce785c..73757758af 100644 --- a/contrib/tools/python3/Lib/logging/handlers.py +++ b/contrib/tools/python3/Lib/logging/handlers.py @@ -190,9 +190,12 @@ class RotatingFileHandler(BaseRotatingHandler): if self.stream is None: # delay was set... self.stream = self._open() if self.maxBytes > 0: # are we rolling over? + pos = self.stream.tell() + if not pos: + # gh-116263: Never rollover an empty file + return False msg = "%s\n" % self.format(record) - self.stream.seek(0, 2) #due to non-posix-compliant Windows feature - if self.stream.tell() + len(msg) >= self.maxBytes: + if pos + len(msg) >= self.maxBytes: # See bpo-45401: Never rollover anything other than regular files if os.path.exists(self.baseFilename) and not os.path.isfile(self.baseFilename): return False diff --git a/contrib/tools/python3/Lib/pickle.py b/contrib/tools/python3/Lib/pickle.py index c4d6e65821..01c1a10279 100644 --- a/contrib/tools/python3/Lib/pickle.py +++ b/contrib/tools/python3/Lib/pickle.py @@ -397,6 +397,8 @@ def decode_long(data): return int.from_bytes(data, byteorder='little', signed=True) +_NoValue = object() + # Pickling machinery class _Pickler: @@ -1091,11 +1093,16 @@ class _Pickler: (obj, module_name, name)) if self.proto >= 2: - code = _extension_registry.get((module_name, name)) - if code: - assert code > 0 + code = _extension_registry.get((module_name, name), _NoValue) + if code is not _NoValue: if code <= 0xff: - write(EXT1 + pack("<B", code)) + data = pack("<B", code) + if data == b'\0': + # Should never happen in normal circumstances, + # since the type and the value of the code are + # checked in copyreg.add_extension(). + raise RuntimeError("extension code 0 is out of range") + write(EXT1 + data) elif code <= 0xffff: write(EXT2 + pack("<H", code)) else: @@ -1589,9 +1596,8 @@ class _Unpickler: dispatch[EXT4[0]] = load_ext4 def get_extension(self, code): - nil = [] - obj = _extension_cache.get(code, nil) - if obj is not nil: + obj = _extension_cache.get(code, _NoValue) + if obj is not _NoValue: self.append(obj) return key = _inverted_registry.get(code) diff --git a/contrib/tools/python3/Lib/pydoc_data/topics.py b/contrib/tools/python3/Lib/pydoc_data/topics.py index 33b5834b86..8113889a1f 100644 --- a/contrib/tools/python3/Lib/pydoc_data/topics.py +++ b/contrib/tools/python3/Lib/pydoc_data/topics.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -# Autogenerated by Sphinx on Tue Aug 6 21:02:50 2024 +# Autogenerated by Sphinx on Fri Sep 6 21:00:45 2024 # as part of the release process. topics = {'assert': 'The "assert" statement\n' '**********************\n' @@ -3101,7 +3101,7 @@ topics = {'assert': 'The "assert" statement\n' ' | "None"\n' ' | "True"\n' ' | "False"\n' - ' | signed_number: NUMBER | "-" NUMBER\n' + ' signed_number ::= ["-"] NUMBER\n' '\n' 'The rule "strings" and the token "NUMBER" are defined in the ' 'standard\n' @@ -8991,16 +8991,22 @@ topics = {'assert': 'The "assert" statement\n' 'types, operations that compute new values may actually return a\n' 'reference to any existing object with the same type and value, ' 'while\n' - 'for mutable objects this is not allowed. E.g., after "a = 1; b = ' - '1",\n' - '"a" and "b" may or may not refer to the same object with the ' - 'value\n' - 'one, depending on the implementation, but after "c = []; d = []", ' - '"c"\n' - 'and "d" are guaranteed to refer to two different, unique, newly\n' - 'created empty lists. (Note that "c = d = []" assigns the same ' - 'object\n' - 'to both "c" and "d".)\n', + 'for mutable objects this is not allowed. For example, after "a = ' + '1; b\n' + '= 1", *a* and *b* may or may not refer to the same object with ' + 'the\n' + 'value one, depending on the implementation. This is because "int" ' + 'is\n' + 'an immutable type, so the reference to "1" can be reused. This\n' + 'behaviour depends on the implementation used, so should not be ' + 'relied\n' + 'upon, but is something to be aware of when making use of object\n' + 'identity tests. However, after "c = []; d = []", *c* and *d* are\n' + 'guaranteed to refer to two different, unique, newly created ' + 'empty\n' + 'lists. (Note that "e = f = []" assigns the *same* object to both ' + '*e*\n' + 'and *f*.)\n', 'operator-summary': 'Operator precedence\n' '*******************\n' '\n' @@ -13075,15 +13081,13 @@ topics = {'assert': 'The "assert" statement\n' 'greater must be expressed with escapes.\n' '\n' 'Both string and bytes literals may optionally be prefixed with a\n' - 'letter "\'r\'" or "\'R\'"; such strings are called *raw strings* ' - 'and treat\n' - 'backslashes as literal characters. As a result, in string ' - 'literals,\n' - '"\'\\U\'" and "\'\\u\'" escapes in raw strings are not treated ' - 'specially.\n' - 'Given that Python 2.x’s raw unicode literals behave differently ' - 'than\n' - 'Python 3.x’s the "\'ur\'" syntax is not supported.\n' + 'letter "\'r\'" or "\'R\'"; such constructs are called *raw ' + 'string\n' + 'literals* and *raw bytes literals* respectively and treat ' + 'backslashes\n' + 'as literal characters. As a result, in raw string literals, ' + '"\'\\U\'"\n' + 'and "\'\\u\'" escapes are not treated specially.\n' '\n' 'Added in version 3.3: The "\'rb\'" prefix of raw bytes literals ' 'has been\n' @@ -13977,8 +13981,7 @@ topics = {'assert': 'The "assert" statement\n' 'however removing a key and re-inserting it will add it to the end\n' 'instead of keeping its old place.\n' '\n' - 'Dictionaries are mutable; they can be created by the "{...}" ' - 'notation\n' + 'Dictionaries are mutable; they can be created by the "{}" notation\n' '(see section Dictionary displays).\n' '\n' 'The extension modules "dbm.ndbm" and "dbm.gnu" provide additional\n' @@ -15963,8 +15966,8 @@ topics = {'assert': 'The "assert" statement\n' '| | also removes it from ' '*s* | |\n' '+--------------------------------+----------------------------------+-----------------------+\n' - '| "s.remove(x)" | remove the first item from ' - '*s* | (3) |\n' + '| "s.remove(x)" | removes the first item from ' + '*s* | (3) |\n' '| | where "s[i]" is equal to ' '*x* | |\n' '+--------------------------------+----------------------------------+-----------------------+\n' @@ -16428,8 +16431,8 @@ topics = {'assert': 'The "assert" statement\n' '| | also removes it from ' '*s* | |\n' '+--------------------------------+----------------------------------+-----------------------+\n' - '| "s.remove(x)" | remove the first item ' - 'from *s* | (3) |\n' + '| "s.remove(x)" | removes the first ' + 'item from *s* | (3) |\n' '| | where "s[i]" is equal ' 'to *x* | |\n' '+--------------------------------+----------------------------------+-----------------------+\n' diff --git a/contrib/tools/python3/Lib/runpy.py b/contrib/tools/python3/Lib/runpy.py index 42f896c9cd..ef54d3282e 100644 --- a/contrib/tools/python3/Lib/runpy.py +++ b/contrib/tools/python3/Lib/runpy.py @@ -247,17 +247,17 @@ def _get_main_module_details(error=ImportError): sys.modules[main_name] = saved_main -def _get_code_from_file(run_name, fname): +def _get_code_from_file(fname): # Check for a compiled file first from pkgutil import read_code - decoded_path = os.path.abspath(os.fsdecode(fname)) - with io.open_code(decoded_path) as f: + code_path = os.path.abspath(fname) + with io.open_code(code_path) as f: code = read_code(f) if code is None: # That didn't work, so try it as normal source code - with io.open_code(decoded_path) as f: + with io.open_code(code_path) as f: code = compile(f.read(), fname, 'exec') - return code, fname + return code def run_path(path_name, init_globals=None, run_name=None): """Execute code located at the specified filesystem location. @@ -279,12 +279,13 @@ def run_path(path_name, init_globals=None, run_name=None): pkg_name = run_name.rpartition(".")[0] from pkgutil import get_importer importer = get_importer(path_name) + path_name = os.fsdecode(path_name) if isinstance(importer, type(None)): # Not a valid sys.path entry, so run the code directly # execfile() doesn't help as we want to allow compiled files - code, fname = _get_code_from_file(run_name, path_name) + code = _get_code_from_file(path_name) return _run_module_code(code, init_globals, run_name, - pkg_name=pkg_name, script_name=fname) + pkg_name=pkg_name, script_name=path_name) else: # Finder is defined for path, so add it to # the start of sys.path diff --git a/contrib/tools/python3/Lib/ssl.py b/contrib/tools/python3/Lib/ssl.py index 9c1fea6d36..2a115987d9 100644 --- a/contrib/tools/python3/Lib/ssl.py +++ b/contrib/tools/python3/Lib/ssl.py @@ -527,18 +527,17 @@ class SSLContext(_SSLContext): self._set_alpn_protocols(protos) def _load_windows_store_certs(self, storename, purpose): - certs = bytearray() try: for cert, encoding, trust in enum_certificates(storename): # CA certs are never PKCS#7 encoded if encoding == "x509_asn": if trust is True or purpose.oid in trust: - certs.extend(cert) + try: + self.load_verify_locations(cadata=cert) + except SSLError as exc: + warnings.warn(f"Bad certificate in Windows certificate store: {exc!s}") except PermissionError: warnings.warn("unable to enumerate Windows certificate store") - if certs: - self.load_verify_locations(cadata=certs) - return certs def load_default_certs(self, purpose=Purpose.SERVER_AUTH): if not isinstance(purpose, _ASN1Object): diff --git a/contrib/tools/python3/Lib/tarfile.py b/contrib/tools/python3/Lib/tarfile.py index e1487e3864..0a0f31eca0 100755 --- a/contrib/tools/python3/Lib/tarfile.py +++ b/contrib/tools/python3/Lib/tarfile.py @@ -843,6 +843,9 @@ _NAMED_FILTERS = { # Sentinel for replace() defaults, meaning "don't change the attribute" _KEEP = object() +# Header length is digits followed by a space. +_header_length_prefix_re = re.compile(br"([0-9]{1,20}) ") + class TarInfo(object): """Informational class which holds the details about an archive member given by a tar header block. @@ -1412,37 +1415,59 @@ class TarInfo(object): else: pax_headers = tarfile.pax_headers.copy() - # Check if the pax header contains a hdrcharset field. This tells us - # the encoding of the path, linkpath, uname and gname fields. Normally, - # these fields are UTF-8 encoded but since POSIX.1-2008 tar - # implementations are allowed to store them as raw binary strings if - # the translation to UTF-8 fails. - match = re.search(br"\d+ hdrcharset=([^\n]+)\n", buf) - if match is not None: - pax_headers["hdrcharset"] = match.group(1).decode("utf-8") - - # For the time being, we don't care about anything other than "BINARY". - # The only other value that is currently allowed by the standard is - # "ISO-IR 10646 2000 UTF-8" in other words UTF-8. - hdrcharset = pax_headers.get("hdrcharset") - if hdrcharset == "BINARY": - encoding = tarfile.encoding - else: - encoding = "utf-8" - # Parse pax header information. A record looks like that: # "%d %s=%s\n" % (length, keyword, value). length is the size # of the complete record including the length field itself and - # the newline. keyword and value are both UTF-8 encoded strings. - regex = re.compile(br"(\d+) ([^=]+)=") + # the newline. pos = 0 - while match := regex.match(buf, pos): - length, keyword = match.groups() - length = int(length) - if length == 0: + encoding = None + raw_headers = [] + while len(buf) > pos and buf[pos] != 0x00: + if not (match := _header_length_prefix_re.match(buf, pos)): + raise InvalidHeaderError("invalid header") + try: + length = int(match.group(1)) + except ValueError: + raise InvalidHeaderError("invalid header") + # Headers must be at least 5 bytes, shortest being '5 x=\n'. + # Value is allowed to be empty. + if length < 5: + raise InvalidHeaderError("invalid header") + if pos + length > len(buf): + raise InvalidHeaderError("invalid header") + + header_value_end_offset = match.start(1) + length - 1 # Last byte of the header + keyword_and_value = buf[match.end(1) + 1:header_value_end_offset] + raw_keyword, equals, raw_value = keyword_and_value.partition(b"=") + + # Check the framing of the header. The last character must be '\n' (0x0A) + if not raw_keyword or equals != b"=" or buf[header_value_end_offset] != 0x0A: raise InvalidHeaderError("invalid header") - value = buf[match.end(2) + 1:match.start(1) + length - 1] + raw_headers.append((length, raw_keyword, raw_value)) + + # Check if the pax header contains a hdrcharset field. This tells us + # the encoding of the path, linkpath, uname and gname fields. Normally, + # these fields are UTF-8 encoded but since POSIX.1-2008 tar + # implementations are allowed to store them as raw binary strings if + # the translation to UTF-8 fails. For the time being, we don't care about + # anything other than "BINARY". The only other value that is currently + # allowed by the standard is "ISO-IR 10646 2000 UTF-8" in other words UTF-8. + # Note that we only follow the initial 'hdrcharset' setting to preserve + # the initial behavior of the 'tarfile' module. + if raw_keyword == b"hdrcharset" and encoding is None: + if raw_value == b"BINARY": + encoding = tarfile.encoding + else: # This branch ensures only the first 'hdrcharset' header is used. + encoding = "utf-8" + pos += length + + # If no explicit hdrcharset is set, we use UTF-8 as a default. + if encoding is None: + encoding = "utf-8" + + # After parsing the raw headers we can decode them to text. + for length, raw_keyword, raw_value in raw_headers: # Normally, we could just use "utf-8" as the encoding and "strict" # as the error handler, but we better not take the risk. For # example, GNU tar <= 1.23 is known to store filenames it cannot @@ -1450,17 +1475,16 @@ class TarInfo(object): # hdrcharset=BINARY header). # We first try the strict standard encoding, and if that fails we # fall back on the user's encoding and error handler. - keyword = self._decode_pax_field(keyword, "utf-8", "utf-8", + keyword = self._decode_pax_field(raw_keyword, "utf-8", "utf-8", tarfile.errors) if keyword in PAX_NAME_FIELDS: - value = self._decode_pax_field(value, encoding, tarfile.encoding, + value = self._decode_pax_field(raw_value, encoding, tarfile.encoding, tarfile.errors) else: - value = self._decode_pax_field(value, "utf-8", "utf-8", + value = self._decode_pax_field(raw_value, "utf-8", "utf-8", tarfile.errors) pax_headers[keyword] = value - pos += length # Fetch the next header. try: @@ -1475,7 +1499,7 @@ class TarInfo(object): elif "GNU.sparse.size" in pax_headers: # GNU extended sparse format version 0.0. - self._proc_gnusparse_00(next, pax_headers, buf) + self._proc_gnusparse_00(next, raw_headers) elif pax_headers.get("GNU.sparse.major") == "1" and pax_headers.get("GNU.sparse.minor") == "0": # GNU extended sparse format version 1.0. @@ -1497,15 +1521,24 @@ class TarInfo(object): return next - def _proc_gnusparse_00(self, next, pax_headers, buf): + def _proc_gnusparse_00(self, next, raw_headers): """Process a GNU tar extended sparse header, version 0.0. """ offsets = [] - for match in re.finditer(br"\d+ GNU.sparse.offset=(\d+)\n", buf): - offsets.append(int(match.group(1))) numbytes = [] - for match in re.finditer(br"\d+ GNU.sparse.numbytes=(\d+)\n", buf): - numbytes.append(int(match.group(1))) + for _, keyword, value in raw_headers: + if keyword == b"GNU.sparse.offset": + try: + offsets.append(int(value.decode())) + except ValueError: + raise InvalidHeaderError("invalid header") + + elif keyword == b"GNU.sparse.numbytes": + try: + numbytes.append(int(value.decode())) + except ValueError: + raise InvalidHeaderError("invalid header") + next.sparse = list(zip(offsets, numbytes)) def _proc_gnusparse_01(self, next, pax_headers): diff --git a/contrib/tools/python3/Lib/turtle.py b/contrib/tools/python3/Lib/turtle.py index 811c5dfa49..92ac58f8e6 100644 --- a/contrib/tools/python3/Lib/turtle.py +++ b/contrib/tools/python3/Lib/turtle.py @@ -1719,7 +1719,7 @@ class TNavigator(object): >>> reset() >>> turtle.left(60) >>> turtle.forward(100) - >>> print turtle.xcor() + >>> print(turtle.xcor()) 50.0 """ return self._position[0] @@ -1733,7 +1733,7 @@ class TNavigator(object): >>> reset() >>> turtle.left(60) >>> turtle.forward(100) - >>> print turtle.ycor() + >>> print(turtle.ycor()) 86.6025403784 """ return self._position[1] @@ -2336,7 +2336,7 @@ class TPen(object): Example (for a Turtle instance named turtle): >>> turtle.hideturtle() - >>> print turtle.isvisible(): + >>> print(turtle.isvisible()) False """ return self._shown diff --git a/contrib/tools/python3/Lib/urllib/parse.py b/contrib/tools/python3/Lib/urllib/parse.py index 3932bb99c7..2481595203 100644 --- a/contrib/tools/python3/Lib/urllib/parse.py +++ b/contrib/tools/python3/Lib/urllib/parse.py @@ -525,9 +525,13 @@ def urlunsplit(components): empty query; the RFC states that these are equivalent).""" scheme, netloc, url, query, fragment, _coerce_result = ( _coerce_args(*components)) - if netloc or (scheme and scheme in uses_netloc) or url[:2] == '//': + if netloc: if url and url[:1] != '/': url = '/' + url - url = '//' + (netloc or '') + url + url = '//' + netloc + url + elif url[:2] == '//': + url = '//' + url + elif scheme and scheme in uses_netloc and (not url or url[:1] == '/'): + url = '//' + url if scheme: url = scheme + ':' + url if query: diff --git a/contrib/tools/python3/Lib/webbrowser.py b/contrib/tools/python3/Lib/webbrowser.py index ba6711e4ef..13b9e85f9e 100755 --- a/contrib/tools/python3/Lib/webbrowser.py +++ b/contrib/tools/python3/Lib/webbrowser.py @@ -30,7 +30,7 @@ def register(name, klass, instance=None, *, preferred=False): # Preferred browsers go to the front of the list. # Need to match to the default browser returned by xdg-settings, which # may be of the form e.g. "firefox.desktop". - if preferred or (_os_preferred_browser and name in _os_preferred_browser): + if preferred or (_os_preferred_browser and f'{name}.desktop' == _os_preferred_browser): _tryorder.insert(0, name) else: _tryorder.append(name) @@ -77,6 +77,9 @@ def open(url, new=0, autoraise=True): - 1: a new browser window. - 2: a new browser page ("tab"). If possible, autoraise raises the window (the default) or not. + + If opening the browser succeeds, return True. + If there is a problem, return False. """ if _tryorder is None: with _lock: diff --git a/contrib/tools/python3/Lib/ya.make b/contrib/tools/python3/Lib/ya.make index d6607786ff..cf6f637f36 100644 --- a/contrib/tools/python3/Lib/ya.make +++ b/contrib/tools/python3/Lib/ya.make @@ -4,9 +4,9 @@ ENABLE(PYBUILD_NO_PY) PY3_LIBRARY() -VERSION(3.12.5) +VERSION(3.12.6) -ORIGINAL_SOURCE(https://github.com/python/cpython/archive/v3.12.5.tar.gz) +ORIGINAL_SOURCE(https://github.com/python/cpython/archive/v3.12.6.tar.gz) LICENSE(Python-2.0) diff --git a/contrib/tools/python3/Lib/zipfile/_path/__init__.py b/contrib/tools/python3/Lib/zipfile/_path/__init__.py index 78c413563b..8db5ef18d7 100644 --- a/contrib/tools/python3/Lib/zipfile/_path/__init__.py +++ b/contrib/tools/python3/Lib/zipfile/_path/__init__.py @@ -1,3 +1,12 @@ +""" +A Path-like interface for zipfiles. + +This codebase is shared between zipfile.Path in the stdlib +and zipp in PyPI. See +https://github.com/python/importlib_metadata/wiki/Development-Methodology +for more detail. +""" + import io import posixpath import zipfile @@ -34,7 +43,7 @@ def _parents(path): def _ancestry(path): """ Given a path with elements separated by - posixpath.sep, generate all elements of that path + posixpath.sep, generate all elements of that path. >>> list(_ancestry('b/d')) ['b/d', 'b'] @@ -46,9 +55,14 @@ def _ancestry(path): ['b'] >>> list(_ancestry('')) [] + + Multiple separators are treated like a single. + + >>> list(_ancestry('//b//d///f//')) + ['//b//d///f', '//b//d', '//b'] """ path = path.rstrip(posixpath.sep) - while path and path != posixpath.sep: + while path.rstrip(posixpath.sep): yield path path, tail = posixpath.split(path) @@ -174,7 +188,10 @@ def _extract_text_encoding(encoding=None, *args, **kwargs): class Path: """ - A pathlib-compatible interface for zip files. + A :class:`importlib.resources.abc.Traversable` interface for zip files. + + Implements many of the features users enjoy from + :class:`pathlib.Path`. Consider a zip file with this structure:: diff --git a/contrib/tools/python3/Lib/zipfile/_path/glob.py b/contrib/tools/python3/Lib/zipfile/_path/glob.py index 4a2e665e27..d5213533ad 100644 --- a/contrib/tools/python3/Lib/zipfile/_path/glob.py +++ b/contrib/tools/python3/Lib/zipfile/_path/glob.py @@ -2,6 +2,19 @@ import re def translate(pattern): + return match_dirs(translate_core(pattern)) + + +def match_dirs(pattern): + """ + Ensure that zipfile.Path directory names are matched. + + zipfile.Path directory names always end in a slash. + """ + return rf'{pattern}[/]?' + + +def translate_core(pattern): r""" Given a glob pattern, produce a regex that matches it. |