diff options
author | shadchin <shadchin@yandex-team.ru> | 2022-02-10 16:44:30 +0300 |
---|---|---|
committer | Daniil Cherednik <dcherednik@yandex-team.ru> | 2022-02-10 16:44:30 +0300 |
commit | 2598ef1d0aee359b4b6d5fdd1758916d5907d04f (patch) | |
tree | 012bb94d777798f1f56ac1cec429509766d05181 /contrib/tools/python3/src/Lib/urllib/parse.py | |
parent | 6751af0b0c1b952fede40b19b71da8025b5d8bcf (diff) | |
download | ydb-2598ef1d0aee359b4b6d5fdd1758916d5907d04f.tar.gz |
Restoring authorship annotation for <shadchin@yandex-team.ru>. Commit 1 of 2.
Diffstat (limited to 'contrib/tools/python3/src/Lib/urllib/parse.py')
-rw-r--r-- | contrib/tools/python3/src/Lib/urllib/parse.py | 394 |
1 files changed, 197 insertions, 197 deletions
diff --git a/contrib/tools/python3/src/Lib/urllib/parse.py b/contrib/tools/python3/src/Lib/urllib/parse.py index b7965fe3d2..f6299398c9 100644 --- a/contrib/tools/python3/src/Lib/urllib/parse.py +++ b/contrib/tools/python3/src/Lib/urllib/parse.py @@ -29,9 +29,9 @@ test_urlparse.py provides a good indicator of parsing behavior. import re import sys -import types +import types import collections -import warnings +import warnings __all__ = ["urlparse", "urlunparse", "urljoin", "urldefrag", "urlsplit", "urlunsplit", "urlencode", "parse_qs", @@ -78,9 +78,9 @@ scheme_chars = ('abcdefghijklmnopqrstuvwxyz' '0123456789' '+-.') -# Unsafe bytes to be removed per WHATWG spec -_UNSAFE_URL_BYTES_TO_REMOVE = ['\t', '\r', '\n'] - +# Unsafe bytes to be removed per WHATWG spec +_UNSAFE_URL_BYTES_TO_REMOVE = ['\t', '\r', '\n'] + # XXX: Consider replacing with functools.lru_cache MAX_CACHE_SIZE = 20 _parse_cache = {} @@ -171,18 +171,18 @@ class _NetlocResultMixinBase(object): def port(self): port = self._hostinfo[1] if port is not None: - try: - port = int(port, 10) - except ValueError: - message = f'Port could not be cast to integer value as {port!r}' - raise ValueError(message) from None + try: + port = int(port, 10) + except ValueError: + message = f'Port could not be cast to integer value as {port!r}' + raise ValueError(message) from None if not ( 0 <= port <= 65535): raise ValueError("Port out of range 0-65535") return port - __class_getitem__ = classmethod(types.GenericAlias) - + __class_getitem__ = classmethod(types.GenericAlias) + class _NetlocResultMixinStr(_NetlocResultMixinBase, _ResultMixinStr): __slots__ = () @@ -295,7 +295,7 @@ by reference to a primary resource and additional identifying information. """ _ParseResultBase.__doc__ = """ -ParseResult(scheme, netloc, path, params, query, fragment) +ParseResult(scheme, netloc, path, params, query, fragment) A 6-tuple that contains components of a parsed URL. """ @@ -372,23 +372,23 @@ del _fix_result_transcoding def urlparse(url, scheme='', allow_fragments=True): """Parse a URL into 6 components: <scheme>://<netloc>/<path>;<params>?<query>#<fragment> - - The result is a named 6-tuple with fields corresponding to the - above. It is either a ParseResult or ParseResultBytes object, - depending on the type of the url parameter. - - The username, password, hostname, and port sub-components of netloc - can also be accessed as attributes of the returned object. - - The scheme argument provides the default value of the scheme - component when no scheme is found in url. - - If allow_fragments is False, no attempt is made to separate the - fragment component from the previous component, which can be either - path or query. - - Note that % escapes are not expanded. - """ + + The result is a named 6-tuple with fields corresponding to the + above. It is either a ParseResult or ParseResultBytes object, + depending on the type of the url parameter. + + The username, password, hostname, and port sub-components of netloc + can also be accessed as attributes of the returned object. + + The scheme argument provides the default value of the scheme + component when no scheme is found in url. + + If allow_fragments is False, no attempt is made to separate the + fragment component from the previous component, which can be either + path or query. + + Note that % escapes are not expanded. + """ url, scheme, _coerce_result = _coerce_args(url, scheme) splitresult = urlsplit(url, scheme, allow_fragments) scheme, netloc, url, query, fragment = splitresult @@ -422,45 +422,45 @@ def _checknetloc(netloc): # looking for characters like \u2100 that expand to 'a/c' # IDNA uses NFKC equivalence, so normalize for this check import unicodedata - n = netloc.replace('@', '') # ignore characters already included - n = n.replace(':', '') # but not the surrounding text - n = n.replace('#', '') - n = n.replace('?', '') - netloc2 = unicodedata.normalize('NFKC', n) - if n == netloc2: + n = netloc.replace('@', '') # ignore characters already included + n = n.replace(':', '') # but not the surrounding text + n = n.replace('#', '') + n = n.replace('?', '') + netloc2 = unicodedata.normalize('NFKC', n) + if n == netloc2: return for c in '/?#@:': if c in netloc2: - raise ValueError("netloc '" + netloc + "' contains invalid " + + raise ValueError("netloc '" + netloc + "' contains invalid " + "characters under NFKC normalization") def urlsplit(url, scheme='', allow_fragments=True): """Parse a URL into 5 components: <scheme>://<netloc>/<path>?<query>#<fragment> - - The result is a named 5-tuple with fields corresponding to the - above. It is either a SplitResult or SplitResultBytes object, - depending on the type of the url parameter. - - The username, password, hostname, and port sub-components of netloc - can also be accessed as attributes of the returned object. - - The scheme argument provides the default value of the scheme - component when no scheme is found in url. - - If allow_fragments is False, no attempt is made to separate the - fragment component from the previous component, which can be either - path or query. - - Note that % escapes are not expanded. - """ - + + The result is a named 5-tuple with fields corresponding to the + above. It is either a SplitResult or SplitResultBytes object, + depending on the type of the url parameter. + + The username, password, hostname, and port sub-components of netloc + can also be accessed as attributes of the returned object. + + The scheme argument provides the default value of the scheme + component when no scheme is found in url. + + If allow_fragments is False, no attempt is made to separate the + fragment component from the previous component, which can be either + path or query. + + Note that % escapes are not expanded. + """ + url, scheme, _coerce_result = _coerce_args(url, scheme) - - for b in _UNSAFE_URL_BYTES_TO_REMOVE: - url = url.replace(b, "") - scheme = scheme.replace(b, "") - + + for b in _UNSAFE_URL_BYTES_TO_REMOVE: + url = url.replace(b, "") + scheme = scheme.replace(b, "") + allow_fragments = bool(allow_fragments) key = url, scheme, allow_fragments, type(url), type(scheme) cached = _parse_cache.get(key, None) @@ -475,7 +475,7 @@ def urlsplit(url, scheme='', allow_fragments=True): if c not in scheme_chars: break else: - scheme, url = url[:i].lower(), url[i+1:] + scheme, url = url[:i].lower(), url[i+1:] if url[:2] == '//': netloc, url = _splitnetloc(url, 2) @@ -651,8 +651,8 @@ def unquote(string, encoding='utf-8', errors='replace'): unquote('abc%20def') -> 'abc def'. """ - if isinstance(string, bytes): - return unquote_to_bytes(string).decode(encoding, errors) + if isinstance(string, bytes): + return unquote_to_bytes(string).decode(encoding, errors) if '%' not in string: string.split return string @@ -670,7 +670,7 @@ def unquote(string, encoding='utf-8', errors='replace'): def parse_qs(qs, keep_blank_values=False, strict_parsing=False, - encoding='utf-8', errors='replace', max_num_fields=None, separator='&'): + encoding='utf-8', errors='replace', max_num_fields=None, separator='&'): """Parse a query given as a string argument. Arguments: @@ -694,15 +694,15 @@ def parse_qs(qs, keep_blank_values=False, strict_parsing=False, max_num_fields: int. If set, then throws a ValueError if there are more than n fields read by parse_qsl(). - separator: str. The symbol to use for separating the query arguments. - Defaults to &. - + separator: str. The symbol to use for separating the query arguments. + Defaults to &. + Returns a dictionary. """ parsed_result = {} pairs = parse_qsl(qs, keep_blank_values, strict_parsing, encoding=encoding, errors=errors, - max_num_fields=max_num_fields, separator=separator) + max_num_fields=max_num_fields, separator=separator) for name, value in pairs: if name in parsed_result: parsed_result[name].append(value) @@ -712,7 +712,7 @@ def parse_qs(qs, keep_blank_values=False, strict_parsing=False, def parse_qsl(qs, keep_blank_values=False, strict_parsing=False, - encoding='utf-8', errors='replace', max_num_fields=None, separator='&'): + encoding='utf-8', errors='replace', max_num_fields=None, separator='&'): """Parse a query given as a string argument. Arguments: @@ -735,26 +735,26 @@ def parse_qsl(qs, keep_blank_values=False, strict_parsing=False, max_num_fields: int. If set, then throws a ValueError if there are more than n fields read by parse_qsl(). - separator: str. The symbol to use for separating the query arguments. - Defaults to &. - + separator: str. The symbol to use for separating the query arguments. + Defaults to &. + Returns a list, as G-d intended. """ qs, _coerce_result = _coerce_args(qs) - separator, _ = _coerce_args(separator) - - if not separator or (not isinstance(separator, (str, bytes))): - raise ValueError("Separator must be of type string or bytes.") + separator, _ = _coerce_args(separator) + if not separator or (not isinstance(separator, (str, bytes))): + raise ValueError("Separator must be of type string or bytes.") + # If max_num_fields is defined then check that the number of fields # is less than max_num_fields. This prevents a memory exhaustion DOS # attack via post bodies with many fields. if max_num_fields is not None: - num_fields = 1 + qs.count(separator) + num_fields = 1 + qs.count(separator) if max_num_fields < num_fields: raise ValueError('Max number of fields exceeded') - pairs = [s1 for s1 in qs.split(separator)] + pairs = [s1 for s1 in qs.split(separator)] r = [] for name_value in pairs: if not name_value and not strict_parsing: @@ -820,32 +820,32 @@ def quote(string, safe='/', encoding=None, errors=None): """quote('abc def') -> 'abc%20def' Each part of a URL, e.g. the path info, the query, etc., has a - different set of reserved characters that must be quoted. The - quote function offers a cautious (not minimal) way to quote a - string for most of these parts. + different set of reserved characters that must be quoted. The + quote function offers a cautious (not minimal) way to quote a + string for most of these parts. - RFC 3986 Uniform Resource Identifier (URI): Generic Syntax lists - the following (un)reserved characters. + RFC 3986 Uniform Resource Identifier (URI): Generic Syntax lists + the following (un)reserved characters. - unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~" - reserved = gen-delims / sub-delims - gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@" - sub-delims = "!" / "$" / "&" / "'" / "(" / ")" - / "*" / "+" / "," / ";" / "=" + unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~" + reserved = gen-delims / sub-delims + gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@" + sub-delims = "!" / "$" / "&" / "'" / "(" / ")" + / "*" / "+" / "," / ";" / "=" - Each of the reserved characters is reserved in some component of a URL, + Each of the reserved characters is reserved in some component of a URL, but not necessarily in all of them. - The quote function %-escapes all characters that are neither in the - unreserved chars ("always safe") nor the additional chars set via the - safe arg. - - The default for the safe arg is '/'. The character is reserved, but in - typical usage the quote function is being called on a path where the - existing slash characters are to be preserved. - + The quote function %-escapes all characters that are neither in the + unreserved chars ("always safe") nor the additional chars set via the + safe arg. + + The default for the safe arg is '/'. The character is reserved, but in + typical usage the quote function is being called on a path where the + existing slash characters are to be preserved. + Python 3.7 updates from using RFC 2396 to RFC 3986 to quote URL strings. - Now, "~" is included in the set of unreserved characters. + Now, "~" is included in the set of unreserved characters. string and safe may be either str or bytes objects. encoding and errors must not be specified if string is a bytes object. @@ -989,14 +989,14 @@ def urlencode(query, doseq=False, safe='', encoding=None, errors=None, l.append(k + '=' + elt) return '&'.join(l) - + def to_bytes(url): - warnings.warn("urllib.parse.to_bytes() is deprecated as of 3.8", - DeprecationWarning, stacklevel=2) - return _to_bytes(url) - - -def _to_bytes(url): + warnings.warn("urllib.parse.to_bytes() is deprecated as of 3.8", + DeprecationWarning, stacklevel=2) + return _to_bytes(url) + + +def _to_bytes(url): """to_bytes(u"URL") --> 'URL'.""" # Most URL schemes require ASCII. If that changes, the conversion # can be relaxed. @@ -1009,29 +1009,29 @@ def _to_bytes(url): " contains non-ASCII characters") return url - + def unwrap(url): - """Transform a string like '<URL:scheme://host/path>' into 'scheme://host/path'. - - The string is returned unchanged if it's not a wrapped URL. - """ + """Transform a string like '<URL:scheme://host/path>' into 'scheme://host/path'. + + The string is returned unchanged if it's not a wrapped URL. + """ url = str(url).strip() if url[:1] == '<' and url[-1:] == '>': url = url[1:-1].strip() - if url[:4] == 'URL:': - url = url[4:].strip() + if url[:4] == 'URL:': + url = url[4:].strip() return url - -def splittype(url): - warnings.warn("urllib.parse.splittype() is deprecated as of 3.8, " - "use urllib.parse.urlparse() instead", - DeprecationWarning, stacklevel=2) - return _splittype(url) - - + +def splittype(url): + warnings.warn("urllib.parse.splittype() is deprecated as of 3.8, " + "use urllib.parse.urlparse() instead", + DeprecationWarning, stacklevel=2) + return _splittype(url) + + _typeprog = None -def _splittype(url): +def _splittype(url): """splittype('type:opaquestring') --> 'type', 'opaquestring'.""" global _typeprog if _typeprog is None: @@ -1043,16 +1043,16 @@ def _splittype(url): return scheme.lower(), data return None, url - -def splithost(url): - warnings.warn("urllib.parse.splithost() is deprecated as of 3.8, " - "use urllib.parse.urlparse() instead", - DeprecationWarning, stacklevel=2) - return _splithost(url) - - + +def splithost(url): + warnings.warn("urllib.parse.splithost() is deprecated as of 3.8, " + "use urllib.parse.urlparse() instead", + DeprecationWarning, stacklevel=2) + return _splithost(url) + + _hostprog = None -def _splithost(url): +def _splithost(url): """splithost('//host[:port]/path') --> 'host[:port]', '/path'.""" global _hostprog if _hostprog is None: @@ -1066,64 +1066,64 @@ def _splithost(url): return host_port, path return None, url - + def splituser(host): - warnings.warn("urllib.parse.splituser() is deprecated as of 3.8, " - "use urllib.parse.urlparse() instead", - DeprecationWarning, stacklevel=2) - return _splituser(host) - - -def _splituser(host): + warnings.warn("urllib.parse.splituser() is deprecated as of 3.8, " + "use urllib.parse.urlparse() instead", + DeprecationWarning, stacklevel=2) + return _splituser(host) + + +def _splituser(host): """splituser('user[:passwd]@host[:port]') --> 'user[:passwd]', 'host[:port]'.""" user, delim, host = host.rpartition('@') return (user if delim else None), host - + def splitpasswd(user): - warnings.warn("urllib.parse.splitpasswd() is deprecated as of 3.8, " - "use urllib.parse.urlparse() instead", - DeprecationWarning, stacklevel=2) - return _splitpasswd(user) - - -def _splitpasswd(user): + warnings.warn("urllib.parse.splitpasswd() is deprecated as of 3.8, " + "use urllib.parse.urlparse() instead", + DeprecationWarning, stacklevel=2) + return _splitpasswd(user) + + +def _splitpasswd(user): """splitpasswd('user:passwd') -> 'user', 'passwd'.""" user, delim, passwd = user.partition(':') return user, (passwd if delim else None) - -def splitport(host): - warnings.warn("urllib.parse.splitport() is deprecated as of 3.8, " - "use urllib.parse.urlparse() instead", - DeprecationWarning, stacklevel=2) - return _splitport(host) - - + +def splitport(host): + warnings.warn("urllib.parse.splitport() is deprecated as of 3.8, " + "use urllib.parse.urlparse() instead", + DeprecationWarning, stacklevel=2) + return _splitport(host) + + # splittag('/path#tag') --> '/path', 'tag' _portprog = None -def _splitport(host): +def _splitport(host): """splitport('host:port') --> 'host', 'port'.""" global _portprog if _portprog is None: - _portprog = re.compile('(.*):([0-9]*)', re.DOTALL) + _portprog = re.compile('(.*):([0-9]*)', re.DOTALL) - match = _portprog.fullmatch(host) + match = _portprog.fullmatch(host) if match: host, port = match.groups() if port: return host, port return host, None - + def splitnport(host, defport=-1): - warnings.warn("urllib.parse.splitnport() is deprecated as of 3.8, " - "use urllib.parse.urlparse() instead", - DeprecationWarning, stacklevel=2) - return _splitnport(host, defport) - - -def _splitnport(host, defport=-1): + warnings.warn("urllib.parse.splitnport() is deprecated as of 3.8, " + "use urllib.parse.urlparse() instead", + DeprecationWarning, stacklevel=2) + return _splitnport(host, defport) + + +def _splitnport(host, defport=-1): """Split host and port, returning numeric port. Return given default port if no ':' found; defaults to -1. Return numerical port if a valid number are found after ':'. @@ -1139,59 +1139,59 @@ def _splitnport(host, defport=-1): return host, nport return host, defport - + def splitquery(url): - warnings.warn("urllib.parse.splitquery() is deprecated as of 3.8, " - "use urllib.parse.urlparse() instead", - DeprecationWarning, stacklevel=2) - return _splitquery(url) - - -def _splitquery(url): + warnings.warn("urllib.parse.splitquery() is deprecated as of 3.8, " + "use urllib.parse.urlparse() instead", + DeprecationWarning, stacklevel=2) + return _splitquery(url) + + +def _splitquery(url): """splitquery('/path?query') --> '/path', 'query'.""" path, delim, query = url.rpartition('?') if delim: return path, query return url, None - + def splittag(url): - warnings.warn("urllib.parse.splittag() is deprecated as of 3.8, " - "use urllib.parse.urlparse() instead", - DeprecationWarning, stacklevel=2) - return _splittag(url) - - -def _splittag(url): + warnings.warn("urllib.parse.splittag() is deprecated as of 3.8, " + "use urllib.parse.urlparse() instead", + DeprecationWarning, stacklevel=2) + return _splittag(url) + + +def _splittag(url): """splittag('/path#tag') --> '/path', 'tag'.""" path, delim, tag = url.rpartition('#') if delim: return path, tag return url, None - + def splitattr(url): - warnings.warn("urllib.parse.splitattr() is deprecated as of 3.8, " - "use urllib.parse.urlparse() instead", - DeprecationWarning, stacklevel=2) - return _splitattr(url) - - -def _splitattr(url): + warnings.warn("urllib.parse.splitattr() is deprecated as of 3.8, " + "use urllib.parse.urlparse() instead", + DeprecationWarning, stacklevel=2) + return _splitattr(url) + + +def _splitattr(url): """splitattr('/path;attr1=value1;attr2=value2;...') -> '/path', ['attr1=value1', 'attr2=value2', ...].""" words = url.split(';') return words[0], words[1:] - + def splitvalue(attr): - warnings.warn("urllib.parse.splitvalue() is deprecated as of 3.8, " - "use urllib.parse.parse_qsl() instead", - DeprecationWarning, stacklevel=2) - return _splitvalue(attr) - - -def _splitvalue(attr): + warnings.warn("urllib.parse.splitvalue() is deprecated as of 3.8, " + "use urllib.parse.parse_qsl() instead", + DeprecationWarning, stacklevel=2) + return _splitvalue(attr) + + +def _splitvalue(attr): """splitvalue('attr=value') --> 'attr', 'value'.""" attr, delim, value = attr.partition('=') return attr, (value if delim else None) |