aboutsummaryrefslogtreecommitdiffstats
path: root/contrib/tools/python3/src/Lib/urllib/parse.py
diff options
context:
space:
mode:
authorshadchin <shadchin@yandex-team.ru>2022-02-10 16:44:30 +0300
committerDaniil Cherednik <dcherednik@yandex-team.ru>2022-02-10 16:44:30 +0300
commit2598ef1d0aee359b4b6d5fdd1758916d5907d04f (patch)
tree012bb94d777798f1f56ac1cec429509766d05181 /contrib/tools/python3/src/Lib/urllib/parse.py
parent6751af0b0c1b952fede40b19b71da8025b5d8bcf (diff)
downloadydb-2598ef1d0aee359b4b6d5fdd1758916d5907d04f.tar.gz
Restoring authorship annotation for <shadchin@yandex-team.ru>. Commit 1 of 2.
Diffstat (limited to 'contrib/tools/python3/src/Lib/urllib/parse.py')
-rw-r--r--contrib/tools/python3/src/Lib/urllib/parse.py394
1 files changed, 197 insertions, 197 deletions
diff --git a/contrib/tools/python3/src/Lib/urllib/parse.py b/contrib/tools/python3/src/Lib/urllib/parse.py
index b7965fe3d2..f6299398c9 100644
--- a/contrib/tools/python3/src/Lib/urllib/parse.py
+++ b/contrib/tools/python3/src/Lib/urllib/parse.py
@@ -29,9 +29,9 @@ test_urlparse.py provides a good indicator of parsing behavior.
import re
import sys
-import types
+import types
import collections
-import warnings
+import warnings
__all__ = ["urlparse", "urlunparse", "urljoin", "urldefrag",
"urlsplit", "urlunsplit", "urlencode", "parse_qs",
@@ -78,9 +78,9 @@ scheme_chars = ('abcdefghijklmnopqrstuvwxyz'
'0123456789'
'+-.')
-# Unsafe bytes to be removed per WHATWG spec
-_UNSAFE_URL_BYTES_TO_REMOVE = ['\t', '\r', '\n']
-
+# Unsafe bytes to be removed per WHATWG spec
+_UNSAFE_URL_BYTES_TO_REMOVE = ['\t', '\r', '\n']
+
# XXX: Consider replacing with functools.lru_cache
MAX_CACHE_SIZE = 20
_parse_cache = {}
@@ -171,18 +171,18 @@ class _NetlocResultMixinBase(object):
def port(self):
port = self._hostinfo[1]
if port is not None:
- try:
- port = int(port, 10)
- except ValueError:
- message = f'Port could not be cast to integer value as {port!r}'
- raise ValueError(message) from None
+ try:
+ port = int(port, 10)
+ except ValueError:
+ message = f'Port could not be cast to integer value as {port!r}'
+ raise ValueError(message) from None
if not ( 0 <= port <= 65535):
raise ValueError("Port out of range 0-65535")
return port
- __class_getitem__ = classmethod(types.GenericAlias)
-
+ __class_getitem__ = classmethod(types.GenericAlias)
+
class _NetlocResultMixinStr(_NetlocResultMixinBase, _ResultMixinStr):
__slots__ = ()
@@ -295,7 +295,7 @@ by reference to a primary resource and additional identifying information.
"""
_ParseResultBase.__doc__ = """
-ParseResult(scheme, netloc, path, params, query, fragment)
+ParseResult(scheme, netloc, path, params, query, fragment)
A 6-tuple that contains components of a parsed URL.
"""
@@ -372,23 +372,23 @@ del _fix_result_transcoding
def urlparse(url, scheme='', allow_fragments=True):
"""Parse a URL into 6 components:
<scheme>://<netloc>/<path>;<params>?<query>#<fragment>
-
- The result is a named 6-tuple with fields corresponding to the
- above. It is either a ParseResult or ParseResultBytes object,
- depending on the type of the url parameter.
-
- The username, password, hostname, and port sub-components of netloc
- can also be accessed as attributes of the returned object.
-
- The scheme argument provides the default value of the scheme
- component when no scheme is found in url.
-
- If allow_fragments is False, no attempt is made to separate the
- fragment component from the previous component, which can be either
- path or query.
-
- Note that % escapes are not expanded.
- """
+
+ The result is a named 6-tuple with fields corresponding to the
+ above. It is either a ParseResult or ParseResultBytes object,
+ depending on the type of the url parameter.
+
+ The username, password, hostname, and port sub-components of netloc
+ can also be accessed as attributes of the returned object.
+
+ The scheme argument provides the default value of the scheme
+ component when no scheme is found in url.
+
+ If allow_fragments is False, no attempt is made to separate the
+ fragment component from the previous component, which can be either
+ path or query.
+
+ Note that % escapes are not expanded.
+ """
url, scheme, _coerce_result = _coerce_args(url, scheme)
splitresult = urlsplit(url, scheme, allow_fragments)
scheme, netloc, url, query, fragment = splitresult
@@ -422,45 +422,45 @@ def _checknetloc(netloc):
# looking for characters like \u2100 that expand to 'a/c'
# IDNA uses NFKC equivalence, so normalize for this check
import unicodedata
- n = netloc.replace('@', '') # ignore characters already included
- n = n.replace(':', '') # but not the surrounding text
- n = n.replace('#', '')
- n = n.replace('?', '')
- netloc2 = unicodedata.normalize('NFKC', n)
- if n == netloc2:
+ n = netloc.replace('@', '') # ignore characters already included
+ n = n.replace(':', '') # but not the surrounding text
+ n = n.replace('#', '')
+ n = n.replace('?', '')
+ netloc2 = unicodedata.normalize('NFKC', n)
+ if n == netloc2:
return
for c in '/?#@:':
if c in netloc2:
- raise ValueError("netloc '" + netloc + "' contains invalid " +
+ raise ValueError("netloc '" + netloc + "' contains invalid " +
"characters under NFKC normalization")
def urlsplit(url, scheme='', allow_fragments=True):
"""Parse a URL into 5 components:
<scheme>://<netloc>/<path>?<query>#<fragment>
-
- The result is a named 5-tuple with fields corresponding to the
- above. It is either a SplitResult or SplitResultBytes object,
- depending on the type of the url parameter.
-
- The username, password, hostname, and port sub-components of netloc
- can also be accessed as attributes of the returned object.
-
- The scheme argument provides the default value of the scheme
- component when no scheme is found in url.
-
- If allow_fragments is False, no attempt is made to separate the
- fragment component from the previous component, which can be either
- path or query.
-
- Note that % escapes are not expanded.
- """
-
+
+ The result is a named 5-tuple with fields corresponding to the
+ above. It is either a SplitResult or SplitResultBytes object,
+ depending on the type of the url parameter.
+
+ The username, password, hostname, and port sub-components of netloc
+ can also be accessed as attributes of the returned object.
+
+ The scheme argument provides the default value of the scheme
+ component when no scheme is found in url.
+
+ If allow_fragments is False, no attempt is made to separate the
+ fragment component from the previous component, which can be either
+ path or query.
+
+ Note that % escapes are not expanded.
+ """
+
url, scheme, _coerce_result = _coerce_args(url, scheme)
-
- for b in _UNSAFE_URL_BYTES_TO_REMOVE:
- url = url.replace(b, "")
- scheme = scheme.replace(b, "")
-
+
+ for b in _UNSAFE_URL_BYTES_TO_REMOVE:
+ url = url.replace(b, "")
+ scheme = scheme.replace(b, "")
+
allow_fragments = bool(allow_fragments)
key = url, scheme, allow_fragments, type(url), type(scheme)
cached = _parse_cache.get(key, None)
@@ -475,7 +475,7 @@ def urlsplit(url, scheme='', allow_fragments=True):
if c not in scheme_chars:
break
else:
- scheme, url = url[:i].lower(), url[i+1:]
+ scheme, url = url[:i].lower(), url[i+1:]
if url[:2] == '//':
netloc, url = _splitnetloc(url, 2)
@@ -651,8 +651,8 @@ def unquote(string, encoding='utf-8', errors='replace'):
unquote('abc%20def') -> 'abc def'.
"""
- if isinstance(string, bytes):
- return unquote_to_bytes(string).decode(encoding, errors)
+ if isinstance(string, bytes):
+ return unquote_to_bytes(string).decode(encoding, errors)
if '%' not in string:
string.split
return string
@@ -670,7 +670,7 @@ def unquote(string, encoding='utf-8', errors='replace'):
def parse_qs(qs, keep_blank_values=False, strict_parsing=False,
- encoding='utf-8', errors='replace', max_num_fields=None, separator='&'):
+ encoding='utf-8', errors='replace', max_num_fields=None, separator='&'):
"""Parse a query given as a string argument.
Arguments:
@@ -694,15 +694,15 @@ def parse_qs(qs, keep_blank_values=False, strict_parsing=False,
max_num_fields: int. If set, then throws a ValueError if there
are more than n fields read by parse_qsl().
- separator: str. The symbol to use for separating the query arguments.
- Defaults to &.
-
+ separator: str. The symbol to use for separating the query arguments.
+ Defaults to &.
+
Returns a dictionary.
"""
parsed_result = {}
pairs = parse_qsl(qs, keep_blank_values, strict_parsing,
encoding=encoding, errors=errors,
- max_num_fields=max_num_fields, separator=separator)
+ max_num_fields=max_num_fields, separator=separator)
for name, value in pairs:
if name in parsed_result:
parsed_result[name].append(value)
@@ -712,7 +712,7 @@ def parse_qs(qs, keep_blank_values=False, strict_parsing=False,
def parse_qsl(qs, keep_blank_values=False, strict_parsing=False,
- encoding='utf-8', errors='replace', max_num_fields=None, separator='&'):
+ encoding='utf-8', errors='replace', max_num_fields=None, separator='&'):
"""Parse a query given as a string argument.
Arguments:
@@ -735,26 +735,26 @@ def parse_qsl(qs, keep_blank_values=False, strict_parsing=False,
max_num_fields: int. If set, then throws a ValueError
if there are more than n fields read by parse_qsl().
- separator: str. The symbol to use for separating the query arguments.
- Defaults to &.
-
+ separator: str. The symbol to use for separating the query arguments.
+ Defaults to &.
+
Returns a list, as G-d intended.
"""
qs, _coerce_result = _coerce_args(qs)
- separator, _ = _coerce_args(separator)
-
- if not separator or (not isinstance(separator, (str, bytes))):
- raise ValueError("Separator must be of type string or bytes.")
+ separator, _ = _coerce_args(separator)
+ if not separator or (not isinstance(separator, (str, bytes))):
+ raise ValueError("Separator must be of type string or bytes.")
+
# If max_num_fields is defined then check that the number of fields
# is less than max_num_fields. This prevents a memory exhaustion DOS
# attack via post bodies with many fields.
if max_num_fields is not None:
- num_fields = 1 + qs.count(separator)
+ num_fields = 1 + qs.count(separator)
if max_num_fields < num_fields:
raise ValueError('Max number of fields exceeded')
- pairs = [s1 for s1 in qs.split(separator)]
+ pairs = [s1 for s1 in qs.split(separator)]
r = []
for name_value in pairs:
if not name_value and not strict_parsing:
@@ -820,32 +820,32 @@ def quote(string, safe='/', encoding=None, errors=None):
"""quote('abc def') -> 'abc%20def'
Each part of a URL, e.g. the path info, the query, etc., has a
- different set of reserved characters that must be quoted. The
- quote function offers a cautious (not minimal) way to quote a
- string for most of these parts.
+ different set of reserved characters that must be quoted. The
+ quote function offers a cautious (not minimal) way to quote a
+ string for most of these parts.
- RFC 3986 Uniform Resource Identifier (URI): Generic Syntax lists
- the following (un)reserved characters.
+ RFC 3986 Uniform Resource Identifier (URI): Generic Syntax lists
+ the following (un)reserved characters.
- unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"
- reserved = gen-delims / sub-delims
- gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@"
- sub-delims = "!" / "$" / "&" / "'" / "(" / ")"
- / "*" / "+" / "," / ";" / "="
+ unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"
+ reserved = gen-delims / sub-delims
+ gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@"
+ sub-delims = "!" / "$" / "&" / "'" / "(" / ")"
+ / "*" / "+" / "," / ";" / "="
- Each of the reserved characters is reserved in some component of a URL,
+ Each of the reserved characters is reserved in some component of a URL,
but not necessarily in all of them.
- The quote function %-escapes all characters that are neither in the
- unreserved chars ("always safe") nor the additional chars set via the
- safe arg.
-
- The default for the safe arg is '/'. The character is reserved, but in
- typical usage the quote function is being called on a path where the
- existing slash characters are to be preserved.
-
+ The quote function %-escapes all characters that are neither in the
+ unreserved chars ("always safe") nor the additional chars set via the
+ safe arg.
+
+ The default for the safe arg is '/'. The character is reserved, but in
+ typical usage the quote function is being called on a path where the
+ existing slash characters are to be preserved.
+
Python 3.7 updates from using RFC 2396 to RFC 3986 to quote URL strings.
- Now, "~" is included in the set of unreserved characters.
+ Now, "~" is included in the set of unreserved characters.
string and safe may be either str or bytes objects. encoding and errors
must not be specified if string is a bytes object.
@@ -989,14 +989,14 @@ def urlencode(query, doseq=False, safe='', encoding=None, errors=None,
l.append(k + '=' + elt)
return '&'.join(l)
-
+
def to_bytes(url):
- warnings.warn("urllib.parse.to_bytes() is deprecated as of 3.8",
- DeprecationWarning, stacklevel=2)
- return _to_bytes(url)
-
-
-def _to_bytes(url):
+ warnings.warn("urllib.parse.to_bytes() is deprecated as of 3.8",
+ DeprecationWarning, stacklevel=2)
+ return _to_bytes(url)
+
+
+def _to_bytes(url):
"""to_bytes(u"URL") --> 'URL'."""
# Most URL schemes require ASCII. If that changes, the conversion
# can be relaxed.
@@ -1009,29 +1009,29 @@ def _to_bytes(url):
" contains non-ASCII characters")
return url
-
+
def unwrap(url):
- """Transform a string like '<URL:scheme://host/path>' into 'scheme://host/path'.
-
- The string is returned unchanged if it's not a wrapped URL.
- """
+ """Transform a string like '<URL:scheme://host/path>' into 'scheme://host/path'.
+
+ The string is returned unchanged if it's not a wrapped URL.
+ """
url = str(url).strip()
if url[:1] == '<' and url[-1:] == '>':
url = url[1:-1].strip()
- if url[:4] == 'URL:':
- url = url[4:].strip()
+ if url[:4] == 'URL:':
+ url = url[4:].strip()
return url
-
-def splittype(url):
- warnings.warn("urllib.parse.splittype() is deprecated as of 3.8, "
- "use urllib.parse.urlparse() instead",
- DeprecationWarning, stacklevel=2)
- return _splittype(url)
-
-
+
+def splittype(url):
+ warnings.warn("urllib.parse.splittype() is deprecated as of 3.8, "
+ "use urllib.parse.urlparse() instead",
+ DeprecationWarning, stacklevel=2)
+ return _splittype(url)
+
+
_typeprog = None
-def _splittype(url):
+def _splittype(url):
"""splittype('type:opaquestring') --> 'type', 'opaquestring'."""
global _typeprog
if _typeprog is None:
@@ -1043,16 +1043,16 @@ def _splittype(url):
return scheme.lower(), data
return None, url
-
-def splithost(url):
- warnings.warn("urllib.parse.splithost() is deprecated as of 3.8, "
- "use urllib.parse.urlparse() instead",
- DeprecationWarning, stacklevel=2)
- return _splithost(url)
-
-
+
+def splithost(url):
+ warnings.warn("urllib.parse.splithost() is deprecated as of 3.8, "
+ "use urllib.parse.urlparse() instead",
+ DeprecationWarning, stacklevel=2)
+ return _splithost(url)
+
+
_hostprog = None
-def _splithost(url):
+def _splithost(url):
"""splithost('//host[:port]/path') --> 'host[:port]', '/path'."""
global _hostprog
if _hostprog is None:
@@ -1066,64 +1066,64 @@ def _splithost(url):
return host_port, path
return None, url
-
+
def splituser(host):
- warnings.warn("urllib.parse.splituser() is deprecated as of 3.8, "
- "use urllib.parse.urlparse() instead",
- DeprecationWarning, stacklevel=2)
- return _splituser(host)
-
-
-def _splituser(host):
+ warnings.warn("urllib.parse.splituser() is deprecated as of 3.8, "
+ "use urllib.parse.urlparse() instead",
+ DeprecationWarning, stacklevel=2)
+ return _splituser(host)
+
+
+def _splituser(host):
"""splituser('user[:passwd]@host[:port]') --> 'user[:passwd]', 'host[:port]'."""
user, delim, host = host.rpartition('@')
return (user if delim else None), host
-
+
def splitpasswd(user):
- warnings.warn("urllib.parse.splitpasswd() is deprecated as of 3.8, "
- "use urllib.parse.urlparse() instead",
- DeprecationWarning, stacklevel=2)
- return _splitpasswd(user)
-
-
-def _splitpasswd(user):
+ warnings.warn("urllib.parse.splitpasswd() is deprecated as of 3.8, "
+ "use urllib.parse.urlparse() instead",
+ DeprecationWarning, stacklevel=2)
+ return _splitpasswd(user)
+
+
+def _splitpasswd(user):
"""splitpasswd('user:passwd') -> 'user', 'passwd'."""
user, delim, passwd = user.partition(':')
return user, (passwd if delim else None)
-
-def splitport(host):
- warnings.warn("urllib.parse.splitport() is deprecated as of 3.8, "
- "use urllib.parse.urlparse() instead",
- DeprecationWarning, stacklevel=2)
- return _splitport(host)
-
-
+
+def splitport(host):
+ warnings.warn("urllib.parse.splitport() is deprecated as of 3.8, "
+ "use urllib.parse.urlparse() instead",
+ DeprecationWarning, stacklevel=2)
+ return _splitport(host)
+
+
# splittag('/path#tag') --> '/path', 'tag'
_portprog = None
-def _splitport(host):
+def _splitport(host):
"""splitport('host:port') --> 'host', 'port'."""
global _portprog
if _portprog is None:
- _portprog = re.compile('(.*):([0-9]*)', re.DOTALL)
+ _portprog = re.compile('(.*):([0-9]*)', re.DOTALL)
- match = _portprog.fullmatch(host)
+ match = _portprog.fullmatch(host)
if match:
host, port = match.groups()
if port:
return host, port
return host, None
-
+
def splitnport(host, defport=-1):
- warnings.warn("urllib.parse.splitnport() is deprecated as of 3.8, "
- "use urllib.parse.urlparse() instead",
- DeprecationWarning, stacklevel=2)
- return _splitnport(host, defport)
-
-
-def _splitnport(host, defport=-1):
+ warnings.warn("urllib.parse.splitnport() is deprecated as of 3.8, "
+ "use urllib.parse.urlparse() instead",
+ DeprecationWarning, stacklevel=2)
+ return _splitnport(host, defport)
+
+
+def _splitnport(host, defport=-1):
"""Split host and port, returning numeric port.
Return given default port if no ':' found; defaults to -1.
Return numerical port if a valid number are found after ':'.
@@ -1139,59 +1139,59 @@ def _splitnport(host, defport=-1):
return host, nport
return host, defport
-
+
def splitquery(url):
- warnings.warn("urllib.parse.splitquery() is deprecated as of 3.8, "
- "use urllib.parse.urlparse() instead",
- DeprecationWarning, stacklevel=2)
- return _splitquery(url)
-
-
-def _splitquery(url):
+ warnings.warn("urllib.parse.splitquery() is deprecated as of 3.8, "
+ "use urllib.parse.urlparse() instead",
+ DeprecationWarning, stacklevel=2)
+ return _splitquery(url)
+
+
+def _splitquery(url):
"""splitquery('/path?query') --> '/path', 'query'."""
path, delim, query = url.rpartition('?')
if delim:
return path, query
return url, None
-
+
def splittag(url):
- warnings.warn("urllib.parse.splittag() is deprecated as of 3.8, "
- "use urllib.parse.urlparse() instead",
- DeprecationWarning, stacklevel=2)
- return _splittag(url)
-
-
-def _splittag(url):
+ warnings.warn("urllib.parse.splittag() is deprecated as of 3.8, "
+ "use urllib.parse.urlparse() instead",
+ DeprecationWarning, stacklevel=2)
+ return _splittag(url)
+
+
+def _splittag(url):
"""splittag('/path#tag') --> '/path', 'tag'."""
path, delim, tag = url.rpartition('#')
if delim:
return path, tag
return url, None
-
+
def splitattr(url):
- warnings.warn("urllib.parse.splitattr() is deprecated as of 3.8, "
- "use urllib.parse.urlparse() instead",
- DeprecationWarning, stacklevel=2)
- return _splitattr(url)
-
-
-def _splitattr(url):
+ warnings.warn("urllib.parse.splitattr() is deprecated as of 3.8, "
+ "use urllib.parse.urlparse() instead",
+ DeprecationWarning, stacklevel=2)
+ return _splitattr(url)
+
+
+def _splitattr(url):
"""splitattr('/path;attr1=value1;attr2=value2;...') ->
'/path', ['attr1=value1', 'attr2=value2', ...]."""
words = url.split(';')
return words[0], words[1:]
-
+
def splitvalue(attr):
- warnings.warn("urllib.parse.splitvalue() is deprecated as of 3.8, "
- "use urllib.parse.parse_qsl() instead",
- DeprecationWarning, stacklevel=2)
- return _splitvalue(attr)
-
-
-def _splitvalue(attr):
+ warnings.warn("urllib.parse.splitvalue() is deprecated as of 3.8, "
+ "use urllib.parse.parse_qsl() instead",
+ DeprecationWarning, stacklevel=2)
+ return _splitvalue(attr)
+
+
+def _splitvalue(attr):
"""splitvalue('attr=value') --> 'attr', 'value'."""
attr, delim, value = attr.partition('=')
return attr, (value if delim else None)