diff options
author | shadchin <shadchin@yandex-team.ru> | 2022-02-10 16:44:30 +0300 |
---|---|---|
committer | Daniil Cherednik <dcherednik@yandex-team.ru> | 2022-02-10 16:44:30 +0300 |
commit | 2598ef1d0aee359b4b6d5fdd1758916d5907d04f (patch) | |
tree | 012bb94d777798f1f56ac1cec429509766d05181 /contrib/tools/python3/src/Lib/urllib | |
parent | 6751af0b0c1b952fede40b19b71da8025b5d8bcf (diff) | |
download | ydb-2598ef1d0aee359b4b6d5fdd1758916d5907d04f.tar.gz |
Restoring authorship annotation for <shadchin@yandex-team.ru>. Commit 1 of 2.
Diffstat (limited to 'contrib/tools/python3/src/Lib/urllib')
-rw-r--r-- | contrib/tools/python3/src/Lib/urllib/parse.py | 394 | ||||
-rw-r--r-- | contrib/tools/python3/src/Lib/urllib/request.py | 292 | ||||
-rw-r--r-- | contrib/tools/python3/src/Lib/urllib/response.py | 8 | ||||
-rw-r--r-- | contrib/tools/python3/src/Lib/urllib/robotparser.py | 40 |
4 files changed, 367 insertions, 367 deletions
diff --git a/contrib/tools/python3/src/Lib/urllib/parse.py b/contrib/tools/python3/src/Lib/urllib/parse.py index b7965fe3d2..f6299398c9 100644 --- a/contrib/tools/python3/src/Lib/urllib/parse.py +++ b/contrib/tools/python3/src/Lib/urllib/parse.py @@ -29,9 +29,9 @@ test_urlparse.py provides a good indicator of parsing behavior. import re import sys -import types +import types import collections -import warnings +import warnings __all__ = ["urlparse", "urlunparse", "urljoin", "urldefrag", "urlsplit", "urlunsplit", "urlencode", "parse_qs", @@ -78,9 +78,9 @@ scheme_chars = ('abcdefghijklmnopqrstuvwxyz' '0123456789' '+-.') -# Unsafe bytes to be removed per WHATWG spec -_UNSAFE_URL_BYTES_TO_REMOVE = ['\t', '\r', '\n'] - +# Unsafe bytes to be removed per WHATWG spec +_UNSAFE_URL_BYTES_TO_REMOVE = ['\t', '\r', '\n'] + # XXX: Consider replacing with functools.lru_cache MAX_CACHE_SIZE = 20 _parse_cache = {} @@ -171,18 +171,18 @@ class _NetlocResultMixinBase(object): def port(self): port = self._hostinfo[1] if port is not None: - try: - port = int(port, 10) - except ValueError: - message = f'Port could not be cast to integer value as {port!r}' - raise ValueError(message) from None + try: + port = int(port, 10) + except ValueError: + message = f'Port could not be cast to integer value as {port!r}' + raise ValueError(message) from None if not ( 0 <= port <= 65535): raise ValueError("Port out of range 0-65535") return port - __class_getitem__ = classmethod(types.GenericAlias) - + __class_getitem__ = classmethod(types.GenericAlias) + class _NetlocResultMixinStr(_NetlocResultMixinBase, _ResultMixinStr): __slots__ = () @@ -295,7 +295,7 @@ by reference to a primary resource and additional identifying information. """ _ParseResultBase.__doc__ = """ -ParseResult(scheme, netloc, path, params, query, fragment) +ParseResult(scheme, netloc, path, params, query, fragment) A 6-tuple that contains components of a parsed URL. """ @@ -372,23 +372,23 @@ del _fix_result_transcoding def urlparse(url, scheme='', allow_fragments=True): """Parse a URL into 6 components: <scheme>://<netloc>/<path>;<params>?<query>#<fragment> - - The result is a named 6-tuple with fields corresponding to the - above. It is either a ParseResult or ParseResultBytes object, - depending on the type of the url parameter. - - The username, password, hostname, and port sub-components of netloc - can also be accessed as attributes of the returned object. - - The scheme argument provides the default value of the scheme - component when no scheme is found in url. - - If allow_fragments is False, no attempt is made to separate the - fragment component from the previous component, which can be either - path or query. - - Note that % escapes are not expanded. - """ + + The result is a named 6-tuple with fields corresponding to the + above. It is either a ParseResult or ParseResultBytes object, + depending on the type of the url parameter. + + The username, password, hostname, and port sub-components of netloc + can also be accessed as attributes of the returned object. + + The scheme argument provides the default value of the scheme + component when no scheme is found in url. + + If allow_fragments is False, no attempt is made to separate the + fragment component from the previous component, which can be either + path or query. + + Note that % escapes are not expanded. + """ url, scheme, _coerce_result = _coerce_args(url, scheme) splitresult = urlsplit(url, scheme, allow_fragments) scheme, netloc, url, query, fragment = splitresult @@ -422,45 +422,45 @@ def _checknetloc(netloc): # looking for characters like \u2100 that expand to 'a/c' # IDNA uses NFKC equivalence, so normalize for this check import unicodedata - n = netloc.replace('@', '') # ignore characters already included - n = n.replace(':', '') # but not the surrounding text - n = n.replace('#', '') - n = n.replace('?', '') - netloc2 = unicodedata.normalize('NFKC', n) - if n == netloc2: + n = netloc.replace('@', '') # ignore characters already included + n = n.replace(':', '') # but not the surrounding text + n = n.replace('#', '') + n = n.replace('?', '') + netloc2 = unicodedata.normalize('NFKC', n) + if n == netloc2: return for c in '/?#@:': if c in netloc2: - raise ValueError("netloc '" + netloc + "' contains invalid " + + raise ValueError("netloc '" + netloc + "' contains invalid " + "characters under NFKC normalization") def urlsplit(url, scheme='', allow_fragments=True): """Parse a URL into 5 components: <scheme>://<netloc>/<path>?<query>#<fragment> - - The result is a named 5-tuple with fields corresponding to the - above. It is either a SplitResult or SplitResultBytes object, - depending on the type of the url parameter. - - The username, password, hostname, and port sub-components of netloc - can also be accessed as attributes of the returned object. - - The scheme argument provides the default value of the scheme - component when no scheme is found in url. - - If allow_fragments is False, no attempt is made to separate the - fragment component from the previous component, which can be either - path or query. - - Note that % escapes are not expanded. - """ - + + The result is a named 5-tuple with fields corresponding to the + above. It is either a SplitResult or SplitResultBytes object, + depending on the type of the url parameter. + + The username, password, hostname, and port sub-components of netloc + can also be accessed as attributes of the returned object. + + The scheme argument provides the default value of the scheme + component when no scheme is found in url. + + If allow_fragments is False, no attempt is made to separate the + fragment component from the previous component, which can be either + path or query. + + Note that % escapes are not expanded. + """ + url, scheme, _coerce_result = _coerce_args(url, scheme) - - for b in _UNSAFE_URL_BYTES_TO_REMOVE: - url = url.replace(b, "") - scheme = scheme.replace(b, "") - + + for b in _UNSAFE_URL_BYTES_TO_REMOVE: + url = url.replace(b, "") + scheme = scheme.replace(b, "") + allow_fragments = bool(allow_fragments) key = url, scheme, allow_fragments, type(url), type(scheme) cached = _parse_cache.get(key, None) @@ -475,7 +475,7 @@ def urlsplit(url, scheme='', allow_fragments=True): if c not in scheme_chars: break else: - scheme, url = url[:i].lower(), url[i+1:] + scheme, url = url[:i].lower(), url[i+1:] if url[:2] == '//': netloc, url = _splitnetloc(url, 2) @@ -651,8 +651,8 @@ def unquote(string, encoding='utf-8', errors='replace'): unquote('abc%20def') -> 'abc def'. """ - if isinstance(string, bytes): - return unquote_to_bytes(string).decode(encoding, errors) + if isinstance(string, bytes): + return unquote_to_bytes(string).decode(encoding, errors) if '%' not in string: string.split return string @@ -670,7 +670,7 @@ def unquote(string, encoding='utf-8', errors='replace'): def parse_qs(qs, keep_blank_values=False, strict_parsing=False, - encoding='utf-8', errors='replace', max_num_fields=None, separator='&'): + encoding='utf-8', errors='replace', max_num_fields=None, separator='&'): """Parse a query given as a string argument. Arguments: @@ -694,15 +694,15 @@ def parse_qs(qs, keep_blank_values=False, strict_parsing=False, max_num_fields: int. If set, then throws a ValueError if there are more than n fields read by parse_qsl(). - separator: str. The symbol to use for separating the query arguments. - Defaults to &. - + separator: str. The symbol to use for separating the query arguments. + Defaults to &. + Returns a dictionary. """ parsed_result = {} pairs = parse_qsl(qs, keep_blank_values, strict_parsing, encoding=encoding, errors=errors, - max_num_fields=max_num_fields, separator=separator) + max_num_fields=max_num_fields, separator=separator) for name, value in pairs: if name in parsed_result: parsed_result[name].append(value) @@ -712,7 +712,7 @@ def parse_qs(qs, keep_blank_values=False, strict_parsing=False, def parse_qsl(qs, keep_blank_values=False, strict_parsing=False, - encoding='utf-8', errors='replace', max_num_fields=None, separator='&'): + encoding='utf-8', errors='replace', max_num_fields=None, separator='&'): """Parse a query given as a string argument. Arguments: @@ -735,26 +735,26 @@ def parse_qsl(qs, keep_blank_values=False, strict_parsing=False, max_num_fields: int. If set, then throws a ValueError if there are more than n fields read by parse_qsl(). - separator: str. The symbol to use for separating the query arguments. - Defaults to &. - + separator: str. The symbol to use for separating the query arguments. + Defaults to &. + Returns a list, as G-d intended. """ qs, _coerce_result = _coerce_args(qs) - separator, _ = _coerce_args(separator) - - if not separator or (not isinstance(separator, (str, bytes))): - raise ValueError("Separator must be of type string or bytes.") + separator, _ = _coerce_args(separator) + if not separator or (not isinstance(separator, (str, bytes))): + raise ValueError("Separator must be of type string or bytes.") + # If max_num_fields is defined then check that the number of fields # is less than max_num_fields. This prevents a memory exhaustion DOS # attack via post bodies with many fields. if max_num_fields is not None: - num_fields = 1 + qs.count(separator) + num_fields = 1 + qs.count(separator) if max_num_fields < num_fields: raise ValueError('Max number of fields exceeded') - pairs = [s1 for s1 in qs.split(separator)] + pairs = [s1 for s1 in qs.split(separator)] r = [] for name_value in pairs: if not name_value and not strict_parsing: @@ -820,32 +820,32 @@ def quote(string, safe='/', encoding=None, errors=None): """quote('abc def') -> 'abc%20def' Each part of a URL, e.g. the path info, the query, etc., has a - different set of reserved characters that must be quoted. The - quote function offers a cautious (not minimal) way to quote a - string for most of these parts. + different set of reserved characters that must be quoted. The + quote function offers a cautious (not minimal) way to quote a + string for most of these parts. - RFC 3986 Uniform Resource Identifier (URI): Generic Syntax lists - the following (un)reserved characters. + RFC 3986 Uniform Resource Identifier (URI): Generic Syntax lists + the following (un)reserved characters. - unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~" - reserved = gen-delims / sub-delims - gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@" - sub-delims = "!" / "$" / "&" / "'" / "(" / ")" - / "*" / "+" / "," / ";" / "=" + unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~" + reserved = gen-delims / sub-delims + gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@" + sub-delims = "!" / "$" / "&" / "'" / "(" / ")" + / "*" / "+" / "," / ";" / "=" - Each of the reserved characters is reserved in some component of a URL, + Each of the reserved characters is reserved in some component of a URL, but not necessarily in all of them. - The quote function %-escapes all characters that are neither in the - unreserved chars ("always safe") nor the additional chars set via the - safe arg. - - The default for the safe arg is '/'. The character is reserved, but in - typical usage the quote function is being called on a path where the - existing slash characters are to be preserved. - + The quote function %-escapes all characters that are neither in the + unreserved chars ("always safe") nor the additional chars set via the + safe arg. + + The default for the safe arg is '/'. The character is reserved, but in + typical usage the quote function is being called on a path where the + existing slash characters are to be preserved. + Python 3.7 updates from using RFC 2396 to RFC 3986 to quote URL strings. - Now, "~" is included in the set of unreserved characters. + Now, "~" is included in the set of unreserved characters. string and safe may be either str or bytes objects. encoding and errors must not be specified if string is a bytes object. @@ -989,14 +989,14 @@ def urlencode(query, doseq=False, safe='', encoding=None, errors=None, l.append(k + '=' + elt) return '&'.join(l) - + def to_bytes(url): - warnings.warn("urllib.parse.to_bytes() is deprecated as of 3.8", - DeprecationWarning, stacklevel=2) - return _to_bytes(url) - - -def _to_bytes(url): + warnings.warn("urllib.parse.to_bytes() is deprecated as of 3.8", + DeprecationWarning, stacklevel=2) + return _to_bytes(url) + + +def _to_bytes(url): """to_bytes(u"URL") --> 'URL'.""" # Most URL schemes require ASCII. If that changes, the conversion # can be relaxed. @@ -1009,29 +1009,29 @@ def _to_bytes(url): " contains non-ASCII characters") return url - + def unwrap(url): - """Transform a string like '<URL:scheme://host/path>' into 'scheme://host/path'. - - The string is returned unchanged if it's not a wrapped URL. - """ + """Transform a string like '<URL:scheme://host/path>' into 'scheme://host/path'. + + The string is returned unchanged if it's not a wrapped URL. + """ url = str(url).strip() if url[:1] == '<' and url[-1:] == '>': url = url[1:-1].strip() - if url[:4] == 'URL:': - url = url[4:].strip() + if url[:4] == 'URL:': + url = url[4:].strip() return url - -def splittype(url): - warnings.warn("urllib.parse.splittype() is deprecated as of 3.8, " - "use urllib.parse.urlparse() instead", - DeprecationWarning, stacklevel=2) - return _splittype(url) - - + +def splittype(url): + warnings.warn("urllib.parse.splittype() is deprecated as of 3.8, " + "use urllib.parse.urlparse() instead", + DeprecationWarning, stacklevel=2) + return _splittype(url) + + _typeprog = None -def _splittype(url): +def _splittype(url): """splittype('type:opaquestring') --> 'type', 'opaquestring'.""" global _typeprog if _typeprog is None: @@ -1043,16 +1043,16 @@ def _splittype(url): return scheme.lower(), data return None, url - -def splithost(url): - warnings.warn("urllib.parse.splithost() is deprecated as of 3.8, " - "use urllib.parse.urlparse() instead", - DeprecationWarning, stacklevel=2) - return _splithost(url) - - + +def splithost(url): + warnings.warn("urllib.parse.splithost() is deprecated as of 3.8, " + "use urllib.parse.urlparse() instead", + DeprecationWarning, stacklevel=2) + return _splithost(url) + + _hostprog = None -def _splithost(url): +def _splithost(url): """splithost('//host[:port]/path') --> 'host[:port]', '/path'.""" global _hostprog if _hostprog is None: @@ -1066,64 +1066,64 @@ def _splithost(url): return host_port, path return None, url - + def splituser(host): - warnings.warn("urllib.parse.splituser() is deprecated as of 3.8, " - "use urllib.parse.urlparse() instead", - DeprecationWarning, stacklevel=2) - return _splituser(host) - - -def _splituser(host): + warnings.warn("urllib.parse.splituser() is deprecated as of 3.8, " + "use urllib.parse.urlparse() instead", + DeprecationWarning, stacklevel=2) + return _splituser(host) + + +def _splituser(host): """splituser('user[:passwd]@host[:port]') --> 'user[:passwd]', 'host[:port]'.""" user, delim, host = host.rpartition('@') return (user if delim else None), host - + def splitpasswd(user): - warnings.warn("urllib.parse.splitpasswd() is deprecated as of 3.8, " - "use urllib.parse.urlparse() instead", - DeprecationWarning, stacklevel=2) - return _splitpasswd(user) - - -def _splitpasswd(user): + warnings.warn("urllib.parse.splitpasswd() is deprecated as of 3.8, " + "use urllib.parse.urlparse() instead", + DeprecationWarning, stacklevel=2) + return _splitpasswd(user) + + +def _splitpasswd(user): """splitpasswd('user:passwd') -> 'user', 'passwd'.""" user, delim, passwd = user.partition(':') return user, (passwd if delim else None) - -def splitport(host): - warnings.warn("urllib.parse.splitport() is deprecated as of 3.8, " - "use urllib.parse.urlparse() instead", - DeprecationWarning, stacklevel=2) - return _splitport(host) - - + +def splitport(host): + warnings.warn("urllib.parse.splitport() is deprecated as of 3.8, " + "use urllib.parse.urlparse() instead", + DeprecationWarning, stacklevel=2) + return _splitport(host) + + # splittag('/path#tag') --> '/path', 'tag' _portprog = None -def _splitport(host): +def _splitport(host): """splitport('host:port') --> 'host', 'port'.""" global _portprog if _portprog is None: - _portprog = re.compile('(.*):([0-9]*)', re.DOTALL) + _portprog = re.compile('(.*):([0-9]*)', re.DOTALL) - match = _portprog.fullmatch(host) + match = _portprog.fullmatch(host) if match: host, port = match.groups() if port: return host, port return host, None - + def splitnport(host, defport=-1): - warnings.warn("urllib.parse.splitnport() is deprecated as of 3.8, " - "use urllib.parse.urlparse() instead", - DeprecationWarning, stacklevel=2) - return _splitnport(host, defport) - - -def _splitnport(host, defport=-1): + warnings.warn("urllib.parse.splitnport() is deprecated as of 3.8, " + "use urllib.parse.urlparse() instead", + DeprecationWarning, stacklevel=2) + return _splitnport(host, defport) + + +def _splitnport(host, defport=-1): """Split host and port, returning numeric port. Return given default port if no ':' found; defaults to -1. Return numerical port if a valid number are found after ':'. @@ -1139,59 +1139,59 @@ def _splitnport(host, defport=-1): return host, nport return host, defport - + def splitquery(url): - warnings.warn("urllib.parse.splitquery() is deprecated as of 3.8, " - "use urllib.parse.urlparse() instead", - DeprecationWarning, stacklevel=2) - return _splitquery(url) - - -def _splitquery(url): + warnings.warn("urllib.parse.splitquery() is deprecated as of 3.8, " + "use urllib.parse.urlparse() instead", + DeprecationWarning, stacklevel=2) + return _splitquery(url) + + +def _splitquery(url): """splitquery('/path?query') --> '/path', 'query'.""" path, delim, query = url.rpartition('?') if delim: return path, query return url, None - + def splittag(url): - warnings.warn("urllib.parse.splittag() is deprecated as of 3.8, " - "use urllib.parse.urlparse() instead", - DeprecationWarning, stacklevel=2) - return _splittag(url) - - -def _splittag(url): + warnings.warn("urllib.parse.splittag() is deprecated as of 3.8, " + "use urllib.parse.urlparse() instead", + DeprecationWarning, stacklevel=2) + return _splittag(url) + + +def _splittag(url): """splittag('/path#tag') --> '/path', 'tag'.""" path, delim, tag = url.rpartition('#') if delim: return path, tag return url, None - + def splitattr(url): - warnings.warn("urllib.parse.splitattr() is deprecated as of 3.8, " - "use urllib.parse.urlparse() instead", - DeprecationWarning, stacklevel=2) - return _splitattr(url) - - -def _splitattr(url): + warnings.warn("urllib.parse.splitattr() is deprecated as of 3.8, " + "use urllib.parse.urlparse() instead", + DeprecationWarning, stacklevel=2) + return _splitattr(url) + + +def _splitattr(url): """splitattr('/path;attr1=value1;attr2=value2;...') -> '/path', ['attr1=value1', 'attr2=value2', ...].""" words = url.split(';') return words[0], words[1:] - + def splitvalue(attr): - warnings.warn("urllib.parse.splitvalue() is deprecated as of 3.8, " - "use urllib.parse.parse_qsl() instead", - DeprecationWarning, stacklevel=2) - return _splitvalue(attr) - - -def _splitvalue(attr): + warnings.warn("urllib.parse.splitvalue() is deprecated as of 3.8, " + "use urllib.parse.parse_qsl() instead", + DeprecationWarning, stacklevel=2) + return _splitvalue(attr) + + +def _splitvalue(attr): """splitvalue('attr=value') --> 'attr', 'value'.""" attr, delim, value = attr.partition('=') return attr, (value if delim else None) diff --git a/contrib/tools/python3/src/Lib/urllib/request.py b/contrib/tools/python3/src/Lib/urllib/request.py index bbdc2254e3..6d4053afd0 100644 --- a/contrib/tools/python3/src/Lib/urllib/request.py +++ b/contrib/tools/python3/src/Lib/urllib/request.py @@ -64,7 +64,7 @@ opener = urllib.request.build_opener(proxy_support, authinfo, # install it urllib.request.install_opener(opener) -f = urllib.request.urlopen('https://www.python.org/') +f = urllib.request.urlopen('https://www.python.org/') """ # XXX issues: @@ -102,8 +102,8 @@ import warnings from urllib.error import URLError, HTTPError, ContentTooShortError from urllib.parse import ( urlparse, urlsplit, urljoin, unwrap, quote, unquote, - _splittype, _splithost, _splitport, _splituser, _splitpasswd, - _splitattr, _splitquery, _splitvalue, _splittag, _to_bytes, + _splittype, _splithost, _splitport, _splituser, _splitpasswd, + _splitattr, _splitquery, _splitvalue, _splittag, _to_bytes, unquote_to_bytes, urlunparse) from urllib.response import addinfourl, addclosehook @@ -164,9 +164,9 @@ def urlopen(url, data=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT, The *cadefault* parameter is ignored. - This function always returns an object which can work as a - context manager and has the properties url, headers, and status. - See urllib.response.addinfourl for more detail on these properties. + This function always returns an object which can work as a + context manager and has the properties url, headers, and status. + See urllib.response.addinfourl for more detail on these properties. For HTTP and HTTPS URLs, this function returns a http.client.HTTPResponse object slightly modified. In addition to the three new methods above, the @@ -234,7 +234,7 @@ def urlretrieve(url, filename=None, reporthook=None, data=None): Returns a tuple containing the path to the newly created data file as well as the resulting HTTPMessage object. """ - url_type, path = _splittype(url) + url_type, path = _splittype(url) with contextlib.closing(urlopen(url, data)) as fp: headers = fp.info() @@ -342,7 +342,7 @@ class Request: def full_url(self, url): # unwrap('<URL:type://host/path>') --> 'type://host/path' self._full_url = unwrap(url) - self._full_url, self.fragment = _splittag(self._full_url) + self._full_url, self.fragment = _splittag(self._full_url) self._parse() @full_url.deleter @@ -370,10 +370,10 @@ class Request: self.data = None def _parse(self): - self.type, rest = _splittype(self._full_url) + self.type, rest = _splittype(self._full_url) if self.type is None: raise ValueError("unknown url type: %r" % self.full_url) - self.host, self.selector = _splithost(rest) + self.host, self.selector = _splithost(rest) if self.host: self.host = unquote(self.host) @@ -418,7 +418,7 @@ class Request: self.unredirected_hdrs.pop(header_name, None) def header_items(self): - hdrs = {**self.unredirected_hdrs, **self.headers} + hdrs = {**self.unredirected_hdrs, **self.headers} return list(hdrs.items()) class OpenerDirector: @@ -513,7 +513,7 @@ class OpenerDirector: meth = getattr(processor, meth_name) req = meth(req) - sys.audit('urllib.Request', req.full_url, req.data, req.headers, req.get_method()) + sys.audit('urllib.Request', req.full_url, req.data, req.headers, req.get_method()) response = self._open(req, data) # post-process response @@ -760,7 +760,7 @@ def _parse_proxy(proxy): According to RFC 3986, having an authority component means the URL must have two slashes after the scheme. """ - scheme, r_scheme = _splittype(proxy) + scheme, r_scheme = _splittype(proxy) if not r_scheme.startswith("/"): # authority scheme = None @@ -771,17 +771,17 @@ def _parse_proxy(proxy): raise ValueError("proxy URL with no authority: %r" % proxy) # We have an authority, so for RFC 3986-compliant URLs (by ss 3. # and 3.3.), path is empty or starts with '/' - if '@' in r_scheme: - host_separator = r_scheme.find('@') - end = r_scheme.find("/", host_separator) - else: - end = r_scheme.find("/", 2) + if '@' in r_scheme: + host_separator = r_scheme.find('@') + end = r_scheme.find("/", host_separator) + else: + end = r_scheme.find("/", 2) if end == -1: end = None authority = r_scheme[2:end] - userinfo, hostport = _splituser(authority) + userinfo, hostport = _splituser(authority) if userinfo is not None: - user, password = _splitpasswd(userinfo) + user, password = _splitpasswd(userinfo) else: user = password = None return scheme, user, password, hostport @@ -796,7 +796,7 @@ class ProxyHandler(BaseHandler): assert hasattr(proxies, 'keys'), "proxies must be a mapping" self.proxies = proxies for type, url in proxies.items(): - type = type.lower() + type = type.lower() setattr(self, '%s_open' % type, lambda r, proxy=url, type=type, meth=self.proxy_open: meth(r, proxy, type)) @@ -869,7 +869,7 @@ class HTTPPasswordMgr: scheme = None authority = uri path = '/' - host, port = _splitport(authority) + host, port = _splitport(authority) if default_port and port is None and scheme is not None: dport = {"http": 80, "https": 443, @@ -941,15 +941,15 @@ class AbstractBasicAuthHandler: # allow for double- and single-quoted realm values # (single quotes are a violation of the RFC, but appear in the wild) - rx = re.compile('(?:^|,)' # start of the string or ',' - '[ \t]*' # optional whitespaces - '([^ \t,]+)' # scheme like "Basic" - '[ \t]+' # mandatory whitespaces - # realm=xxx - # realm='xxx' - # realm="xxx" - 'realm=(["\']?)([^"\']*)\\2', - re.I) + rx = re.compile('(?:^|,)' # start of the string or ',' + '[ \t]*' # optional whitespaces + '([^ \t,]+)' # scheme like "Basic" + '[ \t]+' # mandatory whitespaces + # realm=xxx + # realm='xxx' + # realm="xxx" + 'realm=(["\']?)([^"\']*)\\2', + re.I) # XXX could pre-emptively send auth info already accepted (RFC 2617, # end of section 2, and section 1.2 immediately after "credentials" @@ -961,52 +961,52 @@ class AbstractBasicAuthHandler: self.passwd = password_mgr self.add_password = self.passwd.add_password - def _parse_realm(self, header): - # parse WWW-Authenticate header: accept multiple challenges per header - found_challenge = False - for mo in AbstractBasicAuthHandler.rx.finditer(header): - scheme, quote, realm = mo.groups() - if quote not in ['"', "'"]: - warnings.warn("Basic Auth Realm was unquoted", - UserWarning, 3) - - yield (scheme, realm) - - found_challenge = True - - if not found_challenge: - if header: - scheme = header.split()[0] - else: - scheme = '' - yield (scheme, None) - + def _parse_realm(self, header): + # parse WWW-Authenticate header: accept multiple challenges per header + found_challenge = False + for mo in AbstractBasicAuthHandler.rx.finditer(header): + scheme, quote, realm = mo.groups() + if quote not in ['"', "'"]: + warnings.warn("Basic Auth Realm was unquoted", + UserWarning, 3) + + yield (scheme, realm) + + found_challenge = True + + if not found_challenge: + if header: + scheme = header.split()[0] + else: + scheme = '' + yield (scheme, None) + def http_error_auth_reqed(self, authreq, host, req, headers): # host may be an authority (without userinfo) or a URL with an # authority - headers = headers.get_all(authreq) - if not headers: - # no header found - return - - unsupported = None - for header in headers: - for scheme, realm in self._parse_realm(header): - if scheme.lower() != 'basic': - unsupported = scheme - continue - - if realm is not None: - # Use the first matching Basic challenge. - # Ignore following challenges even if they use the Basic - # scheme. - return self.retry_http_basic_auth(host, req, realm) - - if unsupported is not None: - raise ValueError("AbstractBasicAuthHandler does not " - "support the following scheme: %r" - % (scheme,)) - + headers = headers.get_all(authreq) + if not headers: + # no header found + return + + unsupported = None + for header in headers: + for scheme, realm in self._parse_realm(header): + if scheme.lower() != 'basic': + unsupported = scheme + continue + + if realm is not None: + # Use the first matching Basic challenge. + # Ignore following challenges even if they use the Basic + # scheme. + return self.retry_http_basic_auth(host, req, realm) + + if unsupported is not None: + raise ValueError("AbstractBasicAuthHandler does not " + "support the following scheme: %r" + % (scheme,)) + def retry_http_basic_auth(self, host, req, realm): user, pw = self.passwd.find_user_password(realm, host) if pw is not None: @@ -1171,11 +1171,11 @@ class AbstractDigestAuthHandler: A2 = "%s:%s" % (req.get_method(), # XXX selector: what about proxies and full urls req.selector) - # NOTE: As per RFC 2617, when server sends "auth,auth-int", the client could use either `auth` - # or `auth-int` to the response back. we use `auth` to send the response back. - if qop is None: - respdig = KD(H(A1), "%s:%s" % (nonce, H(A2))) - elif 'auth' in qop.split(','): + # NOTE: As per RFC 2617, when server sends "auth,auth-int", the client could use either `auth` + # or `auth-int` to the response back. we use `auth` to send the response back. + if qop is None: + respdig = KD(H(A1), "%s:%s" % (nonce, H(A2))) + elif 'auth' in qop.split(','): if nonce == self.last_nonce: self.nonce_count += 1 else: @@ -1183,7 +1183,7 @@ class AbstractDigestAuthHandler: self.last_nonce = nonce ncvalue = '%08x' % self.nonce_count cnonce = self.get_cnonce(nonce) - noncebit = "%s:%s:%s:%s:%s" % (nonce, ncvalue, cnonce, 'auth', H(A2)) + noncebit = "%s:%s:%s:%s:%s" % (nonce, ncvalue, cnonce, 'auth', H(A2)) respdig = KD(H(A1), noncebit) else: # XXX handle auth-int. @@ -1291,8 +1291,8 @@ class AbstractHTTPHandler(BaseHandler): sel_host = host if request.has_proxy(): - scheme, sel = _splittype(request.selector) - sel_host, sel_path = _splithost(sel) + scheme, sel = _splittype(request.selector) + sel_host, sel_path = _splithost(sel) if not request.has_header('Host'): request.add_unredirected_header('Host', sel_host) for name, value in self.parent.addheaders: @@ -1508,7 +1508,7 @@ class FileHandler(BaseHandler): 'Content-type: %s\nContent-length: %d\nLast-modified: %s\n' % (mtype or 'text/plain', size, modified)) if host: - host, port = _splitport(host) + host, port = _splitport(host) if not host or \ (not port and _safe_gethostbyname(host) in self.get_names()): if host: @@ -1533,16 +1533,16 @@ class FTPHandler(BaseHandler): host = req.host if not host: raise URLError('ftp error: no host given') - host, port = _splitport(host) + host, port = _splitport(host) if port is None: port = ftplib.FTP_PORT else: port = int(port) # username/password handling - user, host = _splituser(host) + user, host = _splituser(host) if user: - user, passwd = _splitpasswd(user) + user, passwd = _splitpasswd(user) else: passwd = None host = unquote(host) @@ -1553,7 +1553,7 @@ class FTPHandler(BaseHandler): host = socket.gethostbyname(host) except OSError as msg: raise URLError(msg) - path, attrs = _splitattr(req.selector) + path, attrs = _splitattr(req.selector) dirs = path.split('/') dirs = list(map(unquote, dirs)) dirs, file = dirs[:-1], dirs[-1] @@ -1563,7 +1563,7 @@ class FTPHandler(BaseHandler): fw = self.connect_ftp(user, passwd, host, port, dirs, req.timeout) type = file and 'I' or 'D' for attr in attrs: - attr, value = _splitvalue(attr) + attr, value = _splitvalue(attr) if attr.lower() == 'type' and \ value in ('a', 'A', 'i', 'I', 'd', 'D'): type = value.upper() @@ -1757,26 +1757,26 @@ class URLopener: # External interface def open(self, fullurl, data=None): """Use URLopener().open(file) instead of open(file, 'r').""" - fullurl = unwrap(_to_bytes(fullurl)) + fullurl = unwrap(_to_bytes(fullurl)) fullurl = quote(fullurl, safe="%/:=&?~#+!$,;'@()*[]|") if self.tempcache and fullurl in self.tempcache: filename, headers = self.tempcache[fullurl] fp = open(filename, 'rb') return addinfourl(fp, headers, fullurl) - urltype, url = _splittype(fullurl) + urltype, url = _splittype(fullurl) if not urltype: urltype = 'file' if urltype in self.proxies: proxy = self.proxies[urltype] - urltype, proxyhost = _splittype(proxy) - host, selector = _splithost(proxyhost) + urltype, proxyhost = _splittype(proxy) + host, selector = _splithost(proxyhost) url = (host, fullurl) # Signal special case to open_*() else: proxy = None name = 'open_' + urltype self.type = urltype name = name.replace('-', '_') - if not hasattr(self, name) or name == 'open_local_file': + if not hasattr(self, name) or name == 'open_local_file': if proxy: return self.open_unknown_proxy(proxy, fullurl, data) else: @@ -1793,29 +1793,29 @@ class URLopener: def open_unknown(self, fullurl, data=None): """Overridable interface to open unknown URL type.""" - type, url = _splittype(fullurl) + type, url = _splittype(fullurl) raise OSError('url error', 'unknown url type', type) def open_unknown_proxy(self, proxy, fullurl, data=None): """Overridable interface to open unknown URL type.""" - type, url = _splittype(fullurl) + type, url = _splittype(fullurl) raise OSError('url error', 'invalid proxy for %s' % type, proxy) # External interface def retrieve(self, url, filename=None, reporthook=None, data=None): """retrieve(url) returns (filename, headers) for a local object or (tempfilename, headers) for a remote object.""" - url = unwrap(_to_bytes(url)) + url = unwrap(_to_bytes(url)) if self.tempcache and url in self.tempcache: return self.tempcache[url] - type, url1 = _splittype(url) + type, url1 = _splittype(url) if filename is None and (not type or type == 'file'): try: fp = self.open_local_file(url1) hdrs = fp.info() fp.close() - return url2pathname(_splithost(url1)[1]), hdrs - except OSError: + return url2pathname(_splithost(url1)[1]), hdrs + except OSError: pass fp = self.open(url, data) try: @@ -1823,10 +1823,10 @@ class URLopener: if filename: tfp = open(filename, 'wb') else: - garbage, path = _splittype(url) - garbage, path = _splithost(path or "") - path, garbage = _splitquery(path or "") - path, garbage = _splitattr(path or "") + garbage, path = _splittype(url) + garbage, path = _splithost(path or "") + path, garbage = _splitquery(path or "") + path, garbage = _splitattr(path or "") suffix = os.path.splitext(path)[1] (fd, filename) = tempfile.mkstemp(suffix) self.__tempfiles.append(filename) @@ -1883,25 +1883,25 @@ class URLopener: user_passwd = None proxy_passwd= None if isinstance(url, str): - host, selector = _splithost(url) + host, selector = _splithost(url) if host: - user_passwd, host = _splituser(host) + user_passwd, host = _splituser(host) host = unquote(host) realhost = host else: host, selector = url # check whether the proxy contains authorization information - proxy_passwd, host = _splituser(host) + proxy_passwd, host = _splituser(host) # now we proceed with the url we want to obtain - urltype, rest = _splittype(selector) + urltype, rest = _splittype(selector) url = rest user_passwd = None if urltype.lower() != 'http': realhost = None else: - realhost, rest = _splithost(rest) + realhost, rest = _splithost(rest) if realhost: - user_passwd, realhost = _splituser(realhost) + user_passwd, realhost = _splituser(realhost) if user_passwd: selector = "%s://%s%s" % (urltype, realhost, rest) if proxy_bypass(realhost): @@ -2007,7 +2007,7 @@ class URLopener: """Use local file.""" import email.utils import mimetypes - host, file = _splithost(url) + host, file = _splithost(url) localname = url2pathname(file) try: stats = os.stat(localname) @@ -2024,7 +2024,7 @@ class URLopener: if file[:1] == '/': urlfile = 'file://' + file return addinfourl(open(localname, 'rb'), headers, urlfile) - host, port = _splitport(host) + host, port = _splitport(host) if (not port and socket.gethostbyname(host) in ((localhost(),) + thishost())): urlfile = file @@ -2040,11 +2040,11 @@ class URLopener: if not isinstance(url, str): raise URLError('ftp error: proxy support for ftp protocol currently not implemented') import mimetypes - host, path = _splithost(url) + host, path = _splithost(url) if not host: raise URLError('ftp error: no host given') - host, port = _splitport(host) - user, host = _splituser(host) - if user: user, passwd = _splitpasswd(user) + host, port = _splitport(host) + user, host = _splituser(host) + if user: user, passwd = _splitpasswd(user) else: passwd = None host = unquote(host) user = unquote(user or '') @@ -2055,7 +2055,7 @@ class URLopener: port = ftplib.FTP_PORT else: port = int(port) - path, attrs = _splitattr(path) + path, attrs = _splitattr(path) path = unquote(path) dirs = path.split('/') dirs, file = dirs[:-1], dirs[-1] @@ -2077,7 +2077,7 @@ class URLopener: if not file: type = 'D' else: type = 'I' for attr in attrs: - attr, value = _splitvalue(attr) + attr, value = _splitvalue(attr) if attr.lower() == 'type' and \ value in ('a', 'A', 'i', 'I', 'd', 'D'): type = value.upper() @@ -2260,11 +2260,11 @@ class FancyURLopener(URLopener): return getattr(self,name)(url, realm, data) def retry_proxy_http_basic_auth(self, url, realm, data=None): - host, selector = _splithost(url) + host, selector = _splithost(url) newurl = 'http://' + host + selector proxy = self.proxies['http'] - urltype, proxyhost = _splittype(proxy) - proxyhost, proxyselector = _splithost(proxyhost) + urltype, proxyhost = _splittype(proxy) + proxyhost, proxyselector = _splithost(proxyhost) i = proxyhost.find('@') + 1 proxyhost = proxyhost[i:] user, passwd = self.get_user_passwd(proxyhost, realm, i) @@ -2278,11 +2278,11 @@ class FancyURLopener(URLopener): return self.open(newurl, data) def retry_proxy_https_basic_auth(self, url, realm, data=None): - host, selector = _splithost(url) + host, selector = _splithost(url) newurl = 'https://' + host + selector proxy = self.proxies['https'] - urltype, proxyhost = _splittype(proxy) - proxyhost, proxyselector = _splithost(proxyhost) + urltype, proxyhost = _splittype(proxy) + proxyhost, proxyselector = _splithost(proxyhost) i = proxyhost.find('@') + 1 proxyhost = proxyhost[i:] user, passwd = self.get_user_passwd(proxyhost, realm, i) @@ -2296,7 +2296,7 @@ class FancyURLopener(URLopener): return self.open(newurl, data) def retry_http_basic_auth(self, url, realm, data=None): - host, selector = _splithost(url) + host, selector = _splithost(url) i = host.find('@') + 1 host = host[i:] user, passwd = self.get_user_passwd(host, realm, i) @@ -2310,7 +2310,7 @@ class FancyURLopener(URLopener): return self.open(newurl, data) def retry_https_basic_auth(self, url, realm, data=None): - host, selector = _splithost(url) + host, selector = _splithost(url) i = host.find('@') + 1 host = host[i:] user, passwd = self.get_user_passwd(host, realm, i) @@ -2527,26 +2527,26 @@ def proxy_bypass_environment(host, proxies=None): try: no_proxy = proxies['no'] except KeyError: - return False + return False # '*' is special case for always bypass if no_proxy == '*': - return True - host = host.lower() + return True + host = host.lower() # strip port off host - hostonly, port = _splitport(host) + hostonly, port = _splitport(host) # check if the host ends with any of the DNS suffixes - for name in no_proxy.split(','): - name = name.strip() + for name in no_proxy.split(','): + name = name.strip() if name: name = name.lstrip('.') # ignore leading dots - name = name.lower() - if hostonly == name or host == name: - return True - name = '.' + name - if hostonly.endswith(name) or host.endswith(name): - return True + name = name.lower() + if hostonly == name or host == name: + return True + name = '.' + name + if hostonly.endswith(name) or host.endswith(name): + return True # otherwise, don't bypass - return False + return False # This code tests an OSX specific data structure but is testable on all @@ -2565,7 +2565,7 @@ def _proxy_bypass_macosx_sysconf(host, proxy_settings): """ from fnmatch import fnmatch - hostonly, port = _splitport(host) + hostonly, port = _splitport(host) def ip2num(ipAddr): parts = ipAddr.split('.') @@ -2600,11 +2600,11 @@ def _proxy_bypass_macosx_sysconf(host, proxy_settings): mask = 8 * (m.group(1).count('.') + 1) else: mask = int(mask[1:]) - - if mask < 0 or mask > 32: - # System libraries ignore invalid prefix lengths - continue - + + if mask < 0 or mask > 32: + # System libraries ignore invalid prefix lengths + continue + mask = 32 - mask if (hostIP >> mask) == (base >> mask): @@ -2677,7 +2677,7 @@ elif os.name == 'nt': for p in proxyServer.split(';'): protocol, address = p.split('=', 1) # See if address has a type:// prefix - if not re.match('(?:[^/:]+)://', address): + if not re.match('(?:[^/:]+)://', address): address = '%s://%s' % (protocol, address) proxies[protocol] = address else: @@ -2724,7 +2724,7 @@ elif os.name == 'nt': if not proxyEnable or not proxyOverride: return 0 # try to make a host list from name and IP address. - rawHost, port = _splitport(host) + rawHost, port = _splitport(host) host = [rawHost] try: addr = socket.gethostbyname(rawHost) diff --git a/contrib/tools/python3/src/Lib/urllib/response.py b/contrib/tools/python3/src/Lib/urllib/response.py index 5a2c3cc78c..aab657304c 100644 --- a/contrib/tools/python3/src/Lib/urllib/response.py +++ b/contrib/tools/python3/src/Lib/urllib/response.py @@ -73,10 +73,10 @@ class addinfourl(addinfo): self.url = url self.code = code - @property - def status(self): - return self.code - + @property + def status(self): + return self.code + def getcode(self): return self.code diff --git a/contrib/tools/python3/src/Lib/urllib/robotparser.py b/contrib/tools/python3/src/Lib/urllib/robotparser.py index c58565e394..71f74f4dc0 100644 --- a/contrib/tools/python3/src/Lib/urllib/robotparser.py +++ b/contrib/tools/python3/src/Lib/urllib/robotparser.py @@ -27,7 +27,7 @@ class RobotFileParser: def __init__(self, url=''): self.entries = [] - self.sitemaps = [] + self.sitemaps = [] self.default_entry = None self.disallow_all = False self.allow_all = False @@ -142,12 +142,12 @@ class RobotFileParser: and numbers[1].strip().isdigit()): entry.req_rate = RequestRate(int(numbers[0]), int(numbers[1])) state = 2 - elif line[0] == "sitemap": - # According to http://www.sitemaps.org/protocol.html - # "This directive is independent of the user-agent line, - # so it doesn't matter where you place it in your file." - # Therefore we do not change the state of the parser. - self.sitemaps.append(line[1]) + elif line[0] == "sitemap": + # According to http://www.sitemaps.org/protocol.html + # "This directive is independent of the user-agent line, + # so it doesn't matter where you place it in your file." + # Therefore we do not change the state of the parser. + self.sitemaps.append(line[1]) if state == 2: self._add_entry(entry) @@ -186,9 +186,9 @@ class RobotFileParser: for entry in self.entries: if entry.applies_to(useragent): return entry.delay - if self.default_entry: - return self.default_entry.delay - return None + if self.default_entry: + return self.default_entry.delay + return None def request_rate(self, useragent): if not self.mtime(): @@ -196,20 +196,20 @@ class RobotFileParser: for entry in self.entries: if entry.applies_to(useragent): return entry.req_rate - if self.default_entry: - return self.default_entry.req_rate - return None - - def site_maps(self): - if not self.sitemaps: - return None - return self.sitemaps - + if self.default_entry: + return self.default_entry.req_rate + return None + + def site_maps(self): + if not self.sitemaps: + return None + return self.sitemaps + def __str__(self): entries = self.entries if self.default_entry is not None: entries = entries + [self.default_entry] - return '\n\n'.join(map(str, entries)) + return '\n\n'.join(map(str, entries)) class RuleLine: |