aboutsummaryrefslogtreecommitdiffstats
path: root/contrib/python/hyperlink
diff options
context:
space:
mode:
authorshmel1k <shmel1k@ydb.tech>2023-11-26 18:16:14 +0300
committershmel1k <shmel1k@ydb.tech>2023-11-26 18:43:30 +0300
commitb8cf9e88f4c5c64d9406af533d8948deb050d695 (patch)
tree218eb61fb3c3b96ec08b4d8cdfef383104a87d63 /contrib/python/hyperlink
parent523f645a83a0ec97a0332dbc3863bb354c92a328 (diff)
downloadydb-b8cf9e88f4c5c64d9406af533d8948deb050d695.tar.gz
add kikimr_configure
Diffstat (limited to 'contrib/python/hyperlink')
-rw-r--r--contrib/python/hyperlink/py2/.dist-info/METADATA38
-rw-r--r--contrib/python/hyperlink/py2/.dist-info/top_level.txt1
-rw-r--r--contrib/python/hyperlink/py2/LICENSE29
-rw-r--r--contrib/python/hyperlink/py2/README.md67
-rw-r--r--contrib/python/hyperlink/py2/hyperlink/__init__.py17
-rw-r--r--contrib/python/hyperlink/py2/hyperlink/_socket.py53
-rw-r--r--contrib/python/hyperlink/py2/hyperlink/_url.py2448
-rw-r--r--contrib/python/hyperlink/py2/hyperlink/hypothesis.py324
-rw-r--r--contrib/python/hyperlink/py2/hyperlink/idna-tables-properties.csv.gzbin0 -> 25555 bytes
-rw-r--r--contrib/python/hyperlink/py2/hyperlink/py.typed1
-rw-r--r--contrib/python/hyperlink/py2/ya.make36
-rw-r--r--contrib/python/hyperlink/py3/.dist-info/METADATA38
-rw-r--r--contrib/python/hyperlink/py3/.dist-info/top_level.txt1
-rw-r--r--contrib/python/hyperlink/py3/LICENSE29
-rw-r--r--contrib/python/hyperlink/py3/README.md67
-rw-r--r--contrib/python/hyperlink/py3/hyperlink/__init__.py17
-rw-r--r--contrib/python/hyperlink/py3/hyperlink/_socket.py53
-rw-r--r--contrib/python/hyperlink/py3/hyperlink/_url.py2448
-rw-r--r--contrib/python/hyperlink/py3/hyperlink/hypothesis.py324
-rw-r--r--contrib/python/hyperlink/py3/hyperlink/idna-tables-properties.csv.gzbin0 -> 25555 bytes
-rw-r--r--contrib/python/hyperlink/py3/hyperlink/py.typed1
-rw-r--r--contrib/python/hyperlink/py3/ya.make35
-rw-r--r--contrib/python/hyperlink/ya.make18
23 files changed, 6045 insertions, 0 deletions
diff --git a/contrib/python/hyperlink/py2/.dist-info/METADATA b/contrib/python/hyperlink/py2/.dist-info/METADATA
new file mode 100644
index 0000000000..fc5922ba87
--- /dev/null
+++ b/contrib/python/hyperlink/py2/.dist-info/METADATA
@@ -0,0 +1,38 @@
+Metadata-Version: 2.1
+Name: hyperlink
+Version: 21.0.0
+Summary: A featureful, immutable, and correct URL for Python.
+Home-page: https://github.com/python-hyper/hyperlink
+Author: Mahmoud Hashemi and Glyph Lefkowitz
+Author-email: mahmoud@hatnote.com
+License: MIT
+Platform: any
+Classifier: Topic :: Utilities
+Classifier: Intended Audience :: Developers
+Classifier: Topic :: Software Development :: Libraries
+Classifier: Development Status :: 5 - Production/Stable
+Classifier: Programming Language :: Python :: 2
+Classifier: Programming Language :: Python :: 2.6
+Classifier: Programming Language :: Python :: 2.7
+Classifier: Programming Language :: Python :: 3
+Classifier: Programming Language :: Python :: 3.4
+Classifier: Programming Language :: Python :: 3.5
+Classifier: Programming Language :: Python :: 3.6
+Classifier: Programming Language :: Python :: 3.7
+Classifier: Programming Language :: Python :: 3.8
+Classifier: Programming Language :: Python :: 3.9
+Classifier: Programming Language :: Python :: Implementation :: PyPy
+Classifier: License :: OSI Approved :: MIT License
+Requires-Python: >=2.6, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*
+Requires-Dist: idna (>=2.5)
+Requires-Dist: typing ; python_version < "3.5"
+
+The humble, but powerful, URL runs everything around us. Chances
+are you've used several just to read this text.
+
+Hyperlink is a featureful, pure-Python implementation of the URL, with
+an emphasis on correctness. MIT licensed.
+
+See the docs at http://hyperlink.readthedocs.io.
+
+
diff --git a/contrib/python/hyperlink/py2/.dist-info/top_level.txt b/contrib/python/hyperlink/py2/.dist-info/top_level.txt
new file mode 100644
index 0000000000..81722ce1d8
--- /dev/null
+++ b/contrib/python/hyperlink/py2/.dist-info/top_level.txt
@@ -0,0 +1 @@
+hyperlink
diff --git a/contrib/python/hyperlink/py2/LICENSE b/contrib/python/hyperlink/py2/LICENSE
new file mode 100644
index 0000000000..a73f882ffb
--- /dev/null
+++ b/contrib/python/hyperlink/py2/LICENSE
@@ -0,0 +1,29 @@
+Copyright (c) 2017
+Glyph Lefkowitz
+Itamar Turner-Trauring
+Jean Paul Calderone
+Adi Roiban
+Amber Hawkie Brown
+Mahmoud Hashemi
+Wilfredo Sanchez Vega
+
+and others that have contributed code to the public domain.
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice shall be
+included in all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
diff --git a/contrib/python/hyperlink/py2/README.md b/contrib/python/hyperlink/py2/README.md
new file mode 100644
index 0000000000..017f9eb88c
--- /dev/null
+++ b/contrib/python/hyperlink/py2/README.md
@@ -0,0 +1,67 @@
+# Hyperlink
+
+*Cool URLs that don't change.*
+
+<a href="https://hyperlink.readthedocs.io/en/latest/">
+ <img src="https://img.shields.io/badge/docs-latest-brightgreen.svg?style=flat" alt="Documentation">
+</a>
+<a href="https://pypi.org/project/hyperlink/">
+ <img src="https://img.shields.io/pypi/v/hyperlink.svg" alt="PyPI">
+</a>
+<a href="http://calver.org">
+ <img src="https://img.shields.io/badge/calver-YY.MINOR.MICRO-22bfda.svg" alt="Calendar Versioning">
+</a>
+<a href="https://pypi.org/project/hyperlink/">
+ <img src="https://img.shields.io/pypi/pyversions/hyperlink.svg" alt="Python Version Compatibility">
+</a>
+<a href="https://https://codecov.io/github/python-hyper/hyperlink?branch=master">
+ <img src="https://codecov.io/github/python-hyper/hyperlink/coverage.svg?branch=master" alt="Code Coverage">
+</a>
+<a href="https://requires.io/github/python-hyper/hyperlink/requirements/?branch=master">
+ <img src="https://requires.io/github/python-hyper/hyperlink/requirements.svg?branch=master" alt="Requirements Status">
+</a>
+
+Hyperlink provides a pure-Python implementation of immutable
+URLs. Based on [RFC 3986][rfc3986] and [3987][rfc3987], the Hyperlink URL
+makes working with both URIs and IRIs easy.
+
+Hyperlink is tested against Python 2.7, 3.4, 3.5, 3.6, 3.7, 3.8, and PyPy.
+
+Full documentation is available on [Read the Docs][docs].
+
+[rfc3986]: https://tools.ietf.org/html/rfc3986
+[rfc3987]: https://tools.ietf.org/html/rfc3987
+[docs]: http://hyperlink.readthedocs.io/en/latest/
+
+## Installation
+
+Hyperlink is a pure-Python package and requires nothing but
+Python. The easiest way to install is with pip:
+
+```
+pip install hyperlink
+```
+
+Then, hyperlink away!
+
+```python
+from hyperlink import URL
+
+url = URL.from_text(u'http://github.com/python-hyper/hyperlink?utm_source=README')
+utm_source = url.get(u'utm_source')
+better_url = url.replace(scheme=u'https', port=443)
+org_url = better_url.click(u'.')
+```
+
+See the full API docs on [Read the Docs][docs].
+
+## More information
+
+Hyperlink would not have been possible without the help of
+[Glyph Lefkowitz](https://glyph.twistedmatrix.com/) and many other
+community members, especially considering that it started as an
+extract from the Twisted networking library. Thanks to them,
+Hyperlink's URL has been production-grade for well over a decade.
+
+Still, should you encounter any issues, do file an issue, or submit a
+pull request.
diff --git a/contrib/python/hyperlink/py2/hyperlink/__init__.py b/contrib/python/hyperlink/py2/hyperlink/__init__.py
new file mode 100644
index 0000000000..f680b01a90
--- /dev/null
+++ b/contrib/python/hyperlink/py2/hyperlink/__init__.py
@@ -0,0 +1,17 @@
+from ._url import (
+ parse,
+ register_scheme,
+ URL,
+ EncodedURL,
+ DecodedURL,
+ URLParseError,
+)
+
+__all__ = (
+ "parse",
+ "register_scheme",
+ "URL",
+ "EncodedURL",
+ "DecodedURL",
+ "URLParseError",
+)
diff --git a/contrib/python/hyperlink/py2/hyperlink/_socket.py b/contrib/python/hyperlink/py2/hyperlink/_socket.py
new file mode 100644
index 0000000000..3bcf89706d
--- /dev/null
+++ b/contrib/python/hyperlink/py2/hyperlink/_socket.py
@@ -0,0 +1,53 @@
+try:
+ from socket import inet_pton
+except ImportError:
+ from typing import TYPE_CHECKING
+
+ if TYPE_CHECKING: # pragma: no cover
+ pass
+ else:
+ # based on https://gist.github.com/nnemkin/4966028
+ # this code only applies on Windows Python 2.7
+ import ctypes
+ import socket
+
+ class SockAddr(ctypes.Structure):
+ _fields_ = [
+ ("sa_family", ctypes.c_short),
+ ("__pad1", ctypes.c_ushort),
+ ("ipv4_addr", ctypes.c_byte * 4),
+ ("ipv6_addr", ctypes.c_byte * 16),
+ ("__pad2", ctypes.c_ulong),
+ ]
+
+ WSAStringToAddressA = ctypes.windll.ws2_32.WSAStringToAddressA
+ WSAAddressToStringA = ctypes.windll.ws2_32.WSAAddressToStringA
+
+ def inet_pton(address_family, ip_string):
+ # type: (int, str) -> bytes
+ addr = SockAddr()
+ ip_string_bytes = ip_string.encode("ascii")
+ addr.sa_family = address_family
+ addr_size = ctypes.c_int(ctypes.sizeof(addr))
+
+ try:
+ attribute, size = {
+ socket.AF_INET: ("ipv4_addr", 4),
+ socket.AF_INET6: ("ipv6_addr", 16),
+ }[address_family]
+ except KeyError:
+ raise socket.error("unknown address family")
+
+ if (
+ WSAStringToAddressA(
+ ip_string_bytes,
+ address_family,
+ None,
+ ctypes.byref(addr),
+ ctypes.byref(addr_size),
+ )
+ != 0
+ ):
+ raise socket.error(ctypes.FormatError())
+
+ return ctypes.string_at(getattr(addr, attribute), size)
diff --git a/contrib/python/hyperlink/py2/hyperlink/_url.py b/contrib/python/hyperlink/py2/hyperlink/_url.py
new file mode 100644
index 0000000000..be69baf696
--- /dev/null
+++ b/contrib/python/hyperlink/py2/hyperlink/_url.py
@@ -0,0 +1,2448 @@
+# -*- coding: utf-8 -*-
+u"""Hyperlink provides Pythonic URL parsing, construction, and rendering.
+
+Usage is straightforward::
+
+ >>> import hyperlink
+ >>> url = hyperlink.parse(u'http://github.com/mahmoud/hyperlink?utm_source=docs')
+ >>> url.host
+ u'github.com'
+ >>> secure_url = url.replace(scheme=u'https')
+ >>> secure_url.get('utm_source')[0]
+ u'docs'
+
+Hyperlink's API centers on the :class:`DecodedURL` type, which wraps
+the lower-level :class:`URL`, both of which can be returned by the
+:func:`parse()` convenience function.
+
+""" # noqa: E501
+
+import re
+import sys
+import string
+import socket
+from socket import AF_INET, AF_INET6
+
+try:
+ from socket import AddressFamily
+except ImportError:
+ AddressFamily = int # type: ignore[assignment,misc]
+from typing import (
+ Any,
+ Callable,
+ Dict,
+ Iterable,
+ Iterator,
+ List,
+ Mapping,
+ Optional,
+ Sequence,
+ Text,
+ Tuple,
+ Type,
+ TypeVar,
+ Union,
+ cast,
+)
+from unicodedata import normalize
+from ._socket import inet_pton
+
+try:
+ from collections.abc import Mapping as MappingABC
+except ImportError: # Python 2
+ from collections import Mapping as MappingABC
+
+from idna import encode as idna_encode, decode as idna_decode
+
+
+PY2 = sys.version_info[0] == 2
+try:
+ unichr
+except NameError: # Py3
+ unichr = chr # type: Callable[[int], Text]
+NoneType = type(None) # type: Type[None]
+QueryPairs = Tuple[Tuple[Text, Optional[Text]], ...] # internal representation
+QueryParameters = Union[
+ Mapping[Text, Optional[Text]],
+ QueryPairs,
+ Sequence[Tuple[Text, Optional[Text]]],
+]
+T = TypeVar("T")
+
+
+# from boltons.typeutils
+def make_sentinel(name="_MISSING", var_name=""):
+ # type: (str, str) -> object
+ """Creates and returns a new **instance** of a new class, suitable for
+ usage as a "sentinel", a kind of singleton often used to indicate
+ a value is missing when ``None`` is a valid input.
+
+ Args:
+ name: Name of the Sentinel
+ var_name: Set this name to the name of the variable in its respective
+ module enable pickle-ability.
+
+ >>> make_sentinel(var_name='_MISSING')
+ _MISSING
+
+ The most common use cases here in boltons are as default values
+ for optional function arguments, partly because of its
+ less-confusing appearance in automatically generated
+ documentation. Sentinels also function well as placeholders in queues
+ and linked lists.
+
+ .. note::
+
+ By design, additional calls to ``make_sentinel`` with the same
+ values will not produce equivalent objects.
+
+ >>> make_sentinel('TEST') == make_sentinel('TEST')
+ False
+ >>> type(make_sentinel('TEST')) == type(make_sentinel('TEST'))
+ False
+ """
+
+ class Sentinel(object):
+ def __init__(self):
+ # type: () -> None
+ self.name = name
+ self.var_name = var_name
+
+ def __repr__(self):
+ # type: () -> str
+ if self.var_name:
+ return self.var_name
+ return "%s(%r)" % (self.__class__.__name__, self.name)
+
+ if var_name:
+ # superclass type hints don't allow str return type, but it is
+ # allowed in the docs, hence the ignore[override] below
+ def __reduce__(self):
+ # type: () -> str
+ return self.var_name
+
+ def __nonzero__(self):
+ # type: () -> bool
+ return False
+
+ __bool__ = __nonzero__
+
+ return Sentinel()
+
+
+_unspecified = _UNSET = make_sentinel("_UNSET") # type: Any
+
+
+# RFC 3986 Section 2.3, Unreserved URI Characters
+# https://tools.ietf.org/html/rfc3986#section-2.3
+_UNRESERVED_CHARS = frozenset(
+ "~-._0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ" "abcdefghijklmnopqrstuvwxyz"
+)
+
+
+# URL parsing regex (based on RFC 3986 Appendix B, with modifications)
+_URL_RE = re.compile(
+ r"^((?P<scheme>[^:/?#]+):)?"
+ r"((?P<_netloc_sep>//)"
+ r"(?P<authority>[^/?#]*))?"
+ r"(?P<path>[^?#]*)"
+ r"(\?(?P<query>[^#]*))?"
+ r"(#(?P<fragment>.*))?$"
+)
+_SCHEME_RE = re.compile(r"^[a-zA-Z0-9+-.]*$")
+_AUTHORITY_RE = re.compile(
+ r"^(?:(?P<userinfo>[^@/?#]*)@)?"
+ r"(?P<host>"
+ r"(?:\[(?P<ipv6_host>[^[\]/?#]*)\])"
+ r"|(?P<plain_host>[^:/?#[\]]*)"
+ r"|(?P<bad_host>.*?))?"
+ r"(?::(?P<port>.*))?$"
+)
+
+
+_HEX_CHAR_MAP = dict(
+ [
+ ((a + b).encode("ascii"), unichr(int(a + b, 16)).encode("charmap"))
+ for a in string.hexdigits
+ for b in string.hexdigits
+ ]
+)
+_ASCII_RE = re.compile("([\x00-\x7f]+)")
+
+# RFC 3986 section 2.2, Reserved Characters
+# https://tools.ietf.org/html/rfc3986#section-2.2
+_GEN_DELIMS = frozenset(u":/?#[]@")
+_SUB_DELIMS = frozenset(u"!$&'()*+,;=")
+_ALL_DELIMS = _GEN_DELIMS | _SUB_DELIMS
+
+_USERINFO_SAFE = _UNRESERVED_CHARS | _SUB_DELIMS | set(u"%")
+_USERINFO_DELIMS = _ALL_DELIMS - _USERINFO_SAFE
+_PATH_SAFE = _USERINFO_SAFE | set(u":@")
+_PATH_DELIMS = _ALL_DELIMS - _PATH_SAFE
+_SCHEMELESS_PATH_SAFE = _PATH_SAFE - set(":")
+_SCHEMELESS_PATH_DELIMS = _ALL_DELIMS - _SCHEMELESS_PATH_SAFE
+_FRAGMENT_SAFE = _UNRESERVED_CHARS | _PATH_SAFE | set(u"/?")
+_FRAGMENT_DELIMS = _ALL_DELIMS - _FRAGMENT_SAFE
+_QUERY_VALUE_SAFE = _UNRESERVED_CHARS | _FRAGMENT_SAFE - set(u"&")
+_QUERY_VALUE_DELIMS = _ALL_DELIMS - _QUERY_VALUE_SAFE
+_QUERY_KEY_SAFE = _UNRESERVED_CHARS | _QUERY_VALUE_SAFE - set(u"=")
+_QUERY_KEY_DELIMS = _ALL_DELIMS - _QUERY_KEY_SAFE
+
+
+def _make_decode_map(delims, allow_percent=False):
+ # type: (Iterable[Text], bool) -> Mapping[bytes, bytes]
+ ret = dict(_HEX_CHAR_MAP)
+ if not allow_percent:
+ delims = set(delims) | set([u"%"])
+ for delim in delims:
+ _hexord = "{0:02X}".format(ord(delim)).encode("ascii")
+ _hexord_lower = _hexord.lower()
+ ret.pop(_hexord)
+ if _hexord != _hexord_lower:
+ ret.pop(_hexord_lower)
+ return ret
+
+
+def _make_quote_map(safe_chars):
+ # type: (Iterable[Text]) -> Mapping[Union[int, Text], Text]
+ ret = {} # type: Dict[Union[int, Text], Text]
+ # v is included in the dict for py3 mostly, because bytestrings
+ # are iterables of ints, of course!
+ for i, v in zip(range(256), range(256)):
+ c = chr(v)
+ if c in safe_chars:
+ ret[c] = ret[v] = c
+ else:
+ ret[c] = ret[v] = "%{0:02X}".format(i)
+ return ret
+
+
+_USERINFO_PART_QUOTE_MAP = _make_quote_map(_USERINFO_SAFE)
+_USERINFO_DECODE_MAP = _make_decode_map(_USERINFO_DELIMS)
+_PATH_PART_QUOTE_MAP = _make_quote_map(_PATH_SAFE)
+_SCHEMELESS_PATH_PART_QUOTE_MAP = _make_quote_map(_SCHEMELESS_PATH_SAFE)
+_PATH_DECODE_MAP = _make_decode_map(_PATH_DELIMS)
+_QUERY_KEY_QUOTE_MAP = _make_quote_map(_QUERY_KEY_SAFE)
+_QUERY_KEY_DECODE_MAP = _make_decode_map(_QUERY_KEY_DELIMS)
+_QUERY_VALUE_QUOTE_MAP = _make_quote_map(_QUERY_VALUE_SAFE)
+_QUERY_VALUE_DECODE_MAP = _make_decode_map(_QUERY_VALUE_DELIMS)
+_FRAGMENT_QUOTE_MAP = _make_quote_map(_FRAGMENT_SAFE)
+_FRAGMENT_DECODE_MAP = _make_decode_map(_FRAGMENT_DELIMS)
+_UNRESERVED_QUOTE_MAP = _make_quote_map(_UNRESERVED_CHARS)
+_UNRESERVED_DECODE_MAP = dict(
+ [
+ (k, v)
+ for k, v in _HEX_CHAR_MAP.items()
+ if v.decode("ascii", "replace") in _UNRESERVED_CHARS
+ ]
+)
+
+_ROOT_PATHS = frozenset(((), (u"",)))
+
+
+def _encode_reserved(text, maximal=True):
+ # type: (Text, bool) -> Text
+ """A very comprehensive percent encoding for encoding all
+ delimiters. Used for arguments to DecodedURL, where a % means a
+ percent sign, and not the character used by URLs for escaping
+ bytes.
+ """
+ if maximal:
+ bytestr = normalize("NFC", text).encode("utf8")
+ return u"".join([_UNRESERVED_QUOTE_MAP[b] for b in bytestr])
+ return u"".join(
+ [
+ _UNRESERVED_QUOTE_MAP[t] if t in _UNRESERVED_CHARS else t
+ for t in text
+ ]
+ )
+
+
+def _encode_path_part(text, maximal=True):
+ # type: (Text, bool) -> Text
+ "Percent-encode a single segment of a URL path."
+ if maximal:
+ bytestr = normalize("NFC", text).encode("utf8")
+ return u"".join([_PATH_PART_QUOTE_MAP[b] for b in bytestr])
+ return u"".join(
+ [_PATH_PART_QUOTE_MAP[t] if t in _PATH_DELIMS else t for t in text]
+ )
+
+
+def _encode_schemeless_path_part(text, maximal=True):
+ # type: (Text, bool) -> Text
+ """Percent-encode the first segment of a URL path for a URL without a
+ scheme specified.
+ """
+ if maximal:
+ bytestr = normalize("NFC", text).encode("utf8")
+ return u"".join([_SCHEMELESS_PATH_PART_QUOTE_MAP[b] for b in bytestr])
+ return u"".join(
+ [
+ _SCHEMELESS_PATH_PART_QUOTE_MAP[t]
+ if t in _SCHEMELESS_PATH_DELIMS
+ else t
+ for t in text
+ ]
+ )
+
+
+def _encode_path_parts(
+ text_parts, # type: Sequence[Text]
+ rooted=False, # type: bool
+ has_scheme=True, # type: bool
+ has_authority=True, # type: bool
+ maximal=True, # type: bool
+):
+ # type: (...) -> Sequence[Text]
+ """
+ Percent-encode a tuple of path parts into a complete path.
+
+ Setting *maximal* to False percent-encodes only the reserved
+ characters that are syntactically necessary for serialization,
+ preserving any IRI-style textual data.
+
+ Leaving *maximal* set to its default True percent-encodes
+ everything required to convert a portion of an IRI to a portion of
+ a URI.
+
+ RFC 3986 3.3:
+
+ If a URI contains an authority component, then the path component
+ must either be empty or begin with a slash ("/") character. If a URI
+ does not contain an authority component, then the path cannot begin
+ with two slash characters ("//"). In addition, a URI reference
+ (Section 4.1) may be a relative-path reference, in which case the
+ first path segment cannot contain a colon (":") character.
+ """
+ if not text_parts:
+ return ()
+ if rooted:
+ text_parts = (u"",) + tuple(text_parts)
+ # elif has_authority and text_parts:
+ # raise Exception('see rfc above') # TODO: too late to fail like this?
+ encoded_parts = [] # type: List[Text]
+ if has_scheme:
+ encoded_parts = [
+ _encode_path_part(part, maximal=maximal) if part else part
+ for part in text_parts
+ ]
+ else:
+ encoded_parts = [_encode_schemeless_path_part(text_parts[0])]
+ encoded_parts.extend(
+ [
+ _encode_path_part(part, maximal=maximal) if part else part
+ for part in text_parts[1:]
+ ]
+ )
+ return tuple(encoded_parts)
+
+
+def _encode_query_key(text, maximal=True):
+ # type: (Text, bool) -> Text
+ """
+ Percent-encode a single query string key or value.
+ """
+ if maximal:
+ bytestr = normalize("NFC", text).encode("utf8")
+ return u"".join([_QUERY_KEY_QUOTE_MAP[b] for b in bytestr])
+ return u"".join(
+ [_QUERY_KEY_QUOTE_MAP[t] if t in _QUERY_KEY_DELIMS else t for t in text]
+ )
+
+
+def _encode_query_value(text, maximal=True):
+ # type: (Text, bool) -> Text
+ """
+ Percent-encode a single query string key or value.
+ """
+ if maximal:
+ bytestr = normalize("NFC", text).encode("utf8")
+ return u"".join([_QUERY_VALUE_QUOTE_MAP[b] for b in bytestr])
+ return u"".join(
+ [
+ _QUERY_VALUE_QUOTE_MAP[t] if t in _QUERY_VALUE_DELIMS else t
+ for t in text
+ ]
+ )
+
+
+def _encode_fragment_part(text, maximal=True):
+ # type: (Text, bool) -> Text
+ """Quote the fragment part of the URL. Fragments don't have
+ subdelimiters, so the whole URL fragment can be passed.
+ """
+ if maximal:
+ bytestr = normalize("NFC", text).encode("utf8")
+ return u"".join([_FRAGMENT_QUOTE_MAP[b] for b in bytestr])
+ return u"".join(
+ [_FRAGMENT_QUOTE_MAP[t] if t in _FRAGMENT_DELIMS else t for t in text]
+ )
+
+
+def _encode_userinfo_part(text, maximal=True):
+ # type: (Text, bool) -> Text
+ """Quote special characters in either the username or password
+ section of the URL.
+ """
+ if maximal:
+ bytestr = normalize("NFC", text).encode("utf8")
+ return u"".join([_USERINFO_PART_QUOTE_MAP[b] for b in bytestr])
+ return u"".join(
+ [
+ _USERINFO_PART_QUOTE_MAP[t] if t in _USERINFO_DELIMS else t
+ for t in text
+ ]
+ )
+
+
+# This port list painstakingly curated by hand searching through
+# https://www.iana.org/assignments/uri-schemes/uri-schemes.xhtml
+# and
+# https://www.iana.org/assignments/service-names-port-numbers/service-names-port-numbers.xhtml
+SCHEME_PORT_MAP = {
+ "acap": 674,
+ "afp": 548,
+ "dict": 2628,
+ "dns": 53,
+ "file": None,
+ "ftp": 21,
+ "git": 9418,
+ "gopher": 70,
+ "http": 80,
+ "https": 443,
+ "imap": 143,
+ "ipp": 631,
+ "ipps": 631,
+ "irc": 194,
+ "ircs": 6697,
+ "ldap": 389,
+ "ldaps": 636,
+ "mms": 1755,
+ "msrp": 2855,
+ "msrps": None,
+ "mtqp": 1038,
+ "nfs": 111,
+ "nntp": 119,
+ "nntps": 563,
+ "pop": 110,
+ "prospero": 1525,
+ "redis": 6379,
+ "rsync": 873,
+ "rtsp": 554,
+ "rtsps": 322,
+ "rtspu": 5005,
+ "sftp": 22,
+ "smb": 445,
+ "snmp": 161,
+ "ssh": 22,
+ "steam": None,
+ "svn": 3690,
+ "telnet": 23,
+ "ventrilo": 3784,
+ "vnc": 5900,
+ "wais": 210,
+ "ws": 80,
+ "wss": 443,
+ "xmpp": None,
+}
+
+# This list of schemes that don't use authorities is also from the link above.
+NO_NETLOC_SCHEMES = set(
+ [
+ "urn",
+ "about",
+ "bitcoin",
+ "blob",
+ "data",
+ "geo",
+ "magnet",
+ "mailto",
+ "news",
+ "pkcs11",
+ "sip",
+ "sips",
+ "tel",
+ ]
+)
+# As of Mar 11, 2017, there were 44 netloc schemes, and 13 non-netloc
+
+NO_QUERY_PLUS_SCHEMES = set()
+
+
+def register_scheme(
+ text, uses_netloc=True, default_port=None, query_plus_is_space=True
+):
+ # type: (Text, bool, Optional[int], bool) -> None
+ """Registers new scheme information, resulting in correct port and
+ slash behavior from the URL object. There are dozens of standard
+ schemes preregistered, so this function is mostly meant for
+ proprietary internal customizations or stopgaps on missing
+ standards information. If a scheme seems to be missing, please
+ `file an issue`_!
+
+ Args:
+ text: A string representation of the scheme.
+ (the 'http' in 'http://hatnote.com')
+ uses_netloc: Does the scheme support specifying a
+ network host? For instance, "http" does, "mailto" does
+ not. Defaults to True.
+ default_port: The default port, if any, for
+ netloc-using schemes.
+ query_plus_is_space: If true, a "+" in the query string should be
+ decoded as a space by DecodedURL.
+
+ .. _file an issue: https://github.com/mahmoud/hyperlink/issues
+ """
+ text = text.lower()
+ if default_port is not None:
+ try:
+ default_port = int(default_port)
+ except (ValueError, TypeError):
+ raise ValueError(
+ "default_port expected integer or None, not %r"
+ % (default_port,)
+ )
+
+ if uses_netloc is True:
+ SCHEME_PORT_MAP[text] = default_port
+ elif uses_netloc is False:
+ if default_port is not None:
+ raise ValueError(
+ "unexpected default port while specifying"
+ " non-netloc scheme: %r" % default_port
+ )
+ NO_NETLOC_SCHEMES.add(text)
+ else:
+ raise ValueError("uses_netloc expected bool, not: %r" % uses_netloc)
+
+ if not query_plus_is_space:
+ NO_QUERY_PLUS_SCHEMES.add(text)
+
+ return
+
+
+def scheme_uses_netloc(scheme, default=None):
+ # type: (Text, Optional[bool]) -> Optional[bool]
+ """Whether or not a URL uses :code:`:` or :code:`://` to separate the
+ scheme from the rest of the URL depends on the scheme's own
+ standard definition. There is no way to infer this behavior
+ from other parts of the URL. A scheme either supports network
+ locations or it does not.
+
+ The URL type's approach to this is to check for explicitly
+ registered schemes, with common schemes like HTTP
+ preregistered. This is the same approach taken by
+ :mod:`urlparse`.
+
+ URL adds two additional heuristics if the scheme as a whole is
+ not registered. First, it attempts to check the subpart of the
+ scheme after the last ``+`` character. This adds intuitive
+ behavior for schemes like ``git+ssh``. Second, if a URL with
+ an unrecognized scheme is loaded, it will maintain the
+ separator it sees.
+ """
+ if not scheme:
+ return False
+ scheme = scheme.lower()
+ if scheme in SCHEME_PORT_MAP:
+ return True
+ if scheme in NO_NETLOC_SCHEMES:
+ return False
+ if scheme.split("+")[-1] in SCHEME_PORT_MAP:
+ return True
+ return default
+
+
+class URLParseError(ValueError):
+ """Exception inheriting from :exc:`ValueError`, raised when failing to
+ parse a URL. Mostly raised on invalid ports and IPv6 addresses.
+ """
+
+ pass
+
+
+def _optional(argument, default):
+ # type: (Any, Any) -> Any
+ if argument is _UNSET:
+ return default
+ else:
+ return argument
+
+
+def _typecheck(name, value, *types):
+ # type: (Text, T, Type[Any]) -> T
+ """
+ Check that the given *value* is one of the given *types*, or raise an
+ exception describing the problem using *name*.
+ """
+ if not types:
+ raise ValueError("expected one or more types, maybe use _textcheck?")
+ if not isinstance(value, types):
+ raise TypeError(
+ "expected %s for %s, got %r"
+ % (" or ".join([t.__name__ for t in types]), name, value)
+ )
+ return value
+
+
+def _textcheck(name, value, delims=frozenset(), nullable=False):
+ # type: (Text, T, Iterable[Text], bool) -> T
+ if not isinstance(value, Text):
+ if nullable and value is None:
+ # used by query string values
+ return value # type: ignore[unreachable]
+ else:
+ str_name = "unicode" if PY2 else "str"
+ exp = str_name + " or NoneType" if nullable else str_name
+ raise TypeError("expected %s for %s, got %r" % (exp, name, value))
+ if delims and set(value) & set(delims): # TODO: test caching into regexes
+ raise ValueError(
+ "one or more reserved delimiters %s present in %s: %r"
+ % ("".join(delims), name, value)
+ )
+ return value # type: ignore[return-value] # T vs. Text
+
+
+def iter_pairs(iterable):
+ # type: (Iterable[Any]) -> Iterator[Any]
+ """
+ Iterate over the (key, value) pairs in ``iterable``.
+
+ This handles dictionaries sensibly, and falls back to assuming the
+ iterable yields (key, value) pairs. This behaviour is similar to
+ what Python's ``dict()`` constructor does.
+ """
+ if isinstance(iterable, MappingABC):
+ iterable = iterable.items()
+ return iter(iterable)
+
+
+def _decode_unreserved(text, normalize_case=False, encode_stray_percents=False):
+ # type: (Text, bool, bool) -> Text
+ return _percent_decode(
+ text,
+ normalize_case=normalize_case,
+ encode_stray_percents=encode_stray_percents,
+ _decode_map=_UNRESERVED_DECODE_MAP,
+ )
+
+
+def _decode_userinfo_part(
+ text, normalize_case=False, encode_stray_percents=False
+):
+ # type: (Text, bool, bool) -> Text
+ return _percent_decode(
+ text,
+ normalize_case=normalize_case,
+ encode_stray_percents=encode_stray_percents,
+ _decode_map=_USERINFO_DECODE_MAP,
+ )
+
+
+def _decode_path_part(text, normalize_case=False, encode_stray_percents=False):
+ # type: (Text, bool, bool) -> Text
+ """
+ >>> _decode_path_part(u'%61%77%2f%7a')
+ u'aw%2fz'
+ >>> _decode_path_part(u'%61%77%2f%7a', normalize_case=True)
+ u'aw%2Fz'
+ """
+ return _percent_decode(
+ text,
+ normalize_case=normalize_case,
+ encode_stray_percents=encode_stray_percents,
+ _decode_map=_PATH_DECODE_MAP,
+ )
+
+
+def _decode_query_key(text, normalize_case=False, encode_stray_percents=False):
+ # type: (Text, bool, bool) -> Text
+ return _percent_decode(
+ text,
+ normalize_case=normalize_case,
+ encode_stray_percents=encode_stray_percents,
+ _decode_map=_QUERY_KEY_DECODE_MAP,
+ )
+
+
+def _decode_query_value(
+ text, normalize_case=False, encode_stray_percents=False
+):
+ # type: (Text, bool, bool) -> Text
+ return _percent_decode(
+ text,
+ normalize_case=normalize_case,
+ encode_stray_percents=encode_stray_percents,
+ _decode_map=_QUERY_VALUE_DECODE_MAP,
+ )
+
+
+def _decode_fragment_part(
+ text, normalize_case=False, encode_stray_percents=False
+):
+ # type: (Text, bool, bool) -> Text
+ return _percent_decode(
+ text,
+ normalize_case=normalize_case,
+ encode_stray_percents=encode_stray_percents,
+ _decode_map=_FRAGMENT_DECODE_MAP,
+ )
+
+
+def _percent_decode(
+ text, # type: Text
+ normalize_case=False, # type: bool
+ subencoding="utf-8", # type: Text
+ raise_subencoding_exc=False, # type: bool
+ encode_stray_percents=False, # type: bool
+ _decode_map=_HEX_CHAR_MAP, # type: Mapping[bytes, bytes]
+):
+ # type: (...) -> Text
+ """Convert percent-encoded text characters to their normal,
+ human-readable equivalents.
+
+ All characters in the input text must be encodable by
+ *subencoding*. All special characters underlying the values in the
+ percent-encoding must be decodable as *subencoding*. If a
+ non-*subencoding*-valid string is passed, the original text is
+ returned with no changes applied.
+
+ Only called by field-tailored variants, e.g.,
+ :func:`_decode_path_part`, as every percent-encodable part of the
+ URL has characters which should not be percent decoded.
+
+ >>> _percent_decode(u'abc%20def')
+ u'abc def'
+
+ Args:
+ text: Text with percent-encoding present.
+ normalize_case: Whether undecoded percent segments, such as encoded
+ delimiters, should be uppercased, per RFC 3986 Section 2.1.
+ See :func:`_decode_path_part` for an example.
+ subencoding: The name of the encoding underlying the percent-encoding.
+ raise_subencoding_exc: Whether an error in decoding the bytes
+ underlying the percent-decoding should be raised.
+
+ Returns:
+ Text: The percent-decoded version of *text*, decoded by *subencoding*.
+ """
+ try:
+ quoted_bytes = text.encode(subencoding)
+ except UnicodeEncodeError:
+ return text
+
+ bits = quoted_bytes.split(b"%")
+ if len(bits) == 1:
+ return text
+
+ res = [bits[0]]
+ append = res.append
+
+ for item in bits[1:]:
+ hexpair, rest = item[:2], item[2:]
+ try:
+ append(_decode_map[hexpair])
+ append(rest)
+ except KeyError:
+ pair_is_hex = hexpair in _HEX_CHAR_MAP
+ if pair_is_hex or not encode_stray_percents:
+ append(b"%")
+ else:
+ # if it's undecodable, treat as a real percent sign,
+ # which is reserved (because it wasn't in the
+ # context-aware _decode_map passed in), and should
+ # stay in an encoded state.
+ append(b"%25")
+ if normalize_case and pair_is_hex:
+ append(hexpair.upper())
+ append(rest)
+ else:
+ append(item)
+
+ unquoted_bytes = b"".join(res)
+
+ try:
+ return unquoted_bytes.decode(subencoding)
+ except UnicodeDecodeError:
+ if raise_subencoding_exc:
+ raise
+ return text
+
+
+def _decode_host(host):
+ # type: (Text) -> Text
+ """Decode a host from ASCII-encodable text to IDNA-decoded text. If
+ the host text is not ASCII, it is returned unchanged, as it is
+ presumed that it is already IDNA-decoded.
+
+ Some technical details: _decode_host is built on top of the "idna"
+ package, which has some quirks:
+
+ Capital letters are not valid IDNA2008. The idna package will
+ raise an exception like this on capital letters:
+
+ > idna.core.InvalidCodepoint: Codepoint U+004B at position 1 ... not allowed
+
+ However, if a segment of a host (i.e., something in
+ url.host.split('.')) is already ASCII, idna doesn't perform its
+ usual checks. In fact, for capital letters it automatically
+ lowercases them.
+
+ This check and some other functionality can be bypassed by passing
+ uts46=True to idna.encode/decode. This allows a more permissive and
+ convenient interface. So far it seems like the balanced approach.
+
+ Example output (from idna==2.6):
+
+ >> idna.encode(u'mahmöud.io')
+ 'xn--mahmud-zxa.io'
+ >> idna.encode(u'Mahmöud.io')
+ Traceback (most recent call last):
+ File "<stdin>", line 1, in <module>
+ File "/home/mahmoud/virtualenvs/hyperlink/local/lib/python2.7/site-packages/idna/core.py", line 355, in encode
+ result.append(alabel(label))
+ File "/home/mahmoud/virtualenvs/hyperlink/local/lib/python2.7/site-packages/idna/core.py", line 276, in alabel
+ check_label(label)
+ File "/home/mahmoud/virtualenvs/hyperlink/local/lib/python2.7/site-packages/idna/core.py", line 253, in check_label
+ raise InvalidCodepoint('Codepoint {0} at position {1} of {2} not allowed'.format(_unot(cp_value), pos+1, repr(label)))
+ idna.core.InvalidCodepoint: Codepoint U+004D at position 1 of u'Mahm\xf6ud' not allowed
+ >> idna.encode(u'Mahmoud.io')
+ 'Mahmoud.io'
+
+ # Similar behavior for decodes below
+ >> idna.decode(u'Mahmoud.io')
+ u'mahmoud.io
+ >> idna.decode(u'Méhmoud.io', uts46=True)
+ u'm\xe9hmoud.io'
+ """ # noqa: E501
+ if not host:
+ return u""
+ try:
+ host_bytes = host.encode("ascii")
+ except UnicodeEncodeError:
+ host_text = host
+ else:
+ try:
+ host_text = idna_decode(host_bytes, uts46=True)
+ except ValueError:
+ # only reached on "narrow" (UCS-2) Python builds <3.4, see #7
+ # NOTE: not going to raise here, because there's no
+ # ambiguity in the IDNA, and the host is still
+ # technically usable
+ host_text = host
+ return host_text
+
+
+def _resolve_dot_segments(path):
+ # type: (Sequence[Text]) -> Sequence[Text]
+ """Normalize the URL path by resolving segments of '.' and '..'. For
+ more details, see `RFC 3986 section 5.2.4, Remove Dot Segments`_.
+
+ Args:
+ path: sequence of path segments in text form
+
+ Returns:
+ A new sequence of path segments with the '.' and '..' elements removed
+ and resolved.
+
+ .. _RFC 3986 section 5.2.4, Remove Dot Segments: https://tools.ietf.org/html/rfc3986#section-5.2.4
+ """ # noqa: E501
+ segs = [] # type: List[Text]
+
+ for seg in path:
+ if seg == u".":
+ pass
+ elif seg == u"..":
+ if segs:
+ segs.pop()
+ else:
+ segs.append(seg)
+
+ if list(path[-1:]) in ([u"."], [u".."]):
+ segs.append(u"")
+
+ return segs
+
+
+def parse_host(host):
+ # type: (Text) -> Tuple[Optional[AddressFamily], Text]
+ """Parse the host into a tuple of ``(family, host)``, where family
+ is the appropriate :mod:`socket` module constant when the host is
+ an IP address. Family is ``None`` when the host is not an IP.
+
+ Will raise :class:`URLParseError` on invalid IPv6 constants.
+
+ Returns:
+ family (socket constant or None), host (string)
+
+ >>> import socket
+ >>> parse_host('googlewebsite.com') == (None, 'googlewebsite.com')
+ True
+ >>> parse_host('::1') == (socket.AF_INET6, '::1')
+ True
+ >>> parse_host('192.168.1.1') == (socket.AF_INET, '192.168.1.1')
+ True
+ """
+ if not host:
+ return None, u""
+
+ if u":" in host:
+ try:
+ inet_pton(AF_INET6, host)
+ except socket.error as se:
+ raise URLParseError("invalid IPv6 host: %r (%r)" % (host, se))
+ except UnicodeEncodeError:
+ pass # TODO: this can't be a real host right?
+ else:
+ family = AF_INET6 # type: Optional[AddressFamily]
+ else:
+ try:
+ inet_pton(AF_INET, host)
+ except (socket.error, UnicodeEncodeError):
+ family = None # not an IP
+ else:
+ family = AF_INET
+
+ return family, host
+
+
+class URL(object):
+ r"""From blogs to billboards, URLs are so common, that it's easy to
+ overlook their complexity and power. With hyperlink's
+ :class:`URL` type, working with URLs doesn't have to be hard.
+
+ URLs are made of many parts. Most of these parts are officially
+ named in `RFC 3986`_ and this diagram may prove handy in identifying
+ them::
+
+ foo://user:pass@example.com:8042/over/there?name=ferret#nose
+ \_/ \_______/ \_________/ \__/\_________/ \_________/ \__/
+ | | | | | | |
+ scheme userinfo host port path query fragment
+
+ While :meth:`~URL.from_text` is used for parsing whole URLs, the
+ :class:`URL` constructor builds a URL from the individual
+ components, like so::
+
+ >>> from hyperlink import URL
+ >>> url = URL(scheme=u'https', host=u'example.com', path=[u'hello', u'world'])
+ >>> print(url.to_text())
+ https://example.com/hello/world
+
+ The constructor runs basic type checks. All strings are expected
+ to be text (:class:`str` in Python 3, :class:`unicode` in Python 2). All
+ arguments are optional, defaulting to appropriately empty values. A full
+ list of constructor arguments is below.
+
+ Args:
+ scheme: The text name of the scheme.
+ host: The host portion of the network location
+ port: The port part of the network location. If ``None`` or no port is
+ passed, the port will default to the default port of the scheme, if
+ it is known. See the ``SCHEME_PORT_MAP`` and
+ :func:`register_default_port` for more info.
+ path: A tuple of strings representing the slash-separated parts of the
+ path, each percent-encoded.
+ query: The query parameters, as a dictionary or as an sequence of
+ percent-encoded key-value pairs.
+ fragment: The fragment part of the URL.
+ rooted: A rooted URL is one which indicates an absolute path.
+ This is True on any URL that includes a host, or any relative URL
+ that starts with a slash.
+ userinfo: The username or colon-separated username:password pair.
+ uses_netloc: Indicates whether ``://`` (the "netloc separator") will
+ appear to separate the scheme from the *path* in cases where no
+ host is present.
+ Setting this to ``True`` is a non-spec-compliant affordance for the
+ common practice of having URIs that are *not* URLs (cannot have a
+ 'host' part) but nevertheless use the common ``://`` idiom that
+ most people associate with URLs; e.g. ``message:`` URIs like
+ ``message://message-id`` being equivalent to ``message:message-id``.
+ This may be inferred based on the scheme depending on whether
+ :func:`register_scheme` has been used to register the scheme and
+ should not be passed directly unless you know the scheme works like
+ this and you know it has not been registered.
+
+ All of these parts are also exposed as read-only attributes of :class:`URL`
+ instances, along with several useful methods.
+
+ .. _RFC 3986: https://tools.ietf.org/html/rfc3986
+ .. _RFC 3987: https://tools.ietf.org/html/rfc3987
+ """ # noqa: E501
+
+ def __init__(
+ self,
+ scheme=None, # type: Optional[Text]
+ host=None, # type: Optional[Text]
+ path=(), # type: Iterable[Text]
+ query=(), # type: QueryParameters
+ fragment=u"", # type: Text
+ port=None, # type: Optional[int]
+ rooted=None, # type: Optional[bool]
+ userinfo=u"", # type: Text
+ uses_netloc=None, # type: Optional[bool]
+ ):
+ # type: (...) -> None
+ if host is not None and scheme is None:
+ scheme = u"http" # TODO: why
+ if port is None and scheme is not None:
+ port = SCHEME_PORT_MAP.get(scheme)
+ if host and query and not path:
+ # per RFC 3986 6.2.3, "a URI that uses the generic syntax
+ # for authority with an empty path should be normalized to
+ # a path of '/'."
+ path = (u"",)
+
+ # Now that we're done detecting whether they were passed, we can set
+ # them to their defaults:
+ if scheme is None:
+ scheme = u""
+ if host is None:
+ host = u""
+ if rooted is None:
+ rooted = bool(host)
+
+ # Set attributes.
+ self._scheme = _textcheck("scheme", scheme)
+ if self._scheme:
+ if not _SCHEME_RE.match(self._scheme):
+ raise ValueError(
+ 'invalid scheme: %r. Only alphanumeric, "+",'
+ ' "-", and "." allowed. Did you meant to call'
+ " %s.from_text()?" % (self._scheme, self.__class__.__name__)
+ )
+
+ _, self._host = parse_host(_textcheck("host", host, "/?#@"))
+ if isinstance(path, Text):
+ raise TypeError(
+ "expected iterable of text for path, not: %r" % (path,)
+ )
+ self._path = tuple(
+ (_textcheck("path segment", segment, "/?#") for segment in path)
+ )
+ self._query = tuple(
+ (
+ _textcheck("query parameter name", k, "&=#"),
+ _textcheck("query parameter value", v, "&#", nullable=True),
+ )
+ for k, v in iter_pairs(query)
+ )
+ self._fragment = _textcheck("fragment", fragment)
+ self._port = _typecheck("port", port, int, NoneType)
+ self._rooted = _typecheck("rooted", rooted, bool)
+ self._userinfo = _textcheck("userinfo", userinfo, "/?#@")
+
+ if uses_netloc is None:
+ uses_netloc = scheme_uses_netloc(self._scheme, uses_netloc)
+ self._uses_netloc = _typecheck(
+ "uses_netloc", uses_netloc, bool, NoneType
+ )
+ will_have_authority = self._host or (
+ self._port and self._port != SCHEME_PORT_MAP.get(scheme)
+ )
+ if will_have_authority:
+ # fixup for rooted consistency; if there's any 'authority'
+ # represented in the textual URL, then the path must be rooted, and
+ # we're definitely using a netloc (there must be a ://).
+ self._rooted = True
+ self._uses_netloc = True
+ if (not self._rooted) and self.path[:1] == (u"",):
+ self._rooted = True
+ self._path = self._path[1:]
+ if not will_have_authority and self._path and not self._rooted:
+ # If, after fixing up the path, there *is* a path and it *isn't*
+ # rooted, then we are definitely not using a netloc; if we did, it
+ # would make the path (erroneously) look like a hostname.
+ self._uses_netloc = False
+
+ def get_decoded_url(self, lazy=False):
+ # type: (bool) -> DecodedURL
+ try:
+ return self._decoded_url
+ except AttributeError:
+ self._decoded_url = DecodedURL(self, lazy=lazy) # type: DecodedURL
+ return self._decoded_url
+
+ @property
+ def scheme(self):
+ # type: () -> Text
+ """The scheme is a string, and the first part of an absolute URL, the
+ part before the first colon, and the part which defines the
+ semantics of the rest of the URL. Examples include "http",
+ "https", "ssh", "file", "mailto", and many others. See
+ :func:`~hyperlink.register_scheme()` for more info.
+ """
+ return self._scheme
+
+ @property
+ def host(self):
+ # type: () -> Text
+ """The host is a string, and the second standard part of an absolute
+ URL. When present, a valid host must be a domain name, or an
+ IP (v4 or v6). It occurs before the first slash, or the second
+ colon, if a :attr:`~hyperlink.URL.port` is provided.
+ """
+ return self._host
+
+ @property
+ def port(self):
+ # type: () -> Optional[int]
+ """The port is an integer that is commonly used in connecting to the
+ :attr:`host`, and almost never appears without it.
+
+ When not present in the original URL, this attribute defaults
+ to the scheme's default port. If the scheme's default port is
+ not known, and the port is not provided, this attribute will
+ be set to None.
+
+ >>> URL.from_text(u'http://example.com/pa/th').port
+ 80
+ >>> URL.from_text(u'foo://example.com/pa/th').port
+ >>> URL.from_text(u'foo://example.com:8042/pa/th').port
+ 8042
+
+ .. note::
+
+ Per the standard, when the port is the same as the schemes
+ default port, it will be omitted in the text URL.
+ """
+ return self._port
+
+ @property
+ def path(self):
+ # type: () -> Sequence[Text]
+ """A tuple of strings, created by splitting the slash-separated
+ hierarchical path. Started by the first slash after the host,
+ terminated by a "?", which indicates the start of the
+ :attr:`~hyperlink.URL.query` string.
+ """
+ return self._path
+
+ @property
+ def query(self):
+ # type: () -> QueryPairs
+ """Tuple of pairs, created by splitting the ampersand-separated
+ mapping of keys and optional values representing
+ non-hierarchical data used to identify the resource. Keys are
+ always strings. Values are strings when present, or None when
+ missing.
+
+ For more operations on the mapping, see
+ :meth:`~hyperlink.URL.get()`, :meth:`~hyperlink.URL.add()`,
+ :meth:`~hyperlink.URL.set()`, and
+ :meth:`~hyperlink.URL.delete()`.
+ """
+ return self._query
+
+ @property
+ def fragment(self):
+ # type: () -> Text
+ """A string, the last part of the URL, indicated by the first "#"
+ after the :attr:`~hyperlink.URL.path` or
+ :attr:`~hyperlink.URL.query`. Enables indirect identification
+ of a secondary resource, like an anchor within an HTML page.
+ """
+ return self._fragment
+
+ @property
+ def rooted(self):
+ # type: () -> bool
+ """Whether or not the path starts with a forward slash (``/``).
+
+ This is taken from the terminology in the BNF grammar,
+ specifically the "path-rootless", rule, since "absolute path"
+ and "absolute URI" are somewhat ambiguous. :attr:`path` does
+ not contain the implicit prefixed ``"/"`` since that is
+ somewhat awkward to work with.
+ """
+ return self._rooted
+
+ @property
+ def userinfo(self):
+ # type: () -> Text
+ """The colon-separated string forming the username-password
+ combination.
+ """
+ return self._userinfo
+
+ @property
+ def uses_netloc(self):
+ # type: () -> Optional[bool]
+ """
+ Indicates whether ``://`` (the "netloc separator") will appear to
+ separate the scheme from the *path* in cases where no host is present.
+ """
+ return self._uses_netloc
+
+ @property
+ def user(self):
+ # type: () -> Text
+ """
+ The user portion of :attr:`~hyperlink.URL.userinfo`.
+ """
+ return self.userinfo.split(u":")[0]
+
+ def authority(self, with_password=False, **kw):
+ # type: (bool, Any) -> Text
+ """Compute and return the appropriate host/port/userinfo combination.
+
+ >>> url = URL.from_text(u'http://user:pass@localhost:8080/a/b?x=y')
+ >>> url.authority()
+ u'user:@localhost:8080'
+ >>> url.authority(with_password=True)
+ u'user:pass@localhost:8080'
+
+ Args:
+ with_password: Whether the return value of this method include the
+ password in the URL, if it is set.
+ Defaults to False.
+
+ Returns:
+ Text: The authority (network location and user information) portion
+ of the URL.
+ """
+ # first, a bit of twisted compat
+ with_password = kw.pop("includeSecrets", with_password)
+ if kw:
+ raise TypeError("got unexpected keyword arguments: %r" % kw.keys())
+ host = self.host
+ if ":" in host:
+ hostport = ["[" + host + "]"]
+ else:
+ hostport = [self.host]
+ if self.port != SCHEME_PORT_MAP.get(self.scheme):
+ hostport.append(Text(self.port))
+ authority = []
+ if self.userinfo:
+ userinfo = self.userinfo
+ if not with_password and u":" in userinfo:
+ userinfo = userinfo[: userinfo.index(u":") + 1]
+ authority.append(userinfo)
+ authority.append(u":".join(hostport))
+ return u"@".join(authority)
+
+ def __eq__(self, other):
+ # type: (Any) -> bool
+ if not isinstance(other, self.__class__):
+ return NotImplemented
+ for attr in [
+ "scheme",
+ "userinfo",
+ "host",
+ "query",
+ "fragment",
+ "port",
+ "uses_netloc",
+ "rooted",
+ ]:
+ if getattr(self, attr) != getattr(other, attr):
+ return False
+ if self.path == other.path or (
+ self.path in _ROOT_PATHS and other.path in _ROOT_PATHS
+ ):
+ return True
+ return False
+
+ def __ne__(self, other):
+ # type: (Any) -> bool
+ if not isinstance(other, self.__class__):
+ return NotImplemented
+ return not self.__eq__(other)
+
+ def __hash__(self):
+ # type: () -> int
+ return hash(
+ (
+ self.__class__,
+ self.scheme,
+ self.userinfo,
+ self.host,
+ self.path,
+ self.query,
+ self.fragment,
+ self.port,
+ self.rooted,
+ self.uses_netloc,
+ )
+ )
+
+ @property
+ def absolute(self):
+ # type: () -> bool
+ """Whether or not the URL is "absolute". Absolute URLs are complete
+ enough to resolve to a network resource without being relative
+ to a base URI.
+
+ >>> URL.from_text(u'http://wikipedia.org/').absolute
+ True
+ >>> URL.from_text(u'?a=b&c=d').absolute
+ False
+
+ Absolute URLs must have both a scheme and a host set.
+ """
+ return bool(self.scheme and self.host)
+
+ def replace(
+ self,
+ scheme=_UNSET, # type: Optional[Text]
+ host=_UNSET, # type: Optional[Text]
+ path=_UNSET, # type: Iterable[Text]
+ query=_UNSET, # type: QueryParameters
+ fragment=_UNSET, # type: Text
+ port=_UNSET, # type: Optional[int]
+ rooted=_UNSET, # type: Optional[bool]
+ userinfo=_UNSET, # type: Text
+ uses_netloc=_UNSET, # type: Optional[bool]
+ ):
+ # type: (...) -> URL
+ """:class:`URL` objects are immutable, which means that attributes
+ are designed to be set only once, at construction. Instead of
+ modifying an existing URL, one simply creates a copy with the
+ desired changes.
+
+ If any of the following arguments is omitted, it defaults to
+ the value on the current URL.
+
+ Args:
+ scheme: The text name of the scheme.
+ host: The host portion of the network location.
+ path: A tuple of strings representing the slash-separated parts of
+ the path.
+ query: The query parameters, as a dictionary or as an sequence of
+ key-value pairs.
+ fragment: The fragment part of the URL.
+ port: The port part of the network location.
+ rooted: Whether or not the path begins with a slash.
+ userinfo: The username or colon-separated username:password pair.
+ uses_netloc: Indicates whether ``://`` (the "netloc separator")
+ will appear to separate the scheme from the *path* in cases
+ where no host is present.
+ Setting this to ``True`` is a non-spec-compliant affordance for
+ the common practice of having URIs that are *not* URLs (cannot
+ have a 'host' part) but nevertheless use the common ``://``
+ idiom that most people associate with URLs; e.g. ``message:``
+ URIs like ``message://message-id`` being equivalent to
+ ``message:message-id``.
+ This may be inferred based on the scheme depending on whether
+ :func:`register_scheme` has been used to register the scheme
+ and should not be passed directly unless you know the scheme
+ works like this and you know it has not been registered.
+
+ Returns:
+ URL: A copy of the current :class:`URL`, with new values for
+ parameters passed.
+ """
+ if scheme is not _UNSET and scheme != self.scheme:
+ # when changing schemes, reset the explicit uses_netloc preference
+ # to honor the new scheme.
+ uses_netloc = None
+ return self.__class__(
+ scheme=_optional(scheme, self.scheme),
+ host=_optional(host, self.host),
+ path=_optional(path, self.path),
+ query=_optional(query, self.query),
+ fragment=_optional(fragment, self.fragment),
+ port=_optional(port, self.port),
+ rooted=_optional(rooted, self.rooted),
+ userinfo=_optional(userinfo, self.userinfo),
+ uses_netloc=_optional(uses_netloc, self.uses_netloc),
+ )
+
+ @classmethod
+ def from_text(cls, text):
+ # type: (Text) -> URL
+ """Whereas the :class:`URL` constructor is useful for constructing
+ URLs from parts, :meth:`~URL.from_text` supports parsing whole
+ URLs from their string form::
+
+ >>> URL.from_text(u'http://example.com')
+ URL.from_text(u'http://example.com')
+ >>> URL.from_text(u'?a=b&x=y')
+ URL.from_text(u'?a=b&x=y')
+
+ As you can see above, it's also used as the :func:`repr` of
+ :class:`URL` objects. The natural counterpart to
+ :func:`~URL.to_text()`. This method only accepts *text*, so be
+ sure to decode those bytestrings.
+
+ Args:
+ text: A valid URL string.
+
+ Returns:
+ URL: The structured object version of the parsed string.
+
+ .. note::
+
+ Somewhat unexpectedly, URLs are a far more permissive
+ format than most would assume. Many strings which don't
+ look like URLs are still valid URLs. As a result, this
+ method only raises :class:`URLParseError` on invalid port
+ and IPv6 values in the host portion of the URL.
+ """
+ um = _URL_RE.match(_textcheck("text", text))
+ if um is None:
+ raise URLParseError("could not parse url: %r" % text)
+ gs = um.groupdict()
+
+ au_text = gs["authority"] or u""
+ au_m = _AUTHORITY_RE.match(au_text)
+ if au_m is None:
+ raise URLParseError(
+ "invalid authority %r in url: %r" % (au_text, text)
+ )
+ au_gs = au_m.groupdict()
+ if au_gs["bad_host"]:
+ raise URLParseError(
+ "invalid host %r in url: %r" % (au_gs["bad_host"], text)
+ )
+
+ userinfo = au_gs["userinfo"] or u""
+
+ host = au_gs["ipv6_host"] or au_gs["plain_host"]
+ port = au_gs["port"]
+ if port is not None:
+ try:
+ port = int(port) # type: ignore[assignment] # FIXME, see below
+ except ValueError:
+ if not port: # TODO: excessive?
+ raise URLParseError("port must not be empty: %r" % au_text)
+ raise URLParseError("expected integer for port, not %r" % port)
+
+ scheme = gs["scheme"] or u""
+ fragment = gs["fragment"] or u""
+ uses_netloc = bool(gs["_netloc_sep"])
+
+ if gs["path"]:
+ path = tuple(gs["path"].split(u"/"))
+ if not path[0]:
+ path = path[1:]
+ rooted = True
+ else:
+ rooted = False
+ else:
+ path = ()
+ rooted = bool(au_text)
+ if gs["query"]:
+ query = tuple(
+ (
+ qe.split(u"=", 1) # type: ignore[misc]
+ if u"=" in qe
+ else (qe, None)
+ )
+ for qe in gs["query"].split(u"&")
+ ) # type: QueryPairs
+ else:
+ query = ()
+ return cls(
+ scheme,
+ host,
+ path,
+ query,
+ fragment,
+ port, # type: ignore[arg-type] # FIXME, see above
+ rooted,
+ userinfo,
+ uses_netloc,
+ )
+
+ def normalize(
+ self,
+ scheme=True,
+ host=True,
+ path=True,
+ query=True,
+ fragment=True,
+ userinfo=True,
+ percents=True,
+ ):
+ # type: (bool, bool, bool, bool, bool, bool, bool) -> URL
+ """Return a new URL object with several standard normalizations
+ applied:
+
+ * Decode unreserved characters (`RFC 3986 2.3`_)
+ * Uppercase remaining percent-encoded octets (`RFC 3986 2.1`_)
+ * Convert scheme and host casing to lowercase (`RFC 3986 3.2.2`_)
+ * Resolve any "." and ".." references in the path (`RFC 3986 6.2.2.3`_)
+ * Ensure an ending slash on URLs with an empty path (`RFC 3986 6.2.3`_)
+ * Encode any stray percent signs (`%`) in percent-encoded
+ fields (path, query, fragment, userinfo) (`RFC 3986 2.4`_)
+
+ All are applied by default, but normalizations can be disabled
+ per-part by passing `False` for that part's corresponding
+ name.
+
+ Args:
+ scheme: Convert the scheme to lowercase
+ host: Convert the host to lowercase
+ path: Normalize the path (see above for details)
+ query: Normalize the query string
+ fragment: Normalize the fragment
+ userinfo: Normalize the userinfo
+ percents: Encode isolated percent signs for any percent-encoded
+ fields which are being normalized (defaults to `True`).
+
+ >>> url = URL.from_text(u'Http://example.COM/a/../b/./c%2f?%61%')
+ >>> print(url.normalize().to_text())
+ http://example.com/b/c%2F?a%25
+
+ .. _RFC 3986 3.2.2: https://tools.ietf.org/html/rfc3986#section-3.2.2
+ .. _RFC 3986 2.3: https://tools.ietf.org/html/rfc3986#section-2.3
+ .. _RFC 3986 2.1: https://tools.ietf.org/html/rfc3986#section-2.1
+ .. _RFC 3986 6.2.2.3: https://tools.ietf.org/html/rfc3986#section-6.2.2.3
+ .. _RFC 3986 6.2.3: https://tools.ietf.org/html/rfc3986#section-6.2.3
+ .. _RFC 3986 2.4: https://tools.ietf.org/html/rfc3986#section-2.4
+ """ # noqa: E501
+ kw = {} # type: Dict[str, Any]
+ if scheme:
+ kw["scheme"] = self.scheme.lower()
+ if host:
+ kw["host"] = self.host.lower()
+
+ def _dec_unres(target):
+ # type: (Text) -> Text
+ return _decode_unreserved(
+ target, normalize_case=True, encode_stray_percents=percents
+ )
+
+ if path:
+ if self.path:
+ kw["path"] = [
+ _dec_unres(p) for p in _resolve_dot_segments(self.path)
+ ]
+ else:
+ kw["path"] = (u"",)
+ if query:
+ kw["query"] = [
+ (_dec_unres(k), _dec_unres(v) if v else v)
+ for k, v in self.query
+ ]
+ if fragment:
+ kw["fragment"] = _dec_unres(self.fragment)
+ if userinfo:
+ kw["userinfo"] = u":".join(
+ [_dec_unres(p) for p in self.userinfo.split(":", 1)]
+ )
+
+ return self.replace(**kw)
+
+ def child(self, *segments):
+ # type: (Text) -> URL
+ """Make a new :class:`URL` where the given path segments are a child
+ of this URL, preserving other parts of the URL, including the
+ query string and fragment.
+
+ For example::
+
+ >>> url = URL.from_text(u'http://localhost/a/b?x=y')
+ >>> child_url = url.child(u"c", u"d")
+ >>> child_url.to_text()
+ u'http://localhost/a/b/c/d?x=y'
+
+ Args:
+ segments: Additional parts to be joined and added to the path, like
+ :func:`os.path.join`. Special characters in segments will be
+ percent encoded.
+
+ Returns:
+ URL: A copy of the current URL with the extra path segments.
+ """
+ if not segments:
+ return self
+
+ segments = [ # type: ignore[assignment] # variable is tuple
+ _textcheck("path segment", s) for s in segments
+ ]
+ new_path = tuple(self.path)
+ if self.path and self.path[-1] == u"":
+ new_path = new_path[:-1]
+ new_path += tuple(_encode_path_parts(segments, maximal=False))
+ return self.replace(path=new_path)
+
+ def sibling(self, segment):
+ # type: (Text) -> URL
+ """Make a new :class:`URL` with a single path segment that is a
+ sibling of this URL path.
+
+ Args:
+ segment: A single path segment.
+
+ Returns:
+ URL: A copy of the current URL with the last path segment
+ replaced by *segment*. Special characters such as
+ ``/?#`` will be percent encoded.
+ """
+ _textcheck("path segment", segment)
+ new_path = tuple(self.path)[:-1] + (_encode_path_part(segment),)
+ return self.replace(path=new_path)
+
+ def click(self, href=u""):
+ # type: (Union[Text, URL]) -> URL
+ """Resolve the given URL relative to this URL.
+
+ The resulting URI should match what a web browser would
+ generate if you visited the current URL and clicked on *href*.
+
+ >>> url = URL.from_text(u'http://blog.hatnote.com/')
+ >>> url.click(u'/post/155074058790').to_text()
+ u'http://blog.hatnote.com/post/155074058790'
+ >>> url = URL.from_text(u'http://localhost/a/b/c/')
+ >>> url.click(u'../d/./e').to_text()
+ u'http://localhost/a/b/d/e'
+
+ Args (Text):
+ href: A string representing a clicked URL.
+
+ Return:
+ A copy of the current URL with navigation logic applied.
+
+ For more information, see `RFC 3986 section 5`_.
+
+ .. _RFC 3986 section 5: https://tools.ietf.org/html/rfc3986#section-5
+ """
+ if href:
+ if isinstance(href, URL):
+ clicked = href
+ else:
+ # TODO: This error message is not completely accurate,
+ # as URL objects are now also valid, but Twisted's
+ # test suite (wrongly) relies on this exact message.
+ _textcheck("relative URL", href)
+ clicked = URL.from_text(href)
+ if clicked.absolute:
+ return clicked
+ else:
+ clicked = self
+
+ query = clicked.query
+ if clicked.scheme and not clicked.rooted:
+ # Schemes with relative paths are not well-defined. RFC 3986 calls
+ # them a "loophole in prior specifications" that should be avoided,
+ # or supported only for backwards compatibility.
+ raise NotImplementedError(
+ "absolute URI with rootless path: %r" % (href,)
+ )
+ else:
+ if clicked.rooted:
+ path = clicked.path
+ elif clicked.path:
+ path = tuple(self.path)[:-1] + tuple(clicked.path)
+ else:
+ path = self.path
+ if not query:
+ query = self.query
+ return self.replace(
+ scheme=clicked.scheme or self.scheme,
+ host=clicked.host or self.host,
+ port=clicked.port or self.port,
+ path=_resolve_dot_segments(path),
+ query=query,
+ fragment=clicked.fragment,
+ )
+
+ def to_uri(self):
+ # type: () -> URL
+ u"""Make a new :class:`URL` instance with all non-ASCII characters
+ appropriately percent-encoded. This is useful to do in preparation
+ for sending a :class:`URL` over a network protocol.
+
+ For example::
+
+ >>> URL.from_text(u'https://ايران.com/foo⇧bar/').to_uri()
+ URL.from_text(u'https://xn--mgba3a4fra.com/foo%E2%87%A7bar/')
+
+ Returns:
+ URL: A new instance with its path segments, query parameters, and
+ hostname encoded, so that they are all in the standard
+ US-ASCII range.
+ """
+ new_userinfo = u":".join(
+ [_encode_userinfo_part(p) for p in self.userinfo.split(":", 1)]
+ )
+ new_path = _encode_path_parts(
+ self.path, has_scheme=bool(self.scheme), rooted=False, maximal=True
+ )
+ new_host = (
+ self.host
+ if not self.host
+ else idna_encode(self.host, uts46=True).decode("ascii")
+ )
+ return self.replace(
+ userinfo=new_userinfo,
+ host=new_host,
+ path=new_path,
+ query=tuple(
+ [
+ (
+ _encode_query_key(k, maximal=True),
+ _encode_query_value(v, maximal=True)
+ if v is not None
+ else None,
+ )
+ for k, v in self.query
+ ]
+ ),
+ fragment=_encode_fragment_part(self.fragment, maximal=True),
+ )
+
+ def to_iri(self):
+ # type: () -> URL
+ u"""Make a new :class:`URL` instance with all but a few reserved
+ characters decoded into human-readable format.
+
+ Percent-encoded Unicode and IDNA-encoded hostnames are
+ decoded, like so::
+
+ >>> url = URL.from_text(u'https://xn--mgba3a4fra.example.com/foo%E2%87%A7bar/')
+ >>> print(url.to_iri().to_text())
+ https://ايران.example.com/foo⇧bar/
+
+ .. note::
+
+ As a general Python issue, "narrow" (UCS-2) builds of
+ Python may not be able to fully decode certain URLs, and
+ the in those cases, this method will return a best-effort,
+ partially-decoded, URL which is still valid. This issue
+ does not affect any Python builds 3.4+.
+
+ Returns:
+ URL: A new instance with its path segments, query parameters, and
+ hostname decoded for display purposes.
+ """ # noqa: E501
+ new_userinfo = u":".join(
+ [_decode_userinfo_part(p) for p in self.userinfo.split(":", 1)]
+ )
+ host_text = _decode_host(self.host)
+
+ return self.replace(
+ userinfo=new_userinfo,
+ host=host_text,
+ path=[_decode_path_part(segment) for segment in self.path],
+ query=tuple(
+ (
+ _decode_query_key(k),
+ _decode_query_value(v) if v is not None else None,
+ )
+ for k, v in self.query
+ ),
+ fragment=_decode_fragment_part(self.fragment),
+ )
+
+ def to_text(self, with_password=False):
+ # type: (bool) -> Text
+ """Render this URL to its textual representation.
+
+ By default, the URL text will *not* include a password, if one
+ is set. RFC 3986 considers using URLs to represent such
+ sensitive information as deprecated. Quoting from RFC 3986,
+ `section 3.2.1`:
+
+ "Applications should not render as clear text any data after the
+ first colon (":") character found within a userinfo subcomponent
+ unless the data after the colon is the empty string (indicating no
+ password)."
+
+ Args (bool):
+ with_password: Whether or not to include the password in the URL
+ text. Defaults to False.
+
+ Returns:
+ Text: The serialized textual representation of this URL, such as
+ ``u"http://example.com/some/path?some=query"``.
+
+ The natural counterpart to :class:`URL.from_text()`.
+
+ .. _section 3.2.1: https://tools.ietf.org/html/rfc3986#section-3.2.1
+ """
+ scheme = self.scheme
+ authority = self.authority(with_password)
+ path = "/".join(
+ _encode_path_parts(
+ self.path,
+ rooted=self.rooted,
+ has_scheme=bool(scheme),
+ has_authority=bool(authority),
+ maximal=False,
+ )
+ )
+ query_parts = []
+ for k, v in self.query:
+ if v is None:
+ query_parts.append(_encode_query_key(k, maximal=False))
+ else:
+ query_parts.append(
+ u"=".join(
+ (
+ _encode_query_key(k, maximal=False),
+ _encode_query_value(v, maximal=False),
+ )
+ )
+ )
+ query_string = u"&".join(query_parts)
+
+ fragment = self.fragment
+
+ parts = [] # type: List[Text]
+ _add = parts.append
+ if scheme:
+ _add(scheme)
+ _add(":")
+ if authority:
+ _add("//")
+ _add(authority)
+ elif scheme and path[:2] != "//" and self.uses_netloc:
+ _add("//")
+ if path:
+ if scheme and authority and path[:1] != "/":
+ _add("/") # relpaths with abs authorities auto get '/'
+ _add(path)
+ if query_string:
+ _add("?")
+ _add(query_string)
+ if fragment:
+ _add("#")
+ _add(fragment)
+ return u"".join(parts)
+
+ def __repr__(self):
+ # type: () -> str
+ """Convert this URL to an representation that shows all of its
+ constituent parts, as well as being a valid argument to
+ :func:`eval`.
+ """
+ return "%s.from_text(%r)" % (self.__class__.__name__, self.to_text())
+
+ def _to_bytes(self):
+ # type: () -> bytes
+ """
+ Allows for direct usage of URL objects with libraries like
+ requests, which automatically stringify URL parameters. See
+ issue #49.
+ """
+ return self.to_uri().to_text().encode("ascii")
+
+ if PY2:
+ __str__ = _to_bytes
+ __unicode__ = to_text
+ else:
+ __bytes__ = _to_bytes
+ __str__ = to_text
+
+ # # Begin Twisted Compat Code
+ asURI = to_uri
+ asIRI = to_iri
+
+ @classmethod
+ def fromText(cls, s):
+ # type: (Text) -> URL
+ return cls.from_text(s)
+
+ def asText(self, includeSecrets=False):
+ # type: (bool) -> Text
+ return self.to_text(with_password=includeSecrets)
+
+ def __dir__(self):
+ # type: () -> Sequence[Text]
+ try:
+ ret = object.__dir__(self)
+ except AttributeError:
+ # object.__dir__ == AttributeError # pdw for py2
+ ret = dir(self.__class__) + list(self.__dict__.keys())
+ ret = sorted(set(ret) - set(["fromText", "asURI", "asIRI", "asText"]))
+ return ret
+
+ # # End Twisted Compat Code
+
+ def add(self, name, value=None):
+ # type: (Text, Optional[Text]) -> URL
+ """Make a new :class:`URL` instance with a given query argument,
+ *name*, added to it with the value *value*, like so::
+
+ >>> URL.from_text(u'https://example.com/?x=y').add(u'x')
+ URL.from_text(u'https://example.com/?x=y&x')
+ >>> URL.from_text(u'https://example.com/?x=y').add(u'x', u'z')
+ URL.from_text(u'https://example.com/?x=y&x=z')
+
+ Args:
+ name: The name of the query parameter to add.
+ The part before the ``=``.
+ value: The value of the query parameter to add.
+ The part after the ``=``.
+ Defaults to ``None``, meaning no value.
+
+ Returns:
+ URL: A new :class:`URL` instance with the parameter added.
+ """
+ return self.replace(query=self.query + ((name, value),))
+
+ def set(self, name, value=None):
+ # type: (Text, Optional[Text]) -> URL
+ """Make a new :class:`URL` instance with the query parameter *name*
+ set to *value*. All existing occurences, if any are replaced
+ by the single name-value pair.
+
+ >>> URL.from_text(u'https://example.com/?x=y').set(u'x')
+ URL.from_text(u'https://example.com/?x')
+ >>> URL.from_text(u'https://example.com/?x=y').set(u'x', u'z')
+ URL.from_text(u'https://example.com/?x=z')
+
+ Args:
+ name: The name of the query parameter to set.
+ The part before the ``=``.
+ value: The value of the query parameter to set.
+ The part after the ``=``.
+ Defaults to ``None``, meaning no value.
+
+ Returns:
+ URL: A new :class:`URL` instance with the parameter set.
+ """
+ # Preserve the original position of the query key in the list
+ q = [(k, v) for (k, v) in self.query if k != name]
+ idx = next(
+ (i for (i, (k, v)) in enumerate(self.query) if k == name), -1
+ )
+ q[idx:idx] = [(name, value)]
+ return self.replace(query=q)
+
+ def get(self, name):
+ # type: (Text) -> List[Optional[Text]]
+ """Get a list of values for the given query parameter, *name*::
+
+ >>> url = URL.from_text(u'?x=1&x=2')
+ >>> url.get('x')
+ [u'1', u'2']
+ >>> url.get('y')
+ []
+
+ If the given *name* is not set, an empty list is returned. A
+ list is always returned, and this method raises no exceptions.
+
+ Args:
+ name: The name of the query parameter to get.
+
+ Returns:
+ List[Optional[Text]]: A list of all the values associated with the
+ key, in string form.
+ """
+ return [value for (key, value) in self.query if name == key]
+
+ def remove(
+ self,
+ name, # type: Text
+ value=_UNSET, # type: Text
+ limit=None, # type: Optional[int]
+ ):
+ # type: (...) -> URL
+ """Make a new :class:`URL` instance with occurrences of the query
+ parameter *name* removed, or, if *value* is set, parameters
+ matching *name* and *value*. No exception is raised if the
+ parameter is not already set.
+
+ Args:
+ name: The name of the query parameter to remove.
+ value: Optional value to additionally filter on.
+ Setting this removes query parameters which match both name
+ and value.
+ limit: Optional maximum number of parameters to remove.
+
+ Returns:
+ URL: A new :class:`URL` instance with the parameter removed.
+ """
+ if limit is None:
+ if value is _UNSET:
+ nq = [(k, v) for (k, v) in self.query if k != name]
+ else:
+ nq = [
+ (k, v)
+ for (k, v) in self.query
+ if not (k == name and v == value)
+ ]
+ else:
+ nq, removed_count = [], 0
+
+ for k, v in self.query:
+ if (
+ k == name
+ and (value is _UNSET or v == value)
+ and removed_count < limit
+ ):
+ removed_count += 1 # drop it
+ else:
+ nq.append((k, v)) # keep it
+
+ return self.replace(query=nq)
+
+
+EncodedURL = URL # An alias better describing what the URL really is
+
+_EMPTY_URL = URL()
+
+
+def _replace_plus(text):
+ # type: (Text) -> Text
+ return text.replace("+", "%20")
+
+
+def _no_op(text):
+ # type: (Text) -> Text
+ return text
+
+
+class DecodedURL(object):
+ """
+ :class:`DecodedURL` is a type designed to act as a higher-level
+ interface to :class:`URL` and the recommended type for most
+ operations. By analogy, :class:`DecodedURL` is the
+ :class:`unicode` to URL's :class:`bytes`.
+
+ :class:`DecodedURL` automatically handles encoding and decoding
+ all its components, such that all inputs and outputs are in a
+ maximally-decoded state. Note that this means, for some special
+ cases, a URL may not "roundtrip" character-for-character, but this
+ is considered a good tradeoff for the safety of automatic
+ encoding.
+
+ Otherwise, :class:`DecodedURL` has almost exactly the same API as
+ :class:`URL`.
+
+ Where applicable, a UTF-8 encoding is presumed. Be advised that
+ some interactions can raise :exc:`UnicodeEncodeErrors` and
+ :exc:`UnicodeDecodeErrors`, just like when working with
+ bytestrings. Examples of such interactions include handling query
+ strings encoding binary data, and paths containing segments with
+ special characters encoded with codecs other than UTF-8.
+
+ Args:
+ url: A :class:`URL` object to wrap.
+ lazy: Set to True to avoid pre-decode all parts of the URL to check for
+ validity.
+ Defaults to False.
+ query_plus_is_space: + characters in the query string should be treated
+ as spaces when decoding. If unspecified, the default is taken from
+ the scheme.
+
+ .. note::
+
+ The :class:`DecodedURL` initializer takes a :class:`URL` object,
+ not URL components, like :class:`URL`. To programmatically
+ construct a :class:`DecodedURL`, you can use this pattern:
+
+ >>> print(DecodedURL().replace(scheme=u'https',
+ ... host=u'pypi.org', path=(u'projects', u'hyperlink')).to_text())
+ https://pypi.org/projects/hyperlink
+
+ .. versionadded:: 18.0.0
+ """
+
+ def __init__(self, url=_EMPTY_URL, lazy=False, query_plus_is_space=None):
+ # type: (URL, bool, Optional[bool]) -> None
+ self._url = url
+ if query_plus_is_space is None:
+ query_plus_is_space = url.scheme not in NO_QUERY_PLUS_SCHEMES
+ self._query_plus_is_space = query_plus_is_space
+ if not lazy:
+ # cache the following, while triggering any decoding
+ # issues with decodable fields
+ self.host, self.userinfo, self.path, self.query, self.fragment
+ return
+
+ @classmethod
+ def from_text(cls, text, lazy=False, query_plus_is_space=None):
+ # type: (Text, bool, Optional[bool]) -> DecodedURL
+ """\
+ Make a `DecodedURL` instance from any text string containing a URL.
+
+ Args:
+ text: Text containing the URL
+ lazy: Whether to pre-decode all parts of the URL to check for
+ validity.
+ Defaults to True.
+ """
+ _url = URL.from_text(text)
+ return cls(_url, lazy=lazy, query_plus_is_space=query_plus_is_space)
+
+ @property
+ def encoded_url(self):
+ # type: () -> URL
+ """Access the underlying :class:`URL` object, which has any special
+ characters encoded.
+ """
+ return self._url
+
+ def to_text(self, with_password=False):
+ # type: (bool) -> Text
+ "Passthrough to :meth:`~hyperlink.URL.to_text()`"
+ return self._url.to_text(with_password)
+
+ def to_uri(self):
+ # type: () -> URL
+ "Passthrough to :meth:`~hyperlink.URL.to_uri()`"
+ return self._url.to_uri()
+
+ def to_iri(self):
+ # type: () -> URL
+ "Passthrough to :meth:`~hyperlink.URL.to_iri()`"
+ return self._url.to_iri()
+
+ def _clone(self, url):
+ # type: (URL) -> DecodedURL
+ return self.__class__(
+ url,
+ # TODO: propagate laziness?
+ query_plus_is_space=self._query_plus_is_space,
+ )
+
+ def click(self, href=u""):
+ # type: (Union[Text, URL, DecodedURL]) -> DecodedURL
+ """Return a new DecodedURL wrapping the result of
+ :meth:`~hyperlink.URL.click()`
+ """
+ if isinstance(href, DecodedURL):
+ href = href._url
+ return self._clone(
+ self._url.click(href=href),
+ )
+
+ def sibling(self, segment):
+ # type: (Text) -> DecodedURL
+ """Automatically encode any reserved characters in *segment* and
+ return a new `DecodedURL` wrapping the result of
+ :meth:`~hyperlink.URL.sibling()`
+ """
+ return self._clone(
+ self._url.sibling(_encode_reserved(segment)),
+ )
+
+ def child(self, *segments):
+ # type: (Text) -> DecodedURL
+ """Automatically encode any reserved characters in *segments* and
+ return a new `DecodedURL` wrapping the result of
+ :meth:`~hyperlink.URL.child()`.
+ """
+ if not segments:
+ return self
+ new_segs = [_encode_reserved(s) for s in segments]
+ return self._clone(self._url.child(*new_segs))
+
+ def normalize(
+ self,
+ scheme=True,
+ host=True,
+ path=True,
+ query=True,
+ fragment=True,
+ userinfo=True,
+ percents=True,
+ ):
+ # type: (bool, bool, bool, bool, bool, bool, bool) -> DecodedURL
+ """Return a new `DecodedURL` wrapping the result of
+ :meth:`~hyperlink.URL.normalize()`
+ """
+ return self._clone(
+ self._url.normalize(
+ scheme, host, path, query, fragment, userinfo, percents
+ )
+ )
+
+ @property
+ def absolute(self):
+ # type: () -> bool
+ return self._url.absolute
+
+ @property
+ def scheme(self):
+ # type: () -> Text
+ return self._url.scheme
+
+ @property
+ def host(self):
+ # type: () -> Text
+ return _decode_host(self._url.host)
+
+ @property
+ def port(self):
+ # type: () -> Optional[int]
+ return self._url.port
+
+ @property
+ def rooted(self):
+ # type: () -> bool
+ return self._url.rooted
+
+ @property
+ def path(self):
+ # type: () -> Sequence[Text]
+ if not hasattr(self, "_path"):
+ self._path = tuple(
+ [
+ _percent_decode(p, raise_subencoding_exc=True)
+ for p in self._url.path
+ ]
+ )
+ return self._path
+
+ @property
+ def query(self):
+ # type: () -> QueryPairs
+ if not hasattr(self, "_query"):
+ if self._query_plus_is_space:
+ predecode = _replace_plus
+ else:
+ predecode = _no_op
+
+ self._query = cast(
+ QueryPairs,
+ tuple(
+ tuple(
+ _percent_decode(
+ predecode(x), raise_subencoding_exc=True
+ )
+ if x is not None
+ else None
+ for x in (k, v)
+ )
+ for k, v in self._url.query
+ ),
+ )
+ return self._query
+
+ @property
+ def fragment(self):
+ # type: () -> Text
+ if not hasattr(self, "_fragment"):
+ frag = self._url.fragment
+ self._fragment = _percent_decode(frag, raise_subencoding_exc=True)
+ return self._fragment
+
+ @property
+ def userinfo(self):
+ # type: () -> Union[Tuple[str], Tuple[str, str]]
+ if not hasattr(self, "_userinfo"):
+ self._userinfo = cast(
+ Union[Tuple[str], Tuple[str, str]],
+ tuple(
+ tuple(
+ _percent_decode(p, raise_subencoding_exc=True)
+ for p in self._url.userinfo.split(":", 1)
+ )
+ ),
+ )
+ return self._userinfo
+
+ @property
+ def user(self):
+ # type: () -> Text
+ return self.userinfo[0]
+
+ @property
+ def uses_netloc(self):
+ # type: () -> Optional[bool]
+ return self._url.uses_netloc
+
+ def replace(
+ self,
+ scheme=_UNSET, # type: Optional[Text]
+ host=_UNSET, # type: Optional[Text]
+ path=_UNSET, # type: Iterable[Text]
+ query=_UNSET, # type: QueryParameters
+ fragment=_UNSET, # type: Text
+ port=_UNSET, # type: Optional[int]
+ rooted=_UNSET, # type: Optional[bool]
+ userinfo=_UNSET, # type: Union[Tuple[str], Tuple[str, str]]
+ uses_netloc=_UNSET, # type: Optional[bool]
+ ):
+ # type: (...) -> DecodedURL
+ """While the signature is the same, this `replace()` differs a little
+ from URL.replace. For instance, it accepts userinfo as a
+ tuple, not as a string, handling the case of having a username
+ containing a `:`. As with the rest of the methods on
+ DecodedURL, if you pass a reserved character, it will be
+ automatically encoded instead of an error being raised.
+ """
+ if path is not _UNSET:
+ path = tuple(_encode_reserved(p) for p in path)
+ if query is not _UNSET:
+ query = cast(
+ QueryPairs,
+ tuple(
+ tuple(
+ _encode_reserved(x) if x is not None else None
+ for x in (k, v)
+ )
+ for k, v in iter_pairs(query)
+ ),
+ )
+ if userinfo is not _UNSET:
+ if len(userinfo) > 2:
+ raise ValueError(
+ 'userinfo expected sequence of ["user"] or'
+ ' ["user", "password"], got %r' % (userinfo,)
+ )
+ userinfo_text = u":".join([_encode_reserved(p) for p in userinfo])
+ else:
+ userinfo_text = _UNSET
+ new_url = self._url.replace(
+ scheme=scheme,
+ host=host,
+ path=path,
+ query=query,
+ fragment=fragment,
+ port=port,
+ rooted=rooted,
+ userinfo=userinfo_text,
+ uses_netloc=uses_netloc,
+ )
+ return self._clone(url=new_url)
+
+ def get(self, name):
+ # type: (Text) -> List[Optional[Text]]
+ "Get the value of all query parameters whose name matches *name*"
+ return [v for (k, v) in self.query if name == k]
+
+ def add(self, name, value=None):
+ # type: (Text, Optional[Text]) -> DecodedURL
+ """Return a new DecodedURL with the query parameter *name* and *value*
+ added."""
+ return self.replace(query=self.query + ((name, value),))
+
+ def set(self, name, value=None):
+ # type: (Text, Optional[Text]) -> DecodedURL
+ "Return a new DecodedURL with query parameter *name* set to *value*"
+ query = self.query
+ q = [(k, v) for (k, v) in query if k != name]
+ idx = next((i for (i, (k, v)) in enumerate(query) if k == name), -1)
+ q[idx:idx] = [(name, value)]
+ return self.replace(query=q)
+
+ def remove(
+ self,
+ name, # type: Text
+ value=_UNSET, # type: Text
+ limit=None, # type: Optional[int]
+ ):
+ # type: (...) -> DecodedURL
+ """Return a new DecodedURL with query parameter *name* removed.
+
+ Optionally also filter for *value*, as well as cap the number
+ of parameters removed with *limit*.
+ """
+ if limit is None:
+ if value is _UNSET:
+ nq = [(k, v) for (k, v) in self.query if k != name]
+ else:
+ nq = [
+ (k, v)
+ for (k, v) in self.query
+ if not (k == name and v == value)
+ ]
+ else:
+ nq, removed_count = [], 0
+ for k, v in self.query:
+ if (
+ k == name
+ and (value is _UNSET or v == value)
+ and removed_count < limit
+ ):
+ removed_count += 1 # drop it
+ else:
+ nq.append((k, v)) # keep it
+
+ return self.replace(query=nq)
+
+ def __repr__(self):
+ # type: () -> str
+ cn = self.__class__.__name__
+ return "%s(url=%r)" % (cn, self._url)
+
+ def __str__(self):
+ # type: () -> str
+ # TODO: the underlying URL's __str__ needs to change to make
+ # this work as the URL, see #55
+ return str(self._url)
+
+ def __eq__(self, other):
+ # type: (Any) -> bool
+ if not isinstance(other, self.__class__):
+ return NotImplemented
+ return self.normalize().to_uri() == other.normalize().to_uri()
+
+ def __ne__(self, other):
+ # type: (Any) -> bool
+ if not isinstance(other, self.__class__):
+ return NotImplemented
+ return not self.__eq__(other)
+
+ def __hash__(self):
+ # type: () -> int
+ return hash(
+ (
+ self.__class__,
+ self.scheme,
+ self.userinfo,
+ self.host,
+ self.path,
+ self.query,
+ self.fragment,
+ self.port,
+ self.rooted,
+ self.uses_netloc,
+ )
+ )
+
+ # # Begin Twisted Compat Code
+ asURI = to_uri
+ asIRI = to_iri
+
+ @classmethod
+ def fromText(cls, s, lazy=False):
+ # type: (Text, bool) -> DecodedURL
+ return cls.from_text(s, lazy=lazy)
+
+ def asText(self, includeSecrets=False):
+ # type: (bool) -> Text
+ return self.to_text(with_password=includeSecrets)
+
+ def __dir__(self):
+ # type: () -> Sequence[Text]
+ try:
+ ret = object.__dir__(self)
+ except AttributeError:
+ # object.__dir__ == AttributeError # pdw for py2
+ ret = dir(self.__class__) + list(self.__dict__.keys())
+ ret = sorted(set(ret) - set(["fromText", "asURI", "asIRI", "asText"]))
+ return ret
+
+ # # End Twisted Compat Code
+
+
+def parse(url, decoded=True, lazy=False):
+ # type: (Text, bool, bool) -> Union[URL, DecodedURL]
+ """
+ Automatically turn text into a structured URL object.
+
+ >>> url = parse(u"https://github.com/python-hyper/hyperlink")
+ >>> print(url.to_text())
+ https://github.com/python-hyper/hyperlink
+
+ Args:
+ url: A text string representation of a URL.
+
+ decoded: Whether or not to return a :class:`DecodedURL`,
+ which automatically handles all
+ encoding/decoding/quoting/unquoting for all the various
+ accessors of parts of the URL, or a :class:`URL`,
+ which has the same API, but requires handling of special
+ characters for different parts of the URL.
+
+ lazy: In the case of `decoded=True`, this controls
+ whether the URL is decoded immediately or as accessed. The
+ default, `lazy=False`, checks all encoded parts of the URL
+ for decodability.
+
+ .. versionadded:: 18.0.0
+ """
+ enc_url = EncodedURL.from_text(url)
+ if not decoded:
+ return enc_url
+ dec_url = DecodedURL(enc_url, lazy=lazy)
+ return dec_url
diff --git a/contrib/python/hyperlink/py2/hyperlink/hypothesis.py b/contrib/python/hyperlink/py2/hyperlink/hypothesis.py
new file mode 100644
index 0000000000..45fd9a9956
--- /dev/null
+++ b/contrib/python/hyperlink/py2/hyperlink/hypothesis.py
@@ -0,0 +1,324 @@
+# -*- coding: utf-8 -*-
+"""
+Hypothesis strategies.
+"""
+from __future__ import absolute_import
+
+try:
+ import hypothesis
+
+ del hypothesis
+except ImportError:
+ from typing import Tuple
+
+ __all__ = () # type: Tuple[str, ...]
+else:
+ import io
+ import pkgutil
+ from csv import reader as csv_reader
+ from os.path import dirname, join
+ from string import ascii_letters, digits
+ from sys import maxunicode
+ from typing import (
+ Callable,
+ Iterable,
+ List,
+ Optional,
+ Sequence,
+ Text,
+ TypeVar,
+ cast,
+ )
+ from gzip import open as open_gzip
+
+ from . import DecodedURL, EncodedURL
+
+ from hypothesis import assume
+ from hypothesis.strategies import (
+ composite,
+ integers,
+ lists,
+ sampled_from,
+ text,
+ )
+
+ from idna import IDNAError, check_label, encode as idna_encode
+
+ __all__ = (
+ "decoded_urls",
+ "encoded_urls",
+ "hostname_labels",
+ "hostnames",
+ "idna_text",
+ "paths",
+ "port_numbers",
+ )
+
+ T = TypeVar("T")
+ DrawCallable = Callable[[Callable[..., T]], T]
+
+ try:
+ unichr
+ except NameError: # Py3
+ unichr = chr # type: Callable[[int], Text]
+
+ def idna_characters():
+ # type: () -> Text
+ """
+ Returns a string containing IDNA characters.
+ """
+ global _idnaCharacters
+
+ if not _idnaCharacters:
+ result = []
+
+ # Data source "IDNA Derived Properties":
+ # https://www.iana.org/assignments/idna-tables-6.3.0/
+ # idna-tables-6.3.0.xhtml#idna-tables-properties
+ dataFileName = join(
+ dirname(__file__), "idna-tables-properties.csv.gz"
+ )
+ data = io.BytesIO(pkgutil.get_data(__name__, "idna-tables-properties.csv.gz"))
+ with open_gzip(data) as dataFile:
+ reader = csv_reader(
+ (line.decode("utf-8") for line in dataFile),
+ delimiter=",",
+ )
+ next(reader) # Skip header row
+ for row in reader:
+ codes, prop, description = row
+
+ if prop != "PVALID":
+ # CONTEXTO or CONTEXTJ are also allowed, but they come
+ # with rules, so we're punting on those here.
+ # See: https://tools.ietf.org/html/rfc5892
+ continue
+
+ startEnd = row[0].split("-", 1)
+ if len(startEnd) == 1:
+ # No end of range given; use start
+ startEnd.append(startEnd[0])
+ start, end = (int(i, 16) for i in startEnd)
+
+ for i in range(start, end + 1):
+ if i > maxunicode: # Happens using Py2 on Windows
+ break
+ result.append(unichr(i))
+
+ _idnaCharacters = u"".join(result)
+
+ return _idnaCharacters
+
+ _idnaCharacters = "" # type: Text
+
+ @composite
+ def idna_text(draw, min_size=1, max_size=None):
+ # type: (DrawCallable, int, Optional[int]) -> Text
+ """
+ A strategy which generates IDNA-encodable text.
+
+ @param min_size: The minimum number of characters in the text.
+ C{None} is treated as C{0}.
+
+ @param max_size: The maximum number of characters in the text.
+ Use C{None} for an unbounded size.
+ """
+ alphabet = idna_characters()
+
+ assert min_size >= 1
+
+ if max_size is not None:
+ assert max_size >= 1
+
+ result = cast(
+ Text,
+ draw(text(min_size=min_size, max_size=max_size, alphabet=alphabet)),
+ )
+
+ # FIXME: There should be a more efficient way to ensure we produce
+ # valid IDNA text.
+ try:
+ idna_encode(result)
+ except IDNAError:
+ assume(False)
+
+ return result
+
+ @composite
+ def port_numbers(draw, allow_zero=False):
+ # type: (DrawCallable, bool) -> int
+ """
+ A strategy which generates port numbers.
+
+ @param allow_zero: Whether to allow port C{0} as a possible value.
+ """
+ if allow_zero:
+ min_value = 0
+ else:
+ min_value = 1
+
+ return cast(int, draw(integers(min_value=min_value, max_value=65535)))
+
+ @composite
+ def hostname_labels(draw, allow_idn=True):
+ # type: (DrawCallable, bool) -> Text
+ """
+ A strategy which generates host name labels.
+
+ @param allow_idn: Whether to allow non-ASCII characters as allowed by
+ internationalized domain names (IDNs).
+ """
+ if allow_idn:
+ label = cast(Text, draw(idna_text(min_size=1, max_size=63)))
+
+ try:
+ label.encode("ascii")
+ except UnicodeEncodeError:
+ # If the label doesn't encode to ASCII, then we need to check
+ # the length of the label after encoding to punycode and adding
+ # the xn-- prefix.
+ while len(label.encode("punycode")) > 63 - len("xn--"):
+ # Rather than bombing out, just trim from the end until it
+ # is short enough, so hypothesis doesn't have to generate
+ # new data.
+ label = label[:-1]
+
+ else:
+ label = cast(
+ Text,
+ draw(
+ text(
+ min_size=1,
+ max_size=63,
+ alphabet=Text(ascii_letters + digits + u"-"),
+ )
+ ),
+ )
+
+ # Filter invalid labels.
+ # It would be better to reliably avoid generation of bogus labels in
+ # the first place, but it's hard...
+ try:
+ check_label(label)
+ except UnicodeError: # pragma: no cover (not always drawn)
+ assume(False)
+
+ return label
+
+ @composite
+ def hostnames(draw, allow_leading_digit=True, allow_idn=True):
+ # type: (DrawCallable, bool, bool) -> Text
+ """
+ A strategy which generates host names.
+
+ @param allow_leading_digit: Whether to allow a leading digit in host
+ names; they were not allowed prior to RFC 1123.
+
+ @param allow_idn: Whether to allow non-ASCII characters as allowed by
+ internationalized domain names (IDNs).
+ """
+ # Draw first label, filtering out labels with leading digits if needed
+ labels = [
+ cast(
+ Text,
+ draw(
+ hostname_labels(allow_idn=allow_idn).filter(
+ lambda l: (
+ True if allow_leading_digit else l[0] not in digits
+ )
+ )
+ ),
+ )
+ ]
+ # Draw remaining labels
+ labels += cast(
+ List[Text],
+ draw(
+ lists(
+ hostname_labels(allow_idn=allow_idn),
+ min_size=1,
+ max_size=4,
+ )
+ ),
+ )
+
+ # Trim off labels until the total host name length fits in 252
+ # characters. This avoids having to filter the data.
+ while sum(len(label) for label in labels) + len(labels) - 1 > 252:
+ labels = labels[:-1]
+
+ return u".".join(labels)
+
+ def path_characters():
+ # type: () -> str
+ """
+ Returns a string containing valid URL path characters.
+ """
+ global _path_characters
+
+ if _path_characters is None:
+
+ def chars():
+ # type: () -> Iterable[Text]
+ for i in range(maxunicode):
+ c = unichr(i)
+
+ # Exclude reserved characters
+ if c in "#/?":
+ continue
+
+ # Exclude anything not UTF-8 compatible
+ try:
+ c.encode("utf-8")
+ except UnicodeEncodeError:
+ continue
+
+ yield c
+
+ _path_characters = "".join(chars())
+
+ return _path_characters
+
+ _path_characters = None # type: Optional[str]
+
+ @composite
+ def paths(draw):
+ # type: (DrawCallable) -> Sequence[Text]
+ return cast(
+ List[Text],
+ draw(
+ lists(text(min_size=1, alphabet=path_characters()), max_size=10)
+ ),
+ )
+
+ @composite
+ def encoded_urls(draw):
+ # type: (DrawCallable) -> EncodedURL
+ """
+ A strategy which generates L{EncodedURL}s.
+ Call the L{EncodedURL.to_uri} method on each URL to get an HTTP
+ protocol-friendly URI.
+ """
+ port = cast(Optional[int], draw(port_numbers(allow_zero=True)))
+ host = cast(Text, draw(hostnames()))
+ path = cast(Sequence[Text], draw(paths()))
+
+ if port == 0:
+ port = None
+
+ return EncodedURL(
+ scheme=cast(Text, draw(sampled_from((u"http", u"https")))),
+ host=host,
+ port=port,
+ path=path,
+ )
+
+ @composite
+ def decoded_urls(draw):
+ # type: (DrawCallable) -> DecodedURL
+ """
+ A strategy which generates L{DecodedURL}s.
+ Call the L{EncodedURL.to_uri} method on each URL to get an HTTP
+ protocol-friendly URI.
+ """
+ return DecodedURL(draw(encoded_urls()))
diff --git a/contrib/python/hyperlink/py2/hyperlink/idna-tables-properties.csv.gz b/contrib/python/hyperlink/py2/hyperlink/idna-tables-properties.csv.gz
new file mode 100644
index 0000000000..48e9f06742
--- /dev/null
+++ b/contrib/python/hyperlink/py2/hyperlink/idna-tables-properties.csv.gz
Binary files differ
diff --git a/contrib/python/hyperlink/py2/hyperlink/py.typed b/contrib/python/hyperlink/py2/hyperlink/py.typed
new file mode 100644
index 0000000000..d2dfd5e491
--- /dev/null
+++ b/contrib/python/hyperlink/py2/hyperlink/py.typed
@@ -0,0 +1 @@
+# See: https://www.python.org/dev/peps/pep-0561/
diff --git a/contrib/python/hyperlink/py2/ya.make b/contrib/python/hyperlink/py2/ya.make
new file mode 100644
index 0000000000..5611a958d8
--- /dev/null
+++ b/contrib/python/hyperlink/py2/ya.make
@@ -0,0 +1,36 @@
+# Generated by devtools/yamaker (pypi).
+
+PY2_LIBRARY()
+
+VERSION(21.0.0)
+
+LICENSE(MIT)
+
+PEERDIR(
+ contrib/deprecated/python/typing
+ contrib/python/idna
+)
+
+NO_LINT()
+
+PY_SRCS(
+ TOP_LEVEL
+ hyperlink/__init__.py
+ hyperlink/_socket.py
+ hyperlink/_url.py
+ hyperlink/hypothesis.py
+)
+
+RESOURCE_FILES(
+ PREFIX contrib/python/hyperlink/py2/
+ .dist-info/METADATA
+ .dist-info/top_level.txt
+ hyperlink/idna-tables-properties.csv.gz
+ hyperlink/py.typed
+)
+
+END()
+
+RECURSE_FOR_TESTS(
+ tests
+)
diff --git a/contrib/python/hyperlink/py3/.dist-info/METADATA b/contrib/python/hyperlink/py3/.dist-info/METADATA
new file mode 100644
index 0000000000..fc5922ba87
--- /dev/null
+++ b/contrib/python/hyperlink/py3/.dist-info/METADATA
@@ -0,0 +1,38 @@
+Metadata-Version: 2.1
+Name: hyperlink
+Version: 21.0.0
+Summary: A featureful, immutable, and correct URL for Python.
+Home-page: https://github.com/python-hyper/hyperlink
+Author: Mahmoud Hashemi and Glyph Lefkowitz
+Author-email: mahmoud@hatnote.com
+License: MIT
+Platform: any
+Classifier: Topic :: Utilities
+Classifier: Intended Audience :: Developers
+Classifier: Topic :: Software Development :: Libraries
+Classifier: Development Status :: 5 - Production/Stable
+Classifier: Programming Language :: Python :: 2
+Classifier: Programming Language :: Python :: 2.6
+Classifier: Programming Language :: Python :: 2.7
+Classifier: Programming Language :: Python :: 3
+Classifier: Programming Language :: Python :: 3.4
+Classifier: Programming Language :: Python :: 3.5
+Classifier: Programming Language :: Python :: 3.6
+Classifier: Programming Language :: Python :: 3.7
+Classifier: Programming Language :: Python :: 3.8
+Classifier: Programming Language :: Python :: 3.9
+Classifier: Programming Language :: Python :: Implementation :: PyPy
+Classifier: License :: OSI Approved :: MIT License
+Requires-Python: >=2.6, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*
+Requires-Dist: idna (>=2.5)
+Requires-Dist: typing ; python_version < "3.5"
+
+The humble, but powerful, URL runs everything around us. Chances
+are you've used several just to read this text.
+
+Hyperlink is a featureful, pure-Python implementation of the URL, with
+an emphasis on correctness. MIT licensed.
+
+See the docs at http://hyperlink.readthedocs.io.
+
+
diff --git a/contrib/python/hyperlink/py3/.dist-info/top_level.txt b/contrib/python/hyperlink/py3/.dist-info/top_level.txt
new file mode 100644
index 0000000000..81722ce1d8
--- /dev/null
+++ b/contrib/python/hyperlink/py3/.dist-info/top_level.txt
@@ -0,0 +1 @@
+hyperlink
diff --git a/contrib/python/hyperlink/py3/LICENSE b/contrib/python/hyperlink/py3/LICENSE
new file mode 100644
index 0000000000..a73f882ffb
--- /dev/null
+++ b/contrib/python/hyperlink/py3/LICENSE
@@ -0,0 +1,29 @@
+Copyright (c) 2017
+Glyph Lefkowitz
+Itamar Turner-Trauring
+Jean Paul Calderone
+Adi Roiban
+Amber Hawkie Brown
+Mahmoud Hashemi
+Wilfredo Sanchez Vega
+
+and others that have contributed code to the public domain.
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice shall be
+included in all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
diff --git a/contrib/python/hyperlink/py3/README.md b/contrib/python/hyperlink/py3/README.md
new file mode 100644
index 0000000000..017f9eb88c
--- /dev/null
+++ b/contrib/python/hyperlink/py3/README.md
@@ -0,0 +1,67 @@
+# Hyperlink
+
+*Cool URLs that don't change.*
+
+<a href="https://hyperlink.readthedocs.io/en/latest/">
+ <img src="https://img.shields.io/badge/docs-latest-brightgreen.svg?style=flat" alt="Documentation">
+</a>
+<a href="https://pypi.org/project/hyperlink/">
+ <img src="https://img.shields.io/pypi/v/hyperlink.svg" alt="PyPI">
+</a>
+<a href="http://calver.org">
+ <img src="https://img.shields.io/badge/calver-YY.MINOR.MICRO-22bfda.svg" alt="Calendar Versioning">
+</a>
+<a href="https://pypi.org/project/hyperlink/">
+ <img src="https://img.shields.io/pypi/pyversions/hyperlink.svg" alt="Python Version Compatibility">
+</a>
+<a href="https://https://codecov.io/github/python-hyper/hyperlink?branch=master">
+ <img src="https://codecov.io/github/python-hyper/hyperlink/coverage.svg?branch=master" alt="Code Coverage">
+</a>
+<a href="https://requires.io/github/python-hyper/hyperlink/requirements/?branch=master">
+ <img src="https://requires.io/github/python-hyper/hyperlink/requirements.svg?branch=master" alt="Requirements Status">
+</a>
+
+Hyperlink provides a pure-Python implementation of immutable
+URLs. Based on [RFC 3986][rfc3986] and [3987][rfc3987], the Hyperlink URL
+makes working with both URIs and IRIs easy.
+
+Hyperlink is tested against Python 2.7, 3.4, 3.5, 3.6, 3.7, 3.8, and PyPy.
+
+Full documentation is available on [Read the Docs][docs].
+
+[rfc3986]: https://tools.ietf.org/html/rfc3986
+[rfc3987]: https://tools.ietf.org/html/rfc3987
+[docs]: http://hyperlink.readthedocs.io/en/latest/
+
+## Installation
+
+Hyperlink is a pure-Python package and requires nothing but
+Python. The easiest way to install is with pip:
+
+```
+pip install hyperlink
+```
+
+Then, hyperlink away!
+
+```python
+from hyperlink import URL
+
+url = URL.from_text(u'http://github.com/python-hyper/hyperlink?utm_source=README')
+utm_source = url.get(u'utm_source')
+better_url = url.replace(scheme=u'https', port=443)
+org_url = better_url.click(u'.')
+```
+
+See the full API docs on [Read the Docs][docs].
+
+## More information
+
+Hyperlink would not have been possible without the help of
+[Glyph Lefkowitz](https://glyph.twistedmatrix.com/) and many other
+community members, especially considering that it started as an
+extract from the Twisted networking library. Thanks to them,
+Hyperlink's URL has been production-grade for well over a decade.
+
+Still, should you encounter any issues, do file an issue, or submit a
+pull request.
diff --git a/contrib/python/hyperlink/py3/hyperlink/__init__.py b/contrib/python/hyperlink/py3/hyperlink/__init__.py
new file mode 100644
index 0000000000..f680b01a90
--- /dev/null
+++ b/contrib/python/hyperlink/py3/hyperlink/__init__.py
@@ -0,0 +1,17 @@
+from ._url import (
+ parse,
+ register_scheme,
+ URL,
+ EncodedURL,
+ DecodedURL,
+ URLParseError,
+)
+
+__all__ = (
+ "parse",
+ "register_scheme",
+ "URL",
+ "EncodedURL",
+ "DecodedURL",
+ "URLParseError",
+)
diff --git a/contrib/python/hyperlink/py3/hyperlink/_socket.py b/contrib/python/hyperlink/py3/hyperlink/_socket.py
new file mode 100644
index 0000000000..3bcf89706d
--- /dev/null
+++ b/contrib/python/hyperlink/py3/hyperlink/_socket.py
@@ -0,0 +1,53 @@
+try:
+ from socket import inet_pton
+except ImportError:
+ from typing import TYPE_CHECKING
+
+ if TYPE_CHECKING: # pragma: no cover
+ pass
+ else:
+ # based on https://gist.github.com/nnemkin/4966028
+ # this code only applies on Windows Python 2.7
+ import ctypes
+ import socket
+
+ class SockAddr(ctypes.Structure):
+ _fields_ = [
+ ("sa_family", ctypes.c_short),
+ ("__pad1", ctypes.c_ushort),
+ ("ipv4_addr", ctypes.c_byte * 4),
+ ("ipv6_addr", ctypes.c_byte * 16),
+ ("__pad2", ctypes.c_ulong),
+ ]
+
+ WSAStringToAddressA = ctypes.windll.ws2_32.WSAStringToAddressA
+ WSAAddressToStringA = ctypes.windll.ws2_32.WSAAddressToStringA
+
+ def inet_pton(address_family, ip_string):
+ # type: (int, str) -> bytes
+ addr = SockAddr()
+ ip_string_bytes = ip_string.encode("ascii")
+ addr.sa_family = address_family
+ addr_size = ctypes.c_int(ctypes.sizeof(addr))
+
+ try:
+ attribute, size = {
+ socket.AF_INET: ("ipv4_addr", 4),
+ socket.AF_INET6: ("ipv6_addr", 16),
+ }[address_family]
+ except KeyError:
+ raise socket.error("unknown address family")
+
+ if (
+ WSAStringToAddressA(
+ ip_string_bytes,
+ address_family,
+ None,
+ ctypes.byref(addr),
+ ctypes.byref(addr_size),
+ )
+ != 0
+ ):
+ raise socket.error(ctypes.FormatError())
+
+ return ctypes.string_at(getattr(addr, attribute), size)
diff --git a/contrib/python/hyperlink/py3/hyperlink/_url.py b/contrib/python/hyperlink/py3/hyperlink/_url.py
new file mode 100644
index 0000000000..be69baf696
--- /dev/null
+++ b/contrib/python/hyperlink/py3/hyperlink/_url.py
@@ -0,0 +1,2448 @@
+# -*- coding: utf-8 -*-
+u"""Hyperlink provides Pythonic URL parsing, construction, and rendering.
+
+Usage is straightforward::
+
+ >>> import hyperlink
+ >>> url = hyperlink.parse(u'http://github.com/mahmoud/hyperlink?utm_source=docs')
+ >>> url.host
+ u'github.com'
+ >>> secure_url = url.replace(scheme=u'https')
+ >>> secure_url.get('utm_source')[0]
+ u'docs'
+
+Hyperlink's API centers on the :class:`DecodedURL` type, which wraps
+the lower-level :class:`URL`, both of which can be returned by the
+:func:`parse()` convenience function.
+
+""" # noqa: E501
+
+import re
+import sys
+import string
+import socket
+from socket import AF_INET, AF_INET6
+
+try:
+ from socket import AddressFamily
+except ImportError:
+ AddressFamily = int # type: ignore[assignment,misc]
+from typing import (
+ Any,
+ Callable,
+ Dict,
+ Iterable,
+ Iterator,
+ List,
+ Mapping,
+ Optional,
+ Sequence,
+ Text,
+ Tuple,
+ Type,
+ TypeVar,
+ Union,
+ cast,
+)
+from unicodedata import normalize
+from ._socket import inet_pton
+
+try:
+ from collections.abc import Mapping as MappingABC
+except ImportError: # Python 2
+ from collections import Mapping as MappingABC
+
+from idna import encode as idna_encode, decode as idna_decode
+
+
+PY2 = sys.version_info[0] == 2
+try:
+ unichr
+except NameError: # Py3
+ unichr = chr # type: Callable[[int], Text]
+NoneType = type(None) # type: Type[None]
+QueryPairs = Tuple[Tuple[Text, Optional[Text]], ...] # internal representation
+QueryParameters = Union[
+ Mapping[Text, Optional[Text]],
+ QueryPairs,
+ Sequence[Tuple[Text, Optional[Text]]],
+]
+T = TypeVar("T")
+
+
+# from boltons.typeutils
+def make_sentinel(name="_MISSING", var_name=""):
+ # type: (str, str) -> object
+ """Creates and returns a new **instance** of a new class, suitable for
+ usage as a "sentinel", a kind of singleton often used to indicate
+ a value is missing when ``None`` is a valid input.
+
+ Args:
+ name: Name of the Sentinel
+ var_name: Set this name to the name of the variable in its respective
+ module enable pickle-ability.
+
+ >>> make_sentinel(var_name='_MISSING')
+ _MISSING
+
+ The most common use cases here in boltons are as default values
+ for optional function arguments, partly because of its
+ less-confusing appearance in automatically generated
+ documentation. Sentinels also function well as placeholders in queues
+ and linked lists.
+
+ .. note::
+
+ By design, additional calls to ``make_sentinel`` with the same
+ values will not produce equivalent objects.
+
+ >>> make_sentinel('TEST') == make_sentinel('TEST')
+ False
+ >>> type(make_sentinel('TEST')) == type(make_sentinel('TEST'))
+ False
+ """
+
+ class Sentinel(object):
+ def __init__(self):
+ # type: () -> None
+ self.name = name
+ self.var_name = var_name
+
+ def __repr__(self):
+ # type: () -> str
+ if self.var_name:
+ return self.var_name
+ return "%s(%r)" % (self.__class__.__name__, self.name)
+
+ if var_name:
+ # superclass type hints don't allow str return type, but it is
+ # allowed in the docs, hence the ignore[override] below
+ def __reduce__(self):
+ # type: () -> str
+ return self.var_name
+
+ def __nonzero__(self):
+ # type: () -> bool
+ return False
+
+ __bool__ = __nonzero__
+
+ return Sentinel()
+
+
+_unspecified = _UNSET = make_sentinel("_UNSET") # type: Any
+
+
+# RFC 3986 Section 2.3, Unreserved URI Characters
+# https://tools.ietf.org/html/rfc3986#section-2.3
+_UNRESERVED_CHARS = frozenset(
+ "~-._0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ" "abcdefghijklmnopqrstuvwxyz"
+)
+
+
+# URL parsing regex (based on RFC 3986 Appendix B, with modifications)
+_URL_RE = re.compile(
+ r"^((?P<scheme>[^:/?#]+):)?"
+ r"((?P<_netloc_sep>//)"
+ r"(?P<authority>[^/?#]*))?"
+ r"(?P<path>[^?#]*)"
+ r"(\?(?P<query>[^#]*))?"
+ r"(#(?P<fragment>.*))?$"
+)
+_SCHEME_RE = re.compile(r"^[a-zA-Z0-9+-.]*$")
+_AUTHORITY_RE = re.compile(
+ r"^(?:(?P<userinfo>[^@/?#]*)@)?"
+ r"(?P<host>"
+ r"(?:\[(?P<ipv6_host>[^[\]/?#]*)\])"
+ r"|(?P<plain_host>[^:/?#[\]]*)"
+ r"|(?P<bad_host>.*?))?"
+ r"(?::(?P<port>.*))?$"
+)
+
+
+_HEX_CHAR_MAP = dict(
+ [
+ ((a + b).encode("ascii"), unichr(int(a + b, 16)).encode("charmap"))
+ for a in string.hexdigits
+ for b in string.hexdigits
+ ]
+)
+_ASCII_RE = re.compile("([\x00-\x7f]+)")
+
+# RFC 3986 section 2.2, Reserved Characters
+# https://tools.ietf.org/html/rfc3986#section-2.2
+_GEN_DELIMS = frozenset(u":/?#[]@")
+_SUB_DELIMS = frozenset(u"!$&'()*+,;=")
+_ALL_DELIMS = _GEN_DELIMS | _SUB_DELIMS
+
+_USERINFO_SAFE = _UNRESERVED_CHARS | _SUB_DELIMS | set(u"%")
+_USERINFO_DELIMS = _ALL_DELIMS - _USERINFO_SAFE
+_PATH_SAFE = _USERINFO_SAFE | set(u":@")
+_PATH_DELIMS = _ALL_DELIMS - _PATH_SAFE
+_SCHEMELESS_PATH_SAFE = _PATH_SAFE - set(":")
+_SCHEMELESS_PATH_DELIMS = _ALL_DELIMS - _SCHEMELESS_PATH_SAFE
+_FRAGMENT_SAFE = _UNRESERVED_CHARS | _PATH_SAFE | set(u"/?")
+_FRAGMENT_DELIMS = _ALL_DELIMS - _FRAGMENT_SAFE
+_QUERY_VALUE_SAFE = _UNRESERVED_CHARS | _FRAGMENT_SAFE - set(u"&")
+_QUERY_VALUE_DELIMS = _ALL_DELIMS - _QUERY_VALUE_SAFE
+_QUERY_KEY_SAFE = _UNRESERVED_CHARS | _QUERY_VALUE_SAFE - set(u"=")
+_QUERY_KEY_DELIMS = _ALL_DELIMS - _QUERY_KEY_SAFE
+
+
+def _make_decode_map(delims, allow_percent=False):
+ # type: (Iterable[Text], bool) -> Mapping[bytes, bytes]
+ ret = dict(_HEX_CHAR_MAP)
+ if not allow_percent:
+ delims = set(delims) | set([u"%"])
+ for delim in delims:
+ _hexord = "{0:02X}".format(ord(delim)).encode("ascii")
+ _hexord_lower = _hexord.lower()
+ ret.pop(_hexord)
+ if _hexord != _hexord_lower:
+ ret.pop(_hexord_lower)
+ return ret
+
+
+def _make_quote_map(safe_chars):
+ # type: (Iterable[Text]) -> Mapping[Union[int, Text], Text]
+ ret = {} # type: Dict[Union[int, Text], Text]
+ # v is included in the dict for py3 mostly, because bytestrings
+ # are iterables of ints, of course!
+ for i, v in zip(range(256), range(256)):
+ c = chr(v)
+ if c in safe_chars:
+ ret[c] = ret[v] = c
+ else:
+ ret[c] = ret[v] = "%{0:02X}".format(i)
+ return ret
+
+
+_USERINFO_PART_QUOTE_MAP = _make_quote_map(_USERINFO_SAFE)
+_USERINFO_DECODE_MAP = _make_decode_map(_USERINFO_DELIMS)
+_PATH_PART_QUOTE_MAP = _make_quote_map(_PATH_SAFE)
+_SCHEMELESS_PATH_PART_QUOTE_MAP = _make_quote_map(_SCHEMELESS_PATH_SAFE)
+_PATH_DECODE_MAP = _make_decode_map(_PATH_DELIMS)
+_QUERY_KEY_QUOTE_MAP = _make_quote_map(_QUERY_KEY_SAFE)
+_QUERY_KEY_DECODE_MAP = _make_decode_map(_QUERY_KEY_DELIMS)
+_QUERY_VALUE_QUOTE_MAP = _make_quote_map(_QUERY_VALUE_SAFE)
+_QUERY_VALUE_DECODE_MAP = _make_decode_map(_QUERY_VALUE_DELIMS)
+_FRAGMENT_QUOTE_MAP = _make_quote_map(_FRAGMENT_SAFE)
+_FRAGMENT_DECODE_MAP = _make_decode_map(_FRAGMENT_DELIMS)
+_UNRESERVED_QUOTE_MAP = _make_quote_map(_UNRESERVED_CHARS)
+_UNRESERVED_DECODE_MAP = dict(
+ [
+ (k, v)
+ for k, v in _HEX_CHAR_MAP.items()
+ if v.decode("ascii", "replace") in _UNRESERVED_CHARS
+ ]
+)
+
+_ROOT_PATHS = frozenset(((), (u"",)))
+
+
+def _encode_reserved(text, maximal=True):
+ # type: (Text, bool) -> Text
+ """A very comprehensive percent encoding for encoding all
+ delimiters. Used for arguments to DecodedURL, where a % means a
+ percent sign, and not the character used by URLs for escaping
+ bytes.
+ """
+ if maximal:
+ bytestr = normalize("NFC", text).encode("utf8")
+ return u"".join([_UNRESERVED_QUOTE_MAP[b] for b in bytestr])
+ return u"".join(
+ [
+ _UNRESERVED_QUOTE_MAP[t] if t in _UNRESERVED_CHARS else t
+ for t in text
+ ]
+ )
+
+
+def _encode_path_part(text, maximal=True):
+ # type: (Text, bool) -> Text
+ "Percent-encode a single segment of a URL path."
+ if maximal:
+ bytestr = normalize("NFC", text).encode("utf8")
+ return u"".join([_PATH_PART_QUOTE_MAP[b] for b in bytestr])
+ return u"".join(
+ [_PATH_PART_QUOTE_MAP[t] if t in _PATH_DELIMS else t for t in text]
+ )
+
+
+def _encode_schemeless_path_part(text, maximal=True):
+ # type: (Text, bool) -> Text
+ """Percent-encode the first segment of a URL path for a URL without a
+ scheme specified.
+ """
+ if maximal:
+ bytestr = normalize("NFC", text).encode("utf8")
+ return u"".join([_SCHEMELESS_PATH_PART_QUOTE_MAP[b] for b in bytestr])
+ return u"".join(
+ [
+ _SCHEMELESS_PATH_PART_QUOTE_MAP[t]
+ if t in _SCHEMELESS_PATH_DELIMS
+ else t
+ for t in text
+ ]
+ )
+
+
+def _encode_path_parts(
+ text_parts, # type: Sequence[Text]
+ rooted=False, # type: bool
+ has_scheme=True, # type: bool
+ has_authority=True, # type: bool
+ maximal=True, # type: bool
+):
+ # type: (...) -> Sequence[Text]
+ """
+ Percent-encode a tuple of path parts into a complete path.
+
+ Setting *maximal* to False percent-encodes only the reserved
+ characters that are syntactically necessary for serialization,
+ preserving any IRI-style textual data.
+
+ Leaving *maximal* set to its default True percent-encodes
+ everything required to convert a portion of an IRI to a portion of
+ a URI.
+
+ RFC 3986 3.3:
+
+ If a URI contains an authority component, then the path component
+ must either be empty or begin with a slash ("/") character. If a URI
+ does not contain an authority component, then the path cannot begin
+ with two slash characters ("//"). In addition, a URI reference
+ (Section 4.1) may be a relative-path reference, in which case the
+ first path segment cannot contain a colon (":") character.
+ """
+ if not text_parts:
+ return ()
+ if rooted:
+ text_parts = (u"",) + tuple(text_parts)
+ # elif has_authority and text_parts:
+ # raise Exception('see rfc above') # TODO: too late to fail like this?
+ encoded_parts = [] # type: List[Text]
+ if has_scheme:
+ encoded_parts = [
+ _encode_path_part(part, maximal=maximal) if part else part
+ for part in text_parts
+ ]
+ else:
+ encoded_parts = [_encode_schemeless_path_part(text_parts[0])]
+ encoded_parts.extend(
+ [
+ _encode_path_part(part, maximal=maximal) if part else part
+ for part in text_parts[1:]
+ ]
+ )
+ return tuple(encoded_parts)
+
+
+def _encode_query_key(text, maximal=True):
+ # type: (Text, bool) -> Text
+ """
+ Percent-encode a single query string key or value.
+ """
+ if maximal:
+ bytestr = normalize("NFC", text).encode("utf8")
+ return u"".join([_QUERY_KEY_QUOTE_MAP[b] for b in bytestr])
+ return u"".join(
+ [_QUERY_KEY_QUOTE_MAP[t] if t in _QUERY_KEY_DELIMS else t for t in text]
+ )
+
+
+def _encode_query_value(text, maximal=True):
+ # type: (Text, bool) -> Text
+ """
+ Percent-encode a single query string key or value.
+ """
+ if maximal:
+ bytestr = normalize("NFC", text).encode("utf8")
+ return u"".join([_QUERY_VALUE_QUOTE_MAP[b] for b in bytestr])
+ return u"".join(
+ [
+ _QUERY_VALUE_QUOTE_MAP[t] if t in _QUERY_VALUE_DELIMS else t
+ for t in text
+ ]
+ )
+
+
+def _encode_fragment_part(text, maximal=True):
+ # type: (Text, bool) -> Text
+ """Quote the fragment part of the URL. Fragments don't have
+ subdelimiters, so the whole URL fragment can be passed.
+ """
+ if maximal:
+ bytestr = normalize("NFC", text).encode("utf8")
+ return u"".join([_FRAGMENT_QUOTE_MAP[b] for b in bytestr])
+ return u"".join(
+ [_FRAGMENT_QUOTE_MAP[t] if t in _FRAGMENT_DELIMS else t for t in text]
+ )
+
+
+def _encode_userinfo_part(text, maximal=True):
+ # type: (Text, bool) -> Text
+ """Quote special characters in either the username or password
+ section of the URL.
+ """
+ if maximal:
+ bytestr = normalize("NFC", text).encode("utf8")
+ return u"".join([_USERINFO_PART_QUOTE_MAP[b] for b in bytestr])
+ return u"".join(
+ [
+ _USERINFO_PART_QUOTE_MAP[t] if t in _USERINFO_DELIMS else t
+ for t in text
+ ]
+ )
+
+
+# This port list painstakingly curated by hand searching through
+# https://www.iana.org/assignments/uri-schemes/uri-schemes.xhtml
+# and
+# https://www.iana.org/assignments/service-names-port-numbers/service-names-port-numbers.xhtml
+SCHEME_PORT_MAP = {
+ "acap": 674,
+ "afp": 548,
+ "dict": 2628,
+ "dns": 53,
+ "file": None,
+ "ftp": 21,
+ "git": 9418,
+ "gopher": 70,
+ "http": 80,
+ "https": 443,
+ "imap": 143,
+ "ipp": 631,
+ "ipps": 631,
+ "irc": 194,
+ "ircs": 6697,
+ "ldap": 389,
+ "ldaps": 636,
+ "mms": 1755,
+ "msrp": 2855,
+ "msrps": None,
+ "mtqp": 1038,
+ "nfs": 111,
+ "nntp": 119,
+ "nntps": 563,
+ "pop": 110,
+ "prospero": 1525,
+ "redis": 6379,
+ "rsync": 873,
+ "rtsp": 554,
+ "rtsps": 322,
+ "rtspu": 5005,
+ "sftp": 22,
+ "smb": 445,
+ "snmp": 161,
+ "ssh": 22,
+ "steam": None,
+ "svn": 3690,
+ "telnet": 23,
+ "ventrilo": 3784,
+ "vnc": 5900,
+ "wais": 210,
+ "ws": 80,
+ "wss": 443,
+ "xmpp": None,
+}
+
+# This list of schemes that don't use authorities is also from the link above.
+NO_NETLOC_SCHEMES = set(
+ [
+ "urn",
+ "about",
+ "bitcoin",
+ "blob",
+ "data",
+ "geo",
+ "magnet",
+ "mailto",
+ "news",
+ "pkcs11",
+ "sip",
+ "sips",
+ "tel",
+ ]
+)
+# As of Mar 11, 2017, there were 44 netloc schemes, and 13 non-netloc
+
+NO_QUERY_PLUS_SCHEMES = set()
+
+
+def register_scheme(
+ text, uses_netloc=True, default_port=None, query_plus_is_space=True
+):
+ # type: (Text, bool, Optional[int], bool) -> None
+ """Registers new scheme information, resulting in correct port and
+ slash behavior from the URL object. There are dozens of standard
+ schemes preregistered, so this function is mostly meant for
+ proprietary internal customizations or stopgaps on missing
+ standards information. If a scheme seems to be missing, please
+ `file an issue`_!
+
+ Args:
+ text: A string representation of the scheme.
+ (the 'http' in 'http://hatnote.com')
+ uses_netloc: Does the scheme support specifying a
+ network host? For instance, "http" does, "mailto" does
+ not. Defaults to True.
+ default_port: The default port, if any, for
+ netloc-using schemes.
+ query_plus_is_space: If true, a "+" in the query string should be
+ decoded as a space by DecodedURL.
+
+ .. _file an issue: https://github.com/mahmoud/hyperlink/issues
+ """
+ text = text.lower()
+ if default_port is not None:
+ try:
+ default_port = int(default_port)
+ except (ValueError, TypeError):
+ raise ValueError(
+ "default_port expected integer or None, not %r"
+ % (default_port,)
+ )
+
+ if uses_netloc is True:
+ SCHEME_PORT_MAP[text] = default_port
+ elif uses_netloc is False:
+ if default_port is not None:
+ raise ValueError(
+ "unexpected default port while specifying"
+ " non-netloc scheme: %r" % default_port
+ )
+ NO_NETLOC_SCHEMES.add(text)
+ else:
+ raise ValueError("uses_netloc expected bool, not: %r" % uses_netloc)
+
+ if not query_plus_is_space:
+ NO_QUERY_PLUS_SCHEMES.add(text)
+
+ return
+
+
+def scheme_uses_netloc(scheme, default=None):
+ # type: (Text, Optional[bool]) -> Optional[bool]
+ """Whether or not a URL uses :code:`:` or :code:`://` to separate the
+ scheme from the rest of the URL depends on the scheme's own
+ standard definition. There is no way to infer this behavior
+ from other parts of the URL. A scheme either supports network
+ locations or it does not.
+
+ The URL type's approach to this is to check for explicitly
+ registered schemes, with common schemes like HTTP
+ preregistered. This is the same approach taken by
+ :mod:`urlparse`.
+
+ URL adds two additional heuristics if the scheme as a whole is
+ not registered. First, it attempts to check the subpart of the
+ scheme after the last ``+`` character. This adds intuitive
+ behavior for schemes like ``git+ssh``. Second, if a URL with
+ an unrecognized scheme is loaded, it will maintain the
+ separator it sees.
+ """
+ if not scheme:
+ return False
+ scheme = scheme.lower()
+ if scheme in SCHEME_PORT_MAP:
+ return True
+ if scheme in NO_NETLOC_SCHEMES:
+ return False
+ if scheme.split("+")[-1] in SCHEME_PORT_MAP:
+ return True
+ return default
+
+
+class URLParseError(ValueError):
+ """Exception inheriting from :exc:`ValueError`, raised when failing to
+ parse a URL. Mostly raised on invalid ports and IPv6 addresses.
+ """
+
+ pass
+
+
+def _optional(argument, default):
+ # type: (Any, Any) -> Any
+ if argument is _UNSET:
+ return default
+ else:
+ return argument
+
+
+def _typecheck(name, value, *types):
+ # type: (Text, T, Type[Any]) -> T
+ """
+ Check that the given *value* is one of the given *types*, or raise an
+ exception describing the problem using *name*.
+ """
+ if not types:
+ raise ValueError("expected one or more types, maybe use _textcheck?")
+ if not isinstance(value, types):
+ raise TypeError(
+ "expected %s for %s, got %r"
+ % (" or ".join([t.__name__ for t in types]), name, value)
+ )
+ return value
+
+
+def _textcheck(name, value, delims=frozenset(), nullable=False):
+ # type: (Text, T, Iterable[Text], bool) -> T
+ if not isinstance(value, Text):
+ if nullable and value is None:
+ # used by query string values
+ return value # type: ignore[unreachable]
+ else:
+ str_name = "unicode" if PY2 else "str"
+ exp = str_name + " or NoneType" if nullable else str_name
+ raise TypeError("expected %s for %s, got %r" % (exp, name, value))
+ if delims and set(value) & set(delims): # TODO: test caching into regexes
+ raise ValueError(
+ "one or more reserved delimiters %s present in %s: %r"
+ % ("".join(delims), name, value)
+ )
+ return value # type: ignore[return-value] # T vs. Text
+
+
+def iter_pairs(iterable):
+ # type: (Iterable[Any]) -> Iterator[Any]
+ """
+ Iterate over the (key, value) pairs in ``iterable``.
+
+ This handles dictionaries sensibly, and falls back to assuming the
+ iterable yields (key, value) pairs. This behaviour is similar to
+ what Python's ``dict()`` constructor does.
+ """
+ if isinstance(iterable, MappingABC):
+ iterable = iterable.items()
+ return iter(iterable)
+
+
+def _decode_unreserved(text, normalize_case=False, encode_stray_percents=False):
+ # type: (Text, bool, bool) -> Text
+ return _percent_decode(
+ text,
+ normalize_case=normalize_case,
+ encode_stray_percents=encode_stray_percents,
+ _decode_map=_UNRESERVED_DECODE_MAP,
+ )
+
+
+def _decode_userinfo_part(
+ text, normalize_case=False, encode_stray_percents=False
+):
+ # type: (Text, bool, bool) -> Text
+ return _percent_decode(
+ text,
+ normalize_case=normalize_case,
+ encode_stray_percents=encode_stray_percents,
+ _decode_map=_USERINFO_DECODE_MAP,
+ )
+
+
+def _decode_path_part(text, normalize_case=False, encode_stray_percents=False):
+ # type: (Text, bool, bool) -> Text
+ """
+ >>> _decode_path_part(u'%61%77%2f%7a')
+ u'aw%2fz'
+ >>> _decode_path_part(u'%61%77%2f%7a', normalize_case=True)
+ u'aw%2Fz'
+ """
+ return _percent_decode(
+ text,
+ normalize_case=normalize_case,
+ encode_stray_percents=encode_stray_percents,
+ _decode_map=_PATH_DECODE_MAP,
+ )
+
+
+def _decode_query_key(text, normalize_case=False, encode_stray_percents=False):
+ # type: (Text, bool, bool) -> Text
+ return _percent_decode(
+ text,
+ normalize_case=normalize_case,
+ encode_stray_percents=encode_stray_percents,
+ _decode_map=_QUERY_KEY_DECODE_MAP,
+ )
+
+
+def _decode_query_value(
+ text, normalize_case=False, encode_stray_percents=False
+):
+ # type: (Text, bool, bool) -> Text
+ return _percent_decode(
+ text,
+ normalize_case=normalize_case,
+ encode_stray_percents=encode_stray_percents,
+ _decode_map=_QUERY_VALUE_DECODE_MAP,
+ )
+
+
+def _decode_fragment_part(
+ text, normalize_case=False, encode_stray_percents=False
+):
+ # type: (Text, bool, bool) -> Text
+ return _percent_decode(
+ text,
+ normalize_case=normalize_case,
+ encode_stray_percents=encode_stray_percents,
+ _decode_map=_FRAGMENT_DECODE_MAP,
+ )
+
+
+def _percent_decode(
+ text, # type: Text
+ normalize_case=False, # type: bool
+ subencoding="utf-8", # type: Text
+ raise_subencoding_exc=False, # type: bool
+ encode_stray_percents=False, # type: bool
+ _decode_map=_HEX_CHAR_MAP, # type: Mapping[bytes, bytes]
+):
+ # type: (...) -> Text
+ """Convert percent-encoded text characters to their normal,
+ human-readable equivalents.
+
+ All characters in the input text must be encodable by
+ *subencoding*. All special characters underlying the values in the
+ percent-encoding must be decodable as *subencoding*. If a
+ non-*subencoding*-valid string is passed, the original text is
+ returned with no changes applied.
+
+ Only called by field-tailored variants, e.g.,
+ :func:`_decode_path_part`, as every percent-encodable part of the
+ URL has characters which should not be percent decoded.
+
+ >>> _percent_decode(u'abc%20def')
+ u'abc def'
+
+ Args:
+ text: Text with percent-encoding present.
+ normalize_case: Whether undecoded percent segments, such as encoded
+ delimiters, should be uppercased, per RFC 3986 Section 2.1.
+ See :func:`_decode_path_part` for an example.
+ subencoding: The name of the encoding underlying the percent-encoding.
+ raise_subencoding_exc: Whether an error in decoding the bytes
+ underlying the percent-decoding should be raised.
+
+ Returns:
+ Text: The percent-decoded version of *text*, decoded by *subencoding*.
+ """
+ try:
+ quoted_bytes = text.encode(subencoding)
+ except UnicodeEncodeError:
+ return text
+
+ bits = quoted_bytes.split(b"%")
+ if len(bits) == 1:
+ return text
+
+ res = [bits[0]]
+ append = res.append
+
+ for item in bits[1:]:
+ hexpair, rest = item[:2], item[2:]
+ try:
+ append(_decode_map[hexpair])
+ append(rest)
+ except KeyError:
+ pair_is_hex = hexpair in _HEX_CHAR_MAP
+ if pair_is_hex or not encode_stray_percents:
+ append(b"%")
+ else:
+ # if it's undecodable, treat as a real percent sign,
+ # which is reserved (because it wasn't in the
+ # context-aware _decode_map passed in), and should
+ # stay in an encoded state.
+ append(b"%25")
+ if normalize_case and pair_is_hex:
+ append(hexpair.upper())
+ append(rest)
+ else:
+ append(item)
+
+ unquoted_bytes = b"".join(res)
+
+ try:
+ return unquoted_bytes.decode(subencoding)
+ except UnicodeDecodeError:
+ if raise_subencoding_exc:
+ raise
+ return text
+
+
+def _decode_host(host):
+ # type: (Text) -> Text
+ """Decode a host from ASCII-encodable text to IDNA-decoded text. If
+ the host text is not ASCII, it is returned unchanged, as it is
+ presumed that it is already IDNA-decoded.
+
+ Some technical details: _decode_host is built on top of the "idna"
+ package, which has some quirks:
+
+ Capital letters are not valid IDNA2008. The idna package will
+ raise an exception like this on capital letters:
+
+ > idna.core.InvalidCodepoint: Codepoint U+004B at position 1 ... not allowed
+
+ However, if a segment of a host (i.e., something in
+ url.host.split('.')) is already ASCII, idna doesn't perform its
+ usual checks. In fact, for capital letters it automatically
+ lowercases them.
+
+ This check and some other functionality can be bypassed by passing
+ uts46=True to idna.encode/decode. This allows a more permissive and
+ convenient interface. So far it seems like the balanced approach.
+
+ Example output (from idna==2.6):
+
+ >> idna.encode(u'mahmöud.io')
+ 'xn--mahmud-zxa.io'
+ >> idna.encode(u'Mahmöud.io')
+ Traceback (most recent call last):
+ File "<stdin>", line 1, in <module>
+ File "/home/mahmoud/virtualenvs/hyperlink/local/lib/python2.7/site-packages/idna/core.py", line 355, in encode
+ result.append(alabel(label))
+ File "/home/mahmoud/virtualenvs/hyperlink/local/lib/python2.7/site-packages/idna/core.py", line 276, in alabel
+ check_label(label)
+ File "/home/mahmoud/virtualenvs/hyperlink/local/lib/python2.7/site-packages/idna/core.py", line 253, in check_label
+ raise InvalidCodepoint('Codepoint {0} at position {1} of {2} not allowed'.format(_unot(cp_value), pos+1, repr(label)))
+ idna.core.InvalidCodepoint: Codepoint U+004D at position 1 of u'Mahm\xf6ud' not allowed
+ >> idna.encode(u'Mahmoud.io')
+ 'Mahmoud.io'
+
+ # Similar behavior for decodes below
+ >> idna.decode(u'Mahmoud.io')
+ u'mahmoud.io
+ >> idna.decode(u'Méhmoud.io', uts46=True)
+ u'm\xe9hmoud.io'
+ """ # noqa: E501
+ if not host:
+ return u""
+ try:
+ host_bytes = host.encode("ascii")
+ except UnicodeEncodeError:
+ host_text = host
+ else:
+ try:
+ host_text = idna_decode(host_bytes, uts46=True)
+ except ValueError:
+ # only reached on "narrow" (UCS-2) Python builds <3.4, see #7
+ # NOTE: not going to raise here, because there's no
+ # ambiguity in the IDNA, and the host is still
+ # technically usable
+ host_text = host
+ return host_text
+
+
+def _resolve_dot_segments(path):
+ # type: (Sequence[Text]) -> Sequence[Text]
+ """Normalize the URL path by resolving segments of '.' and '..'. For
+ more details, see `RFC 3986 section 5.2.4, Remove Dot Segments`_.
+
+ Args:
+ path: sequence of path segments in text form
+
+ Returns:
+ A new sequence of path segments with the '.' and '..' elements removed
+ and resolved.
+
+ .. _RFC 3986 section 5.2.4, Remove Dot Segments: https://tools.ietf.org/html/rfc3986#section-5.2.4
+ """ # noqa: E501
+ segs = [] # type: List[Text]
+
+ for seg in path:
+ if seg == u".":
+ pass
+ elif seg == u"..":
+ if segs:
+ segs.pop()
+ else:
+ segs.append(seg)
+
+ if list(path[-1:]) in ([u"."], [u".."]):
+ segs.append(u"")
+
+ return segs
+
+
+def parse_host(host):
+ # type: (Text) -> Tuple[Optional[AddressFamily], Text]
+ """Parse the host into a tuple of ``(family, host)``, where family
+ is the appropriate :mod:`socket` module constant when the host is
+ an IP address. Family is ``None`` when the host is not an IP.
+
+ Will raise :class:`URLParseError` on invalid IPv6 constants.
+
+ Returns:
+ family (socket constant or None), host (string)
+
+ >>> import socket
+ >>> parse_host('googlewebsite.com') == (None, 'googlewebsite.com')
+ True
+ >>> parse_host('::1') == (socket.AF_INET6, '::1')
+ True
+ >>> parse_host('192.168.1.1') == (socket.AF_INET, '192.168.1.1')
+ True
+ """
+ if not host:
+ return None, u""
+
+ if u":" in host:
+ try:
+ inet_pton(AF_INET6, host)
+ except socket.error as se:
+ raise URLParseError("invalid IPv6 host: %r (%r)" % (host, se))
+ except UnicodeEncodeError:
+ pass # TODO: this can't be a real host right?
+ else:
+ family = AF_INET6 # type: Optional[AddressFamily]
+ else:
+ try:
+ inet_pton(AF_INET, host)
+ except (socket.error, UnicodeEncodeError):
+ family = None # not an IP
+ else:
+ family = AF_INET
+
+ return family, host
+
+
+class URL(object):
+ r"""From blogs to billboards, URLs are so common, that it's easy to
+ overlook their complexity and power. With hyperlink's
+ :class:`URL` type, working with URLs doesn't have to be hard.
+
+ URLs are made of many parts. Most of these parts are officially
+ named in `RFC 3986`_ and this diagram may prove handy in identifying
+ them::
+
+ foo://user:pass@example.com:8042/over/there?name=ferret#nose
+ \_/ \_______/ \_________/ \__/\_________/ \_________/ \__/
+ | | | | | | |
+ scheme userinfo host port path query fragment
+
+ While :meth:`~URL.from_text` is used for parsing whole URLs, the
+ :class:`URL` constructor builds a URL from the individual
+ components, like so::
+
+ >>> from hyperlink import URL
+ >>> url = URL(scheme=u'https', host=u'example.com', path=[u'hello', u'world'])
+ >>> print(url.to_text())
+ https://example.com/hello/world
+
+ The constructor runs basic type checks. All strings are expected
+ to be text (:class:`str` in Python 3, :class:`unicode` in Python 2). All
+ arguments are optional, defaulting to appropriately empty values. A full
+ list of constructor arguments is below.
+
+ Args:
+ scheme: The text name of the scheme.
+ host: The host portion of the network location
+ port: The port part of the network location. If ``None`` or no port is
+ passed, the port will default to the default port of the scheme, if
+ it is known. See the ``SCHEME_PORT_MAP`` and
+ :func:`register_default_port` for more info.
+ path: A tuple of strings representing the slash-separated parts of the
+ path, each percent-encoded.
+ query: The query parameters, as a dictionary or as an sequence of
+ percent-encoded key-value pairs.
+ fragment: The fragment part of the URL.
+ rooted: A rooted URL is one which indicates an absolute path.
+ This is True on any URL that includes a host, or any relative URL
+ that starts with a slash.
+ userinfo: The username or colon-separated username:password pair.
+ uses_netloc: Indicates whether ``://`` (the "netloc separator") will
+ appear to separate the scheme from the *path* in cases where no
+ host is present.
+ Setting this to ``True`` is a non-spec-compliant affordance for the
+ common practice of having URIs that are *not* URLs (cannot have a
+ 'host' part) but nevertheless use the common ``://`` idiom that
+ most people associate with URLs; e.g. ``message:`` URIs like
+ ``message://message-id`` being equivalent to ``message:message-id``.
+ This may be inferred based on the scheme depending on whether
+ :func:`register_scheme` has been used to register the scheme and
+ should not be passed directly unless you know the scheme works like
+ this and you know it has not been registered.
+
+ All of these parts are also exposed as read-only attributes of :class:`URL`
+ instances, along with several useful methods.
+
+ .. _RFC 3986: https://tools.ietf.org/html/rfc3986
+ .. _RFC 3987: https://tools.ietf.org/html/rfc3987
+ """ # noqa: E501
+
+ def __init__(
+ self,
+ scheme=None, # type: Optional[Text]
+ host=None, # type: Optional[Text]
+ path=(), # type: Iterable[Text]
+ query=(), # type: QueryParameters
+ fragment=u"", # type: Text
+ port=None, # type: Optional[int]
+ rooted=None, # type: Optional[bool]
+ userinfo=u"", # type: Text
+ uses_netloc=None, # type: Optional[bool]
+ ):
+ # type: (...) -> None
+ if host is not None and scheme is None:
+ scheme = u"http" # TODO: why
+ if port is None and scheme is not None:
+ port = SCHEME_PORT_MAP.get(scheme)
+ if host and query and not path:
+ # per RFC 3986 6.2.3, "a URI that uses the generic syntax
+ # for authority with an empty path should be normalized to
+ # a path of '/'."
+ path = (u"",)
+
+ # Now that we're done detecting whether they were passed, we can set
+ # them to their defaults:
+ if scheme is None:
+ scheme = u""
+ if host is None:
+ host = u""
+ if rooted is None:
+ rooted = bool(host)
+
+ # Set attributes.
+ self._scheme = _textcheck("scheme", scheme)
+ if self._scheme:
+ if not _SCHEME_RE.match(self._scheme):
+ raise ValueError(
+ 'invalid scheme: %r. Only alphanumeric, "+",'
+ ' "-", and "." allowed. Did you meant to call'
+ " %s.from_text()?" % (self._scheme, self.__class__.__name__)
+ )
+
+ _, self._host = parse_host(_textcheck("host", host, "/?#@"))
+ if isinstance(path, Text):
+ raise TypeError(
+ "expected iterable of text for path, not: %r" % (path,)
+ )
+ self._path = tuple(
+ (_textcheck("path segment", segment, "/?#") for segment in path)
+ )
+ self._query = tuple(
+ (
+ _textcheck("query parameter name", k, "&=#"),
+ _textcheck("query parameter value", v, "&#", nullable=True),
+ )
+ for k, v in iter_pairs(query)
+ )
+ self._fragment = _textcheck("fragment", fragment)
+ self._port = _typecheck("port", port, int, NoneType)
+ self._rooted = _typecheck("rooted", rooted, bool)
+ self._userinfo = _textcheck("userinfo", userinfo, "/?#@")
+
+ if uses_netloc is None:
+ uses_netloc = scheme_uses_netloc(self._scheme, uses_netloc)
+ self._uses_netloc = _typecheck(
+ "uses_netloc", uses_netloc, bool, NoneType
+ )
+ will_have_authority = self._host or (
+ self._port and self._port != SCHEME_PORT_MAP.get(scheme)
+ )
+ if will_have_authority:
+ # fixup for rooted consistency; if there's any 'authority'
+ # represented in the textual URL, then the path must be rooted, and
+ # we're definitely using a netloc (there must be a ://).
+ self._rooted = True
+ self._uses_netloc = True
+ if (not self._rooted) and self.path[:1] == (u"",):
+ self._rooted = True
+ self._path = self._path[1:]
+ if not will_have_authority and self._path and not self._rooted:
+ # If, after fixing up the path, there *is* a path and it *isn't*
+ # rooted, then we are definitely not using a netloc; if we did, it
+ # would make the path (erroneously) look like a hostname.
+ self._uses_netloc = False
+
+ def get_decoded_url(self, lazy=False):
+ # type: (bool) -> DecodedURL
+ try:
+ return self._decoded_url
+ except AttributeError:
+ self._decoded_url = DecodedURL(self, lazy=lazy) # type: DecodedURL
+ return self._decoded_url
+
+ @property
+ def scheme(self):
+ # type: () -> Text
+ """The scheme is a string, and the first part of an absolute URL, the
+ part before the first colon, and the part which defines the
+ semantics of the rest of the URL. Examples include "http",
+ "https", "ssh", "file", "mailto", and many others. See
+ :func:`~hyperlink.register_scheme()` for more info.
+ """
+ return self._scheme
+
+ @property
+ def host(self):
+ # type: () -> Text
+ """The host is a string, and the second standard part of an absolute
+ URL. When present, a valid host must be a domain name, or an
+ IP (v4 or v6). It occurs before the first slash, or the second
+ colon, if a :attr:`~hyperlink.URL.port` is provided.
+ """
+ return self._host
+
+ @property
+ def port(self):
+ # type: () -> Optional[int]
+ """The port is an integer that is commonly used in connecting to the
+ :attr:`host`, and almost never appears without it.
+
+ When not present in the original URL, this attribute defaults
+ to the scheme's default port. If the scheme's default port is
+ not known, and the port is not provided, this attribute will
+ be set to None.
+
+ >>> URL.from_text(u'http://example.com/pa/th').port
+ 80
+ >>> URL.from_text(u'foo://example.com/pa/th').port
+ >>> URL.from_text(u'foo://example.com:8042/pa/th').port
+ 8042
+
+ .. note::
+
+ Per the standard, when the port is the same as the schemes
+ default port, it will be omitted in the text URL.
+ """
+ return self._port
+
+ @property
+ def path(self):
+ # type: () -> Sequence[Text]
+ """A tuple of strings, created by splitting the slash-separated
+ hierarchical path. Started by the first slash after the host,
+ terminated by a "?", which indicates the start of the
+ :attr:`~hyperlink.URL.query` string.
+ """
+ return self._path
+
+ @property
+ def query(self):
+ # type: () -> QueryPairs
+ """Tuple of pairs, created by splitting the ampersand-separated
+ mapping of keys and optional values representing
+ non-hierarchical data used to identify the resource. Keys are
+ always strings. Values are strings when present, or None when
+ missing.
+
+ For more operations on the mapping, see
+ :meth:`~hyperlink.URL.get()`, :meth:`~hyperlink.URL.add()`,
+ :meth:`~hyperlink.URL.set()`, and
+ :meth:`~hyperlink.URL.delete()`.
+ """
+ return self._query
+
+ @property
+ def fragment(self):
+ # type: () -> Text
+ """A string, the last part of the URL, indicated by the first "#"
+ after the :attr:`~hyperlink.URL.path` or
+ :attr:`~hyperlink.URL.query`. Enables indirect identification
+ of a secondary resource, like an anchor within an HTML page.
+ """
+ return self._fragment
+
+ @property
+ def rooted(self):
+ # type: () -> bool
+ """Whether or not the path starts with a forward slash (``/``).
+
+ This is taken from the terminology in the BNF grammar,
+ specifically the "path-rootless", rule, since "absolute path"
+ and "absolute URI" are somewhat ambiguous. :attr:`path` does
+ not contain the implicit prefixed ``"/"`` since that is
+ somewhat awkward to work with.
+ """
+ return self._rooted
+
+ @property
+ def userinfo(self):
+ # type: () -> Text
+ """The colon-separated string forming the username-password
+ combination.
+ """
+ return self._userinfo
+
+ @property
+ def uses_netloc(self):
+ # type: () -> Optional[bool]
+ """
+ Indicates whether ``://`` (the "netloc separator") will appear to
+ separate the scheme from the *path* in cases where no host is present.
+ """
+ return self._uses_netloc
+
+ @property
+ def user(self):
+ # type: () -> Text
+ """
+ The user portion of :attr:`~hyperlink.URL.userinfo`.
+ """
+ return self.userinfo.split(u":")[0]
+
+ def authority(self, with_password=False, **kw):
+ # type: (bool, Any) -> Text
+ """Compute and return the appropriate host/port/userinfo combination.
+
+ >>> url = URL.from_text(u'http://user:pass@localhost:8080/a/b?x=y')
+ >>> url.authority()
+ u'user:@localhost:8080'
+ >>> url.authority(with_password=True)
+ u'user:pass@localhost:8080'
+
+ Args:
+ with_password: Whether the return value of this method include the
+ password in the URL, if it is set.
+ Defaults to False.
+
+ Returns:
+ Text: The authority (network location and user information) portion
+ of the URL.
+ """
+ # first, a bit of twisted compat
+ with_password = kw.pop("includeSecrets", with_password)
+ if kw:
+ raise TypeError("got unexpected keyword arguments: %r" % kw.keys())
+ host = self.host
+ if ":" in host:
+ hostport = ["[" + host + "]"]
+ else:
+ hostport = [self.host]
+ if self.port != SCHEME_PORT_MAP.get(self.scheme):
+ hostport.append(Text(self.port))
+ authority = []
+ if self.userinfo:
+ userinfo = self.userinfo
+ if not with_password and u":" in userinfo:
+ userinfo = userinfo[: userinfo.index(u":") + 1]
+ authority.append(userinfo)
+ authority.append(u":".join(hostport))
+ return u"@".join(authority)
+
+ def __eq__(self, other):
+ # type: (Any) -> bool
+ if not isinstance(other, self.__class__):
+ return NotImplemented
+ for attr in [
+ "scheme",
+ "userinfo",
+ "host",
+ "query",
+ "fragment",
+ "port",
+ "uses_netloc",
+ "rooted",
+ ]:
+ if getattr(self, attr) != getattr(other, attr):
+ return False
+ if self.path == other.path or (
+ self.path in _ROOT_PATHS and other.path in _ROOT_PATHS
+ ):
+ return True
+ return False
+
+ def __ne__(self, other):
+ # type: (Any) -> bool
+ if not isinstance(other, self.__class__):
+ return NotImplemented
+ return not self.__eq__(other)
+
+ def __hash__(self):
+ # type: () -> int
+ return hash(
+ (
+ self.__class__,
+ self.scheme,
+ self.userinfo,
+ self.host,
+ self.path,
+ self.query,
+ self.fragment,
+ self.port,
+ self.rooted,
+ self.uses_netloc,
+ )
+ )
+
+ @property
+ def absolute(self):
+ # type: () -> bool
+ """Whether or not the URL is "absolute". Absolute URLs are complete
+ enough to resolve to a network resource without being relative
+ to a base URI.
+
+ >>> URL.from_text(u'http://wikipedia.org/').absolute
+ True
+ >>> URL.from_text(u'?a=b&c=d').absolute
+ False
+
+ Absolute URLs must have both a scheme and a host set.
+ """
+ return bool(self.scheme and self.host)
+
+ def replace(
+ self,
+ scheme=_UNSET, # type: Optional[Text]
+ host=_UNSET, # type: Optional[Text]
+ path=_UNSET, # type: Iterable[Text]
+ query=_UNSET, # type: QueryParameters
+ fragment=_UNSET, # type: Text
+ port=_UNSET, # type: Optional[int]
+ rooted=_UNSET, # type: Optional[bool]
+ userinfo=_UNSET, # type: Text
+ uses_netloc=_UNSET, # type: Optional[bool]
+ ):
+ # type: (...) -> URL
+ """:class:`URL` objects are immutable, which means that attributes
+ are designed to be set only once, at construction. Instead of
+ modifying an existing URL, one simply creates a copy with the
+ desired changes.
+
+ If any of the following arguments is omitted, it defaults to
+ the value on the current URL.
+
+ Args:
+ scheme: The text name of the scheme.
+ host: The host portion of the network location.
+ path: A tuple of strings representing the slash-separated parts of
+ the path.
+ query: The query parameters, as a dictionary or as an sequence of
+ key-value pairs.
+ fragment: The fragment part of the URL.
+ port: The port part of the network location.
+ rooted: Whether or not the path begins with a slash.
+ userinfo: The username or colon-separated username:password pair.
+ uses_netloc: Indicates whether ``://`` (the "netloc separator")
+ will appear to separate the scheme from the *path* in cases
+ where no host is present.
+ Setting this to ``True`` is a non-spec-compliant affordance for
+ the common practice of having URIs that are *not* URLs (cannot
+ have a 'host' part) but nevertheless use the common ``://``
+ idiom that most people associate with URLs; e.g. ``message:``
+ URIs like ``message://message-id`` being equivalent to
+ ``message:message-id``.
+ This may be inferred based on the scheme depending on whether
+ :func:`register_scheme` has been used to register the scheme
+ and should not be passed directly unless you know the scheme
+ works like this and you know it has not been registered.
+
+ Returns:
+ URL: A copy of the current :class:`URL`, with new values for
+ parameters passed.
+ """
+ if scheme is not _UNSET and scheme != self.scheme:
+ # when changing schemes, reset the explicit uses_netloc preference
+ # to honor the new scheme.
+ uses_netloc = None
+ return self.__class__(
+ scheme=_optional(scheme, self.scheme),
+ host=_optional(host, self.host),
+ path=_optional(path, self.path),
+ query=_optional(query, self.query),
+ fragment=_optional(fragment, self.fragment),
+ port=_optional(port, self.port),
+ rooted=_optional(rooted, self.rooted),
+ userinfo=_optional(userinfo, self.userinfo),
+ uses_netloc=_optional(uses_netloc, self.uses_netloc),
+ )
+
+ @classmethod
+ def from_text(cls, text):
+ # type: (Text) -> URL
+ """Whereas the :class:`URL` constructor is useful for constructing
+ URLs from parts, :meth:`~URL.from_text` supports parsing whole
+ URLs from their string form::
+
+ >>> URL.from_text(u'http://example.com')
+ URL.from_text(u'http://example.com')
+ >>> URL.from_text(u'?a=b&x=y')
+ URL.from_text(u'?a=b&x=y')
+
+ As you can see above, it's also used as the :func:`repr` of
+ :class:`URL` objects. The natural counterpart to
+ :func:`~URL.to_text()`. This method only accepts *text*, so be
+ sure to decode those bytestrings.
+
+ Args:
+ text: A valid URL string.
+
+ Returns:
+ URL: The structured object version of the parsed string.
+
+ .. note::
+
+ Somewhat unexpectedly, URLs are a far more permissive
+ format than most would assume. Many strings which don't
+ look like URLs are still valid URLs. As a result, this
+ method only raises :class:`URLParseError` on invalid port
+ and IPv6 values in the host portion of the URL.
+ """
+ um = _URL_RE.match(_textcheck("text", text))
+ if um is None:
+ raise URLParseError("could not parse url: %r" % text)
+ gs = um.groupdict()
+
+ au_text = gs["authority"] or u""
+ au_m = _AUTHORITY_RE.match(au_text)
+ if au_m is None:
+ raise URLParseError(
+ "invalid authority %r in url: %r" % (au_text, text)
+ )
+ au_gs = au_m.groupdict()
+ if au_gs["bad_host"]:
+ raise URLParseError(
+ "invalid host %r in url: %r" % (au_gs["bad_host"], text)
+ )
+
+ userinfo = au_gs["userinfo"] or u""
+
+ host = au_gs["ipv6_host"] or au_gs["plain_host"]
+ port = au_gs["port"]
+ if port is not None:
+ try:
+ port = int(port) # type: ignore[assignment] # FIXME, see below
+ except ValueError:
+ if not port: # TODO: excessive?
+ raise URLParseError("port must not be empty: %r" % au_text)
+ raise URLParseError("expected integer for port, not %r" % port)
+
+ scheme = gs["scheme"] or u""
+ fragment = gs["fragment"] or u""
+ uses_netloc = bool(gs["_netloc_sep"])
+
+ if gs["path"]:
+ path = tuple(gs["path"].split(u"/"))
+ if not path[0]:
+ path = path[1:]
+ rooted = True
+ else:
+ rooted = False
+ else:
+ path = ()
+ rooted = bool(au_text)
+ if gs["query"]:
+ query = tuple(
+ (
+ qe.split(u"=", 1) # type: ignore[misc]
+ if u"=" in qe
+ else (qe, None)
+ )
+ for qe in gs["query"].split(u"&")
+ ) # type: QueryPairs
+ else:
+ query = ()
+ return cls(
+ scheme,
+ host,
+ path,
+ query,
+ fragment,
+ port, # type: ignore[arg-type] # FIXME, see above
+ rooted,
+ userinfo,
+ uses_netloc,
+ )
+
+ def normalize(
+ self,
+ scheme=True,
+ host=True,
+ path=True,
+ query=True,
+ fragment=True,
+ userinfo=True,
+ percents=True,
+ ):
+ # type: (bool, bool, bool, bool, bool, bool, bool) -> URL
+ """Return a new URL object with several standard normalizations
+ applied:
+
+ * Decode unreserved characters (`RFC 3986 2.3`_)
+ * Uppercase remaining percent-encoded octets (`RFC 3986 2.1`_)
+ * Convert scheme and host casing to lowercase (`RFC 3986 3.2.2`_)
+ * Resolve any "." and ".." references in the path (`RFC 3986 6.2.2.3`_)
+ * Ensure an ending slash on URLs with an empty path (`RFC 3986 6.2.3`_)
+ * Encode any stray percent signs (`%`) in percent-encoded
+ fields (path, query, fragment, userinfo) (`RFC 3986 2.4`_)
+
+ All are applied by default, but normalizations can be disabled
+ per-part by passing `False` for that part's corresponding
+ name.
+
+ Args:
+ scheme: Convert the scheme to lowercase
+ host: Convert the host to lowercase
+ path: Normalize the path (see above for details)
+ query: Normalize the query string
+ fragment: Normalize the fragment
+ userinfo: Normalize the userinfo
+ percents: Encode isolated percent signs for any percent-encoded
+ fields which are being normalized (defaults to `True`).
+
+ >>> url = URL.from_text(u'Http://example.COM/a/../b/./c%2f?%61%')
+ >>> print(url.normalize().to_text())
+ http://example.com/b/c%2F?a%25
+
+ .. _RFC 3986 3.2.2: https://tools.ietf.org/html/rfc3986#section-3.2.2
+ .. _RFC 3986 2.3: https://tools.ietf.org/html/rfc3986#section-2.3
+ .. _RFC 3986 2.1: https://tools.ietf.org/html/rfc3986#section-2.1
+ .. _RFC 3986 6.2.2.3: https://tools.ietf.org/html/rfc3986#section-6.2.2.3
+ .. _RFC 3986 6.2.3: https://tools.ietf.org/html/rfc3986#section-6.2.3
+ .. _RFC 3986 2.4: https://tools.ietf.org/html/rfc3986#section-2.4
+ """ # noqa: E501
+ kw = {} # type: Dict[str, Any]
+ if scheme:
+ kw["scheme"] = self.scheme.lower()
+ if host:
+ kw["host"] = self.host.lower()
+
+ def _dec_unres(target):
+ # type: (Text) -> Text
+ return _decode_unreserved(
+ target, normalize_case=True, encode_stray_percents=percents
+ )
+
+ if path:
+ if self.path:
+ kw["path"] = [
+ _dec_unres(p) for p in _resolve_dot_segments(self.path)
+ ]
+ else:
+ kw["path"] = (u"",)
+ if query:
+ kw["query"] = [
+ (_dec_unres(k), _dec_unres(v) if v else v)
+ for k, v in self.query
+ ]
+ if fragment:
+ kw["fragment"] = _dec_unres(self.fragment)
+ if userinfo:
+ kw["userinfo"] = u":".join(
+ [_dec_unres(p) for p in self.userinfo.split(":", 1)]
+ )
+
+ return self.replace(**kw)
+
+ def child(self, *segments):
+ # type: (Text) -> URL
+ """Make a new :class:`URL` where the given path segments are a child
+ of this URL, preserving other parts of the URL, including the
+ query string and fragment.
+
+ For example::
+
+ >>> url = URL.from_text(u'http://localhost/a/b?x=y')
+ >>> child_url = url.child(u"c", u"d")
+ >>> child_url.to_text()
+ u'http://localhost/a/b/c/d?x=y'
+
+ Args:
+ segments: Additional parts to be joined and added to the path, like
+ :func:`os.path.join`. Special characters in segments will be
+ percent encoded.
+
+ Returns:
+ URL: A copy of the current URL with the extra path segments.
+ """
+ if not segments:
+ return self
+
+ segments = [ # type: ignore[assignment] # variable is tuple
+ _textcheck("path segment", s) for s in segments
+ ]
+ new_path = tuple(self.path)
+ if self.path and self.path[-1] == u"":
+ new_path = new_path[:-1]
+ new_path += tuple(_encode_path_parts(segments, maximal=False))
+ return self.replace(path=new_path)
+
+ def sibling(self, segment):
+ # type: (Text) -> URL
+ """Make a new :class:`URL` with a single path segment that is a
+ sibling of this URL path.
+
+ Args:
+ segment: A single path segment.
+
+ Returns:
+ URL: A copy of the current URL with the last path segment
+ replaced by *segment*. Special characters such as
+ ``/?#`` will be percent encoded.
+ """
+ _textcheck("path segment", segment)
+ new_path = tuple(self.path)[:-1] + (_encode_path_part(segment),)
+ return self.replace(path=new_path)
+
+ def click(self, href=u""):
+ # type: (Union[Text, URL]) -> URL
+ """Resolve the given URL relative to this URL.
+
+ The resulting URI should match what a web browser would
+ generate if you visited the current URL and clicked on *href*.
+
+ >>> url = URL.from_text(u'http://blog.hatnote.com/')
+ >>> url.click(u'/post/155074058790').to_text()
+ u'http://blog.hatnote.com/post/155074058790'
+ >>> url = URL.from_text(u'http://localhost/a/b/c/')
+ >>> url.click(u'../d/./e').to_text()
+ u'http://localhost/a/b/d/e'
+
+ Args (Text):
+ href: A string representing a clicked URL.
+
+ Return:
+ A copy of the current URL with navigation logic applied.
+
+ For more information, see `RFC 3986 section 5`_.
+
+ .. _RFC 3986 section 5: https://tools.ietf.org/html/rfc3986#section-5
+ """
+ if href:
+ if isinstance(href, URL):
+ clicked = href
+ else:
+ # TODO: This error message is not completely accurate,
+ # as URL objects are now also valid, but Twisted's
+ # test suite (wrongly) relies on this exact message.
+ _textcheck("relative URL", href)
+ clicked = URL.from_text(href)
+ if clicked.absolute:
+ return clicked
+ else:
+ clicked = self
+
+ query = clicked.query
+ if clicked.scheme and not clicked.rooted:
+ # Schemes with relative paths are not well-defined. RFC 3986 calls
+ # them a "loophole in prior specifications" that should be avoided,
+ # or supported only for backwards compatibility.
+ raise NotImplementedError(
+ "absolute URI with rootless path: %r" % (href,)
+ )
+ else:
+ if clicked.rooted:
+ path = clicked.path
+ elif clicked.path:
+ path = tuple(self.path)[:-1] + tuple(clicked.path)
+ else:
+ path = self.path
+ if not query:
+ query = self.query
+ return self.replace(
+ scheme=clicked.scheme or self.scheme,
+ host=clicked.host or self.host,
+ port=clicked.port or self.port,
+ path=_resolve_dot_segments(path),
+ query=query,
+ fragment=clicked.fragment,
+ )
+
+ def to_uri(self):
+ # type: () -> URL
+ u"""Make a new :class:`URL` instance with all non-ASCII characters
+ appropriately percent-encoded. This is useful to do in preparation
+ for sending a :class:`URL` over a network protocol.
+
+ For example::
+
+ >>> URL.from_text(u'https://ايران.com/foo⇧bar/').to_uri()
+ URL.from_text(u'https://xn--mgba3a4fra.com/foo%E2%87%A7bar/')
+
+ Returns:
+ URL: A new instance with its path segments, query parameters, and
+ hostname encoded, so that they are all in the standard
+ US-ASCII range.
+ """
+ new_userinfo = u":".join(
+ [_encode_userinfo_part(p) for p in self.userinfo.split(":", 1)]
+ )
+ new_path = _encode_path_parts(
+ self.path, has_scheme=bool(self.scheme), rooted=False, maximal=True
+ )
+ new_host = (
+ self.host
+ if not self.host
+ else idna_encode(self.host, uts46=True).decode("ascii")
+ )
+ return self.replace(
+ userinfo=new_userinfo,
+ host=new_host,
+ path=new_path,
+ query=tuple(
+ [
+ (
+ _encode_query_key(k, maximal=True),
+ _encode_query_value(v, maximal=True)
+ if v is not None
+ else None,
+ )
+ for k, v in self.query
+ ]
+ ),
+ fragment=_encode_fragment_part(self.fragment, maximal=True),
+ )
+
+ def to_iri(self):
+ # type: () -> URL
+ u"""Make a new :class:`URL` instance with all but a few reserved
+ characters decoded into human-readable format.
+
+ Percent-encoded Unicode and IDNA-encoded hostnames are
+ decoded, like so::
+
+ >>> url = URL.from_text(u'https://xn--mgba3a4fra.example.com/foo%E2%87%A7bar/')
+ >>> print(url.to_iri().to_text())
+ https://ايران.example.com/foo⇧bar/
+
+ .. note::
+
+ As a general Python issue, "narrow" (UCS-2) builds of
+ Python may not be able to fully decode certain URLs, and
+ the in those cases, this method will return a best-effort,
+ partially-decoded, URL which is still valid. This issue
+ does not affect any Python builds 3.4+.
+
+ Returns:
+ URL: A new instance with its path segments, query parameters, and
+ hostname decoded for display purposes.
+ """ # noqa: E501
+ new_userinfo = u":".join(
+ [_decode_userinfo_part(p) for p in self.userinfo.split(":", 1)]
+ )
+ host_text = _decode_host(self.host)
+
+ return self.replace(
+ userinfo=new_userinfo,
+ host=host_text,
+ path=[_decode_path_part(segment) for segment in self.path],
+ query=tuple(
+ (
+ _decode_query_key(k),
+ _decode_query_value(v) if v is not None else None,
+ )
+ for k, v in self.query
+ ),
+ fragment=_decode_fragment_part(self.fragment),
+ )
+
+ def to_text(self, with_password=False):
+ # type: (bool) -> Text
+ """Render this URL to its textual representation.
+
+ By default, the URL text will *not* include a password, if one
+ is set. RFC 3986 considers using URLs to represent such
+ sensitive information as deprecated. Quoting from RFC 3986,
+ `section 3.2.1`:
+
+ "Applications should not render as clear text any data after the
+ first colon (":") character found within a userinfo subcomponent
+ unless the data after the colon is the empty string (indicating no
+ password)."
+
+ Args (bool):
+ with_password: Whether or not to include the password in the URL
+ text. Defaults to False.
+
+ Returns:
+ Text: The serialized textual representation of this URL, such as
+ ``u"http://example.com/some/path?some=query"``.
+
+ The natural counterpart to :class:`URL.from_text()`.
+
+ .. _section 3.2.1: https://tools.ietf.org/html/rfc3986#section-3.2.1
+ """
+ scheme = self.scheme
+ authority = self.authority(with_password)
+ path = "/".join(
+ _encode_path_parts(
+ self.path,
+ rooted=self.rooted,
+ has_scheme=bool(scheme),
+ has_authority=bool(authority),
+ maximal=False,
+ )
+ )
+ query_parts = []
+ for k, v in self.query:
+ if v is None:
+ query_parts.append(_encode_query_key(k, maximal=False))
+ else:
+ query_parts.append(
+ u"=".join(
+ (
+ _encode_query_key(k, maximal=False),
+ _encode_query_value(v, maximal=False),
+ )
+ )
+ )
+ query_string = u"&".join(query_parts)
+
+ fragment = self.fragment
+
+ parts = [] # type: List[Text]
+ _add = parts.append
+ if scheme:
+ _add(scheme)
+ _add(":")
+ if authority:
+ _add("//")
+ _add(authority)
+ elif scheme and path[:2] != "//" and self.uses_netloc:
+ _add("//")
+ if path:
+ if scheme and authority and path[:1] != "/":
+ _add("/") # relpaths with abs authorities auto get '/'
+ _add(path)
+ if query_string:
+ _add("?")
+ _add(query_string)
+ if fragment:
+ _add("#")
+ _add(fragment)
+ return u"".join(parts)
+
+ def __repr__(self):
+ # type: () -> str
+ """Convert this URL to an representation that shows all of its
+ constituent parts, as well as being a valid argument to
+ :func:`eval`.
+ """
+ return "%s.from_text(%r)" % (self.__class__.__name__, self.to_text())
+
+ def _to_bytes(self):
+ # type: () -> bytes
+ """
+ Allows for direct usage of URL objects with libraries like
+ requests, which automatically stringify URL parameters. See
+ issue #49.
+ """
+ return self.to_uri().to_text().encode("ascii")
+
+ if PY2:
+ __str__ = _to_bytes
+ __unicode__ = to_text
+ else:
+ __bytes__ = _to_bytes
+ __str__ = to_text
+
+ # # Begin Twisted Compat Code
+ asURI = to_uri
+ asIRI = to_iri
+
+ @classmethod
+ def fromText(cls, s):
+ # type: (Text) -> URL
+ return cls.from_text(s)
+
+ def asText(self, includeSecrets=False):
+ # type: (bool) -> Text
+ return self.to_text(with_password=includeSecrets)
+
+ def __dir__(self):
+ # type: () -> Sequence[Text]
+ try:
+ ret = object.__dir__(self)
+ except AttributeError:
+ # object.__dir__ == AttributeError # pdw for py2
+ ret = dir(self.__class__) + list(self.__dict__.keys())
+ ret = sorted(set(ret) - set(["fromText", "asURI", "asIRI", "asText"]))
+ return ret
+
+ # # End Twisted Compat Code
+
+ def add(self, name, value=None):
+ # type: (Text, Optional[Text]) -> URL
+ """Make a new :class:`URL` instance with a given query argument,
+ *name*, added to it with the value *value*, like so::
+
+ >>> URL.from_text(u'https://example.com/?x=y').add(u'x')
+ URL.from_text(u'https://example.com/?x=y&x')
+ >>> URL.from_text(u'https://example.com/?x=y').add(u'x', u'z')
+ URL.from_text(u'https://example.com/?x=y&x=z')
+
+ Args:
+ name: The name of the query parameter to add.
+ The part before the ``=``.
+ value: The value of the query parameter to add.
+ The part after the ``=``.
+ Defaults to ``None``, meaning no value.
+
+ Returns:
+ URL: A new :class:`URL` instance with the parameter added.
+ """
+ return self.replace(query=self.query + ((name, value),))
+
+ def set(self, name, value=None):
+ # type: (Text, Optional[Text]) -> URL
+ """Make a new :class:`URL` instance with the query parameter *name*
+ set to *value*. All existing occurences, if any are replaced
+ by the single name-value pair.
+
+ >>> URL.from_text(u'https://example.com/?x=y').set(u'x')
+ URL.from_text(u'https://example.com/?x')
+ >>> URL.from_text(u'https://example.com/?x=y').set(u'x', u'z')
+ URL.from_text(u'https://example.com/?x=z')
+
+ Args:
+ name: The name of the query parameter to set.
+ The part before the ``=``.
+ value: The value of the query parameter to set.
+ The part after the ``=``.
+ Defaults to ``None``, meaning no value.
+
+ Returns:
+ URL: A new :class:`URL` instance with the parameter set.
+ """
+ # Preserve the original position of the query key in the list
+ q = [(k, v) for (k, v) in self.query if k != name]
+ idx = next(
+ (i for (i, (k, v)) in enumerate(self.query) if k == name), -1
+ )
+ q[idx:idx] = [(name, value)]
+ return self.replace(query=q)
+
+ def get(self, name):
+ # type: (Text) -> List[Optional[Text]]
+ """Get a list of values for the given query parameter, *name*::
+
+ >>> url = URL.from_text(u'?x=1&x=2')
+ >>> url.get('x')
+ [u'1', u'2']
+ >>> url.get('y')
+ []
+
+ If the given *name* is not set, an empty list is returned. A
+ list is always returned, and this method raises no exceptions.
+
+ Args:
+ name: The name of the query parameter to get.
+
+ Returns:
+ List[Optional[Text]]: A list of all the values associated with the
+ key, in string form.
+ """
+ return [value for (key, value) in self.query if name == key]
+
+ def remove(
+ self,
+ name, # type: Text
+ value=_UNSET, # type: Text
+ limit=None, # type: Optional[int]
+ ):
+ # type: (...) -> URL
+ """Make a new :class:`URL` instance with occurrences of the query
+ parameter *name* removed, or, if *value* is set, parameters
+ matching *name* and *value*. No exception is raised if the
+ parameter is not already set.
+
+ Args:
+ name: The name of the query parameter to remove.
+ value: Optional value to additionally filter on.
+ Setting this removes query parameters which match both name
+ and value.
+ limit: Optional maximum number of parameters to remove.
+
+ Returns:
+ URL: A new :class:`URL` instance with the parameter removed.
+ """
+ if limit is None:
+ if value is _UNSET:
+ nq = [(k, v) for (k, v) in self.query if k != name]
+ else:
+ nq = [
+ (k, v)
+ for (k, v) in self.query
+ if not (k == name and v == value)
+ ]
+ else:
+ nq, removed_count = [], 0
+
+ for k, v in self.query:
+ if (
+ k == name
+ and (value is _UNSET or v == value)
+ and removed_count < limit
+ ):
+ removed_count += 1 # drop it
+ else:
+ nq.append((k, v)) # keep it
+
+ return self.replace(query=nq)
+
+
+EncodedURL = URL # An alias better describing what the URL really is
+
+_EMPTY_URL = URL()
+
+
+def _replace_plus(text):
+ # type: (Text) -> Text
+ return text.replace("+", "%20")
+
+
+def _no_op(text):
+ # type: (Text) -> Text
+ return text
+
+
+class DecodedURL(object):
+ """
+ :class:`DecodedURL` is a type designed to act as a higher-level
+ interface to :class:`URL` and the recommended type for most
+ operations. By analogy, :class:`DecodedURL` is the
+ :class:`unicode` to URL's :class:`bytes`.
+
+ :class:`DecodedURL` automatically handles encoding and decoding
+ all its components, such that all inputs and outputs are in a
+ maximally-decoded state. Note that this means, for some special
+ cases, a URL may not "roundtrip" character-for-character, but this
+ is considered a good tradeoff for the safety of automatic
+ encoding.
+
+ Otherwise, :class:`DecodedURL` has almost exactly the same API as
+ :class:`URL`.
+
+ Where applicable, a UTF-8 encoding is presumed. Be advised that
+ some interactions can raise :exc:`UnicodeEncodeErrors` and
+ :exc:`UnicodeDecodeErrors`, just like when working with
+ bytestrings. Examples of such interactions include handling query
+ strings encoding binary data, and paths containing segments with
+ special characters encoded with codecs other than UTF-8.
+
+ Args:
+ url: A :class:`URL` object to wrap.
+ lazy: Set to True to avoid pre-decode all parts of the URL to check for
+ validity.
+ Defaults to False.
+ query_plus_is_space: + characters in the query string should be treated
+ as spaces when decoding. If unspecified, the default is taken from
+ the scheme.
+
+ .. note::
+
+ The :class:`DecodedURL` initializer takes a :class:`URL` object,
+ not URL components, like :class:`URL`. To programmatically
+ construct a :class:`DecodedURL`, you can use this pattern:
+
+ >>> print(DecodedURL().replace(scheme=u'https',
+ ... host=u'pypi.org', path=(u'projects', u'hyperlink')).to_text())
+ https://pypi.org/projects/hyperlink
+
+ .. versionadded:: 18.0.0
+ """
+
+ def __init__(self, url=_EMPTY_URL, lazy=False, query_plus_is_space=None):
+ # type: (URL, bool, Optional[bool]) -> None
+ self._url = url
+ if query_plus_is_space is None:
+ query_plus_is_space = url.scheme not in NO_QUERY_PLUS_SCHEMES
+ self._query_plus_is_space = query_plus_is_space
+ if not lazy:
+ # cache the following, while triggering any decoding
+ # issues with decodable fields
+ self.host, self.userinfo, self.path, self.query, self.fragment
+ return
+
+ @classmethod
+ def from_text(cls, text, lazy=False, query_plus_is_space=None):
+ # type: (Text, bool, Optional[bool]) -> DecodedURL
+ """\
+ Make a `DecodedURL` instance from any text string containing a URL.
+
+ Args:
+ text: Text containing the URL
+ lazy: Whether to pre-decode all parts of the URL to check for
+ validity.
+ Defaults to True.
+ """
+ _url = URL.from_text(text)
+ return cls(_url, lazy=lazy, query_plus_is_space=query_plus_is_space)
+
+ @property
+ def encoded_url(self):
+ # type: () -> URL
+ """Access the underlying :class:`URL` object, which has any special
+ characters encoded.
+ """
+ return self._url
+
+ def to_text(self, with_password=False):
+ # type: (bool) -> Text
+ "Passthrough to :meth:`~hyperlink.URL.to_text()`"
+ return self._url.to_text(with_password)
+
+ def to_uri(self):
+ # type: () -> URL
+ "Passthrough to :meth:`~hyperlink.URL.to_uri()`"
+ return self._url.to_uri()
+
+ def to_iri(self):
+ # type: () -> URL
+ "Passthrough to :meth:`~hyperlink.URL.to_iri()`"
+ return self._url.to_iri()
+
+ def _clone(self, url):
+ # type: (URL) -> DecodedURL
+ return self.__class__(
+ url,
+ # TODO: propagate laziness?
+ query_plus_is_space=self._query_plus_is_space,
+ )
+
+ def click(self, href=u""):
+ # type: (Union[Text, URL, DecodedURL]) -> DecodedURL
+ """Return a new DecodedURL wrapping the result of
+ :meth:`~hyperlink.URL.click()`
+ """
+ if isinstance(href, DecodedURL):
+ href = href._url
+ return self._clone(
+ self._url.click(href=href),
+ )
+
+ def sibling(self, segment):
+ # type: (Text) -> DecodedURL
+ """Automatically encode any reserved characters in *segment* and
+ return a new `DecodedURL` wrapping the result of
+ :meth:`~hyperlink.URL.sibling()`
+ """
+ return self._clone(
+ self._url.sibling(_encode_reserved(segment)),
+ )
+
+ def child(self, *segments):
+ # type: (Text) -> DecodedURL
+ """Automatically encode any reserved characters in *segments* and
+ return a new `DecodedURL` wrapping the result of
+ :meth:`~hyperlink.URL.child()`.
+ """
+ if not segments:
+ return self
+ new_segs = [_encode_reserved(s) for s in segments]
+ return self._clone(self._url.child(*new_segs))
+
+ def normalize(
+ self,
+ scheme=True,
+ host=True,
+ path=True,
+ query=True,
+ fragment=True,
+ userinfo=True,
+ percents=True,
+ ):
+ # type: (bool, bool, bool, bool, bool, bool, bool) -> DecodedURL
+ """Return a new `DecodedURL` wrapping the result of
+ :meth:`~hyperlink.URL.normalize()`
+ """
+ return self._clone(
+ self._url.normalize(
+ scheme, host, path, query, fragment, userinfo, percents
+ )
+ )
+
+ @property
+ def absolute(self):
+ # type: () -> bool
+ return self._url.absolute
+
+ @property
+ def scheme(self):
+ # type: () -> Text
+ return self._url.scheme
+
+ @property
+ def host(self):
+ # type: () -> Text
+ return _decode_host(self._url.host)
+
+ @property
+ def port(self):
+ # type: () -> Optional[int]
+ return self._url.port
+
+ @property
+ def rooted(self):
+ # type: () -> bool
+ return self._url.rooted
+
+ @property
+ def path(self):
+ # type: () -> Sequence[Text]
+ if not hasattr(self, "_path"):
+ self._path = tuple(
+ [
+ _percent_decode(p, raise_subencoding_exc=True)
+ for p in self._url.path
+ ]
+ )
+ return self._path
+
+ @property
+ def query(self):
+ # type: () -> QueryPairs
+ if not hasattr(self, "_query"):
+ if self._query_plus_is_space:
+ predecode = _replace_plus
+ else:
+ predecode = _no_op
+
+ self._query = cast(
+ QueryPairs,
+ tuple(
+ tuple(
+ _percent_decode(
+ predecode(x), raise_subencoding_exc=True
+ )
+ if x is not None
+ else None
+ for x in (k, v)
+ )
+ for k, v in self._url.query
+ ),
+ )
+ return self._query
+
+ @property
+ def fragment(self):
+ # type: () -> Text
+ if not hasattr(self, "_fragment"):
+ frag = self._url.fragment
+ self._fragment = _percent_decode(frag, raise_subencoding_exc=True)
+ return self._fragment
+
+ @property
+ def userinfo(self):
+ # type: () -> Union[Tuple[str], Tuple[str, str]]
+ if not hasattr(self, "_userinfo"):
+ self._userinfo = cast(
+ Union[Tuple[str], Tuple[str, str]],
+ tuple(
+ tuple(
+ _percent_decode(p, raise_subencoding_exc=True)
+ for p in self._url.userinfo.split(":", 1)
+ )
+ ),
+ )
+ return self._userinfo
+
+ @property
+ def user(self):
+ # type: () -> Text
+ return self.userinfo[0]
+
+ @property
+ def uses_netloc(self):
+ # type: () -> Optional[bool]
+ return self._url.uses_netloc
+
+ def replace(
+ self,
+ scheme=_UNSET, # type: Optional[Text]
+ host=_UNSET, # type: Optional[Text]
+ path=_UNSET, # type: Iterable[Text]
+ query=_UNSET, # type: QueryParameters
+ fragment=_UNSET, # type: Text
+ port=_UNSET, # type: Optional[int]
+ rooted=_UNSET, # type: Optional[bool]
+ userinfo=_UNSET, # type: Union[Tuple[str], Tuple[str, str]]
+ uses_netloc=_UNSET, # type: Optional[bool]
+ ):
+ # type: (...) -> DecodedURL
+ """While the signature is the same, this `replace()` differs a little
+ from URL.replace. For instance, it accepts userinfo as a
+ tuple, not as a string, handling the case of having a username
+ containing a `:`. As with the rest of the methods on
+ DecodedURL, if you pass a reserved character, it will be
+ automatically encoded instead of an error being raised.
+ """
+ if path is not _UNSET:
+ path = tuple(_encode_reserved(p) for p in path)
+ if query is not _UNSET:
+ query = cast(
+ QueryPairs,
+ tuple(
+ tuple(
+ _encode_reserved(x) if x is not None else None
+ for x in (k, v)
+ )
+ for k, v in iter_pairs(query)
+ ),
+ )
+ if userinfo is not _UNSET:
+ if len(userinfo) > 2:
+ raise ValueError(
+ 'userinfo expected sequence of ["user"] or'
+ ' ["user", "password"], got %r' % (userinfo,)
+ )
+ userinfo_text = u":".join([_encode_reserved(p) for p in userinfo])
+ else:
+ userinfo_text = _UNSET
+ new_url = self._url.replace(
+ scheme=scheme,
+ host=host,
+ path=path,
+ query=query,
+ fragment=fragment,
+ port=port,
+ rooted=rooted,
+ userinfo=userinfo_text,
+ uses_netloc=uses_netloc,
+ )
+ return self._clone(url=new_url)
+
+ def get(self, name):
+ # type: (Text) -> List[Optional[Text]]
+ "Get the value of all query parameters whose name matches *name*"
+ return [v for (k, v) in self.query if name == k]
+
+ def add(self, name, value=None):
+ # type: (Text, Optional[Text]) -> DecodedURL
+ """Return a new DecodedURL with the query parameter *name* and *value*
+ added."""
+ return self.replace(query=self.query + ((name, value),))
+
+ def set(self, name, value=None):
+ # type: (Text, Optional[Text]) -> DecodedURL
+ "Return a new DecodedURL with query parameter *name* set to *value*"
+ query = self.query
+ q = [(k, v) for (k, v) in query if k != name]
+ idx = next((i for (i, (k, v)) in enumerate(query) if k == name), -1)
+ q[idx:idx] = [(name, value)]
+ return self.replace(query=q)
+
+ def remove(
+ self,
+ name, # type: Text
+ value=_UNSET, # type: Text
+ limit=None, # type: Optional[int]
+ ):
+ # type: (...) -> DecodedURL
+ """Return a new DecodedURL with query parameter *name* removed.
+
+ Optionally also filter for *value*, as well as cap the number
+ of parameters removed with *limit*.
+ """
+ if limit is None:
+ if value is _UNSET:
+ nq = [(k, v) for (k, v) in self.query if k != name]
+ else:
+ nq = [
+ (k, v)
+ for (k, v) in self.query
+ if not (k == name and v == value)
+ ]
+ else:
+ nq, removed_count = [], 0
+ for k, v in self.query:
+ if (
+ k == name
+ and (value is _UNSET or v == value)
+ and removed_count < limit
+ ):
+ removed_count += 1 # drop it
+ else:
+ nq.append((k, v)) # keep it
+
+ return self.replace(query=nq)
+
+ def __repr__(self):
+ # type: () -> str
+ cn = self.__class__.__name__
+ return "%s(url=%r)" % (cn, self._url)
+
+ def __str__(self):
+ # type: () -> str
+ # TODO: the underlying URL's __str__ needs to change to make
+ # this work as the URL, see #55
+ return str(self._url)
+
+ def __eq__(self, other):
+ # type: (Any) -> bool
+ if not isinstance(other, self.__class__):
+ return NotImplemented
+ return self.normalize().to_uri() == other.normalize().to_uri()
+
+ def __ne__(self, other):
+ # type: (Any) -> bool
+ if not isinstance(other, self.__class__):
+ return NotImplemented
+ return not self.__eq__(other)
+
+ def __hash__(self):
+ # type: () -> int
+ return hash(
+ (
+ self.__class__,
+ self.scheme,
+ self.userinfo,
+ self.host,
+ self.path,
+ self.query,
+ self.fragment,
+ self.port,
+ self.rooted,
+ self.uses_netloc,
+ )
+ )
+
+ # # Begin Twisted Compat Code
+ asURI = to_uri
+ asIRI = to_iri
+
+ @classmethod
+ def fromText(cls, s, lazy=False):
+ # type: (Text, bool) -> DecodedURL
+ return cls.from_text(s, lazy=lazy)
+
+ def asText(self, includeSecrets=False):
+ # type: (bool) -> Text
+ return self.to_text(with_password=includeSecrets)
+
+ def __dir__(self):
+ # type: () -> Sequence[Text]
+ try:
+ ret = object.__dir__(self)
+ except AttributeError:
+ # object.__dir__ == AttributeError # pdw for py2
+ ret = dir(self.__class__) + list(self.__dict__.keys())
+ ret = sorted(set(ret) - set(["fromText", "asURI", "asIRI", "asText"]))
+ return ret
+
+ # # End Twisted Compat Code
+
+
+def parse(url, decoded=True, lazy=False):
+ # type: (Text, bool, bool) -> Union[URL, DecodedURL]
+ """
+ Automatically turn text into a structured URL object.
+
+ >>> url = parse(u"https://github.com/python-hyper/hyperlink")
+ >>> print(url.to_text())
+ https://github.com/python-hyper/hyperlink
+
+ Args:
+ url: A text string representation of a URL.
+
+ decoded: Whether or not to return a :class:`DecodedURL`,
+ which automatically handles all
+ encoding/decoding/quoting/unquoting for all the various
+ accessors of parts of the URL, or a :class:`URL`,
+ which has the same API, but requires handling of special
+ characters for different parts of the URL.
+
+ lazy: In the case of `decoded=True`, this controls
+ whether the URL is decoded immediately or as accessed. The
+ default, `lazy=False`, checks all encoded parts of the URL
+ for decodability.
+
+ .. versionadded:: 18.0.0
+ """
+ enc_url = EncodedURL.from_text(url)
+ if not decoded:
+ return enc_url
+ dec_url = DecodedURL(enc_url, lazy=lazy)
+ return dec_url
diff --git a/contrib/python/hyperlink/py3/hyperlink/hypothesis.py b/contrib/python/hyperlink/py3/hyperlink/hypothesis.py
new file mode 100644
index 0000000000..45fd9a9956
--- /dev/null
+++ b/contrib/python/hyperlink/py3/hyperlink/hypothesis.py
@@ -0,0 +1,324 @@
+# -*- coding: utf-8 -*-
+"""
+Hypothesis strategies.
+"""
+from __future__ import absolute_import
+
+try:
+ import hypothesis
+
+ del hypothesis
+except ImportError:
+ from typing import Tuple
+
+ __all__ = () # type: Tuple[str, ...]
+else:
+ import io
+ import pkgutil
+ from csv import reader as csv_reader
+ from os.path import dirname, join
+ from string import ascii_letters, digits
+ from sys import maxunicode
+ from typing import (
+ Callable,
+ Iterable,
+ List,
+ Optional,
+ Sequence,
+ Text,
+ TypeVar,
+ cast,
+ )
+ from gzip import open as open_gzip
+
+ from . import DecodedURL, EncodedURL
+
+ from hypothesis import assume
+ from hypothesis.strategies import (
+ composite,
+ integers,
+ lists,
+ sampled_from,
+ text,
+ )
+
+ from idna import IDNAError, check_label, encode as idna_encode
+
+ __all__ = (
+ "decoded_urls",
+ "encoded_urls",
+ "hostname_labels",
+ "hostnames",
+ "idna_text",
+ "paths",
+ "port_numbers",
+ )
+
+ T = TypeVar("T")
+ DrawCallable = Callable[[Callable[..., T]], T]
+
+ try:
+ unichr
+ except NameError: # Py3
+ unichr = chr # type: Callable[[int], Text]
+
+ def idna_characters():
+ # type: () -> Text
+ """
+ Returns a string containing IDNA characters.
+ """
+ global _idnaCharacters
+
+ if not _idnaCharacters:
+ result = []
+
+ # Data source "IDNA Derived Properties":
+ # https://www.iana.org/assignments/idna-tables-6.3.0/
+ # idna-tables-6.3.0.xhtml#idna-tables-properties
+ dataFileName = join(
+ dirname(__file__), "idna-tables-properties.csv.gz"
+ )
+ data = io.BytesIO(pkgutil.get_data(__name__, "idna-tables-properties.csv.gz"))
+ with open_gzip(data) as dataFile:
+ reader = csv_reader(
+ (line.decode("utf-8") for line in dataFile),
+ delimiter=",",
+ )
+ next(reader) # Skip header row
+ for row in reader:
+ codes, prop, description = row
+
+ if prop != "PVALID":
+ # CONTEXTO or CONTEXTJ are also allowed, but they come
+ # with rules, so we're punting on those here.
+ # See: https://tools.ietf.org/html/rfc5892
+ continue
+
+ startEnd = row[0].split("-", 1)
+ if len(startEnd) == 1:
+ # No end of range given; use start
+ startEnd.append(startEnd[0])
+ start, end = (int(i, 16) for i in startEnd)
+
+ for i in range(start, end + 1):
+ if i > maxunicode: # Happens using Py2 on Windows
+ break
+ result.append(unichr(i))
+
+ _idnaCharacters = u"".join(result)
+
+ return _idnaCharacters
+
+ _idnaCharacters = "" # type: Text
+
+ @composite
+ def idna_text(draw, min_size=1, max_size=None):
+ # type: (DrawCallable, int, Optional[int]) -> Text
+ """
+ A strategy which generates IDNA-encodable text.
+
+ @param min_size: The minimum number of characters in the text.
+ C{None} is treated as C{0}.
+
+ @param max_size: The maximum number of characters in the text.
+ Use C{None} for an unbounded size.
+ """
+ alphabet = idna_characters()
+
+ assert min_size >= 1
+
+ if max_size is not None:
+ assert max_size >= 1
+
+ result = cast(
+ Text,
+ draw(text(min_size=min_size, max_size=max_size, alphabet=alphabet)),
+ )
+
+ # FIXME: There should be a more efficient way to ensure we produce
+ # valid IDNA text.
+ try:
+ idna_encode(result)
+ except IDNAError:
+ assume(False)
+
+ return result
+
+ @composite
+ def port_numbers(draw, allow_zero=False):
+ # type: (DrawCallable, bool) -> int
+ """
+ A strategy which generates port numbers.
+
+ @param allow_zero: Whether to allow port C{0} as a possible value.
+ """
+ if allow_zero:
+ min_value = 0
+ else:
+ min_value = 1
+
+ return cast(int, draw(integers(min_value=min_value, max_value=65535)))
+
+ @composite
+ def hostname_labels(draw, allow_idn=True):
+ # type: (DrawCallable, bool) -> Text
+ """
+ A strategy which generates host name labels.
+
+ @param allow_idn: Whether to allow non-ASCII characters as allowed by
+ internationalized domain names (IDNs).
+ """
+ if allow_idn:
+ label = cast(Text, draw(idna_text(min_size=1, max_size=63)))
+
+ try:
+ label.encode("ascii")
+ except UnicodeEncodeError:
+ # If the label doesn't encode to ASCII, then we need to check
+ # the length of the label after encoding to punycode and adding
+ # the xn-- prefix.
+ while len(label.encode("punycode")) > 63 - len("xn--"):
+ # Rather than bombing out, just trim from the end until it
+ # is short enough, so hypothesis doesn't have to generate
+ # new data.
+ label = label[:-1]
+
+ else:
+ label = cast(
+ Text,
+ draw(
+ text(
+ min_size=1,
+ max_size=63,
+ alphabet=Text(ascii_letters + digits + u"-"),
+ )
+ ),
+ )
+
+ # Filter invalid labels.
+ # It would be better to reliably avoid generation of bogus labels in
+ # the first place, but it's hard...
+ try:
+ check_label(label)
+ except UnicodeError: # pragma: no cover (not always drawn)
+ assume(False)
+
+ return label
+
+ @composite
+ def hostnames(draw, allow_leading_digit=True, allow_idn=True):
+ # type: (DrawCallable, bool, bool) -> Text
+ """
+ A strategy which generates host names.
+
+ @param allow_leading_digit: Whether to allow a leading digit in host
+ names; they were not allowed prior to RFC 1123.
+
+ @param allow_idn: Whether to allow non-ASCII characters as allowed by
+ internationalized domain names (IDNs).
+ """
+ # Draw first label, filtering out labels with leading digits if needed
+ labels = [
+ cast(
+ Text,
+ draw(
+ hostname_labels(allow_idn=allow_idn).filter(
+ lambda l: (
+ True if allow_leading_digit else l[0] not in digits
+ )
+ )
+ ),
+ )
+ ]
+ # Draw remaining labels
+ labels += cast(
+ List[Text],
+ draw(
+ lists(
+ hostname_labels(allow_idn=allow_idn),
+ min_size=1,
+ max_size=4,
+ )
+ ),
+ )
+
+ # Trim off labels until the total host name length fits in 252
+ # characters. This avoids having to filter the data.
+ while sum(len(label) for label in labels) + len(labels) - 1 > 252:
+ labels = labels[:-1]
+
+ return u".".join(labels)
+
+ def path_characters():
+ # type: () -> str
+ """
+ Returns a string containing valid URL path characters.
+ """
+ global _path_characters
+
+ if _path_characters is None:
+
+ def chars():
+ # type: () -> Iterable[Text]
+ for i in range(maxunicode):
+ c = unichr(i)
+
+ # Exclude reserved characters
+ if c in "#/?":
+ continue
+
+ # Exclude anything not UTF-8 compatible
+ try:
+ c.encode("utf-8")
+ except UnicodeEncodeError:
+ continue
+
+ yield c
+
+ _path_characters = "".join(chars())
+
+ return _path_characters
+
+ _path_characters = None # type: Optional[str]
+
+ @composite
+ def paths(draw):
+ # type: (DrawCallable) -> Sequence[Text]
+ return cast(
+ List[Text],
+ draw(
+ lists(text(min_size=1, alphabet=path_characters()), max_size=10)
+ ),
+ )
+
+ @composite
+ def encoded_urls(draw):
+ # type: (DrawCallable) -> EncodedURL
+ """
+ A strategy which generates L{EncodedURL}s.
+ Call the L{EncodedURL.to_uri} method on each URL to get an HTTP
+ protocol-friendly URI.
+ """
+ port = cast(Optional[int], draw(port_numbers(allow_zero=True)))
+ host = cast(Text, draw(hostnames()))
+ path = cast(Sequence[Text], draw(paths()))
+
+ if port == 0:
+ port = None
+
+ return EncodedURL(
+ scheme=cast(Text, draw(sampled_from((u"http", u"https")))),
+ host=host,
+ port=port,
+ path=path,
+ )
+
+ @composite
+ def decoded_urls(draw):
+ # type: (DrawCallable) -> DecodedURL
+ """
+ A strategy which generates L{DecodedURL}s.
+ Call the L{EncodedURL.to_uri} method on each URL to get an HTTP
+ protocol-friendly URI.
+ """
+ return DecodedURL(draw(encoded_urls()))
diff --git a/contrib/python/hyperlink/py3/hyperlink/idna-tables-properties.csv.gz b/contrib/python/hyperlink/py3/hyperlink/idna-tables-properties.csv.gz
new file mode 100644
index 0000000000..48e9f06742
--- /dev/null
+++ b/contrib/python/hyperlink/py3/hyperlink/idna-tables-properties.csv.gz
Binary files differ
diff --git a/contrib/python/hyperlink/py3/hyperlink/py.typed b/contrib/python/hyperlink/py3/hyperlink/py.typed
new file mode 100644
index 0000000000..d2dfd5e491
--- /dev/null
+++ b/contrib/python/hyperlink/py3/hyperlink/py.typed
@@ -0,0 +1 @@
+# See: https://www.python.org/dev/peps/pep-0561/
diff --git a/contrib/python/hyperlink/py3/ya.make b/contrib/python/hyperlink/py3/ya.make
new file mode 100644
index 0000000000..2ada924679
--- /dev/null
+++ b/contrib/python/hyperlink/py3/ya.make
@@ -0,0 +1,35 @@
+# Generated by devtools/yamaker (pypi).
+
+PY3_LIBRARY()
+
+VERSION(21.0.0)
+
+LICENSE(MIT)
+
+PEERDIR(
+ contrib/python/idna
+)
+
+NO_LINT()
+
+PY_SRCS(
+ TOP_LEVEL
+ hyperlink/__init__.py
+ hyperlink/_socket.py
+ hyperlink/_url.py
+ hyperlink/hypothesis.py
+)
+
+RESOURCE_FILES(
+ PREFIX contrib/python/hyperlink/py3/
+ .dist-info/METADATA
+ .dist-info/top_level.txt
+ hyperlink/idna-tables-properties.csv.gz
+ hyperlink/py.typed
+)
+
+END()
+
+RECURSE_FOR_TESTS(
+ tests
+)
diff --git a/contrib/python/hyperlink/ya.make b/contrib/python/hyperlink/ya.make
new file mode 100644
index 0000000000..64a73ff34e
--- /dev/null
+++ b/contrib/python/hyperlink/ya.make
@@ -0,0 +1,18 @@
+PY23_LIBRARY()
+
+LICENSE(Service-Py23-Proxy)
+
+IF (PYTHON2)
+ PEERDIR(contrib/python/hyperlink/py2)
+ELSE()
+ PEERDIR(contrib/python/hyperlink/py3)
+ENDIF()
+
+NO_LINT()
+
+END()
+
+RECURSE(
+ py2
+ py3
+)