diff options
author | rekby <rekby@ydb.tech> | 2023-12-14 16:56:50 +0300 |
---|---|---|
committer | rekby <rekby@ydb.tech> | 2023-12-14 18:09:44 +0300 |
commit | b2b2bb5997507072ca64548efe64447dd6395426 (patch) | |
tree | bbfbf77d11f1972c93ae4101fe561fd440d6ad6a /contrib/python/yarl | |
parent | 8b8678a6a4f57c62e348cdad8afd3849011a5f11 (diff) | |
download | ydb-b2b2bb5997507072ca64548efe64447dd6395426.tar.gz |
KIKIMR-19900 switch arcadia to python ydb sdk from contrib
Этот PR создан скриптом - для переключения зависимостей на python ydb sdk с версии внутри ydb на код, приезжающий через контриб.
Код в обеих версиях одинаковый, так что поломок/изменения функционала на ожидается.
На всякий случай посмотрите свои проекты и если будут возражения пишите сюда в issues или в тикет KIKIMR-19900.
Если всё ок - шипните, для определённости.
При отсутствии блокеров PR будет перегенерирован и влит с force-мёрджем в четверг, 14 декабря.
Diffstat (limited to 'contrib/python/yarl')
25 files changed, 7298 insertions, 0 deletions
diff --git a/contrib/python/yarl/.dist-info/METADATA b/contrib/python/yarl/.dist-info/METADATA new file mode 100644 index 0000000000..8585ab738d --- /dev/null +++ b/contrib/python/yarl/.dist-info/METADATA @@ -0,0 +1,1010 @@ +Metadata-Version: 2.1 +Name: yarl +Version: 1.9.3 +Summary: Yet another URL library +Home-page: https://github.com/aio-libs/yarl +Author: Andrew Svetlov +Author-email: andrew.svetlov@gmail.com +Maintainer: aiohttp team <team@aiohttp.org> +Maintainer-email: team@aiohttp.org +License: Apache-2.0 +Project-URL: Chat: Matrix, https://matrix.to/#/#aio-libs:matrix.org +Project-URL: Chat: Matrix Space, https://matrix.to/#/#aio-libs-space:matrix.org +Project-URL: CI: GitHub Workflows, https://github.com/aio-libs/yarl/actions?query=branch:master +Project-URL: Code of Conduct, https://github.com/aio-libs/.github/blob/master/CODE_OF_CONDUCT.md +Project-URL: Coverage: codecov, https://codecov.io/github/aio-libs/yarl +Project-URL: Docs: Changelog, https://yarl.aio-libs.org/en/latest/changes/ +Project-URL: Docs: RTD, https://yarl.aio-libs.org +Project-URL: GitHub: issues, https://github.com/aio-libs/yarl/issues +Project-URL: GitHub: repo, https://github.com/aio-libs/yarl +Keywords: cython,cext,yarl +Classifier: Development Status :: 5 - Production/Stable +Classifier: Intended Audience :: Developers +Classifier: License :: OSI Approved :: Apache Software License +Classifier: Programming Language :: Cython +Classifier: Programming Language :: Python +Classifier: Programming Language :: Python :: 3 +Classifier: Programming Language :: Python :: 3.7 +Classifier: Programming Language :: Python :: 3.8 +Classifier: Programming Language :: Python :: 3.9 +Classifier: Programming Language :: Python :: 3.10 +Classifier: Programming Language :: Python :: 3.11 +Classifier: Programming Language :: Python :: 3.12 +Classifier: Topic :: Internet :: WWW/HTTP +Classifier: Topic :: Software Development :: Libraries :: Python Modules +Requires-Python: >=3.7 +Description-Content-Type: text/x-rst +License-File: LICENSE +License-File: NOTICE +Requires-Dist: idna >=2.0 +Requires-Dist: multidict >=4.0 +Requires-Dist: typing-extensions >=3.7.4 ; python_version < "3.8" + +yarl +==== + +The module provides handy URL class for URL parsing and changing. + +.. image:: https://github.com/aio-libs/yarl/workflows/CI/badge.svg + :target: https://github.com/aio-libs/yarl/actions?query=workflow%3ACI + :align: right + +.. image:: https://codecov.io/gh/aio-libs/yarl/branch/master/graph/badge.svg + :target: https://codecov.io/gh/aio-libs/yarl + +.. image:: https://badge.fury.io/py/yarl.svg + :target: https://badge.fury.io/py/yarl + + +.. image:: https://readthedocs.org/projects/yarl/badge/?version=latest + :target: https://yarl.aio-libs.org + + +.. image:: https://img.shields.io/pypi/pyversions/yarl.svg + :target: https://pypi.python.org/pypi/yarl + +.. image:: https://img.shields.io/matrix/aio-libs:matrix.org?label=Discuss%20on%20Matrix%20at%20%23aio-libs%3Amatrix.org&logo=matrix&server_fqdn=matrix.org&style=flat + :target: https://matrix.to/#/%23aio-libs:matrix.org + :alt: Matrix Room — #aio-libs:matrix.org + +.. image:: https://img.shields.io/matrix/aio-libs-space:matrix.org?label=Discuss%20on%20Matrix%20at%20%23aio-libs-space%3Amatrix.org&logo=matrix&server_fqdn=matrix.org&style=flat + :target: https://matrix.to/#/%23aio-libs-space:matrix.org + :alt: Matrix Space — #aio-libs-space:matrix.org + +Introduction +------------ + +Url is constructed from ``str``: + +.. code-block:: pycon + + >>> from yarl import URL + >>> url = URL('https://www.python.org/~guido?arg=1#frag') + >>> url + URL('https://www.python.org/~guido?arg=1#frag') + +All url parts: *scheme*, *user*, *password*, *host*, *port*, *path*, +*query* and *fragment* are accessible by properties: + +.. code-block:: pycon + + >>> url.scheme + 'https' + >>> url.host + 'www.python.org' + >>> url.path + '/~guido' + >>> url.query_string + 'arg=1' + >>> url.query + <MultiDictProxy('arg': '1')> + >>> url.fragment + 'frag' + +All url manipulations produce a new url object: + +.. code-block:: pycon + + >>> url = URL('https://www.python.org') + >>> url / 'foo' / 'bar' + URL('https://www.python.org/foo/bar') + >>> url / 'foo' % {'bar': 'baz'} + URL('https://www.python.org/foo?bar=baz') + +Strings passed to constructor and modification methods are +automatically encoded giving canonical representation as result: + +.. code-block:: pycon + + >>> url = URL('https://www.python.org/путь') + >>> url + URL('https://www.python.org/%D0%BF%D1%83%D1%82%D1%8C') + +Regular properties are *percent-decoded*, use ``raw_`` versions for +getting *encoded* strings: + +.. code-block:: pycon + + >>> url.path + '/путь' + + >>> url.raw_path + '/%D0%BF%D1%83%D1%82%D1%8C' + +Human readable representation of URL is available as ``.human_repr()``: + +.. code-block:: pycon + + >>> url.human_repr() + 'https://www.python.org/путь' + +For full documentation please read https://yarl.aio-libs.org. + + +Installation +------------ + +:: + + $ pip install yarl + +The library is Python 3 only! + +PyPI contains binary wheels for Linux, Windows and MacOS. If you want to install +``yarl`` on another operating system (like *Alpine Linux*, which is not +manylinux-compliant because of the missing glibc and therefore, cannot be +used with our wheels) the the tarball will be used to compile the library from +the source code. It requires a C compiler and and Python headers installed. + +To skip the compilation you must explicitly opt-in by using a PEP 517 +configuration setting ``--pure-python``, or setting the ``YARL_NO_EXTENSIONS`` +environment variable to a non-empty value, e.g.: + +.. code-block:: console + + $ pip install yarl --config-settings=--pure-python= + +Please note that the pure-Python (uncompiled) version is much slower. However, +PyPy always uses a pure-Python implementation, and, as such, it is unaffected +by this variable. + +Dependencies +------------ + +YARL requires multidict_ library. + + +API documentation +------------------ + +The documentation is located at https://yarl.aio-libs.org. + + +Why isn't boolean supported by the URL query API? +------------------------------------------------- + +There is no standard for boolean representation of boolean values. + +Some systems prefer ``true``/``false``, others like ``yes``/``no``, ``on``/``off``, +``Y``/``N``, ``1``/``0``, etc. + +``yarl`` cannot make an unambiguous decision on how to serialize ``bool`` values because +it is specific to how the end-user's application is built and would be different for +different apps. The library doesn't accept booleans in the API; a user should convert +bools into strings using own preferred translation protocol. + + +Comparison with other URL libraries +------------------------------------ + +* furl (https://pypi.python.org/pypi/furl) + + The library has rich functionality but the ``furl`` object is mutable. + + I'm afraid to pass this object into foreign code: who knows if the + code will modify my url in a terrible way while I just want to send URL + with handy helpers for accessing URL properties. + + ``furl`` has other non-obvious tricky things but the main objection + is mutability. + +* URLObject (https://pypi.python.org/pypi/URLObject) + + URLObject is immutable, that's pretty good. + + Every URL change generates a new URL object. + + But the library doesn't do any decode/encode transformations leaving the + end user to cope with these gory details. + + +Source code +----------- + +The project is hosted on GitHub_ + +Please file an issue on the `bug tracker +<https://github.com/aio-libs/yarl/issues>`_ if you have found a bug +or have some suggestion in order to improve the library. + +The library uses `Azure Pipelines <https://dev.azure.com/aio-libs/yarl>`_ for +Continuous Integration. + +Discussion list +--------------- + +*aio-libs* google group: https://groups.google.com/forum/#!forum/aio-libs + +Feel free to post your questions and ideas here. + + +Authors and License +------------------- + +The ``yarl`` package is written by Andrew Svetlov. + +It's *Apache 2* licensed and freely available. + + +.. _GitHub: https://github.com/aio-libs/yarl + +.. _multidict: https://github.com/aio-libs/multidict + +.. + You should *NOT* be adding new change log entries to this file, this + file is managed by towncrier. You *may* edit previous change logs to + fix problems like typo corrections or such. + To add a new change log entry, please see + https://pip.pypa.io/en/latest/development/#adding-a-news-entry + we named the news folder "changes". + + WARNING: Don't drop the next directive! + +.. towncrier release notes start + +1.9.3 (2023-11-20) +================== + +Bug fixes +--------- + +- Stopped dropping trailing slashes in ``yarl.URL.joinpath()`` -- by `@mjpieters <https://github.com/sponsors/mjpieters>`__. (`#862 <https://github.com/aio-libs/yarl/issues/862>`__, `#866 <https://github.com/aio-libs/yarl/issues/866>`__) +- Started accepting string subclasses in ``__truediv__()`` operations (``URL / segment``) -- by `@mjpieters <https://github.com/sponsors/mjpieters>`__. (`#871 <https://github.com/aio-libs/yarl/issues/871>`__, `#884 <https://github.com/aio-libs/yarl/issues/884>`__) +- Fixed the human representation of URLs with square brackets in usernames and passwords -- by `@mjpieters <https://github.com/sponsors/mjpieters>`__. (`#876 <https://github.com/aio-libs/yarl/issues/876>`__, `#882 <https://github.com/aio-libs/yarl/issues/882>`__) +- Updated type hints to include ``URL.missing_port()``, ``URL.__bytes__()`` + and the ``encoding`` argument to ``yarl.URL.joinpath()`` + -- by `@mjpieters <https://github.com/sponsors/mjpieters>`__. (`#891 <https://github.com/aio-libs/yarl/issues/891>`__) + + +Packaging updates and notes for downstreams +------------------------------------------- + +- Integrated Cython 3 to enable building *yarl* under Python 3.12 -- by `@mjpieters <https://github.com/sponsors/mjpieters>`__. (`#829 <https://github.com/aio-libs/yarl/issues/829>`__, `#881 <https://github.com/aio-libs/yarl/issues/881>`__) +- Declared modern ``setuptools.build_meta`` as the ``517`` build + backend in ``pyproject.toml`` explicitly -- by `@webknjaz <https://github.com/sponsors/webknjaz>`__. (`#886 <https://github.com/aio-libs/yarl/issues/886>`__) +- Converted most of the packaging setup into a declarative ``setup.cfg`` + config -- by `@webknjaz <https://github.com/sponsors/webknjaz>`__. (`#890 <https://github.com/aio-libs/yarl/issues/890>`__) +- Replaced the packaging is replaced from an old-fashioned ``setup.py`` to an + in-tree ``517`` build backend -- by `@webknjaz <https://github.com/sponsors/webknjaz>`__. + + Whenever the end-users or downstream packagers need to build ``yarl`` from + source (a Git checkout or an sdist), they may pass a ``config_settings`` + flag ``--pure-python``. If this flag is not set, a C-extension will be built + and included into the distribution. + + Here is how this can be done with ``pip``: + + .. code-block:: console + + $ python -m pip install . --config-settings=--pure-python= + + This will also work with ``-e | --editable``. + + The same can be achieved via ``pypa/build``: + + .. code-block:: console + + $ python -m build --config-setting=--pure-python= + + Adding ``-w | --wheel`` can force ``pypa/build`` produce a wheel from source + directly, as opposed to building an ``sdist`` and then building from it. (`#893 <https://github.com/aio-libs/yarl/issues/893>`__) +- Declared Python 3.12 supported officially in the distribution package metadata + -- by `@edgarrmondragon <https://github.com/sponsors/edgarrmondragon>`__. (`#942 <https://github.com/aio-libs/yarl/issues/942>`__) + + +Contributor-facing changes +-------------------------- + +- A regression test for no-host URLs was added per `#821 <https://github.com/aio-libs/yarl/issues/821>`__ + and ``3986`` -- by `@kenballus <https://github.com/sponsors/kenballus>`__. (`#821 <https://github.com/aio-libs/yarl/issues/821>`__, `#822 <https://github.com/aio-libs/yarl/issues/822>`__) +- Started testing *yarl* against Python 3.12 in CI -- by `@mjpieters <https://github.com/sponsors/mjpieters>`__. (`#881 <https://github.com/aio-libs/yarl/issues/881>`__) +- All Python 3.12 jobs are now marked as required to pass in CI + -- by `@edgarrmondragon <https://github.com/sponsors/edgarrmondragon>`__. (`#942 <https://github.com/aio-libs/yarl/issues/942>`__) +- MyST is now integrated in Sphinx -- by `@webknjaz <https://github.com/sponsors/webknjaz>`__. + + This allows the contributors to author new documents in Markdown + when they have difficulties with going straight RST. (`#953 <https://github.com/aio-libs/yarl/issues/953>`__) + + +1.9.2 (2023-04-25) +================== + +Bugfixes +-------- + +- Fix regression with ``__truediv__`` and absolute URLs with empty paths causing the raw path to lack the leading ``/``. + (`#854 <https://github.com/aio-libs/yarl/issues/854>`_) + + +1.9.1 (2023-04-21) +================== + +Bugfixes +-------- + +- Marked tests that fail on older Python patch releases (< 3.7.10, < 3.8.8 and < 3.9.2) as expected to fail due to missing a security fix for CVE-2021-23336. (`#850 <https://github.com/aio-libs/yarl/issues/850>`_) + + +1.9.0 (2023-04-19) +================== + +This release was never published to PyPI, due to issues with the build process. + +Features +-------- + +- Added ``URL.joinpath(*elements)``, to create a new URL appending multiple path elements. (`#704 <https://github.com/aio-libs/yarl/issues/704>`_) +- Made ``URL.__truediv__()`` return ``NotImplemented`` if called with an + unsupported type — by `@michaeljpeters <https://github.com/sponsors/michaeljpeters>`__. + (`#832 <https://github.com/aio-libs/yarl/issues/832>`_) + + +Bugfixes +-------- + +- Path normalization for absolute URLs no longer raises a ValueError exception + when ``..`` segments would otherwise go beyond the URL path root. + (`#536 <https://github.com/aio-libs/yarl/issues/536>`_) +- Fixed an issue with update_query() not getting rid of the query when argument is None. (`#792 <https://github.com/aio-libs/yarl/issues/792>`_) +- Added some input restrictions on with_port() function to prevent invalid boolean inputs or out of valid port inputs; handled incorrect 0 port representation. (`#793 <https://github.com/aio-libs/yarl/issues/793>`_) +- Made ``yarl.URL.build()`` raise a ``TypeError`` if the ``host`` argument is ``None`` — by `@paulpapacz <https://github.com/sponsors/paulpapacz>`__. (`#808 <https://github.com/aio-libs/yarl/issues/808>`_) +- Fixed an issue with ``update_query()`` getting rid of the query when the argument + is empty but not ``None``. (`#845 <https://github.com/aio-libs/yarl/issues/845>`_) + + +Misc +---- + +- `#220 <https://github.com/aio-libs/yarl/issues/220>`_ + + +1.8.2 (2022-12-03) +================== + +This is the first release that started shipping wheels for Python 3.11. + + +1.8.1 (2022-08-01) +================== + +Misc +---- + +- `#694 <https://github.com/aio-libs/yarl/issues/694>`_, `#699 <https://github.com/aio-libs/yarl/issues/699>`_, `#700 <https://github.com/aio-libs/yarl/issues/700>`_, `#701 <https://github.com/aio-libs/yarl/issues/701>`_, `#702 <https://github.com/aio-libs/yarl/issues/702>`_, `#703 <https://github.com/aio-libs/yarl/issues/703>`_, `#739 <https://github.com/aio-libs/yarl/issues/739>`_ + + +1.8.0 (2022-08-01) +================== + +Features +-------- + +- Added ``URL.raw_suffix``, ``URL.suffix``, ``URL.raw_suffixes``, ``URL.suffixes``, ``URL.with_suffix``. (`#613 <https://github.com/aio-libs/yarl/issues/613>`_) + + +Improved Documentation +---------------------- + +- Fixed broken internal references to ``yarl.URL.human_repr()``. + (`#665 <https://github.com/aio-libs/yarl/issues/665>`_) +- Fixed broken external references to ``multidict:index`` docs. (`#665 <https://github.com/aio-libs/yarl/issues/665>`_) + + +Deprecations and Removals +------------------------- + +- Dropped Python 3.6 support. (`#672 <https://github.com/aio-libs/yarl/issues/672>`_) + + +Misc +---- + +- `#646 <https://github.com/aio-libs/yarl/issues/646>`_, `#699 <https://github.com/aio-libs/yarl/issues/699>`_, `#701 <https://github.com/aio-libs/yarl/issues/701>`_ + + +1.7.2 (2021-11-01) +================== + +Bugfixes +-------- + +- Changed call in ``with_port()`` to stop reencoding parts of the URL that were already encoded. (`#623 <https://github.com/aio-libs/yarl/issues/623>`_) + + +1.7.1 (2021-10-07) +================== + +Bugfixes +-------- + +- Fix 1.7.0 build error + +1.7.0 (2021-10-06) +================== + +Features +-------- + +- Add ``__bytes__()`` magic method so that ``bytes(url)`` will work and use optimal ASCII encoding. + (`#582 <https://github.com/aio-libs/yarl/issues/582>`_) +- Started shipping platform-specific arm64 wheels for Apple Silicon. (`#622 <https://github.com/aio-libs/yarl/issues/622>`_) +- Started shipping platform-specific wheels with the ``musl`` tag targeting typical Alpine Linux runtimes. (`#622 <https://github.com/aio-libs/yarl/issues/622>`_) +- Added support for Python 3.10. (`#622 <https://github.com/aio-libs/yarl/issues/622>`_) + + +1.6.3 (2020-11-14) +================== + +Bugfixes +-------- + +- No longer loose characters when decoding incorrect percent-sequences (like ``%e2%82%f8``). All non-decodable percent-sequences are now preserved. + `#517 <https://github.com/aio-libs/yarl/issues/517>`_ +- Provide x86 Windows wheels. + `#535 <https://github.com/aio-libs/yarl/issues/535>`_ + + +---- + + +1.6.2 (2020-10-12) +================== + + +Bugfixes +-------- + +- Provide generated ``.c`` files in TarBall distribution. + `#530 <https://github.com/aio-libs/multidict/issues/530>`_ + +1.6.1 (2020-10-12) +================== + +Features +-------- + +- Provide wheels for ``aarch64``, ``i686``, ``ppc64le``, ``s390x`` architectures on + Linux as well as ``x86_64``. + `#507 <https://github.com/aio-libs/yarl/issues/507>`_ +- Provide wheels for Python 3.9. + `#526 <https://github.com/aio-libs/yarl/issues/526>`_ + +Bugfixes +-------- + +- ``human_repr()`` now always produces valid representation equivalent to the original URL (if the original URL is valid). + `#511 <https://github.com/aio-libs/yarl/issues/511>`_ +- Fixed requoting a single percent followed by a percent-encoded character in the Cython implementation. + `#514 <https://github.com/aio-libs/yarl/issues/514>`_ +- Fix ValueError when decoding ``%`` which is not followed by two hexadecimal digits. + `#516 <https://github.com/aio-libs/yarl/issues/516>`_ +- Fix decoding ``%`` followed by a space and hexadecimal digit. + `#520 <https://github.com/aio-libs/yarl/issues/520>`_ +- Fix annotation of ``with_query()``/``update_query()`` methods for ``key=[val1, val2]`` case. + `#528 <https://github.com/aio-libs/yarl/issues/528>`_ + +Removal +------- + +- Drop Python 3.5 support; Python 3.6 is the minimal supported Python version. + + +---- + + +1.6.0 (2020-09-23) +================== + +Features +-------- + +- Allow for int and float subclasses in query, while still denying bool. + `#492 <https://github.com/aio-libs/yarl/issues/492>`_ + + +Bugfixes +-------- + +- Do not requote arguments in ``URL.build()``, ``with_xxx()`` and in ``/`` operator. + `#502 <https://github.com/aio-libs/yarl/issues/502>`_ +- Keep IPv6 brackets in ``origin()``. + `#504 <https://github.com/aio-libs/yarl/issues/504>`_ + + +---- + + +1.5.1 (2020-08-01) +================== + +Bugfixes +-------- + +- Fix including relocated internal ``yarl._quoting_c`` C-extension into published PyPI dists. + `#485 <https://github.com/aio-libs/yarl/issues/485>`_ + + +Misc +---- + +- `#484 <https://github.com/aio-libs/yarl/issues/484>`_ + + +---- + + +1.5.0 (2020-07-26) +================== + +Features +-------- + +- Convert host to lowercase on URL building. + `#386 <https://github.com/aio-libs/yarl/issues/386>`_ +- Allow using ``mod`` operator (``%``) for updating query string (an alias for ``update_query()`` method). + `#435 <https://github.com/aio-libs/yarl/issues/435>`_ +- Allow use of sequences such as ``list`` and ``tuple`` in the values + of a mapping such as ``dict`` to represent that a key has many values:: + + url = URL("http://example.com") + assert url.with_query({"a": [1, 2]}) == URL("http://example.com/?a=1&a=2") + + `#443 <https://github.com/aio-libs/yarl/issues/443>`_ +- Support ``URL.build()`` with scheme and path (creates a relative URL). + `#464 <https://github.com/aio-libs/yarl/issues/464>`_ +- Cache slow IDNA encode/decode calls. + `#476 <https://github.com/aio-libs/yarl/issues/476>`_ +- Add ``@final`` / ``Final`` type hints + `#477 <https://github.com/aio-libs/yarl/issues/477>`_ +- Support URL authority/raw_authority properties and authority argument of ``URL.build()`` method. + `#478 <https://github.com/aio-libs/yarl/issues/478>`_ +- Hide the library implementation details, make the exposed public list very clean. + `#483 <https://github.com/aio-libs/yarl/issues/483>`_ + + +Bugfixes +-------- + +- Fix tests with newer Python (3.7.6, 3.8.1 and 3.9.0+). + `#409 <https://github.com/aio-libs/yarl/issues/409>`_ +- Fix a bug where query component, passed in a form of mapping or sequence, is unquoted in unexpected way. + `#426 <https://github.com/aio-libs/yarl/issues/426>`_ +- Hide ``Query`` and ``QueryVariable`` type aliases in ``__init__.pyi``, now they are prefixed with underscore. + `#431 <https://github.com/aio-libs/yarl/issues/431>`_ +- Keep IPv6 brackets after updating port/user/password. + `#451 <https://github.com/aio-libs/yarl/issues/451>`_ + + +---- + + +1.4.2 (2019-12-05) +================== + +Features +-------- + +- Workaround for missing ``str.isascii()`` in Python 3.6 + `#389 <https://github.com/aio-libs/yarl/issues/389>`_ + + +---- + + +1.4.1 (2019-11-29) +================== + +* Fix regression, make the library work on Python 3.5 and 3.6 again. + +1.4.0 (2019-11-29) +================== + +* Distinguish an empty password in URL from a password not provided at all (#262) + +* Fixed annotations for optional parameters of ``URL.build`` (#309) + +* Use None as default value of ``user`` parameter of ``URL.build`` (#309) + +* Enforce building C Accelerated modules when installing from source tarball, use + ``YARL_NO_EXTENSIONS`` environment variable for falling back to (slower) Pure Python + implementation (#329) + +* Drop Python 3.5 support + +* Fix quoting of plus in path by pure python version (#339) + +* Don't create a new URL if fragment is unchanged (#292) + +* Included in error message the path that produces starting slash forbidden error (#376) + +* Skip slow IDNA encoding for ASCII-only strings (#387) + + +1.3.0 (2018-12-11) +================== + +* Fix annotations for ``query`` parameter (#207) + +* An incoming query sequence can have int variables (the same as for + Mapping type) (#208) + +* Add ``URL.explicit_port`` property (#218) + +* Give a friendlier error when port can't be converted to int (#168) + +* ``bool(URL())`` now returns ``False`` (#272) + +1.2.6 (2018-06-14) +================== + +* Drop Python 3.4 trove classifier (#205) + +1.2.5 (2018-05-23) +================== + +* Fix annotations for ``build`` (#199) + +1.2.4 (2018-05-08) +================== + +* Fix annotations for ``cached_property`` (#195) + +1.2.3 (2018-05-03) +================== + +* Accept ``str`` subclasses in ``URL`` constructor (#190) + +1.2.2 (2018-05-01) +================== + +* Fix build + +1.2.1 (2018-04-30) +================== + +* Pin minimal required Python to 3.5.3 (#189) + +1.2.0 (2018-04-30) +================== + +* Forbid inheritance, replace ``__init__`` with ``__new__`` (#171) + +* Support PEP-561 (provide type hinting marker) (#182) + +1.1.1 (2018-02-17) +================== + +* Fix performance regression: don't encode empty ``netloc`` (#170) + +1.1.0 (2018-01-21) +================== + +* Make pure Python quoter consistent with Cython version (#162) + +1.0.0 (2018-01-15) +================== + +* Use fast path if quoted string does not need requoting (#154) + +* Speed up quoting/unquoting by ``_Quoter`` and ``_Unquoter`` classes (#155) + +* Drop ``yarl.quote`` and ``yarl.unquote`` public functions (#155) + +* Add custom string writer, reuse static buffer if available (#157) + Code is 50-80 times faster than Pure Python version (was 4-5 times faster) + +* Don't recode IP zone (#144) + +* Support ``encoded=True`` in ``yarl.URL.build()`` (#158) + +* Fix updating query with multiple keys (#160) + +0.18.0 (2018-01-10) +=================== + +* Fallback to IDNA 2003 if domain name is not IDNA 2008 compatible (#152) + +0.17.0 (2017-12-30) +=================== + +* Use IDNA 2008 for domain name processing (#149) + +0.16.0 (2017-12-07) +=================== + +* Fix raising ``TypeError`` by ``url.query_string()`` after + ``url.with_query({})`` (empty mapping) (#141) + +0.15.0 (2017-11-23) +=================== + +* Add ``raw_path_qs`` attribute (#137) + +0.14.2 (2017-11-14) +=================== + +* Restore ``strict`` parameter as no-op in ``quote`` / ``unquote`` + +0.14.1 (2017-11-13) +=================== + +* Restore ``strict`` parameter as no-op for sake of compatibility with + aiohttp 2.2 + +0.14.0 (2017-11-11) +=================== + +* Drop strict mode (#123) + +* Fix ``"ValueError: Unallowed PCT %"`` when there's a ``"%"`` in the URL (#124) + +0.13.0 (2017-10-01) +=================== + +* Document ``encoded`` parameter (#102) + +* Support relative URLs like ``'?key=value'`` (#100) + +* Unsafe encoding for QS fixed. Encode ``;`` character in value parameter (#104) + +* Process passwords without user names (#95) + +0.12.0 (2017-06-26) +=================== + +* Properly support paths without leading slash in ``URL.with_path()`` (#90) + +* Enable type annotation checks + +0.11.0 (2017-06-26) +=================== + +* Normalize path (#86) + +* Clear query and fragment parts in ``.with_path()`` (#85) + +0.10.3 (2017-06-13) +=================== + +* Prevent double URL arguments unquoting (#83) + +0.10.2 (2017-05-05) +=================== + +* Unexpected hash behavior (#75) + + +0.10.1 (2017-05-03) +=================== + +* Unexpected compare behavior (#73) + +* Do not quote or unquote + if not a query string. (#74) + + +0.10.0 (2017-03-14) +=================== + +* Added ``URL.build`` class method (#58) + +* Added ``path_qs`` attribute (#42) + + +0.9.8 (2017-02-16) +================== + +* Do not quote ``:`` in path + + +0.9.7 (2017-02-16) +================== + +* Load from pickle without _cache (#56) + +* Percent-encoded pluses in path variables become spaces (#59) + + +0.9.6 (2017-02-15) +================== + +* Revert backward incompatible change (BaseURL) + + +0.9.5 (2017-02-14) +================== + +* Fix BaseURL rich comparison support + + +0.9.4 (2017-02-14) +================== + +* Use BaseURL + + +0.9.3 (2017-02-14) +================== + +* Added BaseURL + + +0.9.2 (2017-02-08) +================== + +* Remove debug print + + +0.9.1 (2017-02-07) +================== + +* Do not lose tail chars (#45) + + +0.9.0 (2017-02-07) +================== + +* Allow to quote ``%`` in non strict mode (#21) + +* Incorrect parsing of query parameters with %3B (;) inside (#34) + +* Fix core dumps (#41) + +* ``tmpbuf`` - compiling error (#43) + +* Added ``URL.update_path()`` method + +* Added ``URL.update_query()`` method (#47) + + +0.8.1 (2016-12-03) +================== + +* Fix broken aiohttp: revert back ``quote`` / ``unquote``. + + +0.8.0 (2016-12-03) +================== + +* Support more verbose error messages in ``.with_query()`` (#24) + +* Don't percent-encode ``@`` and ``:`` in path (#32) + +* Don't expose ``yarl.quote`` and ``yarl.unquote``, these functions are + part of private API + +0.7.1 (2016-11-18) +================== + +* Accept not only ``str`` but all classes inherited from ``str`` also (#25) + +0.7.0 (2016-11-07) +================== + +* Accept ``int`` as value for ``.with_query()`` + +0.6.0 (2016-11-07) +================== + +* Explicitly use UTF8 encoding in ``setup.py`` (#20) +* Properly unquote non-UTF8 strings (#19) + +0.5.3 (2016-11-02) +================== + +* Don't use ``typing.NamedTuple`` fields but indexes on URL construction + +0.5.2 (2016-11-02) +================== + +* Inline ``_encode`` class method + +0.5.1 (2016-11-02) +================== + +* Make URL construction faster by removing extra classmethod calls + +0.5.0 (2016-11-02) +================== + +* Add Cython optimization for quoting/unquoting +* Provide binary wheels + +0.4.3 (2016-09-29) +================== + +* Fix typing stubs + +0.4.2 (2016-09-29) +================== + +* Expose ``quote()`` and ``unquote()`` as public API + +0.4.1 (2016-09-28) +================== + +* Support empty values in query (``'/path?arg'``) + +0.4.0 (2016-09-27) +================== + +* Introduce ``relative()`` (#16) + +0.3.2 (2016-09-27) +================== + +* Typo fixes #15 + +0.3.1 (2016-09-26) +================== + +* Support sequence of pairs as ``with_query()`` parameter + +0.3.0 (2016-09-26) +================== + +* Introduce ``is_default_port()`` + +0.2.1 (2016-09-26) +================== + +* Raise ValueError for URLs like 'http://:8080/' + +0.2.0 (2016-09-18) +================== + +* Avoid doubling slashes when joining paths (#13) + +* Appending path starting from slash is forbidden (#12) + +0.1.4 (2016-09-09) +================== + +* Add ``kwargs`` support for ``with_query()`` (#10) + +0.1.3 (2016-09-07) +================== + +* Document ``with_query()``, ``with_fragment()`` and ``origin()`` + +* Allow ``None`` for ``with_query()`` and ``with_fragment()`` + +0.1.2 (2016-09-07) +================== + +* Fix links, tune docs theme. + +0.1.1 (2016-09-06) +================== + +* Update README, old version used obsolete API + +0.1.0 (2016-09-06) +================== + +* The library was deeply refactored, bytes are gone away but all + accepted strings are encoded if needed. + +0.0.1 (2016-08-30) +================== + +* The first release. diff --git a/contrib/python/yarl/.dist-info/top_level.txt b/contrib/python/yarl/.dist-info/top_level.txt new file mode 100644 index 0000000000..e93e8bddef --- /dev/null +++ b/contrib/python/yarl/.dist-info/top_level.txt @@ -0,0 +1 @@ +yarl diff --git a/contrib/python/yarl/LICENSE b/contrib/python/yarl/LICENSE new file mode 100644 index 0000000000..d645695673 --- /dev/null +++ b/contrib/python/yarl/LICENSE @@ -0,0 +1,202 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/contrib/python/yarl/NOTICE b/contrib/python/yarl/NOTICE new file mode 100644 index 0000000000..fa53b2b138 --- /dev/null +++ b/contrib/python/yarl/NOTICE @@ -0,0 +1,13 @@ + Copyright 2016-2021, Andrew Svetlov and aio-libs team + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/contrib/python/yarl/README.rst b/contrib/python/yarl/README.rst new file mode 100644 index 0000000000..a1032b206a --- /dev/null +++ b/contrib/python/yarl/README.rst @@ -0,0 +1,209 @@ +yarl +==== + +The module provides handy URL class for URL parsing and changing. + +.. image:: https://github.com/aio-libs/yarl/workflows/CI/badge.svg + :target: https://github.com/aio-libs/yarl/actions?query=workflow%3ACI + :align: right + +.. image:: https://codecov.io/gh/aio-libs/yarl/branch/master/graph/badge.svg + :target: https://codecov.io/gh/aio-libs/yarl + +.. image:: https://badge.fury.io/py/yarl.svg + :target: https://badge.fury.io/py/yarl + + +.. image:: https://readthedocs.org/projects/yarl/badge/?version=latest + :target: https://yarl.aio-libs.org + + +.. image:: https://img.shields.io/pypi/pyversions/yarl.svg + :target: https://pypi.python.org/pypi/yarl + +.. image:: https://img.shields.io/matrix/aio-libs:matrix.org?label=Discuss%20on%20Matrix%20at%20%23aio-libs%3Amatrix.org&logo=matrix&server_fqdn=matrix.org&style=flat + :target: https://matrix.to/#/%23aio-libs:matrix.org + :alt: Matrix Room — #aio-libs:matrix.org + +.. image:: https://img.shields.io/matrix/aio-libs-space:matrix.org?label=Discuss%20on%20Matrix%20at%20%23aio-libs-space%3Amatrix.org&logo=matrix&server_fqdn=matrix.org&style=flat + :target: https://matrix.to/#/%23aio-libs-space:matrix.org + :alt: Matrix Space — #aio-libs-space:matrix.org + +Introduction +------------ + +Url is constructed from ``str``: + +.. code-block:: pycon + + >>> from yarl import URL + >>> url = URL('https://www.python.org/~guido?arg=1#frag') + >>> url + URL('https://www.python.org/~guido?arg=1#frag') + +All url parts: *scheme*, *user*, *password*, *host*, *port*, *path*, +*query* and *fragment* are accessible by properties: + +.. code-block:: pycon + + >>> url.scheme + 'https' + >>> url.host + 'www.python.org' + >>> url.path + '/~guido' + >>> url.query_string + 'arg=1' + >>> url.query + <MultiDictProxy('arg': '1')> + >>> url.fragment + 'frag' + +All url manipulations produce a new url object: + +.. code-block:: pycon + + >>> url = URL('https://www.python.org') + >>> url / 'foo' / 'bar' + URL('https://www.python.org/foo/bar') + >>> url / 'foo' % {'bar': 'baz'} + URL('https://www.python.org/foo?bar=baz') + +Strings passed to constructor and modification methods are +automatically encoded giving canonical representation as result: + +.. code-block:: pycon + + >>> url = URL('https://www.python.org/путь') + >>> url + URL('https://www.python.org/%D0%BF%D1%83%D1%82%D1%8C') + +Regular properties are *percent-decoded*, use ``raw_`` versions for +getting *encoded* strings: + +.. code-block:: pycon + + >>> url.path + '/путь' + + >>> url.raw_path + '/%D0%BF%D1%83%D1%82%D1%8C' + +Human readable representation of URL is available as ``.human_repr()``: + +.. code-block:: pycon + + >>> url.human_repr() + 'https://www.python.org/путь' + +For full documentation please read https://yarl.aio-libs.org. + + +Installation +------------ + +:: + + $ pip install yarl + +The library is Python 3 only! + +PyPI contains binary wheels for Linux, Windows and MacOS. If you want to install +``yarl`` on another operating system (like *Alpine Linux*, which is not +manylinux-compliant because of the missing glibc and therefore, cannot be +used with our wheels) the the tarball will be used to compile the library from +the source code. It requires a C compiler and and Python headers installed. + +To skip the compilation you must explicitly opt-in by using a PEP 517 +configuration setting ``--pure-python``, or setting the ``YARL_NO_EXTENSIONS`` +environment variable to a non-empty value, e.g.: + +.. code-block:: console + + $ pip install yarl --config-settings=--pure-python= + +Please note that the pure-Python (uncompiled) version is much slower. However, +PyPy always uses a pure-Python implementation, and, as such, it is unaffected +by this variable. + +Dependencies +------------ + +YARL requires multidict_ library. + + +API documentation +------------------ + +The documentation is located at https://yarl.aio-libs.org. + + +Why isn't boolean supported by the URL query API? +------------------------------------------------- + +There is no standard for boolean representation of boolean values. + +Some systems prefer ``true``/``false``, others like ``yes``/``no``, ``on``/``off``, +``Y``/``N``, ``1``/``0``, etc. + +``yarl`` cannot make an unambiguous decision on how to serialize ``bool`` values because +it is specific to how the end-user's application is built and would be different for +different apps. The library doesn't accept booleans in the API; a user should convert +bools into strings using own preferred translation protocol. + + +Comparison with other URL libraries +------------------------------------ + +* furl (https://pypi.python.org/pypi/furl) + + The library has rich functionality but the ``furl`` object is mutable. + + I'm afraid to pass this object into foreign code: who knows if the + code will modify my url in a terrible way while I just want to send URL + with handy helpers for accessing URL properties. + + ``furl`` has other non-obvious tricky things but the main objection + is mutability. + +* URLObject (https://pypi.python.org/pypi/URLObject) + + URLObject is immutable, that's pretty good. + + Every URL change generates a new URL object. + + But the library doesn't do any decode/encode transformations leaving the + end user to cope with these gory details. + + +Source code +----------- + +The project is hosted on GitHub_ + +Please file an issue on the `bug tracker +<https://github.com/aio-libs/yarl/issues>`_ if you have found a bug +or have some suggestion in order to improve the library. + +The library uses `Azure Pipelines <https://dev.azure.com/aio-libs/yarl>`_ for +Continuous Integration. + +Discussion list +--------------- + +*aio-libs* google group: https://groups.google.com/forum/#!forum/aio-libs + +Feel free to post your questions and ideas here. + + +Authors and License +------------------- + +The ``yarl`` package is written by Andrew Svetlov. + +It's *Apache 2* licensed and freely available. + + +.. _GitHub: https://github.com/aio-libs/yarl + +.. _multidict: https://github.com/aio-libs/multidict diff --git a/contrib/python/yarl/tests/test_cache.py b/contrib/python/yarl/tests/test_cache.py new file mode 100644 index 0000000000..22141dd085 --- /dev/null +++ b/contrib/python/yarl/tests/test_cache.py @@ -0,0 +1,28 @@ +import yarl + +# Don't check the actual behavior but make sure that calls are allowed + + +def teardown_module(): + yarl.cache_configure() + + +def test_cache_clear() -> None: + yarl.cache_clear() + + +def test_cache_info() -> None: + info = yarl.cache_info() + assert info.keys() == {"idna_encode", "idna_decode"} + + +def test_cache_configure_default() -> None: + yarl.cache_configure() + + +def test_cache_configure_None() -> None: + yarl.cache_configure(idna_encode_size=None, idna_decode_size=None) + + +def test_cache_configure_explicit() -> None: + yarl.cache_configure(idna_encode_size=128, idna_decode_size=128) diff --git a/contrib/python/yarl/tests/test_cached_property.py b/contrib/python/yarl/tests/test_cached_property.py new file mode 100644 index 0000000000..5dcb5ece23 --- /dev/null +++ b/contrib/python/yarl/tests/test_cached_property.py @@ -0,0 +1,45 @@ +import pytest + +from yarl._url import cached_property + + +def test_reify(): + class A: + def __init__(self): + self._cache = {} + + @cached_property + def prop(self): + return 1 + + a = A() + assert 1 == a.prop + + +def test_reify_class(): + class A: + def __init__(self): + self._cache = {} + + @cached_property + def prop(self): + """Docstring.""" + return 1 + + assert isinstance(A.prop, cached_property) + assert "Docstring." == A.prop.__doc__ + + +def test_reify_assignment(): + class A: + def __init__(self): + self._cache = {} + + @cached_property + def prop(self): + return 1 + + a = A() + + with pytest.raises(AttributeError): + a.prop = 123 diff --git a/contrib/python/yarl/tests/test_normalize_path.py b/contrib/python/yarl/tests/test_normalize_path.py new file mode 100644 index 0000000000..defc4d8dd7 --- /dev/null +++ b/contrib/python/yarl/tests/test_normalize_path.py @@ -0,0 +1,34 @@ +import pytest + +from yarl import URL + +PATHS = [ + # No dots + ("", ""), + ("/", "/"), + ("//", "//"), + ("///", "///"), + # Single-dot + ("path/to", "path/to"), + ("././path/to", "path/to"), + ("path/./to", "path/to"), + ("path/././to", "path/to"), + ("path/to/.", "path/to/"), + ("path/to/./.", "path/to/"), + # Double-dots + ("../path/to", "path/to"), + ("path/../to", "to"), + ("path/../../to", "to"), + # absolute path root / is maintained; tests based on two + # tests from web-platform-tests project's urltestdata.json + ("/foo/../../../ton", "/ton"), + ("/foo/../../../..bar", "/..bar"), + # Non-ASCII characters + ("μονοπάτι/../../να/ᴜɴɪ/ᴄᴏᴅᴇ", "να/ᴜɴɪ/ᴄᴏᴅᴇ"), + ("μονοπάτι/../../να/𝕦𝕟𝕚/𝕔𝕠𝕕𝕖/.", "να/𝕦𝕟𝕚/𝕔𝕠𝕕𝕖/"), +] + + +@pytest.mark.parametrize("original,expected", PATHS) +def test__normalize_path(original, expected): + assert URL._normalize_path(original) == expected diff --git a/contrib/python/yarl/tests/test_pickle.py b/contrib/python/yarl/tests/test_pickle.py new file mode 100644 index 0000000000..a1f29ab68c --- /dev/null +++ b/contrib/python/yarl/tests/test_pickle.py @@ -0,0 +1,23 @@ +import pickle + +from yarl import URL + +# serialize + + +def test_pickle(): + u1 = URL("test") + hash(u1) + v = pickle.dumps(u1) + u2 = pickle.loads(v) + assert u1._cache + assert not u2._cache + assert hash(u1) == hash(u2) + + +def test_default_style_state(): + u = URL("test") + hash(u) + u.__setstate__((None, {"_val": "test", "_strict": False, "_cache": {"hash": 1}})) + assert not u._cache + assert u._val == "test" diff --git a/contrib/python/yarl/tests/test_quoting.py b/contrib/python/yarl/tests/test_quoting.py new file mode 100644 index 0000000000..7ebc0f9b04 --- /dev/null +++ b/contrib/python/yarl/tests/test_quoting.py @@ -0,0 +1,450 @@ +import pytest + +from yarl._quoting import NO_EXTENSIONS +from yarl._quoting_py import _Quoter as _PyQuoter +from yarl._quoting_py import _Unquoter as _PyUnquoter + +if not NO_EXTENSIONS: + from yarl._quoting_c import _Quoter as _CQuoter + from yarl._quoting_c import _Unquoter as _CUnquoter + + @pytest.fixture(params=[_PyQuoter, _CQuoter], ids=["py_quoter", "c_quoter"]) + def quoter(request): + return request.param + + @pytest.fixture(params=[_PyUnquoter, _CUnquoter], ids=["py_unquoter", "c_unquoter"]) + def unquoter(request): + return request.param + +else: + + @pytest.fixture(params=[_PyQuoter], ids=["py_quoter"]) + def quoter(request): + return request.param + + @pytest.fixture(params=[_PyUnquoter], ids=["py_unquoter"]) + def unquoter(request): + return request.param + + +def hexescape(char): + """Escape char as RFC 2396 specifies""" + hex_repr = hex(ord(char))[2:].upper() + if len(hex_repr) == 1: + hex_repr = "0%s" % hex_repr + return "%" + hex_repr + + +def test_quote_not_allowed_non_strict(quoter): + assert quoter()("%HH") == "%25HH" + + +def test_quote_unfinished_tail_percent_non_strict(quoter): + assert quoter()("%") == "%25" + + +def test_quote_unfinished_tail_digit_non_strict(quoter): + assert quoter()("%2") == "%252" + + +def test_quote_unfinished_tail_safe_non_strict(quoter): + assert quoter()("%x") == "%25x" + + +def test_quote_unfinished_tail_unsafe_non_strict(quoter): + assert quoter()("%#") == "%25%23" + + +def test_quote_unfinished_tail_non_ascii_non_strict(quoter): + assert quoter()("%ß") == "%25%C3%9F" + + +def test_quote_unfinished_tail_non_ascii2_non_strict(quoter): + assert quoter()("%€") == "%25%E2%82%AC" + + +def test_quote_unfinished_tail_non_ascii3_non_strict(quoter): + assert quoter()("%🐍") == "%25%F0%9F%90%8D" + + +def test_quote_from_bytes(quoter): + assert quoter()("archaeological arcana") == "archaeological%20arcana" + assert quoter()("") == "" + + +def test_quote_ignore_broken_unicode(quoter): + s = quoter()( + "j\u001a\udcf4q\udcda/\udc97g\udcee\udccb\u000ch\udccb" + "\u0018\udce4v\u001b\udce2\udcce\udccecom/y\udccepj\u0016" + ) + + assert s == "j%1Aq%2Fg%0Ch%18v%1Bcom%2Fypj%16" + assert quoter()(s) == s + + +def test_unquote_to_bytes(unquoter): + assert unquoter()("abc%20def") == "abc def" + assert unquoter()("") == "" + + +def test_never_quote(quoter): + # Make sure quote() does not quote letters, digits, and "_,.-~" + do_not_quote = ( + "ABCDEFGHIJKLMNOPQRSTUVWXYZ" "abcdefghijklmnopqrstuvwxyz" "0123456789" "_.-~" + ) + assert quoter()(do_not_quote) == do_not_quote + assert quoter(qs=True)(do_not_quote) == do_not_quote + + +def test_safe(quoter): + # Test setting 'safe' parameter does what it should do + quote_by_default = "<>" + assert quoter(safe=quote_by_default)(quote_by_default) == quote_by_default + + ret = quoter(safe=quote_by_default, qs=True)(quote_by_default) + assert ret == quote_by_default + + +_SHOULD_QUOTE = [chr(num) for num in range(32)] +_SHOULD_QUOTE.append(r'<>#"{}|\^[]`') +_SHOULD_QUOTE.append(chr(127)) # For 0x7F +SHOULD_QUOTE = "".join(_SHOULD_QUOTE) + + +@pytest.mark.parametrize("char", SHOULD_QUOTE) +def test_default_quoting(char, quoter): + # Make sure all characters that should be quoted are by default sans + # space (separate test for that). + result = quoter()(char) + assert hexescape(char) == result + result = quoter(qs=True)(char) + assert hexescape(char) == result + + +# TODO: should it encode percent? +def test_default_quoting_percent(quoter): + result = quoter()("%25") + assert "%25" == result + result = quoter(qs=True)("%25") + assert "%25" == result + result = quoter(requote=False)("%25") + assert "%2525" == result + + +def test_default_quoting_partial(quoter): + partial_quote = "ab[]cd" + expected = "ab%5B%5Dcd" + result = quoter()(partial_quote) + assert expected == result + result = quoter(qs=True)(partial_quote) + assert expected == result + + +def test_quoting_space(quoter): + # Make sure quote() and quote_plus() handle spaces as specified in + # their unique way + result = quoter()(" ") + assert result == hexescape(" ") + result = quoter(qs=True)(" ") + assert result == "+" + + given = "a b cd e f" + expect = given.replace(" ", hexescape(" ")) + result = quoter()(given) + assert expect == result + expect = given.replace(" ", "+") + result = quoter(qs=True)(given) + assert expect == result + + +def test_quoting_plus(quoter): + assert quoter(qs=False)("alpha+beta gamma") == "alpha+beta%20gamma" + assert quoter(qs=True)("alpha+beta gamma") == "alpha%2Bbeta+gamma" + assert quoter(safe="+", qs=True)("alpha+beta gamma") == "alpha+beta+gamma" + + +def test_quote_with_unicode(quoter): + # Characters in Latin-1 range, encoded by default in UTF-8 + given = "\u00a2\u00d8ab\u00ff" + expect = "%C2%A2%C3%98ab%C3%BF" + result = quoter()(given) + assert expect == result + # Characters in BMP, encoded by default in UTF-8 + given = "\u6f22\u5b57" # "Kanji" + expect = "%E6%BC%A2%E5%AD%97" + result = quoter()(given) + assert expect == result + + +def test_quote_plus_with_unicode(quoter): + # Characters in Latin-1 range, encoded by default in UTF-8 + given = "\u00a2\u00d8ab\u00ff" + expect = "%C2%A2%C3%98ab%C3%BF" + result = quoter(qs=True)(given) + assert expect == result + # Characters in BMP, encoded by default in UTF-8 + given = "\u6f22\u5b57" # "Kanji" + expect = "%E6%BC%A2%E5%AD%97" + result = quoter(qs=True)(given) + assert expect == result + + +@pytest.mark.parametrize("num", list(range(128))) +def test_unquoting(num, unquoter): + # Make sure unquoting of all ASCII values works + given = hexescape(chr(num)) + expect = chr(num) + result = unquoter()(given) + assert expect == result + if expect not in "+=&;": + result = unquoter(qs=True)(given) + assert expect == result + + +# Expected value should be the same as given. +# See https://url.spec.whatwg.org/#percent-encoded-bytes +@pytest.mark.parametrize( + ("input", "expected"), + [ + ("%", "%"), + ("%2", "%2"), + ("%x", "%x"), + ("%€", "%€"), + ("%2x", "%2x"), + ("%2 ", "%2 "), + ("% 2", "% 2"), + ("%xa", "%xa"), + ("%%", "%%"), + ("%%3f", "%?"), + ("%2%", "%2%"), + ("%2%3f", "%2?"), + ("%x%3f", "%x?"), + ("%€%3f", "%€?"), + ], +) +def test_unquoting_bad_percent_escapes(unquoter, input, expected): + assert unquoter()(input) == expected + + +@pytest.mark.xfail +# FIXME: After conversion to bytes, should not cause UTF-8 decode fail. +# See https://url.spec.whatwg.org/#percent-encoded-bytes +def test_unquoting_invalid_utf8_sequence(unquoter): + with pytest.raises(ValueError): + unquoter()("%AB") + with pytest.raises(ValueError): + unquoter()("%AB%AB") + + +def test_unquoting_mixed_case_percent_escapes(unquoter): + expected = "𝕦" + assert expected == unquoter()("%F0%9D%95%A6") + assert expected == unquoter()("%F0%9d%95%a6") + assert expected == unquoter()("%f0%9D%95%a6") + assert expected == unquoter()("%f0%9d%95%a6") + + +def test_unquoting_parts(unquoter): + # Make sure unquoting works when have non-quoted characters + # interspersed + given = "ab" + hexescape("c") + "d" + expect = "abcd" + result = unquoter()(given) + assert expect == result + result = unquoter(qs=True)(given) + assert expect == result + + +def test_quote_None(quoter): + assert quoter()(None) is None + + +def test_unquote_None(unquoter): + assert unquoter()(None) is None + + +def test_quote_empty_string(quoter): + assert quoter()("") == "" + + +def test_unquote_empty_string(unquoter): + assert unquoter()("") == "" + + +def test_quote_bad_types(quoter): + with pytest.raises(TypeError): + quoter()(123) + + +def test_unquote_bad_types(unquoter): + with pytest.raises(TypeError): + unquoter()(123) + + +def test_quote_lowercase(quoter): + assert quoter()("%d1%84") == "%D1%84" + + +def test_quote_unquoted(quoter): + assert quoter()("%41") == "A" + + +def test_quote_space(quoter): + assert quoter()(" ") == "%20" # NULL + + +# test to see if this would work to fix +# coverage on this file. +def test_quote_percent_last_character(quoter): + # % is last character in this case. + assert quoter()("%") == "%25" + + +def test_unquote_unsafe(unquoter): + assert unquoter(unsafe="@")("%40") == "%40" + + +def test_unquote_unsafe2(unquoter): + assert unquoter(unsafe="@")("%40abc") == "%40abc" + + +def test_unquote_unsafe3(unquoter): + assert unquoter(qs=True)("a%2Bb=?%3D%2B%26") == "a%2Bb=?%3D%2B%26" + + +def test_unquote_unsafe4(unquoter): + assert unquoter(unsafe="@")("a@b") == "a%40b" + + +@pytest.mark.parametrize( + ("input", "expected"), + [ + ("%e2%82", "%e2%82"), + ("%e2%82ac", "%e2%82ac"), + ("%e2%82%f8", "%e2%82%f8"), + ("%e2%82%2b", "%e2%82+"), + ("%e2%82%e2%82%ac", "%e2%82€"), + ("%e2%82%e2%82", "%e2%82%e2%82"), + ], +) +def test_unquote_non_utf8(unquoter, input, expected): + assert unquoter()(input) == expected + + +def test_unquote_unsafe_non_utf8(unquoter): + assert unquoter(unsafe="\n")("%e2%82%0a") == "%e2%82%0A" + + +def test_unquote_plus_non_utf8(unquoter): + assert unquoter(qs=True)("%e2%82%2b") == "%e2%82%2B" + + +def test_quote_non_ascii(quoter): + assert quoter()("%F8") == "%F8" + + +def test_quote_non_ascii2(quoter): + assert quoter()("a%F8b") == "a%F8b" + + +def test_quote_percent_percent_encoded(quoter): + assert quoter()("%%3f") == "%25%3F" + + +def test_quote_percent_digit_percent_encoded(quoter): + assert quoter()("%2%3f") == "%252%3F" + + +def test_quote_percent_safe_percent_encoded(quoter): + assert quoter()("%x%3f") == "%25x%3F" + + +def test_quote_percent_unsafe_percent_encoded(quoter): + assert quoter()("%#%3f") == "%25%23%3F" + + +def test_quote_percent_non_ascii_percent_encoded(quoter): + assert quoter()("%ß%3f") == "%25%C3%9F%3F" + + +def test_quote_percent_non_ascii2_percent_encoded(quoter): + assert quoter()("%€%3f") == "%25%E2%82%AC%3F" + + +def test_quote_percent_non_ascii3_percent_encoded(quoter): + assert quoter()("%🐍%3f") == "%25%F0%9F%90%8D%3F" + + +class StrLike(str): + pass + + +def test_quote_str_like(quoter): + assert quoter()(StrLike("abc")) == "abc" + + +def test_unquote_str_like(unquoter): + assert unquoter()(StrLike("abc")) == "abc" + + +def test_quote_sub_delims(quoter): + assert quoter()("!$&'()*+,;=") == "!$&'()*+,;=" + + +def test_requote_sub_delims(quoter): + assert quoter()("%21%24%26%27%28%29%2A%2B%2C%3B%3D") == "!$&'()*+,;=" + + +def test_unquoting_plus(unquoter): + assert unquoter(qs=False)("a+b") == "a+b" + + +def test_unquote_plus_to_space(unquoter): + assert unquoter(qs=True)("a+b") == "a b" + + +def test_unquote_plus_to_space_unsafe(unquoter): + assert unquoter(unsafe="+", qs=True)("a+b") == "a+b" + + +def test_quote_qs_with_colon(quoter): + s = quoter(safe="=+&?/:@", qs=True)("next=http%3A//example.com/") + assert s == "next=http://example.com/" + + +def test_quote_protected(quoter): + s = quoter(protected="/")("/path%2fto/three") + assert s == "/path%2Fto/three" + + +def test_quote_fastpath_safe(quoter): + s1 = "/path/to" + s2 = quoter(safe="/")(s1) + assert s1 is s2 + + +def test_quote_fastpath_pct(quoter): + s1 = "abc%A0" + s2 = quoter()(s1) + assert s1 is s2 + + +def test_quote_very_large_string(quoter): + # more than 8 KiB + s = "abcфух%30%0a" * 1024 + assert quoter()(s) == "abc%D1%84%D1%83%D1%850%0A" * 1024 + + +def test_space(quoter): + s = "% A" + assert quoter()(s) == "%25%20A" + + +def test_quoter_path_with_plus(quoter): + s = "/test/x+y%2Bz/:+%2B/" + assert "/test/x+y%2Bz/:+%2B/" == quoter(safe="@:", protected="/+")(s) + + +def test_unquoter_path_with_plus(unquoter): + s = "/test/x+y%2Bz/:+%2B/" + assert "/test/x+y+z/:++/" == unquoter(unsafe="+")(s) diff --git a/contrib/python/yarl/tests/test_update_query.py b/contrib/python/yarl/tests/test_update_query.py new file mode 100644 index 0000000000..e47c468341 --- /dev/null +++ b/contrib/python/yarl/tests/test_update_query.py @@ -0,0 +1,366 @@ +import enum + +import pytest +from multidict import MultiDict + +from yarl import URL + +# with_query + + +def test_with_query(): + url = URL("http://example.com") + assert str(url.with_query({"a": "1"})) == "http://example.com/?a=1" + + +def test_update_query(): + url = URL("http://example.com/") + assert str(url.update_query({"a": "1"})) == "http://example.com/?a=1" + assert str(URL("test").update_query(a=1)) == "test?a=1" + + url = URL("http://example.com/?foo=bar") + expected_url = URL("http://example.com/?foo=bar&baz=foo") + + assert url.update_query({"baz": "foo"}) == expected_url + assert url.update_query(baz="foo") == expected_url + assert url.update_query("baz=foo") == expected_url + + +def test_update_query_with_args_and_kwargs(): + url = URL("http://example.com/") + + with pytest.raises(ValueError): + url.update_query("a", foo="bar") + + +def test_update_query_with_multiple_args(): + url = URL("http://example.com/") + + with pytest.raises(ValueError): + url.update_query("a", "b") + + +def test_update_query_with_none_arg(): + url = URL("http://example.com/?foo=bar&baz=foo") + expected_url = URL("http://example.com/") + assert url.update_query(None) == expected_url + + +def test_update_query_with_empty_dict(): + url = URL("http://example.com/?foo=bar&baz=foo") + assert url.update_query({}) == url + + +def test_with_query_list_of_pairs(): + url = URL("http://example.com") + assert str(url.with_query([("a", "1")])) == "http://example.com/?a=1" + + +def test_with_query_list_non_pairs(): + url = URL("http://example.com") + with pytest.raises(ValueError): + url.with_query(["a=1", "b=2", "c=3"]) + + +def test_with_query_kwargs(): + url = URL("http://example.com") + q = url.with_query(query="1", query2="1").query + assert q == dict(query="1", query2="1") + + +def test_with_query_kwargs_and_args_are_mutually_exclusive(): + url = URL("http://example.com") + with pytest.raises(ValueError): + url.with_query({"a": "2", "b": "4"}, a="1") + + +def test_with_query_only_single_arg_is_supported(): + url = URL("http://example.com") + u1 = url.with_query(b=3) + u2 = URL("http://example.com/?b=3") + assert u1 == u2 + with pytest.raises(ValueError): + url.with_query("a=1", "a=b") + + +def test_with_query_empty_dict(): + url = URL("http://example.com/?a=b") + new_url = url.with_query({}) + assert new_url.query_string == "" + assert str(new_url) == "http://example.com/" + + +def test_with_query_empty_str(): + url = URL("http://example.com/?a=b") + assert str(url.with_query("")) == "http://example.com/" + + +def test_with_query_empty_value(): + url = URL("http://example.com/") + assert str(url.with_query({"a": ""})) == "http://example.com/?a=" + + +def test_with_query_str(): + url = URL("http://example.com") + assert str(url.with_query("a=1&b=2")) == "http://example.com/?a=1&b=2" + + +def test_with_query_str_non_ascii_and_spaces(): + url = URL("http://example.com") + url2 = url.with_query("a=1 2&b=знач") + assert url2.raw_query_string == "a=1+2&b=%D0%B7%D0%BD%D0%B0%D1%87" + assert url2.query_string == "a=1 2&b=знач" + + +def test_with_query_int(): + url = URL("http://example.com") + assert url.with_query({"a": 1}) == URL("http://example.com/?a=1") + + +def test_with_query_kwargs_int(): + url = URL("http://example.com") + assert url.with_query(b=2) == URL("http://example.com/?b=2") + + +def test_with_query_list_int(): + url = URL("http://example.com") + assert str(url.with_query([("a", 1)])) == "http://example.com/?a=1" + + +@pytest.mark.parametrize( + ("query", "expected"), + [ + pytest.param({"a": []}, "", id="empty list"), + pytest.param({"a": ()}, "", id="empty tuple"), + pytest.param({"a": [1]}, "/?a=1", id="single list"), + pytest.param({"a": (1,)}, "/?a=1", id="single tuple"), + pytest.param({"a": [1, 2]}, "/?a=1&a=2", id="list"), + pytest.param({"a": (1, 2)}, "/?a=1&a=2", id="tuple"), + pytest.param({"a[]": [1, 2]}, "/?a%5B%5D=1&a%5B%5D=2", id="key with braces"), + pytest.param({"&": [1, 2]}, "/?%26=1&%26=2", id="quote key"), + pytest.param({"a": ["1", 2]}, "/?a=1&a=2", id="mixed types"), + pytest.param({"&": ["=", 2]}, "/?%26=%3D&%26=2", id="quote key and value"), + pytest.param({"a": 1, "b": [2, 3]}, "/?a=1&b=2&b=3", id="single then list"), + pytest.param({"a": [1, 2], "b": 3}, "/?a=1&a=2&b=3", id="list then single"), + pytest.param({"a": ["1&a=2", 3]}, "/?a=1%26a%3D2&a=3", id="ampersand then int"), + pytest.param({"a": [1, "2&a=3"]}, "/?a=1&a=2%26a%3D3", id="int then ampersand"), + ], +) +def test_with_query_sequence(query, expected): + url = URL("http://example.com") + expected = "http://example.com{expected}".format_map(locals()) + assert str(url.with_query(query)) == expected + + +@pytest.mark.parametrize( + "query", + [ + pytest.param({"a": [[1]]}, id="nested"), + pytest.param([("a", [1, 2])], id="tuple list"), + ], +) +def test_with_query_sequence_invalid_use(query): + url = URL("http://example.com") + with pytest.raises(TypeError, match="Invalid variable type"): + url.with_query(query) + + +class _CStr(str): + pass + + +class _EmptyStrEr: + def __str__(self): + return "" + + +class _CInt(int, _EmptyStrEr): + pass + + +class _CFloat(float, _EmptyStrEr): + pass + + +@pytest.mark.parametrize( + ("value", "expected"), + [ + pytest.param("1", "1", id="str"), + pytest.param(_CStr("1"), "1", id="custom str"), + pytest.param(1, "1", id="int"), + pytest.param(_CInt(1), "1", id="custom int"), + pytest.param(1.1, "1.1", id="float"), + pytest.param(_CFloat(1.1), "1.1", id="custom float"), + ], +) +def test_with_query_valid_type(value, expected): + url = URL("http://example.com") + expected = "http://example.com/?a={expected}".format_map(locals()) + assert str(url.with_query({"a": value})) == expected + + +@pytest.mark.parametrize( + ("value", "exc_type"), + [ + pytest.param(True, TypeError, id="bool"), + pytest.param(None, TypeError, id="none"), + pytest.param(float("inf"), ValueError, id="non-finite float"), + pytest.param(float("nan"), ValueError, id="NaN float"), + ], +) +def test_with_query_invalid_type(value, exc_type): + url = URL("http://example.com") + with pytest.raises(exc_type): + url.with_query({"a": value}) + + +@pytest.mark.parametrize( + ("value", "expected"), + [ + pytest.param("1", "1", id="str"), + pytest.param(_CStr("1"), "1", id="custom str"), + pytest.param(1, "1", id="int"), + pytest.param(_CInt(1), "1", id="custom int"), + pytest.param(1.1, "1.1", id="float"), + pytest.param(_CFloat(1.1), "1.1", id="custom float"), + ], +) +def test_with_query_list_valid_type(value, expected): + url = URL("http://example.com") + expected = "http://example.com/?a={expected}".format_map(locals()) + assert str(url.with_query([("a", value)])) == expected + + +@pytest.mark.parametrize( + ("value"), [pytest.param(True, id="bool"), pytest.param(None, id="none")] +) +def test_with_query_list_invalid_type(value): + url = URL("http://example.com") + with pytest.raises(TypeError): + url.with_query([("a", value)]) + + +def test_with_int_enum(): + class IntEnum(int, enum.Enum): + A = 1 + + url = URL("http://example.com/path") + url2 = url.with_query(a=IntEnum.A) + assert str(url2) == "http://example.com/path?a=1" + + +def test_with_float_enum(): + class FloatEnum(float, enum.Enum): + A = 1.1 + + url = URL("http://example.com/path") + url2 = url.with_query(a=FloatEnum.A) + assert str(url2) == "http://example.com/path?a=1.1" + + +def test_with_query_multidict(): + url = URL("http://example.com/path") + q = MultiDict([("a", "b"), ("c", "d")]) + assert str(url.with_query(q)) == "http://example.com/path?a=b&c=d" + + +def test_with_multidict_with_spaces_and_non_ascii(): + url = URL("http://example.com") + url2 = url.with_query({"a b": "ю б"}) + assert url2.raw_query_string == "a+b=%D1%8E+%D0%B1" + + +def test_with_query_multidict_with_unsafe(): + url = URL("http://example.com/path") + url2 = url.with_query({"a+b": "?=+&;"}) + assert url2.raw_query_string == "a%2Bb=?%3D%2B%26%3B" + assert url2.query_string == "a%2Bb=?%3D%2B%26%3B" + assert url2.query == {"a+b": "?=+&;"} + + +def test_with_query_None(): + url = URL("http://example.com/path?a=b") + assert url.with_query(None).query_string == "" + + +def test_with_query_bad_type(): + url = URL("http://example.com") + with pytest.raises(TypeError): + url.with_query(123) + + +def test_with_query_bytes(): + url = URL("http://example.com") + with pytest.raises(TypeError): + url.with_query(b"123") + + +def test_with_query_bytearray(): + url = URL("http://example.com") + with pytest.raises(TypeError): + url.with_query(bytearray(b"123")) + + +def test_with_query_memoryview(): + url = URL("http://example.com") + with pytest.raises(TypeError): + url.with_query(memoryview(b"123")) + + +@pytest.mark.parametrize( + ("query", "expected"), + [ + pytest.param([("key", "1;2;3")], "?key=1%3B2%3B3", id="tuple list semicolon"), + pytest.param({"key": "1;2;3"}, "?key=1%3B2%3B3", id="mapping semicolon"), + pytest.param([("key", "1&a=2")], "?key=1%26a%3D2", id="tuple list ampersand"), + pytest.param({"key": "1&a=2"}, "?key=1%26a%3D2", id="mapping ampersand"), + pytest.param([("&", "=")], "?%26=%3D", id="tuple list quote key"), + pytest.param({"&": "="}, "?%26=%3D", id="mapping quote key"), + pytest.param( + [("a[]", "3")], + "?a%5B%5D=3", + id="quote one key braces", + ), + pytest.param( + [("a[]", "3"), ("a[]", "4")], + "?a%5B%5D=3&a%5B%5D=4", + id="quote many key braces", + ), + ], +) +def test_with_query_params(query, expected): + url = URL("http://example.com/get") + url2 = url.with_query(query) + assert str(url2) == ("http://example.com/get" + expected) + + +def test_with_query_only(): + url = URL() + url2 = url.with_query(key="value") + assert str(url2) == "?key=value" + + +def test_with_query_complex_url(): + target_url = "http://example.com/?game=bulls+%26+cows" + url = URL("/redir").with_query({"t": target_url}) + assert url.query["t"] == target_url + + +def test_update_query_multiple_keys(): + url = URL("http://example.com/path?a=1&a=2") + u2 = url.update_query([("a", "3"), ("a", "4")]) + + assert str(u2) == "http://example.com/path?a=3&a=4" + + +# mod operator + + +def test_update_query_with_mod_operator(): + url = URL("http://example.com/") + assert str(url % {"a": "1"}) == "http://example.com/?a=1" + assert str(url % [("a", "1")]) == "http://example.com/?a=1" + assert str(url % "a=1&b=2") == "http://example.com/?a=1&b=2" + assert str(url % {"a": "1"} % {"b": "2"}) == "http://example.com/?a=1&b=2" + assert str(url % {"a": "1"} % {"a": "3", "b": "2"}) == "http://example.com/?a=3&b=2" + assert str(url / "foo" % {"a": "1"}) == "http://example.com/foo?a=1" diff --git a/contrib/python/yarl/tests/test_url.py b/contrib/python/yarl/tests/test_url.py new file mode 100644 index 0000000000..af13d0b5d5 --- /dev/null +++ b/contrib/python/yarl/tests/test_url.py @@ -0,0 +1,1732 @@ +from enum import Enum +from urllib.parse import SplitResult + +import pytest + +from yarl import URL + + +def test_inheritance(): + with pytest.raises(TypeError) as ctx: + + class MyURL(URL): # type: ignore[misc] + pass + + assert ( + "Inheriting a class " + "<class '__tests__.test_url.test_inheritance.<locals>.MyURL'> " + "from URL is forbidden" == str(ctx.value) + ) + + +def test_str_subclass(): + class S(str): + pass + + assert str(URL(S("http://example.com"))) == "http://example.com" + + +def test_is(): + u1 = URL("http://example.com") + u2 = URL(u1) + assert u1 is u2 + + +def test_bool(): + assert URL("http://example.com") + assert not URL() + assert not URL("") + + +def test_absolute_url_without_host(): + with pytest.raises(ValueError): + URL("http://:8080/") + + +def test_url_is_not_str(): + url = URL("http://example.com") + assert not isinstance(url, str) + + +def test_str(): + url = URL("http://example.com:8888/path/to?a=1&b=2") + assert str(url) == "http://example.com:8888/path/to?a=1&b=2" + + +def test_repr(): + url = URL("http://example.com") + assert "URL('http://example.com')" == repr(url) + + +def test_origin(): + url = URL("http://user:password@example.com:8888/path/to?a=1&b=2") + assert URL("http://example.com:8888") == url.origin() + + +def test_origin_nonascii(): + url = URL("http://user:password@историк.рф:8888/path/to?a=1&b=2") + assert str(url.origin()) == "http://xn--h1aagokeh.xn--p1ai:8888" + + +def test_origin_ipv6(): + url = URL("http://user:password@[::1]:8888/path/to?a=1&b=2") + assert str(url.origin()) == "http://[::1]:8888" + + +def test_origin_not_absolute_url(): + url = URL("/path/to?a=1&b=2") + with pytest.raises(ValueError): + url.origin() + + +def test_origin_no_scheme(): + url = URL("//user:password@example.com:8888/path/to?a=1&b=2") + with pytest.raises(ValueError): + url.origin() + + +def test_drop_dots(): + u = URL("http://example.com/path/../to") + assert str(u) == "http://example.com/to" + + +def test_abs_cmp(): + assert URL("http://example.com:8888") == URL("http://example.com:8888") + assert URL("http://example.com:8888/") == URL("http://example.com:8888/") + assert URL("http://example.com:8888/") == URL("http://example.com:8888") + assert URL("http://example.com:8888") == URL("http://example.com:8888/") + + +def test_abs_hash(): + url = URL("http://example.com:8888") + url_trailing = URL("http://example.com:8888/") + assert hash(url) == hash(url_trailing) + + +# properties + + +def test_scheme(): + url = URL("http://example.com") + assert "http" == url.scheme + + +def test_raw_user(): + url = URL("http://user@example.com") + assert "user" == url.raw_user + + +def test_raw_user_non_ascii(): + url = URL("http://вася@example.com") + assert "%D0%B2%D0%B0%D1%81%D1%8F" == url.raw_user + + +def test_no_user(): + url = URL("http://example.com") + assert url.user is None + + +def test_user_non_ascii(): + url = URL("http://вася@example.com") + assert "вася" == url.user + + +def test_raw_password(): + url = URL("http://user:password@example.com") + assert "password" == url.raw_password + + +def test_raw_password_non_ascii(): + url = URL("http://user:пароль@example.com") + assert "%D0%BF%D0%B0%D1%80%D0%BE%D0%BB%D1%8C" == url.raw_password + + +def test_password_non_ascii(): + url = URL("http://user:пароль@example.com") + assert "пароль" == url.password + + +def test_password_without_user(): + url = URL("http://:password@example.com") + assert url.user is None + assert "password" == url.password + + +def test_user_empty_password(): + url = URL("http://user:@example.com") + assert "user" == url.user + assert "" == url.password + + +def test_raw_host(): + url = URL("http://example.com") + assert "example.com" == url.raw_host + + +def test_raw_host_non_ascii(): + url = URL("http://историк.рф") + assert "xn--h1aagokeh.xn--p1ai" == url.raw_host + + +def test_host_non_ascii(): + url = URL("http://историк.рф") + assert "историк.рф" == url.host + + +def test_localhost(): + url = URL("http://[::1]") + assert "::1" == url.host + + +def test_host_with_underscore(): + url = URL("http://abc_def.com") + assert "abc_def.com" == url.host + + +def test_raw_host_when_port_is_specified(): + url = URL("http://example.com:8888") + assert "example.com" == url.raw_host + + +def test_raw_host_from_str_with_ipv4(): + url = URL("http://127.0.0.1:80") + assert url.raw_host == "127.0.0.1" + + +def test_raw_host_from_str_with_ipv6(): + url = URL("http://[::1]:80") + assert url.raw_host == "::1" + + +def test_authority_full() -> None: + url = URL("http://user:passwd@host.com:8080/path") + assert url.raw_authority == "user:passwd@host.com:8080" + assert url.authority == "user:passwd@host.com:8080" + + +def test_authority_short() -> None: + url = URL("http://host.com/path") + assert url.raw_authority == "host.com" + + +def test_authority_full_nonasci() -> None: + url = URL("http://ваня:пароль@айдеко.рф:8080/path") + assert url.raw_authority == ( + "%D0%B2%D0%B0%D0%BD%D1%8F:%D0%BF%D0%B0%D1%80%D0%BE%D0%BB%D1%8C@" + "xn--80aidohy.xn--p1ai:8080" + ) + assert url.authority == "ваня:пароль@айдеко.рф:8080" + + +def test_lowercase(): + url = URL("http://gitHUB.com") + assert url.raw_host == "github.com" + assert url.host == url.raw_host + + +def test_lowercase_nonascii(): + url = URL("http://Айдеко.Рф") + assert url.raw_host == "xn--80aidohy.xn--p1ai" + assert url.host == "айдеко.рф" + + +def test_compressed_ipv6(): + url = URL("http://[1DEC:0:0:0::1]") + assert url.raw_host == "1dec::1" + assert url.host == url.raw_host + + +def test_ipv4_zone(): + # I'm unsure if it is correct. + url = URL("http://1.2.3.4%тест%42:123") + assert url.raw_host == "1.2.3.4%тест%42" + assert url.host == url.raw_host + + +def test_port_for_explicit_port(): + url = URL("http://example.com:8888") + assert 8888 == url.port + + +def test_port_for_implicit_port(): + url = URL("http://example.com") + assert 80 == url.port + + +def test_port_for_relative_url(): + url = URL("/path/to") + assert url.port is None + + +def test_port_for_unknown_scheme(): + url = URL("unknown://example.com") + assert url.port is None + + +def test_explicit_port_for_explicit_port(): + url = URL("http://example.com:8888") + assert 8888 == url.explicit_port + + +def test_explicit_port_for_implicit_port(): + url = URL("http://example.com") + assert url.explicit_port is None + + +def test_explicit_port_for_relative_url(): + url = URL("/path/to") + assert url.explicit_port is None + + +def test_explicit_port_for_unknown_scheme(): + url = URL("unknown://example.com") + assert url.explicit_port is None + + +def test_raw_path_string_empty(): + url = URL("http://example.com") + assert "/" == url.raw_path + + +def test_raw_path(): + url = URL("http://example.com/path/to") + assert "/path/to" == url.raw_path + + +def test_raw_path_non_ascii(): + url = URL("http://example.com/путь/сюда") + assert "/%D0%BF%D1%83%D1%82%D1%8C/%D1%81%D1%8E%D0%B4%D0%B0" == url.raw_path + + +def test_path_non_ascii(): + url = URL("http://example.com/путь/сюда") + assert "/путь/сюда" == url.path + + +def test_path_with_spaces(): + url = URL("http://example.com/a b/test") + assert "/a b/test" == url.path + + url = URL("http://example.com/a b") + assert "/a b" == url.path + + +def test_raw_path_for_empty_url(): + url = URL() + assert "" == url.raw_path + + +def test_raw_path_for_colon_and_at(): + url = URL("http://example.com/path:abc@123") + assert url.raw_path == "/path:abc@123" + + +def test_raw_query_string(): + url = URL("http://example.com?a=1&b=2") + assert url.raw_query_string == "a=1&b=2" + + +def test_raw_query_string_non_ascii(): + url = URL("http://example.com?б=в&ю=к") + assert url.raw_query_string == "%D0%B1=%D0%B2&%D1%8E=%D0%BA" + + +def test_query_string_non_ascii(): + url = URL("http://example.com?б=в&ю=к") + assert url.query_string == "б=в&ю=к" + + +def test_path_qs(): + url = URL("http://example.com/") + assert url.path_qs == "/" + url = URL("http://example.com/?б=в&ю=к") + assert url.path_qs == "/?б=в&ю=к" + url = URL("http://example.com/path?б=в&ю=к") + assert url.path_qs == "/path?б=в&ю=к" + + +def test_raw_path_qs(): + url = URL("http://example.com/") + assert url.raw_path_qs == "/" + url = URL("http://example.com/?б=в&ю=к") + assert url.raw_path_qs == "/?%D0%B1=%D0%B2&%D1%8E=%D0%BA" + url = URL("http://example.com/path?б=в&ю=к") + assert url.raw_path_qs == "/path?%D0%B1=%D0%B2&%D1%8E=%D0%BA" + url = URL("http://example.com/путь?a=1&b=2") + assert url.raw_path_qs == "/%D0%BF%D1%83%D1%82%D1%8C?a=1&b=2" + + +def test_query_string_spaces(): + url = URL("http://example.com?a+b=c+d&e=f+g") + assert url.query_string == "a b=c d&e=f g" + + +# raw fragment + + +def test_raw_fragment_empty(): + url = URL("http://example.com") + assert "" == url.raw_fragment + + +def test_raw_fragment(): + url = URL("http://example.com/path#anchor") + assert "anchor" == url.raw_fragment + + +def test_raw_fragment_non_ascii(): + url = URL("http://example.com/path#якорь") + assert "%D1%8F%D0%BA%D0%BE%D1%80%D1%8C" == url.raw_fragment + + +def test_raw_fragment_safe(): + url = URL("http://example.com/path#a?b/c:d@e") + assert "a?b/c:d@e" == url.raw_fragment + + +def test_fragment_non_ascii(): + url = URL("http://example.com/path#якорь") + assert "якорь" == url.fragment + + +def test_raw_parts_empty(): + url = URL("http://example.com") + assert ("/",) == url.raw_parts + + +def test_raw_parts(): + url = URL("http://example.com/path/to") + assert ("/", "path", "to") == url.raw_parts + + +def test_raw_parts_without_path(): + url = URL("http://example.com") + assert ("/",) == url.raw_parts + + +def test_raw_path_parts_with_2F_in_path(): + url = URL("http://example.com/path%2Fto/three") + assert ("/", "path%2Fto", "three") == url.raw_parts + + +def test_raw_path_parts_with_2f_in_path(): + url = URL("http://example.com/path%2fto/three") + assert ("/", "path%2Fto", "three") == url.raw_parts + + +def test_raw_parts_for_relative_path(): + url = URL("path/to") + assert ("path", "to") == url.raw_parts + + +def test_raw_parts_for_relative_path_starting_from_slash(): + url = URL("/path/to") + assert ("/", "path", "to") == url.raw_parts + + +def test_raw_parts_for_relative_double_path(): + url = URL("path/to") + assert ("path", "to") == url.raw_parts + + +def test_parts_for_empty_url(): + url = URL() + assert ("",) == url.raw_parts + + +def test_raw_parts_non_ascii(): + url = URL("http://example.com/путь/сюда") + assert ( + "/", + "%D0%BF%D1%83%D1%82%D1%8C", + "%D1%81%D1%8E%D0%B4%D0%B0", + ) == url.raw_parts + + +def test_parts_non_ascii(): + url = URL("http://example.com/путь/сюда") + assert ("/", "путь", "сюда") == url.parts + + +def test_name_for_empty_url(): + url = URL() + assert "" == url.raw_name + + +def test_raw_name(): + url = URL("http://example.com/path/to#frag") + assert "to" == url.raw_name + + +def test_raw_name_root(): + url = URL("http://example.com/#frag") + assert "" == url.raw_name + + +def test_raw_name_root2(): + url = URL("http://example.com") + assert "" == url.raw_name + + +def test_raw_name_root3(): + url = URL("http://example.com/") + assert "" == url.raw_name + + +def test_relative_raw_name(): + url = URL("path/to") + assert "to" == url.raw_name + + +def test_relative_raw_name_starting_from_slash(): + url = URL("/path/to") + assert "to" == url.raw_name + + +def test_relative_raw_name_slash(): + url = URL("/") + assert "" == url.raw_name + + +def test_name_non_ascii(): + url = URL("http://example.com/путь") + assert url.name == "путь" + + +def test_suffix_for_empty_url(): + url = URL() + assert "" == url.raw_suffix + + +def test_raw_suffix(): + url = URL("http://example.com/path/to.txt#frag") + assert ".txt" == url.raw_suffix + + +def test_raw_suffix_root(): + url = URL("http://example.com/#frag") + assert "" == url.raw_suffix + + +def test_raw_suffix_root2(): + url = URL("http://example.com") + assert "" == url.raw_suffix + + +def test_raw_suffix_root3(): + url = URL("http://example.com/") + assert "" == url.raw_suffix + + +def test_relative_raw_suffix(): + url = URL("path/to") + assert "" == url.raw_suffix + + +def test_relative_raw_suffix_starting_from_slash(): + url = URL("/path/to") + assert "" == url.raw_suffix + + +def test_relative_raw_suffix_dot(): + url = URL(".") + assert "" == url.raw_suffix + + +def test_suffix_non_ascii(): + url = URL("http://example.com/путь.суффикс") + assert url.suffix == ".суффикс" + + +def test_suffix_with_empty_name(): + url = URL("http://example.com/.hgrc") + assert "" == url.raw_suffix + + +def test_suffix_multi_dot(): + url = URL("http://example.com/doc.tar.gz") + assert ".gz" == url.raw_suffix + + +def test_suffix_with_dot_name(): + url = URL("http://example.com/doc.") + assert "" == url.raw_suffix + + +def test_suffixes_for_empty_url(): + url = URL() + assert () == url.raw_suffixes + + +def test_raw_suffixes(): + url = URL("http://example.com/path/to.txt#frag") + assert (".txt",) == url.raw_suffixes + + +def test_raw_suffixes_root(): + url = URL("http://example.com/#frag") + assert () == url.raw_suffixes + + +def test_raw_suffixes_root2(): + url = URL("http://example.com") + assert () == url.raw_suffixes + + +def test_raw_suffixes_root3(): + url = URL("http://example.com/") + assert () == url.raw_suffixes + + +def test_relative_raw_suffixes(): + url = URL("path/to") + assert () == url.raw_suffixes + + +def test_relative_raw_suffixes_starting_from_slash(): + url = URL("/path/to") + assert () == url.raw_suffixes + + +def test_relative_raw_suffixes_dot(): + url = URL(".") + assert () == url.raw_suffixes + + +def test_suffixes_non_ascii(): + url = URL("http://example.com/путь.суффикс") + assert url.suffixes == (".суффикс",) + + +def test_suffixes_with_empty_name(): + url = URL("http://example.com/.hgrc") + assert () == url.raw_suffixes + + +def test_suffixes_multi_dot(): + url = URL("http://example.com/doc.tar.gz") + assert (".tar", ".gz") == url.raw_suffixes + + +def test_suffixes_with_dot_name(): + url = URL("http://example.com/doc.") + assert () == url.raw_suffixes + + +def test_plus_in_path(): + url = URL("http://example.com/test/x+y%2Bz/:+%2B/") + assert "/test/x+y+z/:++/" == url.path + + +def test_nonascii_in_qs(): + url = URL("http://example.com") + url2 = url.with_query({"f\xf8\xf8": "f\xf8\xf8"}) + assert "http://example.com/?f%C3%B8%C3%B8=f%C3%B8%C3%B8" == str(url2) + + +def test_percent_encoded_in_qs(): + url = URL("http://example.com") + url2 = url.with_query({"k%cf%80": "v%cf%80"}) + assert str(url2) == "http://example.com/?k%25cf%2580=v%25cf%2580" + assert url2.raw_query_string == "k%25cf%2580=v%25cf%2580" + assert url2.query_string == "k%cf%80=v%cf%80" + assert url2.query == {"k%cf%80": "v%cf%80"} + + +# modifiers + + +def test_parent_raw_path(): + url = URL("http://example.com/path/to") + assert url.parent.raw_path == "/path" + + +def test_parent_raw_parts(): + url = URL("http://example.com/path/to") + assert url.parent.raw_parts == ("/", "path") + + +def test_double_parent_raw_path(): + url = URL("http://example.com/path/to") + assert url.parent.parent.raw_path == "/" + + +def test_empty_parent_raw_path(): + url = URL("http://example.com/") + assert url.parent.parent.raw_path == "/" + + +def test_empty_parent_raw_path2(): + url = URL("http://example.com") + assert url.parent.parent.raw_path == "/" + + +def test_clear_fragment_on_getting_parent(): + url = URL("http://example.com/path/to#frag") + assert URL("http://example.com/path") == url.parent + + +def test_clear_fragment_on_getting_parent_toplevel(): + url = URL("http://example.com/#frag") + assert URL("http://example.com/") == url.parent + + +def test_clear_query_on_getting_parent(): + url = URL("http://example.com/path/to?a=b") + assert URL("http://example.com/path") == url.parent + + +def test_clear_query_on_getting_parent_toplevel(): + url = URL("http://example.com/?a=b") + assert URL("http://example.com/") == url.parent + + +# truediv + + +def test_div_root(): + url = URL("http://example.com") / "path" / "to" + assert str(url) == "http://example.com/path/to" + assert url.raw_path == "/path/to" + + +def test_div_root_with_slash(): + url = URL("http://example.com/") / "path" / "to" + assert str(url) == "http://example.com/path/to" + assert url.raw_path == "/path/to" + + +def test_div(): + url = URL("http://example.com/path") / "to" + assert str(url) == "http://example.com/path/to" + assert url.raw_path == "/path/to" + + +def test_div_with_slash(): + url = URL("http://example.com/path/") / "to" + assert str(url) == "http://example.com/path/to" + assert url.raw_path == "/path/to" + + +def test_div_path_starting_from_slash_is_forbidden(): + url = URL("http://example.com/path/") + with pytest.raises(ValueError): + url / "/to/others" + + +class StrEnum(str, Enum): + spam = "ham" + + def __str__(self): + return self.value + + +def test_div_path_srting_subclass(): + url = URL("http://example.com/path/") / StrEnum.spam + assert str(url) == "http://example.com/path/ham" + + +def test_div_bad_type(): + url = URL("http://example.com/path/") + with pytest.raises(TypeError): + url / 3 + + +def test_div_cleanup_query_and_fragment(): + url = URL("http://example.com/path?a=1#frag") + assert str(url / "to") == "http://example.com/path/to" + + +def test_div_for_empty_url(): + url = URL() / "a" + assert url.raw_parts == ("a",) + + +def test_div_for_relative_url(): + url = URL("a") / "b" + assert url.raw_parts == ("a", "b") + + +def test_div_for_relative_url_started_with_slash(): + url = URL("/a") / "b" + assert url.raw_parts == ("/", "a", "b") + + +def test_div_non_ascii(): + url = URL("http://example.com/сюда") + url2 = url / "туда" + assert url2.path == "/сюда/туда" + assert url2.raw_path == "/%D1%81%D1%8E%D0%B4%D0%B0/%D1%82%D1%83%D0%B4%D0%B0" + assert url2.parts == ("/", "сюда", "туда") + assert url2.raw_parts == ( + "/", + "%D1%81%D1%8E%D0%B4%D0%B0", + "%D1%82%D1%83%D0%B4%D0%B0", + ) + + +def test_div_percent_encoded(): + url = URL("http://example.com/path") + url2 = url / "%cf%80" + assert url2.path == "/path/%cf%80" + assert url2.raw_path == "/path/%25cf%2580" + assert url2.parts == ("/", "path", "%cf%80") + assert url2.raw_parts == ("/", "path", "%25cf%2580") + + +def test_div_with_colon_and_at(): + url = URL("http://example.com/base") / "path:abc@123" + assert url.raw_path == "/base/path:abc@123" + + +def test_div_with_dots(): + url = URL("http://example.com/base") / "../path/./to" + assert url.raw_path == "/path/to" + + +# joinpath + + +@pytest.mark.parametrize( + "base,to_join,expected", + [ + pytest.param("", ("path", "to"), "http://example.com/path/to", id="root"), + pytest.param( + "/", ("path", "to"), "http://example.com/path/to", id="root-with-slash" + ), + pytest.param("/path", ("to",), "http://example.com/path/to", id="path"), + pytest.param( + "/path/", ("to",), "http://example.com/path/to", id="path-with-slash" + ), + pytest.param( + "/path?a=1#frag", + ("to",), + "http://example.com/path/to", + id="cleanup-query-and-fragment", + ), + pytest.param("", ("path/",), "http://example.com/path/", id="trailing-slash"), + pytest.param( + "", ("path/", "to/"), "http://example.com/path/to/", id="duplicate-slash" + ), + pytest.param("", (), "http://example.com", id="empty-segments"), + pytest.param( + "/", ("path/",), "http://example.com/path/", id="base-slash-trailing-slash" + ), + pytest.param( + "/", + ("path/", "to/"), + "http://example.com/path/to/", + id="base-slash-duplicate-slash", + ), + pytest.param("/", (), "http://example.com", id="base-slash-empty-segments"), + ], +) +def test_joinpath(base, to_join, expected): + url = URL(f"http://example.com{base}") + assert str(url.joinpath(*to_join)) == expected + + +@pytest.mark.parametrize( + "url,to_join,expected", + [ + pytest.param(URL(), ("a",), ("a",), id="empty-url"), + pytest.param(URL("a"), ("b",), ("a", "b"), id="relative-path"), + pytest.param(URL("a"), ("b", "", "c"), ("a", "b", "c"), id="empty-element"), + pytest.param(URL("/a"), ("b"), ("/", "a", "b"), id="absolute-path"), + pytest.param(URL(), ("a/",), ("a", ""), id="trailing-slash"), + pytest.param(URL(), ("a/", "b/"), ("a", "b", ""), id="duplicate-slash"), + pytest.param(URL(), (), ("",), id="empty-segments"), + ], +) +def test_joinpath_relative(url, to_join, expected): + assert url.joinpath(*to_join).raw_parts == expected + + +@pytest.mark.parametrize( + "url,to_join,encoded,e_path,e_raw_path,e_parts,e_raw_parts", + [ + pytest.param( + "http://example.com/сюда", + ("туда",), + False, + "/сюда/туда", + "/%D1%81%D1%8E%D0%B4%D0%B0/%D1%82%D1%83%D0%B4%D0%B0", + ("/", "сюда", "туда"), + ("/", "%D1%81%D1%8E%D0%B4%D0%B0", "%D1%82%D1%83%D0%B4%D0%B0"), + id="non-ascii", + ), + pytest.param( + "http://example.com/path", + ("%cf%80",), + False, + "/path/%cf%80", + "/path/%25cf%2580", + ("/", "path", "%cf%80"), + ("/", "path", "%25cf%2580"), + id="percent-encoded", + ), + pytest.param( + "http://example.com/path", + ("%cf%80",), + True, + "/path/π", + "/path/%cf%80", + ("/", "path", "π"), + ("/", "path", "%cf%80"), + id="encoded-percent-encoded", + ), + ], +) +def test_joinpath_encoding( + url, to_join, encoded, e_path, e_raw_path, e_parts, e_raw_parts +): + joined = URL(url).joinpath(*to_join, encoded=encoded) + assert joined.path == e_path + assert joined.raw_path == e_raw_path + assert joined.parts == e_parts + assert joined.raw_parts == e_raw_parts + + +@pytest.mark.parametrize( + "to_join,expected", + [ + pytest.param(("path:abc@123",), "/base/path:abc@123", id="with-colon-and-at"), + pytest.param(("..", "path", ".", "to"), "/path/to", id="with-dots"), + ], +) +def test_joinpath_edgecases(to_join, expected): + url = URL("http://example.com/base").joinpath(*to_join) + assert url.raw_path == expected + + +def test_joinpath_path_starting_from_slash_is_forbidden(): + url = URL("http://example.com/path/") + with pytest.raises( + ValueError, match="Appending path .* starting from slash is forbidden" + ): + assert url.joinpath("/to/others") + + +# with_path + + +def test_with_path(): + url = URL("http://example.com") + url2 = url.with_path("/test") + assert str(url2) == "http://example.com/test" + assert url2.raw_path == "/test" + assert url2.path == "/test" + + +def test_with_path_nonascii(): + url = URL("http://example.com") + url2 = url.with_path("/π") + assert str(url2) == "http://example.com/%CF%80" + assert url2.raw_path == "/%CF%80" + assert url2.path == "/π" + + +def test_with_path_percent_encoded(): + url = URL("http://example.com") + url2 = url.with_path("/%cf%80") + assert str(url2) == "http://example.com/%25cf%2580" + assert url2.raw_path == "/%25cf%2580" + assert url2.path == "/%cf%80" + + +def test_with_path_encoded(): + url = URL("http://example.com") + url2 = url.with_path("/test", encoded=True) + assert str(url2) == "http://example.com/test" + assert url2.raw_path == "/test" + assert url2.path == "/test" + + +def test_with_path_encoded_nonascii(): + url = URL("http://example.com") + url2 = url.with_path("/π", encoded=True) + assert str(url2) == "http://example.com/π" + assert url2.raw_path == "/π" + assert url2.path == "/π" + + +def test_with_path_encoded_percent_encoded(): + url = URL("http://example.com") + url2 = url.with_path("/%cf%80", encoded=True) + assert str(url2) == "http://example.com/%cf%80" + assert url2.raw_path == "/%cf%80" + assert url2.path == "/π" + + +def test_with_path_dots(): + url = URL("http://example.com") + assert str(url.with_path("/test/.")) == "http://example.com/test/" + + +def test_with_path_relative(): + url = URL("/path") + assert str(url.with_path("/new")) == "/new" + + +def test_with_path_query(): + url = URL("http://example.com?a=b") + assert str(url.with_path("/test")) == "http://example.com/test" + + +def test_with_path_fragment(): + url = URL("http://example.com#frag") + assert str(url.with_path("/test")) == "http://example.com/test" + + +def test_with_path_empty(): + url = URL("http://example.com/test") + assert str(url.with_path("")) == "http://example.com" + + +def test_with_path_leading_slash(): + url = URL("http://example.com") + assert url.with_path("test").path == "/test" + + +# with_fragment + + +def test_with_fragment(): + url = URL("http://example.com") + url2 = url.with_fragment("frag") + assert str(url2) == "http://example.com/#frag" + assert url2.raw_fragment == "frag" + assert url2.fragment == "frag" + + +def test_with_fragment_safe(): + url = URL("http://example.com") + u2 = url.with_fragment("a:b?c@d/e") + assert str(u2) == "http://example.com/#a:b?c@d/e" + + +def test_with_fragment_non_ascii(): + url = URL("http://example.com") + url2 = url.with_fragment("фрагм") + assert url2.raw_fragment == "%D1%84%D1%80%D0%B0%D0%B3%D0%BC" + assert url2.fragment == "фрагм" + + +def test_with_fragment_percent_encoded(): + url = URL("http://example.com") + url2 = url.with_fragment("%cf%80") + assert str(url2) == "http://example.com/#%25cf%2580" + assert url2.raw_fragment == "%25cf%2580" + assert url2.fragment == "%cf%80" + + +def test_with_fragment_None(): + url = URL("http://example.com/path#frag") + url2 = url.with_fragment(None) + assert str(url2) == "http://example.com/path" + + +def test_with_fragment_None_matching(): + url = URL("http://example.com/path") + url2 = url.with_fragment(None) + assert url is url2 + + +def test_with_fragment_matching(): + url = URL("http://example.com/path#frag") + url2 = url.with_fragment("frag") + assert url is url2 + + +def test_with_fragment_bad_type(): + url = URL("http://example.com") + with pytest.raises(TypeError): + url.with_fragment(123) + + +# with_name + + +def test_with_name(): + url = URL("http://example.com/a/b") + assert url.raw_parts == ("/", "a", "b") + url2 = url.with_name("c") + assert url2.raw_parts == ("/", "a", "c") + assert url2.parts == ("/", "a", "c") + assert url2.raw_path == "/a/c" + assert url2.path == "/a/c" + + +def test_with_name_for_naked_path(): + url = URL("http://example.com") + url2 = url.with_name("a") + assert url2.raw_parts == ("/", "a") + + +def test_with_name_for_relative_path(): + url = URL("a") + url2 = url.with_name("b") + assert url2.raw_parts == ("b",) + + +def test_with_name_for_relative_path2(): + url = URL("a/b") + url2 = url.with_name("c") + assert url2.raw_parts == ("a", "c") + + +def test_with_name_for_relative_path_starting_from_slash(): + url = URL("/a") + url2 = url.with_name("b") + assert url2.raw_parts == ("/", "b") + + +def test_with_name_for_relative_path_starting_from_slash2(): + url = URL("/a/b") + url2 = url.with_name("c") + assert url2.raw_parts == ("/", "a", "c") + + +def test_with_name_empty(): + url = URL("http://example.com/path/to").with_name("") + assert str(url) == "http://example.com/path/" + + +def test_with_name_non_ascii(): + url = URL("http://example.com/path").with_name("путь") + assert url.path == "/путь" + assert url.raw_path == "/%D0%BF%D1%83%D1%82%D1%8C" + assert url.parts == ("/", "путь") + assert url.raw_parts == ("/", "%D0%BF%D1%83%D1%82%D1%8C") + + +def test_with_name_percent_encoded(): + url = URL("http://example.com/path") + url2 = url.with_name("%cf%80") + assert url2.raw_parts == ("/", "%25cf%2580") + assert url2.parts == ("/", "%cf%80") + assert url2.raw_path == "/%25cf%2580" + assert url2.path == "/%cf%80" + + +def test_with_name_with_slash(): + with pytest.raises(ValueError): + URL("http://example.com").with_name("a/b") + + +def test_with_name_non_str(): + with pytest.raises(TypeError): + URL("http://example.com").with_name(123) + + +def test_with_name_within_colon_and_at(): + url = URL("http://example.com/oldpath").with_name("path:abc@123") + assert url.raw_path == "/path:abc@123" + + +def test_with_name_dot(): + with pytest.raises(ValueError): + URL("http://example.com").with_name(".") + + +def test_with_name_double_dot(): + with pytest.raises(ValueError): + URL("http://example.com").with_name("..") + + +# with_suffix + + +def test_with_suffix(): + url = URL("http://example.com/a/b") + assert url.raw_parts == ("/", "a", "b") + url2 = url.with_suffix(".c") + assert url2.raw_parts == ("/", "a", "b.c") + assert url2.parts == ("/", "a", "b.c") + assert url2.raw_path == "/a/b.c" + assert url2.path == "/a/b.c" + + +def test_with_suffix_for_naked_path(): + url = URL("http://example.com") + with pytest.raises(ValueError) as excinfo: + url.with_suffix(".a") + (msg,) = excinfo.value.args + assert msg == f"{url!r} has an empty name" + + +def test_with_suffix_for_relative_path(): + url = URL("a") + url2 = url.with_suffix(".b") + assert url2.raw_parts == ("a.b",) + + +def test_with_suffix_for_relative_path2(): + url = URL("a/b") + url2 = url.with_suffix(".c") + assert url2.raw_parts == ("a", "b.c") + + +def test_with_suffix_for_relative_path_starting_from_slash(): + url = URL("/a") + url2 = url.with_suffix(".b") + assert url2.raw_parts == ("/", "a.b") + + +def test_with_suffix_for_relative_path_starting_from_slash2(): + url = URL("/a/b") + url2 = url.with_suffix(".c") + assert url2.raw_parts == ("/", "a", "b.c") + + +def test_with_suffix_empty(): + url = URL("http://example.com/path/to").with_suffix("") + assert str(url) == "http://example.com/path/to" + + +def test_with_suffix_non_ascii(): + url = URL("http://example.com/path").with_suffix(".путь") + assert url.path == "/path.путь" + assert url.raw_path == "/path.%D0%BF%D1%83%D1%82%D1%8C" + assert url.parts == ("/", "path.путь") + assert url.raw_parts == ("/", "path.%D0%BF%D1%83%D1%82%D1%8C") + + +def test_with_suffix_percent_encoded(): + url = URL("http://example.com/path") + url2 = url.with_suffix(".%cf%80") + assert url2.raw_parts == ("/", "path.%25cf%2580") + assert url2.parts == ("/", "path.%cf%80") + assert url2.raw_path == "/path.%25cf%2580" + assert url2.path == "/path.%cf%80" + + +def test_with_suffix_without_dot(): + with pytest.raises(ValueError) as excinfo: + URL("http://example.com/a").with_suffix("b") + (msg,) = excinfo.value.args + assert msg == "Invalid suffix 'b'" + + +def test_with_suffix_non_str(): + with pytest.raises(TypeError) as excinfo: + URL("http://example.com").with_suffix(123) + (msg,) = excinfo.value.args + assert msg == "Invalid suffix type" + + +def test_with_suffix_dot(): + with pytest.raises(ValueError) as excinfo: + URL("http://example.com").with_suffix(".") + (msg,) = excinfo.value.args + assert msg == "Invalid suffix '.'" + + +def test_with_suffix_with_slash(): + with pytest.raises(ValueError) as excinfo: + URL("http://example.com/a").with_suffix("/.b") + (msg,) = excinfo.value.args + assert msg == "Invalid suffix '/.b'" + + +def test_with_suffix_with_slash2(): + with pytest.raises(ValueError) as excinfo: + URL("http://example.com/a").with_suffix(".b/.d") + (msg,) = excinfo.value.args + assert msg == "Slash in name is not allowed" + + +def test_with_suffix_replace(): + url = URL("/a.b") + url2 = url.with_suffix(".c") + assert url2.raw_parts == ("/", "a.c") + + +# is_absolute + + +def test_is_absolute_for_relative_url(): + url = URL("/path/to") + assert not url.is_absolute() + + +def test_is_absolute_for_absolute_url(): + url = URL("http://example.com") + assert url.is_absolute() + + +def test_is_non_absolute_for_empty_url(): + url = URL() + assert not url.is_absolute() + + +def test_is_non_absolute_for_empty_url2(): + url = URL("") + assert not url.is_absolute() + + +def test_is_absolute_path_starting_from_double_slash(): + url = URL("//www.python.org") + assert url.is_absolute() + + +# is_default_port + + +def test_is_default_port_for_relative_url(): + url = URL("/path/to") + assert not url.is_default_port() + + +def test_is_default_port_for_absolute_url_without_port(): + url = URL("http://example.com") + assert url.is_default_port() + + +def test_is_default_port_for_absolute_url_with_default_port(): + url = URL("http://example.com:80") + assert url.is_default_port() + + +def test_is_default_port_for_absolute_url_with_nondefault_port(): + url = URL("http://example.com:8080") + assert not url.is_default_port() + + +def test_is_default_port_for_unknown_scheme(): + url = URL("unknown://example.com:8080") + assert not url.is_default_port() + + +# + + +def test_no_scheme(): + url = URL("example.com") + assert url.raw_host is None + assert url.raw_path == "example.com" + assert str(url) == "example.com" + + +def test_no_scheme2(): + url = URL("example.com/a/b") + assert url.raw_host is None + assert url.raw_path == "example.com/a/b" + assert str(url) == "example.com/a/b" + + +def test_from_non_allowed(): + with pytest.raises(TypeError): + URL(1234) + + +def test_from_idna(): + url = URL("http://xn--jxagkqfkduily1i.eu") + assert "http://xn--jxagkqfkduily1i.eu" == str(url) + url = URL("http://xn--einla-pqa.de/") # needs idna 2008 + assert "http://xn--einla-pqa.de/" == str(url) + + +def test_to_idna(): + url = URL("http://εμπορικόσήμα.eu") + assert "http://xn--jxagkqfkduily1i.eu" == str(url) + url = URL("http://einlaß.de/") + assert "http://xn--einla-pqa.de/" == str(url) + + +def test_from_ascii_login(): + url = URL("http://" "%D0%B2%D0%B0%D1%81%D1%8F" "@host:1234/") + assert ("http://" "%D0%B2%D0%B0%D1%81%D1%8F" "@host:1234/") == str(url) + + +def test_from_non_ascii_login(): + url = URL("http://вася@host:1234/") + assert ("http://" "%D0%B2%D0%B0%D1%81%D1%8F" "@host:1234/") == str(url) + + +def test_from_ascii_login_and_password(): + url = URL( + "http://" + "%D0%B2%D0%B0%D1%81%D1%8F" + ":%D0%BF%D0%B0%D1%80%D0%BE%D0%BB%D1%8C" + "@host:1234/" + ) + assert ( + "http://" + "%D0%B2%D0%B0%D1%81%D1%8F" + ":%D0%BF%D0%B0%D1%80%D0%BE%D0%BB%D1%8C" + "@host:1234/" + ) == str(url) + + +def test_from_non_ascii_login_and_password(): + url = URL("http://вася:пароль@host:1234/") + assert ( + "http://" + "%D0%B2%D0%B0%D1%81%D1%8F" + ":%D0%BF%D0%B0%D1%80%D0%BE%D0%BB%D1%8C" + "@host:1234/" + ) == str(url) + + +def test_from_ascii_path(): + url = URL("http://example.com/" "%D0%BF%D1%83%D1%82%D1%8C/%D1%82%D1%83%D0%B4%D0%B0") + assert ( + "http://example.com/" "%D0%BF%D1%83%D1%82%D1%8C/%D1%82%D1%83%D0%B4%D0%B0" + ) == str(url) + + +def test_from_ascii_path_lower_case(): + url = URL("http://example.com/" "%d0%bf%d1%83%d1%82%d1%8c/%d1%82%d1%83%d0%b4%d0%b0") + assert ( + "http://example.com/" "%D0%BF%D1%83%D1%82%D1%8C/%D1%82%D1%83%D0%B4%D0%B0" + ) == str(url) + + +def test_from_non_ascii_path(): + url = URL("http://example.com/путь/туда") + assert ( + "http://example.com/" "%D0%BF%D1%83%D1%82%D1%8C/%D1%82%D1%83%D0%B4%D0%B0" + ) == str(url) + + +def test_bytes(): + url = URL("http://example.com/путь/туда") + assert ( + b"http://example.com/%D0%BF%D1%83%D1%82%D1%8C/%D1%82%D1%83%D0%B4%D0%B0" + == bytes(url) + ) + + +def test_from_ascii_query_parts(): + url = URL( + "http://example.com/" + "?%D0%BF%D0%B0%D1%80%D0%B0%D0%BC" + "=%D0%B7%D0%BD%D0%B0%D1%87" + ) + assert ( + "http://example.com/" + "?%D0%BF%D0%B0%D1%80%D0%B0%D0%BC" + "=%D0%B7%D0%BD%D0%B0%D1%87" + ) == str(url) + + +def test_from_non_ascii_query_parts(): + url = URL("http://example.com/?парам=знач") + assert ( + "http://example.com/" + "?%D0%BF%D0%B0%D1%80%D0%B0%D0%BC" + "=%D0%B7%D0%BD%D0%B0%D1%87" + ) == str(url) + + +def test_from_non_ascii_query_parts2(): + url = URL("http://example.com/?п=з&ю=б") + assert "http://example.com/?%D0%BF=%D0%B7&%D1%8E=%D0%B1" == str(url) + + +def test_from_ascii_fragment(): + url = URL("http://example.com/" "#%D1%84%D1%80%D0%B0%D0%B3%D0%BC%D0%B5%D0%BD%D1%82") + assert ( + "http://example.com/" "#%D1%84%D1%80%D0%B0%D0%B3%D0%BC%D0%B5%D0%BD%D1%82" + ) == str(url) + + +def test_from_bytes_with_non_ascii_fragment(): + url = URL("http://example.com/#фрагмент") + assert ( + "http://example.com/" "#%D1%84%D1%80%D0%B0%D0%B3%D0%BC%D0%B5%D0%BD%D1%82" + ) == str(url) + + +def test_to_str(): + url = URL("http://εμπορικόσήμα.eu/") + assert "http://xn--jxagkqfkduily1i.eu/" == str(url) + + +def test_to_str_long(): + url = URL( + "https://host-12345678901234567890123456789012345678901234567890" "-name:8888/" + ) + expected = ( + "https://host-" + "12345678901234567890123456789012345678901234567890" + "-name:8888/" + ) + assert expected == str(url) + + +def test_decoding_with_2F_in_path(): + url = URL("http://example.com/path%2Fto") + assert "http://example.com/path%2Fto" == str(url) + assert url == URL(str(url)) + + +def test_decoding_with_26_and_3D_in_query(): + url = URL("http://example.com/?%26=%3D") + assert "http://example.com/?%26=%3D" == str(url) + assert url == URL(str(url)) + + +def test_fragment_only_url(): + url = URL("#frag") + assert str(url) == "#frag" + + +def test_url_from_url(): + url = URL("http://example.com") + assert URL(url) == url + assert URL(url).raw_parts == ("/",) + + +def test_lowercase_scheme(): + url = URL("HTTP://example.com") + assert str(url) == "http://example.com" + + +def test_str_for_empty_url(): + url = URL() + assert "" == str(url) + + +def test_parent_for_empty_url(): + url = URL() + assert url is url.parent + + +def test_empty_value_for_query(): + url = URL("http://example.com/path").with_query({"a": ""}) + assert str(url) == "http://example.com/path?a=" + + +def test_none_value_for_query(): + with pytest.raises(TypeError): + URL("http://example.com/path").with_query({"a": None}) + + +def test_decode_pct_in_path(): + url = URL("http://www.python.org/%7Eguido") + assert "http://www.python.org/~guido" == str(url) + + +def test_decode_pct_in_path_lower_case(): + url = URL("http://www.python.org/%7eguido") + assert "http://www.python.org/~guido" == str(url) + + +# join + + +def test_join(): + base = URL("http://www.cwi.nl/%7Eguido/Python.html") + url = URL("FAQ.html") + url2 = base.join(url) + assert str(url2) == "http://www.cwi.nl/~guido/FAQ.html" + + +def test_join_absolute(): + base = URL("http://www.cwi.nl/%7Eguido/Python.html") + url = URL("//www.python.org/%7Eguido") + url2 = base.join(url) + assert str(url2) == "http://www.python.org/~guido" + + +def test_join_non_url(): + base = URL("http://example.com") + with pytest.raises(TypeError): + base.join("path/to") + + +NORMAL = [ + ("g:h", "g:h"), + ("g", "http://a/b/c/g"), + ("./g", "http://a/b/c/g"), + ("g/", "http://a/b/c/g/"), + ("/g", "http://a/g"), + ("//g", "http://g"), + ("?y", "http://a/b/c/d;p?y"), + ("g?y", "http://a/b/c/g?y"), + ("#s", "http://a/b/c/d;p?q#s"), + ("g#s", "http://a/b/c/g#s"), + ("g?y#s", "http://a/b/c/g?y#s"), + (";x", "http://a/b/c/;x"), + ("g;x", "http://a/b/c/g;x"), + ("g;x?y#s", "http://a/b/c/g;x?y#s"), + ("", "http://a/b/c/d;p?q"), + (".", "http://a/b/c/"), + ("./", "http://a/b/c/"), + ("..", "http://a/b/"), + ("../", "http://a/b/"), + ("../g", "http://a/b/g"), + ("../..", "http://a/"), + ("../../", "http://a/"), + ("../../g", "http://a/g"), +] + + +@pytest.mark.parametrize("url,expected", NORMAL) +def test_join_from_rfc_3986_normal(url, expected): + # test case from https://tools.ietf.org/html/rfc3986.html#section-5.4 + base = URL("http://a/b/c/d;p?q") + url = URL(url) + expected = URL(expected) + assert base.join(url) == expected + + +ABNORMAL = [ + ("../../../g", "http://a/g"), + ("../../../../g", "http://a/g"), + ("/./g", "http://a/g"), + ("/../g", "http://a/g"), + ("g.", "http://a/b/c/g."), + (".g", "http://a/b/c/.g"), + ("g..", "http://a/b/c/g.."), + ("..g", "http://a/b/c/..g"), + ("./../g", "http://a/b/g"), + ("./g/.", "http://a/b/c/g/"), + ("g/./h", "http://a/b/c/g/h"), + ("g/../h", "http://a/b/c/h"), + ("g;x=1/./y", "http://a/b/c/g;x=1/y"), + ("g;x=1/../y", "http://a/b/c/y"), + ("g?y/./x", "http://a/b/c/g?y/./x"), + ("g?y/../x", "http://a/b/c/g?y/../x"), + ("g#s/./x", "http://a/b/c/g#s/./x"), + ("g#s/../x", "http://a/b/c/g#s/../x"), +] + + +@pytest.mark.parametrize("url,expected", ABNORMAL) +def test_join_from_rfc_3986_abnormal(url, expected): + # test case from https://tools.ietf.org/html/rfc3986.html#section-5.4.2 + base = URL("http://a/b/c/d;p?q") + url = URL(url) + expected = URL(expected) + assert base.join(url) == expected + + +def test_split_result_non_decoded(): + with pytest.raises(ValueError): + URL(SplitResult("http", "example.com", "path", "qs", "frag")) + + +def test_human_repr(): + url = URL("http://вася:пароль@хост.домен:8080/путь/сюда?арг=вал#фраг") + s = url.human_repr() + assert URL(s) == url + assert s == "http://вася:пароль@хост.домен:8080/путь/сюда?арг=вал#фраг" + + +def test_human_repr_defaults(): + url = URL("путь") + s = url.human_repr() + assert s == "путь" + + +def test_human_repr_default_port(): + url = URL("http://вася:пароль@хост.домен/путь/сюда?арг=вал#фраг") + s = url.human_repr() + assert URL(s) == url + assert s == "http://вася:пароль@хост.домен/путь/сюда?арг=вал#фраг" + + +def test_human_repr_ipv6(): + url = URL("http://[::1]:8080/path") + s = url.human_repr() + url2 = URL(s) + assert url2 == url + assert url2.host == "::1" + assert s == "http://[::1]:8080/path" + + +def test_human_repr_delimiters(): + url = URL.build( + scheme="http", + user=" !\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~", + password=" !\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~", + host="хост.домен", + port=8080, + path="/ !\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~", + query={ + " !\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~": " !\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~" + }, + fragment=" !\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~", + ) + s = url.human_repr() + assert URL(s) == url + assert ( + s == "http:// !\"%23$%25&'()*+,-.%2F%3A;<=>%3F%40%5B\\%5D^_`{|}~" + ": !\"%23$%25&'()*+,-.%2F%3A;<=>%3F%40%5B\\%5D^_`{|}~" + "@хост.домен:8080" + "/ !\"%23$%25&'()*+,-./:;<=>%3F@[\\]^_`{|}~" + "? !\"%23$%25%26'()*%2B,-./:%3B<%3D>?@[\\]^_`{|}~" + "= !\"%23$%25%26'()*%2B,-./:%3B<%3D>?@[\\]^_`{|}~" + "# !\"#$%25&'()*+,-./:;<=>?@[\\]^_`{|}~" + ) + + +def test_human_repr_non_printable(): + url = URL.build( + scheme="http", + user="вася\n\xad\u200b", + password="пароль\n\xad\u200b", + host="хост.домен", + port=8080, + path="/путь\n\xad\u200b", + query={"арг\n\xad\u200b": "вал\n\xad\u200b"}, + fragment="фраг\n\xad\u200b", + ) + s = url.human_repr() + assert URL(s) == url + assert ( + s == "http://вася%0A%C2%AD%E2%80%8B:пароль%0A%C2%AD%E2%80%8B" + "@хост.домен:8080" + "/путь%0A%C2%AD%E2%80%8B" + "?арг%0A%C2%AD%E2%80%8B=вал%0A%C2%AD%E2%80%8B" + "#фраг%0A%C2%AD%E2%80%8B" + ) + + +# relative + + +def test_relative(): + url = URL("http://user:pass@example.com:8080/path?a=b#frag") + rel = url.relative() + assert str(rel) == "/path?a=b#frag" + + +def test_relative_is_relative(): + url = URL("http://user:pass@example.com:8080/path?a=b#frag") + rel = url.relative() + assert not rel.is_absolute() + + +def test_relative_abs_parts_are_removed(): + url = URL("http://user:pass@example.com:8080/path?a=b#frag") + rel = url.relative() + assert not rel.scheme + assert not rel.user + assert not rel.password + assert not rel.host + assert not rel.port + + +def test_relative_fails_on_rel_url(): + with pytest.raises(ValueError): + URL("/path?a=b#frag").relative() + + +def test_slash_and_question_in_query(): + u = URL("http://example.com/path?http://example.com/p?a#b") + assert u.query_string == "http://example.com/p?a" + + +def test_slash_and_question_in_fragment(): + u = URL("http://example.com/path#http://example.com/p?a") + assert u.fragment == "http://example.com/p?a" + + +def test_requoting(): + u = URL("http://127.0.0.1/?next=http%3A//example.com/") + assert u.raw_query_string == "next=http://example.com/" + assert str(u) == "http://127.0.0.1/?next=http://example.com/" diff --git a/contrib/python/yarl/tests/test_url_build.py b/contrib/python/yarl/tests/test_url_build.py new file mode 100644 index 0000000000..51969fa849 --- /dev/null +++ b/contrib/python/yarl/tests/test_url_build.py @@ -0,0 +1,259 @@ +import pytest + +from yarl import URL + +# build classmethod + + +def test_build_without_arguments(): + u = URL.build() + assert str(u) == "" + + +def test_build_simple(): + u = URL.build(scheme="http", host="127.0.0.1") + assert str(u) == "http://127.0.0.1" + + +def test_build_with_scheme(): + u = URL.build(scheme="blob", path="path") + assert str(u) == "blob:path" + + +def test_build_with_host(): + u = URL.build(host="127.0.0.1") + assert str(u) == "//127.0.0.1" + assert u == URL("//127.0.0.1") + + +def test_build_with_scheme_and_host(): + u = URL.build(scheme="http", host="127.0.0.1") + assert str(u) == "http://127.0.0.1" + assert u == URL("http://127.0.0.1") + + +def test_build_with_port(): + with pytest.raises(ValueError): + URL.build(port=8000) + + u = URL.build(scheme="http", host="127.0.0.1", port=8000) + assert str(u) == "http://127.0.0.1:8000" + + +def test_build_with_user(): + u = URL.build(scheme="http", host="127.0.0.1", user="foo") + assert str(u) == "http://foo@127.0.0.1" + + +def test_build_with_user_password(): + u = URL.build(scheme="http", host="127.0.0.1", user="foo", password="bar") + assert str(u) == "http://foo:bar@127.0.0.1" + + +def test_build_with_query_and_query_string(): + with pytest.raises(ValueError): + URL.build( + scheme="http", + host="127.0.0.1", + user="foo", + password="bar", + port=8000, + path="/index.html", + query=dict(arg="value1"), + query_string="arg=value1", + fragment="top", + ) + + +def test_build_with_all(): + u = URL.build( + scheme="http", + host="127.0.0.1", + user="foo", + password="bar", + port=8000, + path="/index.html", + query_string="arg=value1", + fragment="top", + ) + assert str(u) == "http://foo:bar@127.0.0.1:8000/index.html?arg=value1#top" + + +def test_build_with_authority_and_host(): + with pytest.raises(ValueError): + URL.build(authority="host.com", host="example.com") + + +def test_build_with_authority(): + url = URL.build(scheme="http", authority="ваня:bar@host.com:8000", path="path") + assert str(url) == "http://%D0%B2%D0%B0%D0%BD%D1%8F:bar@host.com:8000/path" + + +def test_build_with_authority_without_encoding(): + url = URL.build( + scheme="http", authority="foo:bar@host.com:8000", path="path", encoded=True + ) + assert str(url) == "http://foo:bar@host.com:8000/path" + + +def test_query_str(): + u = URL.build(scheme="http", host="127.0.0.1", path="/", query_string="arg=value1") + assert str(u) == "http://127.0.0.1/?arg=value1" + + +def test_query_dict(): + u = URL.build(scheme="http", host="127.0.0.1", path="/", query=dict(arg="value1")) + + assert str(u) == "http://127.0.0.1/?arg=value1" + + +def test_build_path_quoting(): + u = URL.build( + scheme="http", host="127.0.0.1", path="/файл.jpg", query=dict(arg="Привет") + ) + + assert u == URL("http://127.0.0.1/файл.jpg?arg=Привет") + assert str(u) == ( + "http://127.0.0.1/%D1%84%D0%B0%D0%B9%D0%BB.jpg?" + "arg=%D0%9F%D1%80%D0%B8%D0%B2%D0%B5%D1%82" + ) + + +def test_build_query_quoting(): + u = URL.build(scheme="http", host="127.0.0.1", path="/файл.jpg", query="arg=Привет") + + assert u == URL("http://127.0.0.1/файл.jpg?arg=Привет") + assert str(u) == ( + "http://127.0.0.1/%D1%84%D0%B0%D0%B9%D0%BB.jpg?" + "arg=%D0%9F%D1%80%D0%B8%D0%B2%D0%B5%D1%82" + ) + + +def test_build_query_only(): + u = URL.build(query={"key": "value"}) + + assert str(u) == "?key=value" + + +def test_build_drop_dots(): + u = URL.build(scheme="http", host="example.com", path="/path/../to") + assert str(u) == "http://example.com/to" + + +def test_build_encode(): + u = URL.build( + scheme="http", + host="историк.рф", + path="/путь/файл", + query_string="ключ=знач", + fragment="фраг", + ) + expected = ( + "http://xn--h1aagokeh.xn--p1ai" + "/%D0%BF%D1%83%D1%82%D1%8C/%D1%84%D0%B0%D0%B9%D0%BB" + "?%D0%BA%D0%BB%D1%8E%D1%87=%D0%B7%D0%BD%D0%B0%D1%87" + "#%D1%84%D1%80%D0%B0%D0%B3" + ) + assert str(u) == expected + + +def test_build_already_encoded(): + # resulting URL is invalid but not encoded + u = URL.build( + scheme="http", + host="историк.рф", + path="/путь/файл", + query_string="ключ=знач", + fragment="фраг", + encoded=True, + ) + assert str(u) == "http://историк.рф/путь/файл?ключ=знач#фраг" + + +def test_build_percent_encoded(): + u = URL.build( + scheme="http", + host="%2d.org", + user="u%2d", + password="p%2d", + path="/%2d", + query_string="k%2d=v%2d", + fragment="f%2d", + ) + assert str(u) == "http://u%252d:p%252d@%2d.org/%252d?k%252d=v%252d#f%252d" + assert u.raw_host == "%2d.org" + assert u.host == "%2d.org" + assert u.raw_user == "u%252d" + assert u.user == "u%2d" + assert u.raw_password == "p%252d" + assert u.password == "p%2d" + assert u.raw_authority == "u%252d:p%252d@%2d.org" + assert u.authority == "u%2d:p%2d@%2d.org:80" + assert u.raw_path == "/%252d" + assert u.path == "/%2d" + assert u.query == {"k%2d": "v%2d"} + assert u.raw_query_string == "k%252d=v%252d" + assert u.query_string == "k%2d=v%2d" + assert u.raw_fragment == "f%252d" + assert u.fragment == "f%2d" + + +def test_build_with_authority_percent_encoded(): + u = URL.build(scheme="http", authority="u%2d:p%2d@%2d.org") + assert str(u) == "http://u%252d:p%252d@%2d.org" + assert u.raw_host == "%2d.org" + assert u.host == "%2d.org" + assert u.raw_user == "u%252d" + assert u.user == "u%2d" + assert u.raw_password == "p%252d" + assert u.password == "p%2d" + assert u.raw_authority == "u%252d:p%252d@%2d.org" + assert u.authority == "u%2d:p%2d@%2d.org:80" + + +def test_build_with_authority_percent_encoded_already_encoded(): + u = URL.build(scheme="http", authority="u%2d:p%2d@%2d.org", encoded=True) + assert str(u) == "http://u%2d:p%2d@%2d.org" + assert u.raw_host == "%2d.org" + assert u.host == "%2d.org" + assert u.user == "u-" + assert u.raw_user == "u%2d" + assert u.password == "p-" + assert u.raw_password == "p%2d" + assert u.authority == "u-:p-@%2d.org:80" + assert u.raw_authority == "u%2d:p%2d@%2d.org" + + +def test_build_with_authority_with_path_with_leading_slash(): + u = URL.build(scheme="http", host="example.com", path="/path_with_leading_slash") + assert str(u) == "http://example.com/path_with_leading_slash" + + +def test_build_with_authority_with_empty_path(): + u = URL.build(scheme="http", host="example.com", path="") + assert str(u) == "http://example.com" + + +def test_build_with_authority_with_path_without_leading_slash(): + with pytest.raises(ValueError): + URL.build(scheme="http", host="example.com", path="path_without_leading_slash") + + +def test_build_with_none_host(): + with pytest.raises(TypeError, match="NoneType is illegal for.*host"): + URL.build(scheme="http", host=None) + + +def test_build_with_none_path(): + with pytest.raises(TypeError): + URL.build(scheme="http", host="example.com", path=None) + + +def test_build_with_none_query_string(): + with pytest.raises(TypeError): + URL.build(scheme="http", host="example.com", query_string=None) + + +def test_build_with_none_fragment(): + with pytest.raises(TypeError): + URL.build(scheme="http", host="example.com", fragment=None) diff --git a/contrib/python/yarl/tests/test_url_cmp_and_hash.py b/contrib/python/yarl/tests/test_url_cmp_and_hash.py new file mode 100644 index 0000000000..17c42e3566 --- /dev/null +++ b/contrib/python/yarl/tests/test_url_cmp_and_hash.py @@ -0,0 +1,88 @@ +from yarl import URL + +# comparison and hashing + + +def test_ne_str(): + url = URL("http://example.com/") + assert url != "http://example.com/" + + +def test_eq(): + url = URL("http://example.com/") + assert url == URL("http://example.com/") + + +def test_hash(): + assert hash(URL("http://example.com/")) == hash(URL("http://example.com/")) + + +def test_hash_double_call(): + url = URL("http://example.com/") + assert hash(url) == hash(url) + + +def test_le_less(): + url1 = URL("http://example1.com/") + url2 = URL("http://example2.com/") + + assert url1 <= url2 + + +def test_le_eq(): + url1 = URL("http://example.com/") + url2 = URL("http://example.com/") + + assert url1 <= url2 + + +def test_le_not_implemented(): + url = URL("http://example1.com/") + + assert url.__le__(123) is NotImplemented + + +def test_lt(): + url1 = URL("http://example1.com/") + url2 = URL("http://example2.com/") + + assert url1 < url2 + + +def test_lt_not_implemented(): + url = URL("http://example1.com/") + + assert url.__lt__(123) is NotImplemented + + +def test_ge_more(): + url1 = URL("http://example1.com/") + url2 = URL("http://example2.com/") + + assert url2 >= url1 + + +def test_ge_eq(): + url1 = URL("http://example.com/") + url2 = URL("http://example.com/") + + assert url2 >= url1 + + +def test_ge_not_implemented(): + url = URL("http://example1.com/") + + assert url.__ge__(123) is NotImplemented + + +def test_gt(): + url1 = URL("http://example1.com/") + url2 = URL("http://example2.com/") + + assert url2 > url1 + + +def test_gt_not_implemented(): + url = URL("http://example1.com/") + + assert url.__gt__(123) is NotImplemented diff --git a/contrib/python/yarl/tests/test_url_parsing.py b/contrib/python/yarl/tests/test_url_parsing.py new file mode 100644 index 0000000000..cc753fcd0c --- /dev/null +++ b/contrib/python/yarl/tests/test_url_parsing.py @@ -0,0 +1,582 @@ +import sys + +import pytest + +from yarl import URL + + +class TestScheme: + def test_scheme_path(self): + u = URL("scheme:path") + assert u.scheme == "scheme" + assert u.host is None + assert u.path == "path" + assert u.query_string == "" + assert u.fragment == "" + + def test_scheme_path_other(self): + u = URL("scheme:path:other") + assert u.scheme == "scheme" + assert u.host is None + assert u.path == "path:other" + assert u.query_string == "" + assert u.fragment == "" + + def test_complex_scheme(self): + u = URL("allow+chars-33.:path") + assert u.scheme == "allow+chars-33." + assert u.host is None + assert u.path == "path" + assert u.query_string == "" + assert u.fragment == "" + + def test_scheme_only(self): + u = URL("simple:") + assert u.scheme == "simple" + assert u.host is None + assert u.path == "" + assert u.query_string == "" + assert u.fragment == "" + + def test_no_scheme1(self): + u = URL("google.com:80") + # See: https://bugs.python.org/issue27657 + if ( + sys.version_info[:3] == (3, 7, 6) + or sys.version_info[:3] == (3, 8, 1) + or sys.version_info >= (3, 9, 0) + ): + assert u.scheme == "google.com" + assert u.host is None + assert u.path == "80" + else: + assert u.scheme == "" + assert u.host is None + assert u.path == "google.com:80" + assert u.query_string == "" + assert u.fragment == "" + + def test_no_scheme2(self): + u = URL("google.com:80/root") + assert u.scheme == "google.com" + assert u.host is None + assert u.path == "80/root" + assert u.query_string == "" + assert u.fragment == "" + + def test_not_a_scheme1(self): + u = URL("not_cheme:path") + assert u.scheme == "" + assert u.host is None + assert u.path == "not_cheme:path" + assert u.query_string == "" + assert u.fragment == "" + + def test_not_a_scheme2(self): + u = URL("signals37:book") + assert u.scheme == "signals37" + assert u.host is None + assert u.path == "book" + assert u.query_string == "" + assert u.fragment == "" + + def test_scheme_rel_path1(self): + u = URL(":relative-path") + assert u.scheme == "" + assert u.host is None + assert u.path == ":relative-path" + assert u.query_string == "" + assert u.fragment == "" + + def test_scheme_rel_path2(self): + u = URL(":relative/path") + assert u.scheme == "" + assert u.host is None + assert u.path == ":relative/path" + assert u.query_string == "" + assert u.fragment == "" + + def test_scheme_weird(self): + u = URL("://and-this") + assert u.scheme == "" + assert u.host is None + assert u.path == "://and-this" + assert u.query_string == "" + assert u.fragment == "" + + +class TestHost: + def test_canonical(self): + u = URL("scheme://host/path") + assert u.scheme == "scheme" + assert u.host == "host" + assert u.path == "/path" + assert u.query_string == "" + assert u.fragment == "" + + def test_absolute_no_scheme(self): + u = URL("//host/path") + assert u.scheme == "" + assert u.host == "host" + assert u.path == "/path" + assert u.query_string == "" + assert u.fragment == "" + + def test_absolute_no_scheme_complex_host(self): + u = URL("//host+path") + assert u.scheme == "" + assert u.host == "host+path" + assert u.path == "/" + assert u.query_string == "" + assert u.fragment == "" + + def test_absolute_no_scheme_simple_host(self): + u = URL("//host") + assert u.scheme == "" + assert u.host == "host" + assert u.path == "/" + assert u.query_string == "" + assert u.fragment == "" + + def test_weird_host(self): + u = URL("//this+is$also&host!") + assert u.scheme == "" + assert u.host == "this+is$also&host!" + assert u.path == "/" + assert u.query_string == "" + assert u.fragment == "" + + def test_scheme_no_host(self): + u = URL("scheme:/host/path") + assert u.scheme == "scheme" + assert u.host is None + assert u.path == "/host/path" + assert u.query_string == "" + assert u.fragment == "" + + def test_scheme_no_host2(self): + u = URL("scheme:///host/path") + assert u.scheme == "scheme" + assert u.host is None + assert u.path == "/host/path" + assert u.query_string == "" + assert u.fragment == "" + + def test_no_scheme_no_host(self): + u = URL("scheme//host/path") + assert u.scheme == "" + assert u.host is None + assert u.path == "scheme//host/path" + assert u.query_string == "" + assert u.fragment == "" + + def test_ipv4(self): + u = URL("//127.0.0.1/") + assert u.scheme == "" + assert u.host == "127.0.0.1" + assert u.path == "/" + assert u.query_string == "" + assert u.fragment == "" + + def test_ipv6(self): + u = URL("//[::1]/") + assert u.scheme == "" + assert u.host == "::1" + assert u.path == "/" + assert u.query_string == "" + assert u.fragment == "" + + def test_ipvfuture_address(self): + u = URL("//[v1.-1]/") + assert u.scheme == "" + assert u.host == "v1.-1" + assert u.path == "/" + assert u.query_string == "" + assert u.fragment == "" + + +class TestPort: + def test_canonical(self): + u = URL("//host:80/path") + assert u.scheme == "" + assert u.host == "host" + assert u.port == 80 + assert u.path == "/path" + assert u.query_string == "" + assert u.fragment == "" + + def test_no_path(self): + u = URL("//host:80") + assert u.scheme == "" + assert u.host == "host" + assert u.port == 80 + assert u.path == "/" + assert u.query_string == "" + assert u.fragment == "" + + @pytest.mark.xfail(reason="https://github.com/aio-libs/yarl/issues/821") + def test_no_host(self): + u = URL("//:80") + assert u.scheme == "" + assert u.host == "" + assert u.port == 80 + assert u.path == "/" + assert u.query_string == "" + assert u.fragment == "" + + def test_double_port(self): + with pytest.raises(ValueError): + URL("//h:22:80/") + + def test_bad_port(self): + with pytest.raises(ValueError): + URL("//h:no/path") + + def test_another_bad_port(self): + with pytest.raises(ValueError): + URL("//h:22:no/path") + + def test_bad_port_again(self): + with pytest.raises(ValueError): + URL("//h:-80/path") + + +class TestUserInfo: + def test_canonical(self): + u = URL("sch://user@host/") + assert u.scheme == "sch" + assert u.user == "user" + assert u.host == "host" + assert u.path == "/" + assert u.query_string == "" + assert u.fragment == "" + + def test_user_pass(self): + u = URL("//user:pass@host") + assert u.scheme == "" + assert u.user == "user" + assert u.password == "pass" + assert u.host == "host" + assert u.path == "/" + assert u.query_string == "" + assert u.fragment == "" + + def test_complex_userinfo(self): + u = URL("//user:pas:and:more@host") + assert u.scheme == "" + assert u.user == "user" + assert u.password == "pas:and:more" + assert u.host == "host" + assert u.path == "/" + assert u.query_string == "" + assert u.fragment == "" + + def test_no_user(self): + u = URL("//:pas:@host") + assert u.scheme == "" + assert u.user is None + assert u.password == "pas:" + assert u.host == "host" + assert u.path == "/" + assert u.query_string == "" + assert u.fragment == "" + + def test_weird_user(self): + u = URL("//!($&')*+,;=@host") + assert u.scheme == "" + assert u.user == "!($&')*+,;=" + assert u.password is None + assert u.host == "host" + assert u.path == "/" + assert u.query_string == "" + assert u.fragment == "" + + def test_weird_user2(self): + u = URL("//user@info@ya.ru") + assert u.scheme == "" + assert u.user == "user@info" + assert u.password is None + assert u.host == "ya.ru" + assert u.path == "/" + assert u.query_string == "" + assert u.fragment == "" + + def test_weird_user3(self): + u = URL("//%5Bsome%5D@host") + assert u.scheme == "" + assert u.user == "[some]" + assert u.password is None + assert u.host == "host" + assert u.path == "/" + assert u.query_string == "" + assert u.fragment == "" + + +class TestQuery_String: + def test_simple(self): + u = URL("?query") + assert u.scheme == "" + assert u.user is None + assert u.password is None + assert u.host is None + assert u.path == "" + assert u.query_string == "query" + assert u.fragment == "" + + def test_scheme_query(self): + u = URL("http:?query") + assert u.scheme == "http" + assert u.user is None + assert u.password is None + assert u.host is None + assert u.path == "" + assert u.query_string == "query" + assert u.fragment == "" + + def test_abs_url_query(self): + u = URL("//host?query") + assert u.scheme == "" + assert u.user is None + assert u.password is None + assert u.host == "host" + assert u.path == "/" + assert u.query_string == "query" + assert u.fragment == "" + + def test_abs_url_path_query(self): + u = URL("//host/path?query") + assert u.scheme == "" + assert u.user is None + assert u.password is None + assert u.host == "host" + assert u.path == "/path" + assert u.query_string == "query" + assert u.fragment == "" + + def test_double_question_mark(self): + u = URL("//ho?st/path?query") + assert u.scheme == "" + assert u.user is None + assert u.password is None + assert u.host == "ho" + assert u.path == "/" + assert u.query_string == "st/path?query" + assert u.fragment == "" + + def test_complex_query(self): + u = URL("?a://b:c@d.e/f?g#h") + assert u.scheme == "" + assert u.user is None + assert u.password is None + assert u.host is None + assert u.path == "" + assert u.query_string == "a://b:c@d.e/f?g" + assert u.fragment == "h" + + def test_query_in_fragment(self): + u = URL("#?query") + assert u.scheme == "" + assert u.user is None + assert u.password is None + assert u.host is None + assert u.path == "" + assert u.query_string == "" + assert u.fragment == "?query" + + +class TestFragment: + def test_simple(self): + u = URL("#frag") + assert u.scheme == "" + assert u.user is None + assert u.password is None + assert u.host is None + assert u.path == "" + assert u.query_string == "" + assert u.fragment == "frag" + + def test_scheme_frag(self): + u = URL("http:#frag") + assert u.scheme == "http" + assert u.user is None + assert u.password is None + assert u.host is None + assert u.path == "" + assert u.query_string == "" + assert u.fragment == "frag" + + def test_host_frag(self): + u = URL("//host#frag") + assert u.scheme == "" + assert u.user is None + assert u.password is None + assert u.host == "host" + assert u.path == "/" + assert u.query_string == "" + assert u.fragment == "frag" + + def test_scheme_path_frag(self): + u = URL("//host/path#frag") + assert u.scheme == "" + assert u.user is None + assert u.password is None + assert u.host == "host" + assert u.path == "/path" + assert u.query_string == "" + assert u.fragment == "frag" + + def test_scheme_query_frag(self): + u = URL("//host?query#frag") + assert u.scheme == "" + assert u.user is None + assert u.password is None + assert u.host == "host" + assert u.path == "/" + assert u.query_string == "query" + assert u.fragment == "frag" + + def test_host_frag_query(self): + u = URL("//ho#st/path?query") + assert u.scheme == "" + assert u.user is None + assert u.password is None + assert u.host == "ho" + assert u.path == "/" + assert u.query_string == "" + assert u.fragment == "st/path?query" + + def test_complex_frag(self): + u = URL("#a://b:c@d.e/f?g#h") + assert u.scheme == "" + assert u.user is None + assert u.password is None + assert u.host is None + assert u.path == "" + assert u.query_string == "" + assert u.fragment == "a://b:c@d.e/f?g#h" + + +class TestStripEmptyParts: + def test_all_empty(self): + with pytest.raises(ValueError): + URL("//@:?#") + + def test_path_only(self): + u = URL("///path") + assert u.scheme == "" + assert u.user is None + assert u.password is None + assert u.host is None + assert u.path == "/path" + assert u.query_string == "" + assert u.fragment == "" + + def test_empty_user(self): + u = URL("//@host") + assert u.scheme == "" + assert u.user is None + assert u.password is None + assert u.host == "host" + assert u.path == "/" + assert u.query_string == "" + assert u.fragment == "" + + def test_empty_port(self): + u = URL("//host:") + assert u.scheme == "" + assert u.user is None + assert u.password is None + assert u.host == "host" + assert u.path == "/" + assert u.query_string == "" + assert u.fragment == "" + + def test_empty_port_and_path(self): + u = URL("//host:/") + assert u.scheme == "" + assert u.user is None + assert u.password is None + assert u.host == "host" + assert u.path == "/" + assert u.query_string == "" + assert u.fragment == "" + + def test_empty_path_only(self): + u = URL("/") + assert u.scheme == "" + assert u.user is None + assert u.password is None + assert u.host is None + assert u.path == "/" + assert u.query_string == "" + assert u.fragment == "" + + def test_relative_path_only(self): + u = URL("path") + assert u.scheme == "" + assert u.user is None + assert u.password is None + assert u.host is None + assert u.path == "path" + assert u.query_string == "" + assert u.fragment == "" + + def test_path(self): + u = URL("/path") + assert u.scheme == "" + assert u.user is None + assert u.password is None + assert u.host is None + assert u.path == "/path" + assert u.query_string == "" + assert u.fragment == "" + + def test_empty_query_with_path(self): + u = URL("/path?") + assert u.scheme == "" + assert u.user is None + assert u.password is None + assert u.host is None + assert u.path == "/path" + assert u.query_string == "" + assert u.fragment == "" + + def test_empty_query(self): + u = URL("?") + assert u.scheme == "" + assert u.user is None + assert u.password is None + assert u.host is None + assert u.path == "" + assert u.query_string == "" + assert u.fragment == "" + + def test_empty_query_with_frag(self): + u = URL("?#frag") + assert u.scheme == "" + assert u.user is None + assert u.password is None + assert u.host is None + assert u.path == "" + assert u.query_string == "" + assert u.fragment == "frag" + + def test_path_empty_frag(self): + u = URL("/path#") + assert u.scheme == "" + assert u.user is None + assert u.password is None + assert u.host is None + assert u.path == "/path" + assert u.query_string == "" + assert u.fragment == "" + + def test_empty_path(self): + u = URL("#") + assert u.scheme == "" + assert u.user is None + assert u.password is None + assert u.host is None + assert u.path == "" + assert u.query_string == "" + assert u.fragment == "" diff --git a/contrib/python/yarl/tests/test_url_query.py b/contrib/python/yarl/tests/test_url_query.py new file mode 100644 index 0000000000..bcd2433cbc --- /dev/null +++ b/contrib/python/yarl/tests/test_url_query.py @@ -0,0 +1,173 @@ +from typing import List, Tuple +from urllib.parse import parse_qs, urlencode + +import pytest +from multidict import MultiDict, MultiDictProxy + +from yarl import URL + +# ======================================== +# Basic chars in query values +# ======================================== + +URLS_WITH_BASIC_QUERY_VALUES: List[Tuple[URL, MultiDict]] = [ + # Empty strings, keys and values + ( + URL("http://example.com"), + MultiDict(), + ), + ( + URL("http://example.com?a="), + MultiDict([("a", "")]), + ), + # ASCII chars + ( + URL("http://example.com?a+b=c+d"), + MultiDict({"a b": "c d"}), + ), + ( + URL("http://example.com?a=1&b=2"), + MultiDict([("a", "1"), ("b", "2")]), + ), + ( + URL("http://example.com?a=1&b=2&a=3"), + MultiDict([("a", "1"), ("b", "2"), ("a", "3")]), + ), + # Non-ASCI BMP chars + ( + URL("http://example.com?ключ=знач"), + MultiDict({"ключ": "знач"}), + ), + ( + URL("http://example.com?foo=ᴜɴɪᴄᴏᴅᴇ"), + MultiDict({"foo": "ᴜɴɪᴄᴏᴅᴇ"}), + ), + # Non-BMP chars + ( + URL("http://example.com?bar=𝕦𝕟𝕚𝕔𝕠𝕕𝕖"), + MultiDict({"bar": "𝕦𝕟𝕚𝕔𝕠𝕕𝕖"}), + ), +] + + +@pytest.mark.parametrize( + "original_url, expected_query", + URLS_WITH_BASIC_QUERY_VALUES, +) +def test_query_basic_parsing(original_url, expected_query): + assert isinstance(original_url.query, MultiDictProxy) + assert original_url.query == expected_query + + +@pytest.mark.parametrize( + "original_url, expected_query", + URLS_WITH_BASIC_QUERY_VALUES, +) +def test_query_basic_update_query(original_url, expected_query): + new_url = original_url.update_query({}) + assert new_url == original_url + + +def test_query_dont_unqoute_twice(): + sample_url = "http://base.place?" + urlencode({"a": "/////"}) + query = urlencode({"url": sample_url}) + full_url = "http://test_url.aha?" + query + + url = URL(full_url) + assert url.query["url"] == sample_url + + +# ======================================== +# Reserved chars in query values +# ======================================== + +# See https://github.com/python/cpython#87133, which introduced a new +# `separator` keyword argument to `urllib.parse.parse_qs` (among others). +# If the name doesn't exist as a variable in the function bytecode, the +# test is expected to fail. +_SEMICOLON_XFAIL = pytest.mark.xfail( + condition="separator" not in parse_qs.__code__.co_varnames, + reason=( + "Python versions < 3.7.10, < 3.8.8 and < 3.9.2 lack a fix for " + 'CVE-2021-23336 dropping ";" as a valid query parameter separator, ' + "making this test fail." + ), + strict=True, +) + + +URLS_WITH_RESERVED_CHARS_IN_QUERY_VALUES = [ + # Ampersand + (URL("http://127.0.0.1/?a=10&b=20"), 2, "10"), + (URL("http://127.0.0.1/?a=10%26b=20"), 1, "10&b=20"), + (URL("http://127.0.0.1/?a=10%3Bb=20"), 1, "10;b=20"), + # Semicolon, which is *not* a query parameter separator as of RFC3986 + (URL("http://127.0.0.1/?a=10;b=20"), 1, "10;b=20"), + (URL("http://127.0.0.1/?a=10%26b=20"), 1, "10&b=20"), + (URL("http://127.0.0.1/?a=10%3Bb=20"), 1, "10;b=20"), +] +URLS_WITH_RESERVED_CHARS_IN_QUERY_VALUES_W_XFAIL = [ + # Ampersand + *URLS_WITH_RESERVED_CHARS_IN_QUERY_VALUES[:3], + # Semicolon, which is *not* a query parameter separator as of RFC3986 + # Mark the first of these as expecting to fail on old Python patch releases. + pytest.param(*URLS_WITH_RESERVED_CHARS_IN_QUERY_VALUES[3], marks=_SEMICOLON_XFAIL), + *URLS_WITH_RESERVED_CHARS_IN_QUERY_VALUES[4:], +] + + +@pytest.mark.parametrize( + "original_url, expected_query_len, expected_value_a", + URLS_WITH_RESERVED_CHARS_IN_QUERY_VALUES_W_XFAIL, +) +def test_query_separators_from_parsing( + original_url, + expected_query_len, + expected_value_a, +): + assert len(original_url.query) == expected_query_len + assert original_url.query["a"] == expected_value_a + + +@pytest.mark.parametrize( + "original_url, expected_query_len, expected_value_a", + URLS_WITH_RESERVED_CHARS_IN_QUERY_VALUES_W_XFAIL, +) +def test_query_separators_from_update_query( + original_url, + expected_query_len, + expected_value_a, +): + new_url = original_url.update_query({"c": expected_value_a}) + assert new_url.query["a"] == expected_value_a + assert new_url.query["c"] == expected_value_a + + +@pytest.mark.parametrize( + "original_url, expected_query_len, expected_value_a", + URLS_WITH_RESERVED_CHARS_IN_QUERY_VALUES, +) +def test_query_separators_from_with_query( + original_url, + expected_query_len, + expected_value_a, +): + new_url = original_url.with_query({"c": expected_value_a}) + assert new_url.query["c"] == expected_value_a + + +@pytest.mark.parametrize( + "original_url, expected_query_len, expected_value_a", + URLS_WITH_RESERVED_CHARS_IN_QUERY_VALUES, +) +def test_query_from_empty_update_query( + original_url, + expected_query_len, + expected_value_a, +): + new_url = original_url.update_query({}) + + assert new_url.query["a"] == original_url.query["a"] + + if "b" in original_url.query: + assert new_url.query["b"] == original_url.query["b"] diff --git a/contrib/python/yarl/tests/test_url_update_netloc.py b/contrib/python/yarl/tests/test_url_update_netloc.py new file mode 100644 index 0000000000..cf0cc1c44c --- /dev/null +++ b/contrib/python/yarl/tests/test_url_update_netloc.py @@ -0,0 +1,228 @@ +import pytest + +from yarl import URL + +# with_* + + +def test_with_scheme(): + url = URL("http://example.com") + assert str(url.with_scheme("https")) == "https://example.com" + + +def test_with_scheme_uppercased(): + url = URL("http://example.com") + assert str(url.with_scheme("HTTPS")) == "https://example.com" + + +def test_with_scheme_for_relative_url(): + with pytest.raises(ValueError): + URL("path/to").with_scheme("http") + + +def test_with_scheme_invalid_type(): + url = URL("http://example.com") + with pytest.raises(TypeError): + assert str(url.with_scheme(123)) + + +def test_with_user(): + url = URL("http://example.com") + assert str(url.with_user("john")) == "http://john@example.com" + + +def test_with_user_non_ascii(): + url = URL("http://example.com") + url2 = url.with_user("вася") + assert url2.raw_user == "%D0%B2%D0%B0%D1%81%D1%8F" + assert url2.user == "вася" + assert url2.raw_authority == "%D0%B2%D0%B0%D1%81%D1%8F@example.com" + assert url2.authority == "вася@example.com:80" + + +def test_with_user_percent_encoded(): + url = URL("http://example.com") + url2 = url.with_user("%cf%80") + assert url2.raw_user == "%25cf%2580" + assert url2.user == "%cf%80" + assert url2.raw_authority == "%25cf%2580@example.com" + assert url2.authority == "%cf%80@example.com:80" + + +def test_with_user_for_relative_url(): + with pytest.raises(ValueError): + URL("path/to").with_user("user") + + +def test_with_user_invalid_type(): + url = URL("http://example.com:123") + with pytest.raises(TypeError): + url.with_user(123) + + +def test_with_user_None(): + url = URL("http://john@example.com") + assert str(url.with_user(None)) == "http://example.com" + + +def test_with_user_ipv6(): + url = URL("http://john:pass@[::1]:8080/") + assert str(url.with_user(None)) == "http://[::1]:8080/" + + +def test_with_user_None_when_password_present(): + url = URL("http://john:pass@example.com") + assert str(url.with_user(None)) == "http://example.com" + + +def test_with_password(): + url = URL("http://john@example.com") + assert str(url.with_password("pass")) == "http://john:pass@example.com" + + +def test_with_password_ipv6(): + url = URL("http://john:pass@[::1]:8080/") + assert str(url.with_password(None)) == "http://john@[::1]:8080/" + + +def test_with_password_non_ascii(): + url = URL("http://john@example.com") + url2 = url.with_password("пароль") + assert url2.raw_password == "%D0%BF%D0%B0%D1%80%D0%BE%D0%BB%D1%8C" + assert url2.password == "пароль" + assert url2.raw_authority == "john:%D0%BF%D0%B0%D1%80%D0%BE%D0%BB%D1%8C@example.com" + assert url2.authority == "john:пароль@example.com:80" + + +def test_with_password_percent_encoded(): + url = URL("http://john@example.com") + url2 = url.with_password("%cf%80") + assert url2.raw_password == "%25cf%2580" + assert url2.password == "%cf%80" + assert url2.raw_authority == "john:%25cf%2580@example.com" + assert url2.authority == "john:%cf%80@example.com:80" + + +def test_with_password_non_ascii_with_colon(): + url = URL("http://john@example.com") + url2 = url.with_password("п:а") + assert url2.raw_password == "%D0%BF%3A%D0%B0" + assert url2.password == "п:а" + + +def test_with_password_for_relative_url(): + with pytest.raises(ValueError): + URL("path/to").with_password("pass") + + +def test_with_password_None(): + url = URL("http://john:pass@example.com") + assert str(url.with_password(None)) == "http://john@example.com" + + +def test_with_password_invalid_type(): + url = URL("http://example.com:123") + with pytest.raises(TypeError): + url.with_password(123) + + +def test_with_password_and_empty_user(): + url = URL("http://example.com") + url2 = url.with_password("pass") + assert url2.password == "pass" + assert url2.user is None + assert str(url2) == "http://:pass@example.com" + + +def test_from_str_with_host_ipv4(): + url = URL("http://host:80") + url = url.with_host("192.168.1.1") + assert url.raw_host == "192.168.1.1" + + +def test_from_str_with_host_ipv6(): + url = URL("http://host:80") + url = url.with_host("::1") + assert url.raw_host == "::1" + + +def test_with_host(): + url = URL("http://example.com:123") + assert str(url.with_host("example.org")) == "http://example.org:123" + + +def test_with_host_empty(): + url = URL("http://example.com:123") + with pytest.raises(ValueError): + url.with_host("") + + +def test_with_host_non_ascii(): + url = URL("http://example.com:123") + url2 = url.with_host("историк.рф") + assert url2.raw_host == "xn--h1aagokeh.xn--p1ai" + assert url2.host == "историк.рф" + assert url2.raw_authority == "xn--h1aagokeh.xn--p1ai:123" + assert url2.authority == "историк.рф:123" + + +def test_with_host_percent_encoded(): + url = URL("http://%25cf%2580%cf%80:%25cf%2580%cf%80@example.com:123") + url2 = url.with_host("%cf%80.org") + assert url2.raw_host == "%cf%80.org" + assert url2.host == "%cf%80.org" + assert url2.raw_authority == "%25cf%2580%CF%80:%25cf%2580%CF%80@%cf%80.org:123" + assert url2.authority == "%cf%80π:%cf%80π@%cf%80.org:123" + + +def test_with_host_for_relative_url(): + with pytest.raises(ValueError): + URL("path/to").with_host("example.com") + + +def test_with_host_invalid_type(): + url = URL("http://example.com:123") + with pytest.raises(TypeError): + url.with_host(None) + + +def test_with_port(): + url = URL("http://example.com") + assert str(url.with_port(8888)) == "http://example.com:8888" + + +def test_with_port_with_no_port(): + url = URL("http://example.com") + assert str(url.with_port(None)) == "http://example.com" + + +def test_with_port_ipv6(): + url = URL("http://[::1]:8080/") + assert str(url.with_port(80)) == "http://[::1]:80/" + + +def test_with_port_keeps_query_and_fragment(): + url = URL("http://example.com/?a=1#frag") + assert str(url.with_port(8888)) == "http://example.com:8888/?a=1#frag" + + +def test_with_port_percent_encoded(): + url = URL("http://user%name:pass%word@example.com/") + assert str(url.with_port(808)) == "http://user%25name:pass%25word@example.com:808/" + + +def test_with_port_for_relative_url(): + with pytest.raises(ValueError): + URL("path/to").with_port(1234) + + +def test_with_port_invalid_type(): + with pytest.raises(TypeError): + URL("http://example.com").with_port("123") + with pytest.raises(TypeError): + URL("http://example.com").with_port(True) + + +def test_with_port_invalid_range(): + with pytest.raises(ValueError): + URL("http://example.com").with_port(-1) diff --git a/contrib/python/yarl/tests/ya.make b/contrib/python/yarl/tests/ya.make new file mode 100644 index 0000000000..f86b0f6380 --- /dev/null +++ b/contrib/python/yarl/tests/ya.make @@ -0,0 +1,24 @@ +PY3TEST() + +PEERDIR( + contrib/python/yarl +) + +TEST_SRCS( + test_cache.py + test_cached_property.py + test_normalize_path.py + test_pickle.py + test_quoting.py + test_update_query.py + test_url.py + test_url_build.py + test_url_cmp_and_hash.py + test_url_parsing.py + test_url_query.py + test_url_update_netloc.py +) + +NO_LINT() + +END() diff --git a/contrib/python/yarl/ya.make b/contrib/python/yarl/ya.make new file mode 100644 index 0000000000..0c3d0ce434 --- /dev/null +++ b/contrib/python/yarl/ya.make @@ -0,0 +1,41 @@ +# Generated by devtools/yamaker (pypi). + +PY3_LIBRARY() + +VERSION(1.9.3) + +LICENSE(Apache-2.0) + +PEERDIR( + contrib/python/idna + contrib/python/multidict +) + +NO_COMPILER_WARNINGS() + +NO_LINT() + +PY_SRCS( + TOP_LEVEL + yarl/__init__.py + yarl/__init__.pyi + yarl/_quoting.py + yarl/_quoting_c.pyi + yarl/_quoting_py.py + yarl/_url.py + CYTHON_C + yarl/_quoting_c.pyx +) + +RESOURCE_FILES( + PREFIX contrib/python/yarl/ + .dist-info/METADATA + .dist-info/top_level.txt + yarl/py.typed +) + +END() + +RECURSE_FOR_TESTS( + tests +) diff --git a/contrib/python/yarl/yarl/__init__.py b/contrib/python/yarl/yarl/__init__.py new file mode 100644 index 0000000000..f43aecbc92 --- /dev/null +++ b/contrib/python/yarl/yarl/__init__.py @@ -0,0 +1,5 @@ +from ._url import URL, cache_clear, cache_configure, cache_info + +__version__ = "1.9.3" + +__all__ = ("URL", "cache_clear", "cache_configure", "cache_info") diff --git a/contrib/python/yarl/yarl/_quoting.py b/contrib/python/yarl/yarl/_quoting.py new file mode 100644 index 0000000000..8d1c705ff2 --- /dev/null +++ b/contrib/python/yarl/yarl/_quoting.py @@ -0,0 +1,18 @@ +import os +import sys + +__all__ = ("_Quoter", "_Unquoter") + + +NO_EXTENSIONS = bool(os.environ.get("YARL_NO_EXTENSIONS")) # type: bool +if sys.implementation.name != "cpython": + NO_EXTENSIONS = True + + +if not NO_EXTENSIONS: # pragma: no branch + try: + from ._quoting_c import _Quoter, _Unquoter # type: ignore[assignment] + except ImportError: # pragma: no cover + from ._quoting_py import _Quoter, _Unquoter # type: ignore[assignment] +else: + from ._quoting_py import _Quoter, _Unquoter # type: ignore[assignment] diff --git a/contrib/python/yarl/yarl/_quoting_c.pyx b/contrib/python/yarl/yarl/_quoting_c.pyx new file mode 100644 index 0000000000..5335d17365 --- /dev/null +++ b/contrib/python/yarl/yarl/_quoting_c.pyx @@ -0,0 +1,371 @@ +# cython: language_level=3 + +from cpython.exc cimport PyErr_NoMemory +from cpython.mem cimport PyMem_Free, PyMem_Malloc, PyMem_Realloc +from cpython.unicode cimport PyUnicode_DecodeASCII, PyUnicode_DecodeUTF8Stateful +from libc.stdint cimport uint8_t, uint64_t +from libc.string cimport memcpy, memset + +from string import ascii_letters, digits + + +cdef str GEN_DELIMS = ":/?#[]@" +cdef str SUB_DELIMS_WITHOUT_QS = "!$'()*," +cdef str SUB_DELIMS = SUB_DELIMS_WITHOUT_QS + '+?=;' +cdef str RESERVED = GEN_DELIMS + SUB_DELIMS +cdef str UNRESERVED = ascii_letters + digits + '-._~' +cdef str ALLOWED = UNRESERVED + SUB_DELIMS_WITHOUT_QS +cdef str QS = '+&=;' + +DEF BUF_SIZE = 8 * 1024 # 8KiB +cdef char BUFFER[BUF_SIZE] + +cdef inline Py_UCS4 _to_hex(uint8_t v): + if v < 10: + return <Py_UCS4>(v+0x30) # ord('0') == 0x30 + else: + return <Py_UCS4>(v+0x41-10) # ord('A') == 0x41 + + +cdef inline int _from_hex(Py_UCS4 v): + if '0' <= v <= '9': + return <int>(v) - 0x30 # ord('0') == 0x30 + elif 'A' <= v <= 'F': + return <int>(v) - 0x41 + 10 # ord('A') == 0x41 + elif 'a' <= v <= 'f': + return <int>(v) - 0x61 + 10 # ord('a') == 0x61 + else: + return -1 + + +cdef inline int _is_lower_hex(Py_UCS4 v): + return 'a' <= v <= 'f' + + +cdef inline Py_UCS4 _restore_ch(Py_UCS4 d1, Py_UCS4 d2): + cdef int digit1 = _from_hex(d1) + if digit1 < 0: + return <Py_UCS4>-1 + cdef int digit2 = _from_hex(d2) + if digit2 < 0: + return <Py_UCS4>-1 + return <Py_UCS4>(digit1 << 4 | digit2) + + +cdef uint8_t ALLOWED_TABLE[16] +cdef uint8_t ALLOWED_NOTQS_TABLE[16] + + +cdef inline bint bit_at(uint8_t array[], uint64_t ch): + return array[ch >> 3] & (1 << (ch & 7)) + + +cdef inline void set_bit(uint8_t array[], uint64_t ch): + array[ch >> 3] |= (1 << (ch & 7)) + + +memset(ALLOWED_TABLE, 0, sizeof(ALLOWED_TABLE)) +memset(ALLOWED_NOTQS_TABLE, 0, sizeof(ALLOWED_NOTQS_TABLE)) + +for i in range(128): + if chr(i) in ALLOWED: + set_bit(ALLOWED_TABLE, i) + set_bit(ALLOWED_NOTQS_TABLE, i) + if chr(i) in QS: + set_bit(ALLOWED_NOTQS_TABLE, i) + +# ----------------- writer --------------------------- + +cdef struct Writer: + char *buf + Py_ssize_t size + Py_ssize_t pos + bint changed + + +cdef inline void _init_writer(Writer* writer): + writer.buf = &BUFFER[0] + writer.size = BUF_SIZE + writer.pos = 0 + writer.changed = 0 + + +cdef inline void _release_writer(Writer* writer): + if writer.buf != BUFFER: + PyMem_Free(writer.buf) + + +cdef inline int _write_char(Writer* writer, Py_UCS4 ch, bint changed): + cdef char * buf + cdef Py_ssize_t size + + if writer.pos == writer.size: + # reallocate + size = writer.size + BUF_SIZE + if writer.buf == BUFFER: + buf = <char*>PyMem_Malloc(size) + if buf == NULL: + PyErr_NoMemory() + return -1 + memcpy(buf, writer.buf, writer.size) + else: + buf = <char*>PyMem_Realloc(writer.buf, size) + if buf == NULL: + PyErr_NoMemory() + return -1 + writer.buf = buf + writer.size = size + writer.buf[writer.pos] = <char>ch + writer.pos += 1 + writer.changed |= changed + return 0 + + +cdef inline int _write_pct(Writer* writer, uint8_t ch, bint changed): + if _write_char(writer, '%', changed) < 0: + return -1 + if _write_char(writer, _to_hex(<uint8_t>ch >> 4), changed) < 0: + return -1 + return _write_char(writer, _to_hex(<uint8_t>ch & 0x0f), changed) + + +cdef inline int _write_utf8(Writer* writer, Py_UCS4 symbol): + cdef uint64_t utf = <uint64_t> symbol + + if utf < 0x80: + return _write_pct(writer, <uint8_t>utf, True) + elif utf < 0x800: + if _write_pct(writer, <uint8_t>(0xc0 | (utf >> 6)), True) < 0: + return -1 + return _write_pct(writer, <uint8_t>(0x80 | (utf & 0x3f)), True) + elif 0xD800 <= utf <= 0xDFFF: + # surogate pair, ignored + return 0 + elif utf < 0x10000: + if _write_pct(writer, <uint8_t>(0xe0 | (utf >> 12)), True) < 0: + return -1 + if _write_pct(writer, <uint8_t>(0x80 | ((utf >> 6) & 0x3f)), + True) < 0: + return -1 + return _write_pct(writer, <uint8_t>(0x80 | (utf & 0x3f)), True) + elif utf > 0x10FFFF: + # symbol is too large + return 0 + else: + if _write_pct(writer, <uint8_t>(0xf0 | (utf >> 18)), True) < 0: + return -1 + if _write_pct(writer, <uint8_t>(0x80 | ((utf >> 12) & 0x3f)), + True) < 0: + return -1 + if _write_pct(writer, <uint8_t>(0x80 | ((utf >> 6) & 0x3f)), + True) < 0: + return -1 + return _write_pct(writer, <uint8_t>(0x80 | (utf & 0x3f)), True) + + +# --------------------- end writer -------------------------- + + +cdef class _Quoter: + cdef bint _qs + cdef bint _requote + + cdef uint8_t _safe_table[16] + cdef uint8_t _protected_table[16] + + def __init__( + self, *, str safe='', str protected='', bint qs=False, bint requote=True, + ): + cdef Py_UCS4 ch + + self._qs = qs + self._requote = requote + + if not self._qs: + memcpy(self._safe_table, + ALLOWED_NOTQS_TABLE, + sizeof(self._safe_table)) + else: + memcpy(self._safe_table, + ALLOWED_TABLE, + sizeof(self._safe_table)) + for ch in safe: + if ord(ch) > 127: + raise ValueError("Only safe symbols with ORD < 128 are allowed") + set_bit(self._safe_table, ch) + + memset(self._protected_table, 0, sizeof(self._protected_table)) + for ch in protected: + if ord(ch) > 127: + raise ValueError("Only safe symbols with ORD < 128 are allowed") + set_bit(self._safe_table, ch) + set_bit(self._protected_table, ch) + + def __call__(self, val): + cdef Writer writer + if val is None: + return None + if type(val) is not str: + if isinstance(val, str): + # derived from str + val = str(val) + else: + raise TypeError("Argument should be str") + _init_writer(&writer) + try: + return self._do_quote(<str>val, &writer) + finally: + _release_writer(&writer) + + cdef str _do_quote(self, str val, Writer *writer): + cdef Py_UCS4 ch + cdef int changed + cdef int idx = 0 + cdef int length = len(val) + + while idx < length: + ch = val[idx] + idx += 1 + if ch == '%' and self._requote and idx <= length - 2: + ch = _restore_ch(val[idx], val[idx + 1]) + if ch != <Py_UCS4>-1: + idx += 2 + if ch < 128: + if bit_at(self._protected_table, ch): + if _write_pct(writer, ch, True) < 0: + raise + continue + + if bit_at(self._safe_table, ch): + if _write_char(writer, ch, True) < 0: + raise + continue + + changed = (_is_lower_hex(val[idx - 2]) or + _is_lower_hex(val[idx - 1])) + if _write_pct(writer, ch, changed) < 0: + raise + continue + else: + ch = '%' + + if self._write(writer, ch) < 0: + raise + + if not writer.changed: + return val + else: + return PyUnicode_DecodeASCII(writer.buf, writer.pos, "strict") + + cdef inline int _write(self, Writer *writer, Py_UCS4 ch): + if self._qs: + if ch == ' ': + return _write_char(writer, '+', True) + + if ch < 128 and bit_at(self._safe_table, ch): + return _write_char(writer, ch, False) + + return _write_utf8(writer, ch) + + +cdef class _Unquoter: + cdef str _unsafe + cdef bint _qs + cdef _Quoter _quoter + cdef _Quoter _qs_quoter + + def __init__(self, *, unsafe='', qs=False): + self._unsafe = unsafe + self._qs = qs + self._quoter = _Quoter() + self._qs_quoter = _Quoter(qs=True) + + def __call__(self, val): + if val is None: + return None + if type(val) is not str: + if isinstance(val, str): + # derived from str + val = str(val) + else: + raise TypeError("Argument should be str") + return self._do_unquote(<str>val) + + cdef str _do_unquote(self, str val): + if len(val) == 0: + return val + cdef list ret = [] + cdef char buffer[4] + cdef Py_ssize_t buflen = 0 + cdef Py_ssize_t consumed + cdef str unquoted + cdef Py_UCS4 ch = 0 + cdef Py_ssize_t idx = 0 + cdef Py_ssize_t length = len(val) + cdef Py_ssize_t start_pct + + while idx < length: + ch = val[idx] + idx += 1 + if ch == '%' and idx <= length - 2: + ch = _restore_ch(val[idx], val[idx + 1]) + if ch != <Py_UCS4>-1: + idx += 2 + assert buflen < 4 + buffer[buflen] = ch + buflen += 1 + try: + unquoted = PyUnicode_DecodeUTF8Stateful(buffer, buflen, + NULL, &consumed) + except UnicodeDecodeError: + start_pct = idx - buflen * 3 + buffer[0] = ch + buflen = 1 + ret.append(val[start_pct : idx - 3]) + try: + unquoted = PyUnicode_DecodeUTF8Stateful(buffer, buflen, + NULL, &consumed) + except UnicodeDecodeError: + buflen = 0 + ret.append(val[idx - 3 : idx]) + continue + if not unquoted: + assert consumed == 0 + continue + assert consumed == buflen + buflen = 0 + if self._qs and unquoted in '+=&;': + ret.append(self._qs_quoter(unquoted)) + elif unquoted in self._unsafe: + ret.append(self._quoter(unquoted)) + else: + ret.append(unquoted) + continue + else: + ch = '%' + + if buflen: + start_pct = idx - 1 - buflen * 3 + ret.append(val[start_pct : idx - 1]) + buflen = 0 + + if ch == '+': + if not self._qs or ch in self._unsafe: + ret.append('+') + else: + ret.append(' ') + continue + + if ch in self._unsafe: + ret.append('%') + h = hex(ord(ch)).upper()[2:] + for ch in h: + ret.append(ch) + continue + + ret.append(ch) + + if buflen: + ret.append(val[length - buflen * 3 : length]) + + return ''.join(ret) diff --git a/contrib/python/yarl/yarl/_quoting_py.py b/contrib/python/yarl/yarl/_quoting_py.py new file mode 100644 index 0000000000..585a1da804 --- /dev/null +++ b/contrib/python/yarl/yarl/_quoting_py.py @@ -0,0 +1,197 @@ +import codecs +import re +from string import ascii_letters, ascii_lowercase, digits +from typing import Optional, cast + +BASCII_LOWERCASE = ascii_lowercase.encode("ascii") +BPCT_ALLOWED = {f"%{i:02X}".encode("ascii") for i in range(256)} +GEN_DELIMS = ":/?#[]@" +SUB_DELIMS_WITHOUT_QS = "!$'()*," +SUB_DELIMS = SUB_DELIMS_WITHOUT_QS + "+&=;" +RESERVED = GEN_DELIMS + SUB_DELIMS +UNRESERVED = ascii_letters + digits + "-._~" +ALLOWED = UNRESERVED + SUB_DELIMS_WITHOUT_QS + + +_IS_HEX = re.compile(b"[A-Z0-9][A-Z0-9]") +_IS_HEX_STR = re.compile("[A-Fa-f0-9][A-Fa-f0-9]") + +utf8_decoder = codecs.getincrementaldecoder("utf-8") + + +class _Quoter: + def __init__( + self, + *, + safe: str = "", + protected: str = "", + qs: bool = False, + requote: bool = True, + ) -> None: + self._safe = safe + self._protected = protected + self._qs = qs + self._requote = requote + + def __call__(self, val: Optional[str]) -> Optional[str]: + if val is None: + return None + if not isinstance(val, str): + raise TypeError("Argument should be str") + if not val: + return "" + bval = cast(str, val).encode("utf8", errors="ignore") + ret = bytearray() + pct = bytearray() + safe = self._safe + safe += ALLOWED + if not self._qs: + safe += "+&=;" + safe += self._protected + bsafe = safe.encode("ascii") + idx = 0 + while idx < len(bval): + ch = bval[idx] + idx += 1 + + if pct: + if ch in BASCII_LOWERCASE: + ch = ch - 32 # convert to uppercase + pct.append(ch) + if len(pct) == 3: # pragma: no branch # peephole optimizer + buf = pct[1:] + if not _IS_HEX.match(buf): + ret.extend(b"%25") + pct.clear() + idx -= 2 + continue + try: + unquoted = chr(int(pct[1:].decode("ascii"), base=16)) + except ValueError: + ret.extend(b"%25") + pct.clear() + idx -= 2 + continue + + if unquoted in self._protected: + ret.extend(pct) + elif unquoted in safe: + ret.append(ord(unquoted)) + else: + ret.extend(pct) + pct.clear() + + # special case, if we have only one char after "%" + elif len(pct) == 2 and idx == len(bval): + ret.extend(b"%25") + pct.clear() + idx -= 1 + + continue + + elif ch == ord("%") and self._requote: + pct.clear() + pct.append(ch) + + # special case if "%" is last char + if idx == len(bval): + ret.extend(b"%25") + + continue + + if self._qs: + if ch == ord(" "): + ret.append(ord("+")) + continue + if ch in bsafe: + ret.append(ch) + continue + + ret.extend((f"%{ch:02X}").encode("ascii")) + + ret2 = ret.decode("ascii") + if ret2 == val: + return val + return ret2 + + +class _Unquoter: + def __init__(self, *, unsafe: str = "", qs: bool = False) -> None: + self._unsafe = unsafe + self._qs = qs + self._quoter = _Quoter() + self._qs_quoter = _Quoter(qs=True) + + def __call__(self, val: Optional[str]) -> Optional[str]: + if val is None: + return None + if not isinstance(val, str): + raise TypeError("Argument should be str") + if not val: + return "" + decoder = cast(codecs.BufferedIncrementalDecoder, utf8_decoder()) + ret = [] + idx = 0 + while idx < len(val): + ch = val[idx] + idx += 1 + if ch == "%" and idx <= len(val) - 2: + pct = val[idx : idx + 2] + if _IS_HEX_STR.fullmatch(pct): + b = bytes([int(pct, base=16)]) + idx += 2 + try: + unquoted = decoder.decode(b) + except UnicodeDecodeError: + start_pct = idx - 3 - len(decoder.buffer) * 3 + ret.append(val[start_pct : idx - 3]) + decoder.reset() + try: + unquoted = decoder.decode(b) + except UnicodeDecodeError: + ret.append(val[idx - 3 : idx]) + continue + if not unquoted: + continue + if self._qs and unquoted in "+=&;": + to_add = self._qs_quoter(unquoted) + if to_add is None: # pragma: no cover + raise RuntimeError("Cannot quote None") + ret.append(to_add) + elif unquoted in self._unsafe: + to_add = self._quoter(unquoted) + if to_add is None: # pragma: no cover + raise RuntimeError("Cannot quote None") + ret.append(to_add) + else: + ret.append(unquoted) + continue + + if decoder.buffer: + start_pct = idx - 1 - len(decoder.buffer) * 3 + ret.append(val[start_pct : idx - 1]) + decoder.reset() + + if ch == "+": + if not self._qs or ch in self._unsafe: + ret.append("+") + else: + ret.append(" ") + continue + + if ch in self._unsafe: + ret.append("%") + h = hex(ord(ch)).upper()[2:] + for ch in h: + ret.append(ch) + continue + + ret.append(ch) + + if decoder.buffer: + ret.append(val[-len(decoder.buffer) * 3 :]) + + ret2 = "".join(ret) + if ret2 == val: + return val + return ret2 diff --git a/contrib/python/yarl/yarl/_url.py b/contrib/python/yarl/yarl/_url.py new file mode 100644 index 0000000000..c8f2acb39b --- /dev/null +++ b/contrib/python/yarl/yarl/_url.py @@ -0,0 +1,1198 @@ +import functools +import math +import warnings +from collections.abc import Mapping, Sequence +from contextlib import suppress +from ipaddress import ip_address +from urllib.parse import SplitResult, parse_qsl, quote, urljoin, urlsplit, urlunsplit + +import idna +from multidict import MultiDict, MultiDictProxy + +from ._quoting import _Quoter, _Unquoter + +DEFAULT_PORTS = {"http": 80, "https": 443, "ws": 80, "wss": 443} + +sentinel = object() + + +def rewrite_module(obj: object) -> object: + obj.__module__ = "yarl" + return obj + + +class cached_property: + """Use as a class method decorator. It operates almost exactly like + the Python `@property` decorator, but it puts the result of the + method it decorates into the instance dict after the first call, + effectively replacing the function it decorates with an instance + variable. It is, in Python parlance, a data descriptor. + + """ + + def __init__(self, wrapped): + self.wrapped = wrapped + try: + self.__doc__ = wrapped.__doc__ + except AttributeError: # pragma: no cover + self.__doc__ = "" + self.name = wrapped.__name__ + + def __get__(self, inst, owner, _sentinel=sentinel): + if inst is None: + return self + val = inst._cache.get(self.name, _sentinel) + if val is not _sentinel: + return val + val = self.wrapped(inst) + inst._cache[self.name] = val + return val + + def __set__(self, inst, value): + raise AttributeError("cached property is read-only") + + +def _normalize_path_segments(segments): + """Drop '.' and '..' from a sequence of str segments""" + + resolved_path = [] + + for seg in segments: + if seg == "..": + # ignore any .. segments that would otherwise cause an + # IndexError when popped from resolved_path if + # resolving for rfc3986 + with suppress(IndexError): + resolved_path.pop() + elif seg != ".": + resolved_path.append(seg) + + if segments and segments[-1] in (".", ".."): + # do some post-processing here. + # if the last segment was a relative dir, + # then we need to append the trailing '/' + resolved_path.append("") + + return resolved_path + + +@rewrite_module +class URL: + # Don't derive from str + # follow pathlib.Path design + # probably URL will not suffer from pathlib problems: + # it's intended for libraries like aiohttp, + # not to be passed into standard library functions like os.open etc. + + # URL grammar (RFC 3986) + # pct-encoded = "%" HEXDIG HEXDIG + # reserved = gen-delims / sub-delims + # gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@" + # sub-delims = "!" / "$" / "&" / "'" / "(" / ")" + # / "*" / "+" / "," / ";" / "=" + # unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~" + # URI = scheme ":" hier-part [ "?" query ] [ "#" fragment ] + # hier-part = "//" authority path-abempty + # / path-absolute + # / path-rootless + # / path-empty + # scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." ) + # authority = [ userinfo "@" ] host [ ":" port ] + # userinfo = *( unreserved / pct-encoded / sub-delims / ":" ) + # host = IP-literal / IPv4address / reg-name + # IP-literal = "[" ( IPv6address / IPvFuture ) "]" + # IPvFuture = "v" 1*HEXDIG "." 1*( unreserved / sub-delims / ":" ) + # IPv6address = 6( h16 ":" ) ls32 + # / "::" 5( h16 ":" ) ls32 + # / [ h16 ] "::" 4( h16 ":" ) ls32 + # / [ *1( h16 ":" ) h16 ] "::" 3( h16 ":" ) ls32 + # / [ *2( h16 ":" ) h16 ] "::" 2( h16 ":" ) ls32 + # / [ *3( h16 ":" ) h16 ] "::" h16 ":" ls32 + # / [ *4( h16 ":" ) h16 ] "::" ls32 + # / [ *5( h16 ":" ) h16 ] "::" h16 + # / [ *6( h16 ":" ) h16 ] "::" + # ls32 = ( h16 ":" h16 ) / IPv4address + # ; least-significant 32 bits of address + # h16 = 1*4HEXDIG + # ; 16 bits of address represented in hexadecimal + # IPv4address = dec-octet "." dec-octet "." dec-octet "." dec-octet + # dec-octet = DIGIT ; 0-9 + # / %x31-39 DIGIT ; 10-99 + # / "1" 2DIGIT ; 100-199 + # / "2" %x30-34 DIGIT ; 200-249 + # / "25" %x30-35 ; 250-255 + # reg-name = *( unreserved / pct-encoded / sub-delims ) + # port = *DIGIT + # path = path-abempty ; begins with "/" or is empty + # / path-absolute ; begins with "/" but not "//" + # / path-noscheme ; begins with a non-colon segment + # / path-rootless ; begins with a segment + # / path-empty ; zero characters + # path-abempty = *( "/" segment ) + # path-absolute = "/" [ segment-nz *( "/" segment ) ] + # path-noscheme = segment-nz-nc *( "/" segment ) + # path-rootless = segment-nz *( "/" segment ) + # path-empty = 0<pchar> + # segment = *pchar + # segment-nz = 1*pchar + # segment-nz-nc = 1*( unreserved / pct-encoded / sub-delims / "@" ) + # ; non-zero-length segment without any colon ":" + # pchar = unreserved / pct-encoded / sub-delims / ":" / "@" + # query = *( pchar / "/" / "?" ) + # fragment = *( pchar / "/" / "?" ) + # URI-reference = URI / relative-ref + # relative-ref = relative-part [ "?" query ] [ "#" fragment ] + # relative-part = "//" authority path-abempty + # / path-absolute + # / path-noscheme + # / path-empty + # absolute-URI = scheme ":" hier-part [ "?" query ] + __slots__ = ("_cache", "_val") + + _QUOTER = _Quoter(requote=False) + _REQUOTER = _Quoter() + _PATH_QUOTER = _Quoter(safe="@:", protected="/+", requote=False) + _PATH_REQUOTER = _Quoter(safe="@:", protected="/+") + _QUERY_QUOTER = _Quoter(safe="?/:@", protected="=+&;", qs=True, requote=False) + _QUERY_REQUOTER = _Quoter(safe="?/:@", protected="=+&;", qs=True) + _QUERY_PART_QUOTER = _Quoter(safe="?/:@", qs=True, requote=False) + _FRAGMENT_QUOTER = _Quoter(safe="?/:@", requote=False) + _FRAGMENT_REQUOTER = _Quoter(safe="?/:@") + + _UNQUOTER = _Unquoter() + _PATH_UNQUOTER = _Unquoter(unsafe="+") + _QS_UNQUOTER = _Unquoter(qs=True) + + def __new__(cls, val="", *, encoded=False, strict=None): + if strict is not None: # pragma: no cover + warnings.warn("strict parameter is ignored") + if type(val) is cls: + return val + if type(val) is str: + val = urlsplit(val) + elif type(val) is SplitResult: + if not encoded: + raise ValueError("Cannot apply decoding to SplitResult") + elif isinstance(val, str): + val = urlsplit(str(val)) + else: + raise TypeError("Constructor parameter should be str") + + if not encoded: + if not val[1]: # netloc + netloc = "" + host = "" + else: + host = val.hostname + if host is None: + raise ValueError("Invalid URL: host is required for absolute urls") + + try: + port = val.port + except ValueError as e: + raise ValueError( + "Invalid URL: port can't be converted to integer" + ) from e + + netloc = cls._make_netloc( + val.username, val.password, host, port, encode=True, requote=True + ) + path = cls._PATH_REQUOTER(val[2]) + if netloc: + path = cls._normalize_path(path) + + cls._validate_authority_uri_abs_path(host=host, path=path) + query = cls._QUERY_REQUOTER(val[3]) + fragment = cls._FRAGMENT_REQUOTER(val[4]) + val = SplitResult(val[0], netloc, path, query, fragment) + + self = object.__new__(cls) + self._val = val + self._cache = {} + return self + + @classmethod + def build( + cls, + *, + scheme="", + authority="", + user=None, + password=None, + host="", + port=None, + path="", + query=None, + query_string="", + fragment="", + encoded=False, + ): + """Creates and returns a new URL""" + + if authority and (user or password or host or port): + raise ValueError( + 'Can\'t mix "authority" with "user", "password", "host" or "port".' + ) + if port and not host: + raise ValueError('Can\'t build URL with "port" but without "host".') + if query and query_string: + raise ValueError('Only one of "query" or "query_string" should be passed') + if ( + scheme is None + or authority is None + or host is None + or path is None + or query_string is None + or fragment is None + ): + raise TypeError( + 'NoneType is illegal for "scheme", "authority", "host", "path", ' + '"query_string", and "fragment" args, use empty string instead.' + ) + + if authority: + if encoded: + netloc = authority + else: + tmp = SplitResult("", authority, "", "", "") + netloc = cls._make_netloc( + tmp.username, tmp.password, tmp.hostname, tmp.port, encode=True + ) + elif not user and not password and not host and not port: + netloc = "" + else: + netloc = cls._make_netloc( + user, password, host, port, encode=not encoded, encode_host=not encoded + ) + if not encoded: + path = cls._PATH_QUOTER(path) + if netloc: + path = cls._normalize_path(path) + + cls._validate_authority_uri_abs_path(host=host, path=path) + query_string = cls._QUERY_QUOTER(query_string) + fragment = cls._FRAGMENT_QUOTER(fragment) + + url = cls( + SplitResult(scheme, netloc, path, query_string, fragment), encoded=True + ) + + if query: + return url.with_query(query) + else: + return url + + def __init_subclass__(cls): + raise TypeError(f"Inheriting a class {cls!r} from URL is forbidden") + + def __str__(self): + val = self._val + if not val.path and self.is_absolute() and (val.query or val.fragment): + val = val._replace(path="/") + return urlunsplit(val) + + def __repr__(self): + return f"{self.__class__.__name__}('{str(self)}')" + + def __bytes__(self): + return str(self).encode("ascii") + + def __eq__(self, other): + if not type(other) is URL: + return NotImplemented + + val1 = self._val + if not val1.path and self.is_absolute(): + val1 = val1._replace(path="/") + + val2 = other._val + if not val2.path and other.is_absolute(): + val2 = val2._replace(path="/") + + return val1 == val2 + + def __hash__(self): + ret = self._cache.get("hash") + if ret is None: + val = self._val + if not val.path and self.is_absolute(): + val = val._replace(path="/") + ret = self._cache["hash"] = hash(val) + return ret + + def __le__(self, other): + if not type(other) is URL: + return NotImplemented + return self._val <= other._val + + def __lt__(self, other): + if not type(other) is URL: + return NotImplemented + return self._val < other._val + + def __ge__(self, other): + if not type(other) is URL: + return NotImplemented + return self._val >= other._val + + def __gt__(self, other): + if not type(other) is URL: + return NotImplemented + return self._val > other._val + + def __truediv__(self, name): + if not isinstance(name, str): + return NotImplemented + return self._make_child((str(name),)) + + def __mod__(self, query): + return self.update_query(query) + + def __bool__(self) -> bool: + return bool( + self._val.netloc or self._val.path or self._val.query or self._val.fragment + ) + + def __getstate__(self): + return (self._val,) + + def __setstate__(self, state): + if state[0] is None and isinstance(state[1], dict): + # default style pickle + self._val = state[1]["_val"] + else: + self._val, *unused = state + self._cache = {} + + def is_absolute(self): + """A check for absolute URLs. + + Return True for absolute ones (having scheme or starting + with //), False otherwise. + + """ + return self.raw_host is not None + + def is_default_port(self): + """A check for default port. + + Return True if port is default for specified scheme, + e.g. 'http://python.org' or 'http://python.org:80', False + otherwise. + + """ + if self.port is None: + return False + default = DEFAULT_PORTS.get(self.scheme) + if default is None: + return False + return self.port == default + + def origin(self): + """Return an URL with scheme, host and port parts only. + + user, password, path, query and fragment are removed. + + """ + # TODO: add a keyword-only option for keeping user/pass maybe? + if not self.is_absolute(): + raise ValueError("URL should be absolute") + if not self._val.scheme: + raise ValueError("URL should have scheme") + v = self._val + netloc = self._make_netloc(None, None, v.hostname, v.port) + val = v._replace(netloc=netloc, path="", query="", fragment="") + return URL(val, encoded=True) + + def relative(self): + """Return a relative part of the URL. + + scheme, user, password, host and port are removed. + + """ + if not self.is_absolute(): + raise ValueError("URL should be absolute") + val = self._val._replace(scheme="", netloc="") + return URL(val, encoded=True) + + @property + def scheme(self): + """Scheme for absolute URLs. + + Empty string for relative URLs or URLs starting with // + + """ + return self._val.scheme + + @property + def raw_authority(self): + """Encoded authority part of URL. + + Empty string for relative URLs. + + """ + return self._val.netloc + + @cached_property + def authority(self): + """Decoded authority part of URL. + + Empty string for relative URLs. + + """ + return self._make_netloc( + self.user, self.password, self.host, self.port, encode_host=False + ) + + @property + def raw_user(self): + """Encoded user part of URL. + + None if user is missing. + + """ + # not .username + ret = self._val.username + if not ret: + return None + return ret + + @cached_property + def user(self): + """Decoded user part of URL. + + None if user is missing. + + """ + return self._UNQUOTER(self.raw_user) + + @property + def raw_password(self): + """Encoded password part of URL. + + None if password is missing. + + """ + return self._val.password + + @cached_property + def password(self): + """Decoded password part of URL. + + None if password is missing. + + """ + return self._UNQUOTER(self.raw_password) + + @property + def raw_host(self): + """Encoded host part of URL. + + None for relative URLs. + + """ + # Use host instead of hostname for sake of shortness + # May add .hostname prop later + return self._val.hostname + + @cached_property + def host(self): + """Decoded host part of URL. + + None for relative URLs. + + """ + raw = self.raw_host + if raw is None: + return None + if "%" in raw: + # Hack for scoped IPv6 addresses like + # fe80::2%Проверка + # presence of '%' sign means only IPv6 address, so idna is useless. + return raw + return _idna_decode(raw) + + @property + def port(self): + """Port part of URL, with scheme-based fallback. + + None for relative URLs or URLs without explicit port and + scheme without default port substitution. + + """ + return self._val.port or DEFAULT_PORTS.get(self._val.scheme) + + @property + def explicit_port(self): + """Port part of URL, without scheme-based fallback. + + None for relative URLs or URLs without explicit port. + + """ + return self._val.port + + @property + def raw_path(self): + """Encoded path of URL. + + / for absolute URLs without path part. + + """ + ret = self._val.path + if not ret and self.is_absolute(): + ret = "/" + return ret + + @cached_property + def path(self): + """Decoded path of URL. + + / for absolute URLs without path part. + + """ + return self._PATH_UNQUOTER(self.raw_path) + + @cached_property + def query(self): + """A MultiDictProxy representing parsed query parameters in decoded + representation. + + Empty value if URL has no query part. + + """ + ret = MultiDict(parse_qsl(self.raw_query_string, keep_blank_values=True)) + return MultiDictProxy(ret) + + @property + def raw_query_string(self): + """Encoded query part of URL. + + Empty string if query is missing. + + """ + return self._val.query + + @cached_property + def query_string(self): + """Decoded query part of URL. + + Empty string if query is missing. + + """ + return self._QS_UNQUOTER(self.raw_query_string) + + @cached_property + def path_qs(self): + """Decoded path of URL with query.""" + if not self.query_string: + return self.path + return f"{self.path}?{self.query_string}" + + @cached_property + def raw_path_qs(self): + """Encoded path of URL with query.""" + if not self.raw_query_string: + return self.raw_path + return f"{self.raw_path}?{self.raw_query_string}" + + @property + def raw_fragment(self): + """Encoded fragment part of URL. + + Empty string if fragment is missing. + + """ + return self._val.fragment + + @cached_property + def fragment(self): + """Decoded fragment part of URL. + + Empty string if fragment is missing. + + """ + return self._UNQUOTER(self.raw_fragment) + + @cached_property + def raw_parts(self): + """A tuple containing encoded *path* parts. + + ('/',) for absolute URLs if *path* is missing. + + """ + path = self._val.path + if self.is_absolute(): + if not path: + parts = ["/"] + else: + parts = ["/"] + path[1:].split("/") + else: + if path.startswith("/"): + parts = ["/"] + path[1:].split("/") + else: + parts = path.split("/") + return tuple(parts) + + @cached_property + def parts(self): + """A tuple containing decoded *path* parts. + + ('/',) for absolute URLs if *path* is missing. + + """ + return tuple(self._UNQUOTER(part) for part in self.raw_parts) + + @cached_property + def parent(self): + """A new URL with last part of path removed and cleaned up query and + fragment. + + """ + path = self.raw_path + if not path or path == "/": + if self.raw_fragment or self.raw_query_string: + return URL(self._val._replace(query="", fragment=""), encoded=True) + return self + parts = path.split("/") + val = self._val._replace(path="/".join(parts[:-1]), query="", fragment="") + return URL(val, encoded=True) + + @cached_property + def raw_name(self): + """The last part of raw_parts.""" + parts = self.raw_parts + if self.is_absolute(): + parts = parts[1:] + if not parts: + return "" + else: + return parts[-1] + else: + return parts[-1] + + @cached_property + def name(self): + """The last part of parts.""" + return self._UNQUOTER(self.raw_name) + + @cached_property + def raw_suffix(self): + name = self.raw_name + i = name.rfind(".") + if 0 < i < len(name) - 1: + return name[i:] + else: + return "" + + @cached_property + def suffix(self): + return self._UNQUOTER(self.raw_suffix) + + @cached_property + def raw_suffixes(self): + name = self.raw_name + if name.endswith("."): + return () + name = name.lstrip(".") + return tuple("." + suffix for suffix in name.split(".")[1:]) + + @cached_property + def suffixes(self): + return tuple(self._UNQUOTER(suffix) for suffix in self.raw_suffixes) + + @staticmethod + def _validate_authority_uri_abs_path(host, path): + """Ensure that path in URL with authority starts with a leading slash. + + Raise ValueError if not. + """ + if len(host) > 0 and len(path) > 0 and not path.startswith("/"): + raise ValueError( + "Path in a URL with authority should start with a slash ('/') if set" + ) + + def _make_child(self, segments, encoded=False): + """add segments to self._val.path, accounting for absolute vs relative paths""" + # keep the trailing slash if the last segment ends with / + parsed = [""] if segments and segments[-1][-1:] == "/" else [] + for seg in reversed(segments): + if not seg: + continue + if seg[0] == "/": + raise ValueError( + f"Appending path {seg!r} starting from slash is forbidden" + ) + seg = seg if encoded else self._PATH_QUOTER(seg) + if "/" in seg: + parsed += ( + sub for sub in reversed(seg.split("/")) if sub and sub != "." + ) + elif seg != ".": + parsed.append(seg) + parsed.reverse() + old_path = self._val.path + if old_path: + parsed = [*old_path.rstrip("/").split("/"), *parsed] + if self.is_absolute(): + parsed = _normalize_path_segments(parsed) + if parsed and parsed[0] != "": + # inject a leading slash when adding a path to an absolute URL + # where there was none before + parsed = ["", *parsed] + new_path = "/".join(parsed) + return URL( + self._val._replace(path=new_path, query="", fragment=""), encoded=True + ) + + @classmethod + def _normalize_path(cls, path): + # Drop '.' and '..' from str path + + prefix = "" + if path.startswith("/"): + # preserve the "/" root element of absolute paths, copying it to the + # normalised output as per sections 5.2.4 and 6.2.2.3 of rfc3986. + prefix = "/" + path = path[1:] + + segments = path.split("/") + return prefix + "/".join(_normalize_path_segments(segments)) + + @classmethod + def _encode_host(cls, host, human=False): + try: + ip, sep, zone = host.partition("%") + ip = ip_address(ip) + except ValueError: + host = host.lower() + # IDNA encoding is slow, + # skip it for ASCII-only strings + # Don't move the check into _idna_encode() helper + # to reduce the cache size + if human or host.isascii(): + return host + host = _idna_encode(host) + else: + host = ip.compressed + if sep: + host += "%" + zone + if ip.version == 6: + host = "[" + host + "]" + return host + + @classmethod + def _make_netloc( + cls, user, password, host, port, encode=False, encode_host=True, requote=False + ): + quoter = cls._REQUOTER if requote else cls._QUOTER + if encode_host: + ret = cls._encode_host(host) + else: + ret = host + if port is not None: + ret = ret + ":" + str(port) + if password is not None: + if not user: + user = "" + else: + if encode: + user = quoter(user) + if encode: + password = quoter(password) + user = user + ":" + password + elif user and encode: + user = quoter(user) + if user: + ret = user + "@" + ret + return ret + + def with_scheme(self, scheme): + """Return a new URL with scheme replaced.""" + # N.B. doesn't cleanup query/fragment + if not isinstance(scheme, str): + raise TypeError("Invalid scheme type") + if not self.is_absolute(): + raise ValueError("scheme replacement is not allowed for relative URLs") + return URL(self._val._replace(scheme=scheme.lower()), encoded=True) + + def with_user(self, user): + """Return a new URL with user replaced. + + Autoencode user if needed. + + Clear user/password if user is None. + + """ + # N.B. doesn't cleanup query/fragment + val = self._val + if user is None: + password = None + elif isinstance(user, str): + user = self._QUOTER(user) + password = val.password + else: + raise TypeError("Invalid user type") + if not self.is_absolute(): + raise ValueError("user replacement is not allowed for relative URLs") + return URL( + self._val._replace( + netloc=self._make_netloc(user, password, val.hostname, val.port) + ), + encoded=True, + ) + + def with_password(self, password): + """Return a new URL with password replaced. + + Autoencode password if needed. + + Clear password if argument is None. + + """ + # N.B. doesn't cleanup query/fragment + if password is None: + pass + elif isinstance(password, str): + password = self._QUOTER(password) + else: + raise TypeError("Invalid password type") + if not self.is_absolute(): + raise ValueError("password replacement is not allowed for relative URLs") + val = self._val + return URL( + self._val._replace( + netloc=self._make_netloc(val.username, password, val.hostname, val.port) + ), + encoded=True, + ) + + def with_host(self, host): + """Return a new URL with host replaced. + + Autoencode host if needed. + + Changing host for relative URLs is not allowed, use .join() + instead. + + """ + # N.B. doesn't cleanup query/fragment + if not isinstance(host, str): + raise TypeError("Invalid host type") + if not self.is_absolute(): + raise ValueError("host replacement is not allowed for relative URLs") + if not host: + raise ValueError("host removing is not allowed") + val = self._val + return URL( + self._val._replace( + netloc=self._make_netloc(val.username, val.password, host, val.port) + ), + encoded=True, + ) + + def with_port(self, port): + """Return a new URL with port replaced. + + Clear port to default if None is passed. + + """ + # N.B. doesn't cleanup query/fragment + if port is not None: + if isinstance(port, bool) or not isinstance(port, int): + raise TypeError(f"port should be int or None, got {type(port)}") + if port < 0 or port > 65535: + raise ValueError(f"port must be between 0 and 65535, got {port}") + if not self.is_absolute(): + raise ValueError("port replacement is not allowed for relative URLs") + val = self._val + return URL( + self._val._replace( + netloc=self._make_netloc(val.username, val.password, val.hostname, port) + ), + encoded=True, + ) + + def with_path(self, path, *, encoded=False): + """Return a new URL with path replaced.""" + if not encoded: + path = self._PATH_QUOTER(path) + if self.is_absolute(): + path = self._normalize_path(path) + if len(path) > 0 and path[0] != "/": + path = "/" + path + return URL(self._val._replace(path=path, query="", fragment=""), encoded=True) + + @classmethod + def _query_seq_pairs(cls, quoter, pairs): + for key, val in pairs: + if isinstance(val, (list, tuple)): + for v in val: + yield quoter(key) + "=" + quoter(cls._query_var(v)) + else: + yield quoter(key) + "=" + quoter(cls._query_var(val)) + + @staticmethod + def _query_var(v): + cls = type(v) + if issubclass(cls, str): + return v + if issubclass(cls, float): + if math.isinf(v): + raise ValueError("float('inf') is not supported") + if math.isnan(v): + raise ValueError("float('nan') is not supported") + return str(float(v)) + if issubclass(cls, int) and cls is not bool: + return str(int(v)) + raise TypeError( + "Invalid variable type: value " + "should be str, int or float, got {!r} " + "of type {}".format(v, cls) + ) + + def _get_str_query(self, *args, **kwargs): + if kwargs: + if len(args) > 0: + raise ValueError( + "Either kwargs or single query parameter must be present" + ) + query = kwargs + elif len(args) == 1: + query = args[0] + else: + raise ValueError("Either kwargs or single query parameter must be present") + + if query is None: + query = None + elif isinstance(query, Mapping): + quoter = self._QUERY_PART_QUOTER + query = "&".join(self._query_seq_pairs(quoter, query.items())) + elif isinstance(query, str): + query = self._QUERY_QUOTER(query) + elif isinstance(query, (bytes, bytearray, memoryview)): + raise TypeError( + "Invalid query type: bytes, bytearray and memoryview are forbidden" + ) + elif isinstance(query, Sequence): + quoter = self._QUERY_PART_QUOTER + # We don't expect sequence values if we're given a list of pairs + # already; only mappings like builtin `dict` which can't have the + # same key pointing to multiple values are allowed to use + # `_query_seq_pairs`. + query = "&".join( + quoter(k) + "=" + quoter(self._query_var(v)) for k, v in query + ) + else: + raise TypeError( + "Invalid query type: only str, mapping or " + "sequence of (key, value) pairs is allowed" + ) + + return query + + def with_query(self, *args, **kwargs): + """Return a new URL with query part replaced. + + Accepts any Mapping (e.g. dict, multidict.MultiDict instances) + or str, autoencode the argument if needed. + + A sequence of (key, value) pairs is supported as well. + + It also can take an arbitrary number of keyword arguments. + + Clear query if None is passed. + + """ + # N.B. doesn't cleanup query/fragment + + new_query = self._get_str_query(*args, **kwargs) or "" + return URL( + self._val._replace(path=self._val.path, query=new_query), encoded=True + ) + + def update_query(self, *args, **kwargs): + """Return a new URL with query part updated.""" + s = self._get_str_query(*args, **kwargs) + query = None + if s is not None: + new_query = MultiDict(parse_qsl(s, keep_blank_values=True)) + query = MultiDict(self.query) + query.update(new_query) + + return URL( + self._val._replace(query=self._get_str_query(query) or ""), encoded=True + ) + + def with_fragment(self, fragment): + """Return a new URL with fragment replaced. + + Autoencode fragment if needed. + + Clear fragment to default if None is passed. + + """ + # N.B. doesn't cleanup query/fragment + if fragment is None: + raw_fragment = "" + elif not isinstance(fragment, str): + raise TypeError("Invalid fragment type") + else: + raw_fragment = self._FRAGMENT_QUOTER(fragment) + if self.raw_fragment == raw_fragment: + return self + return URL(self._val._replace(fragment=raw_fragment), encoded=True) + + def with_name(self, name): + """Return a new URL with name (last part of path) replaced. + + Query and fragment parts are cleaned up. + + Name is encoded if needed. + + """ + # N.B. DOES cleanup query/fragment + if not isinstance(name, str): + raise TypeError("Invalid name type") + if "/" in name: + raise ValueError("Slash in name is not allowed") + name = self._PATH_QUOTER(name) + if name in (".", ".."): + raise ValueError(". and .. values are forbidden") + parts = list(self.raw_parts) + if self.is_absolute(): + if len(parts) == 1: + parts.append(name) + else: + parts[-1] = name + parts[0] = "" # replace leading '/' + else: + parts[-1] = name + if parts[0] == "/": + parts[0] = "" # replace leading '/' + return URL( + self._val._replace(path="/".join(parts), query="", fragment=""), + encoded=True, + ) + + def with_suffix(self, suffix): + """Return a new URL with suffix (file extension of name) replaced. + + Query and fragment parts are cleaned up. + + suffix is encoded if needed. + """ + if not isinstance(suffix, str): + raise TypeError("Invalid suffix type") + if suffix and not suffix.startswith(".") or suffix == ".": + raise ValueError(f"Invalid suffix {suffix!r}") + name = self.raw_name + if not name: + raise ValueError(f"{self!r} has an empty name") + old_suffix = self.raw_suffix + if not old_suffix: + name = name + suffix + else: + name = name[: -len(old_suffix)] + suffix + return self.with_name(name) + + def join(self, url): + """Join URLs + + Construct a full (“absolute”) URL by combining a “base URL” + (self) with another URL (url). + + Informally, this uses components of the base URL, in + particular the addressing scheme, the network location and + (part of) the path, to provide missing components in the + relative URL. + + """ + # See docs for urllib.parse.urljoin + if not isinstance(url, URL): + raise TypeError("url should be URL") + return URL(urljoin(str(self), str(url)), encoded=True) + + def joinpath(self, *other, encoded=False): + """Return a new URL with the elements in other appended to the path.""" + return self._make_child(other, encoded=encoded) + + def human_repr(self): + """Return decoded human readable string for URL representation.""" + user = _human_quote(self.user, "#/:?@[]") + password = _human_quote(self.password, "#/:?@[]") + host = self.host + if host: + host = self._encode_host(self.host, human=True) + path = _human_quote(self.path, "#?") + query_string = "&".join( + "{}={}".format(_human_quote(k, "#&+;="), _human_quote(v, "#&+;=")) + for k, v in self.query.items() + ) + fragment = _human_quote(self.fragment, "") + return urlunsplit( + SplitResult( + self.scheme, + self._make_netloc( + user, + password, + host, + self._val.port, + encode_host=False, + ), + path, + query_string, + fragment, + ) + ) + + +def _human_quote(s, unsafe): + if not s: + return s + for c in "%" + unsafe: + if c in s: + s = s.replace(c, f"%{ord(c):02X}") + if s.isprintable(): + return s + return "".join(c if c.isprintable() else quote(c) for c in s) + + +_MAXCACHE = 256 + + +@functools.lru_cache(_MAXCACHE) +def _idna_decode(raw): + try: + return idna.decode(raw.encode("ascii")) + except UnicodeError: # e.g. '::1' + return raw.encode("ascii").decode("idna") + + +@functools.lru_cache(_MAXCACHE) +def _idna_encode(host): + try: + return idna.encode(host, uts46=True).decode("ascii") + except UnicodeError: + return host.encode("idna").decode("ascii") + + +@rewrite_module +def cache_clear(): + _idna_decode.cache_clear() + _idna_encode.cache_clear() + + +@rewrite_module +def cache_info(): + return { + "idna_encode": _idna_encode.cache_info(), + "idna_decode": _idna_decode.cache_info(), + } + + +@rewrite_module +def cache_configure(*, idna_encode_size=_MAXCACHE, idna_decode_size=_MAXCACHE): + global _idna_decode, _idna_encode + + _idna_encode = functools.lru_cache(idna_encode_size)(_idna_encode.__wrapped__) + _idna_decode = functools.lru_cache(idna_decode_size)(_idna_decode.__wrapped__) diff --git a/contrib/python/yarl/yarl/py.typed b/contrib/python/yarl/yarl/py.typed new file mode 100644 index 0000000000..dcf2c804da --- /dev/null +++ b/contrib/python/yarl/yarl/py.typed @@ -0,0 +1 @@ +# Placeholder |