diff options
| author | robot-piglet <[email protected]> | 2026-05-28 22:45:19 +0300 |
|---|---|---|
| committer | robot-piglet <[email protected]> | 2026-05-29 11:45:12 +0300 |
| commit | a73530ac63e9abc09636b5d6c5914a9173c6a0ec (patch) | |
| tree | 4b794f12e28a60db811ae239a3f94089d8067352 /contrib/python | |
| parent | fd0ebc49a4e14d7194daaef90694074180d4ee7f (diff) | |
Intermediate changes
commit_hash:0d80b337d2b5cfb8757783f85a8f0159596a2b6e
Diffstat (limited to 'contrib/python')
| -rw-r--r-- | contrib/python/idna/py3/.dist-info/METADATA | 148 | ||||
| -rw-r--r-- | contrib/python/idna/py3/README.md (renamed from contrib/python/idna/py3/README.rst) | 142 | ||||
| -rw-r--r-- | contrib/python/idna/py3/idna/codec.py | 13 | ||||
| -rw-r--r-- | contrib/python/idna/py3/idna/core.py | 163 | ||||
| -rw-r--r-- | contrib/python/idna/py3/idna/intranges.py | 5 | ||||
| -rw-r--r-- | contrib/python/idna/py3/idna/package_data.py | 2 | ||||
| -rw-r--r-- | contrib/python/idna/py3/idna/uts46data.py | 1 | ||||
| -rw-r--r-- | contrib/python/idna/py3/tests/test_idna.py | 24 | ||||
| -rw-r--r-- | contrib/python/idna/py3/ya.make | 2 |
9 files changed, 243 insertions, 257 deletions
diff --git a/contrib/python/idna/py3/.dist-info/METADATA b/contrib/python/idna/py3/.dist-info/METADATA index b3024f7cbec..3dd388cb837 100644 --- a/contrib/python/idna/py3/.dist-info/METADATA +++ b/contrib/python/idna/py3/.dist-info/METADATA @@ -1,10 +1,10 @@ Metadata-Version: 2.4 Name: idna -Version: 3.14 +Version: 3.15 Summary: Internationalized Domain Names in Applications (IDNA) Author-email: Kim Davies <[email protected]> Requires-Python: >=3.8 -Description-Content-Type: text/x-rst +Description-Content-Type: text/markdown License-Expression: BSD-3-Clause Classifier: Development Status :: 5 - Production/Stable Classifier: Intended Audience :: Developers @@ -29,156 +29,147 @@ License-File: LICENSE.md Requires-Dist: ruff >= 0.6.2 ; extra == "all" Requires-Dist: mypy >= 1.11.2 ; extra == "all" Requires-Dist: pytest >= 8.3.2 ; extra == "all" -Project-URL: Changelog, https://github.com/kjd/idna/blob/master/HISTORY.rst +Project-URL: Changelog, https://github.com/kjd/idna/blob/master/HISTORY.md Project-URL: Issue tracker, https://github.com/kjd/idna/issues Project-URL: Source, https://github.com/kjd/idna Provides-Extra: all -Internationalized Domain Names in Applications (IDNA) -===================================================== +# Internationalized Domain Names in Applications (IDNA) -Support for `Internationalized Domain Names in -Applications (IDNA) <https://tools.ietf.org/html/rfc5891>`_ -and `Unicode IDNA Compatibility Processing -<https://unicode.org/reports/tr46/>`_. +Support for [Internationalized Domain Names in +Applications (IDNA)](https://tools.ietf.org/html/rfc5891) +and [Unicode IDNA Compatibility Processing](https://unicode.org/reports/tr46/). The latest versions of these standards supplied here provide more comprehensive language coverage and reduce the potential of allowing domains with known security vulnerabilities. This library -is a suitable replacement for the “encodings.idna” +is a suitable replacement for the "encodings.idna" module that comes with the Python standard library, but which only supports an older superseded IDNA specification from 2003. Basic functions are simply executed: -.. code-block:: pycon +```pycon +>>> import idna +>>> idna.encode('ドメイン.テスト') +b'xn--eckwd4c7c.xn--zckzah' +>>> print(idna.decode('xn--eckwd4c7c.xn--zckzah')) +ドメイン.テスト +``` - >>> import idna - >>> idna.encode('ドメイン.テスト') - b'xn--eckwd4c7c.xn--zckzah' - >>> print(idna.decode('xn--eckwd4c7c.xn--zckzah')) - ドメイン.テスト - -Installation ------------- +## Installation This package is available for installation from PyPI via the typical mechanisms, such as: -.. code-block:: bash - - $ python3 -m pip install idna +```bash +$ python3 -m pip install idna +``` -Usage ------ +## Usage -For typical usage, the ``encode`` and ``decode`` functions will take a +For typical usage, the `encode` and `decode` functions will take a domain name argument and perform a conversion to ASCII-compatible encoding (known as A-labels), or to Unicode strings (known as U-labels) respectively. -.. code-block:: pycon - - >>> import idna - >>> idna.encode('ドメイン.テスト') - b'xn--eckwd4c7c.xn--zckzah' - >>> print(idna.decode('xn--eckwd4c7c.xn--zckzah')) - ドメイン.テスト +```pycon +>>> import idna +>>> idna.encode('ドメイン.テスト') +b'xn--eckwd4c7c.xn--zckzah' +>>> print(idna.decode('xn--eckwd4c7c.xn--zckzah')) +ドメイン.テスト +``` -Conversions can be applied at a per-label basis using the ``ulabel`` or -``alabel`` functions if necessary: +Conversions can be applied at a per-label basis using the `ulabel` or +`alabel` functions if necessary: -.. code-block:: pycon +```pycon +>>> idna.alabel('测试') +b'xn--0zwm56d' +``` - >>> idna.alabel('测试') - b'xn--0zwm56d' +### Compatibility Mapping (UTS #46) -Compatibility Mapping (UTS #46) -+++++++++++++++++++++++++++++++ - -This library provides support for `Unicode IDNA Compatibility -Processing <https://unicode.org/reports/tr46/>`_ which normalizes input from +This library provides support for [Unicode IDNA Compatibility +Processing](https://unicode.org/reports/tr46/) which normalizes input from different potential ways a user may input a domain prior to performing the IDNA -conversion operations. This functionality, known as a -`mapping <https://tools.ietf.org/html/rfc5895>`_, is considered by the +conversion operations. This functionality, known as a +[mapping](https://tools.ietf.org/html/rfc5895), is considered by the specification to be a local user-interface issue distinct from IDNA conversion functionality. -For example, “Königsgäßchen” is not a permissible label as *LATIN +For example, "Königsgäßchen" is not a permissible label as *LATIN CAPITAL LETTER K* is not allowed (nor are capital letters in general). UTS 46 will convert this into lower case prior to applying the IDNA conversion. -.. code-block:: pycon - - >>> import idna - >>> idna.encode('Königsgäßchen') - ... - idna.core.InvalidCodepoint: Codepoint U+004B at position 1 of 'Königsgäßchen' not allowed - >>> idna.encode('Königsgäßchen', uts46=True) - b'xn--knigsgchen-b4a3dun' - >>> print(idna.decode('xn--knigsgchen-b4a3dun')) - königsgäßchen +```pycon +>>> import idna +>>> idna.encode('Königsgäßchen') +... +idna.core.InvalidCodepoint: Codepoint U+004B at position 1 of 'Königsgäßchen' not allowed +>>> idna.encode('Königsgäßchen', uts46=True) +b'xn--knigsgchen-b4a3dun' +>>> print(idna.decode('xn--knigsgchen-b4a3dun')) +königsgäßchen +``` -Exceptions ----------- +## Exceptions All errors raised during the conversion following the specification -should raise an exception derived from the ``idna.IDNAError`` base +should raise an exception derived from the `idna.IDNAError` base class. -More specific exceptions that may be generated as ``idna.IDNABidiError`` +More specific exceptions that may be generated as `idna.IDNABidiError` when the error reflects an illegal combination of left-to-right and -right-to-left characters in a label; ``idna.InvalidCodepoint`` when +right-to-left characters in a label; `idna.InvalidCodepoint` when a specific codepoint is an illegal character in an IDN label (i.e. -INVALID); and ``idna.InvalidCodepointContext`` when the codepoint is +INVALID); and `idna.InvalidCodepointContext` when the codepoint is illegal based on its position in the string (i.e. it is CONTEXTO or CONTEXTJ but the contextual requirements are not satisfied.) -Building and Diagnostics ------------------------- +## Building and Diagnostics The IDNA and UTS 46 functionality relies upon pre-calculated lookup tables for performance. These tables are derived from computing against eligibility criteria in the respective standards using the command-line -script ``tools/idna-data``. +script `tools/idna-data`. This tool will fetch relevant codepoint data from the Unicode repository and perform the required calculations to identify eligibility. There are three main modes: -* ``idna-data make-libdata``. Generates ``idnadata.py`` and - ``uts46data.py``, the pre-calculated lookup tables used for IDNA and +* `idna-data make-libdata`. Generates `idnadata.py` and + `uts46data.py`, the pre-calculated lookup tables used for IDNA and UTS 46 conversions. Implementers who wish to track this library against a different Unicode version may use this tool to manually generate a - different version of the ``idnadata.py`` and ``uts46data.py`` files. + different version of the `idnadata.py` and `uts46data.py` files. -* ``idna-data make-table``. Generate a table of the IDNA disposition +* `idna-data make-table`. Generate a table of the IDNA disposition (e.g. PVALID, CONTEXTJ, CONTEXTO) in the format found in Appendix - B.1 of RFC 5892 and the pre-computed tables published by `IANA - <https://www.iana.org/>`_. + B.1 of RFC 5892 and the pre-computed tables published by [IANA](https://www.iana.org/). -* ``idna-data U+0061``. Prints debugging output on the various +* `idna-data U+0061`. Prints debugging output on the various properties associated with an individual Unicode codepoint (in this case, U+0061), that are used to assess the IDNA and UTS 46 status of a codepoint. This is helpful in debugging or analysis. -The tool accepts a number of arguments, described using ``idna-data -h``. -Most notably, the ``--version`` argument allows the specification +The tool accepts a number of arguments, described using `idna-data -h`. +Most notably, the `--version` argument allows the specification of the version of Unicode to be used in computing the table data. For -example, ``idna-data --version 9.0.0 make-libdata`` will generate +example, `idna-data --version 9.0.0 make-libdata` will generate library data against Unicode 9.0.0. -Additional Notes ----------------- +## Additional Notes * **Packages**. The latest tagged release version is published in the - `Python Package Index <https://pypi.org/project/idna/>`_. + [Python Package Index](https://pypi.org/project/idna/). * **Version support**. This library supports Python 3.8 and higher. As this library serves as a low-level toolkit for a variety of @@ -190,8 +181,7 @@ Additional Notes * **Testing**. The library has a test suite based on each rule of the IDNA specification, as well as tests that are provided as part of the - Unicode Technical Standard 46, `Unicode IDNA Compatibility Processing - <https://unicode.org/reports/tr46/>`_. + Unicode Technical Standard 46, [Unicode IDNA Compatibility Processing](https://unicode.org/reports/tr46/). * **Emoji**. It is an occasional request to support emoji domains in this library. Encoding of symbols like emoji is expressly prohibited by diff --git a/contrib/python/idna/py3/README.rst b/contrib/python/idna/py3/README.md index 89ada8c4b25..5772219126b 100644 --- a/contrib/python/idna/py3/README.rst +++ b/contrib/python/idna/py3/README.md @@ -1,148 +1,139 @@ -Internationalized Domain Names in Applications (IDNA) -===================================================== +# Internationalized Domain Names in Applications (IDNA) -Support for `Internationalized Domain Names in -Applications (IDNA) <https://tools.ietf.org/html/rfc5891>`_ -and `Unicode IDNA Compatibility Processing -<https://unicode.org/reports/tr46/>`_. +Support for [Internationalized Domain Names in +Applications (IDNA)](https://tools.ietf.org/html/rfc5891) +and [Unicode IDNA Compatibility Processing](https://unicode.org/reports/tr46/). The latest versions of these standards supplied here provide more comprehensive language coverage and reduce the potential of allowing domains with known security vulnerabilities. This library -is a suitable replacement for the “encodings.idna” +is a suitable replacement for the "encodings.idna" module that comes with the Python standard library, but which only supports an older superseded IDNA specification from 2003. Basic functions are simply executed: -.. code-block:: pycon +```pycon +>>> import idna +>>> idna.encode('ドメイン.テスト') +b'xn--eckwd4c7c.xn--zckzah' +>>> print(idna.decode('xn--eckwd4c7c.xn--zckzah')) +ドメイン.テスト +``` - >>> import idna - >>> idna.encode('ドメイン.テスト') - b'xn--eckwd4c7c.xn--zckzah' - >>> print(idna.decode('xn--eckwd4c7c.xn--zckzah')) - ドメイン.テスト - -Installation ------------- +## Installation This package is available for installation from PyPI via the typical mechanisms, such as: -.. code-block:: bash - - $ python3 -m pip install idna +```bash +$ python3 -m pip install idna +``` -Usage ------ +## Usage -For typical usage, the ``encode`` and ``decode`` functions will take a +For typical usage, the `encode` and `decode` functions will take a domain name argument and perform a conversion to ASCII-compatible encoding (known as A-labels), or to Unicode strings (known as U-labels) respectively. -.. code-block:: pycon - - >>> import idna - >>> idna.encode('ドメイン.テスト') - b'xn--eckwd4c7c.xn--zckzah' - >>> print(idna.decode('xn--eckwd4c7c.xn--zckzah')) - ドメイン.テスト +```pycon +>>> import idna +>>> idna.encode('ドメイン.テスト') +b'xn--eckwd4c7c.xn--zckzah' +>>> print(idna.decode('xn--eckwd4c7c.xn--zckzah')) +ドメイン.テスト +``` -Conversions can be applied at a per-label basis using the ``ulabel`` or -``alabel`` functions if necessary: +Conversions can be applied at a per-label basis using the `ulabel` or +`alabel` functions if necessary: -.. code-block:: pycon +```pycon +>>> idna.alabel('测试') +b'xn--0zwm56d' +``` - >>> idna.alabel('测试') - b'xn--0zwm56d' +### Compatibility Mapping (UTS #46) -Compatibility Mapping (UTS #46) -+++++++++++++++++++++++++++++++ - -This library provides support for `Unicode IDNA Compatibility -Processing <https://unicode.org/reports/tr46/>`_ which normalizes input from +This library provides support for [Unicode IDNA Compatibility +Processing](https://unicode.org/reports/tr46/) which normalizes input from different potential ways a user may input a domain prior to performing the IDNA -conversion operations. This functionality, known as a -`mapping <https://tools.ietf.org/html/rfc5895>`_, is considered by the +conversion operations. This functionality, known as a +[mapping](https://tools.ietf.org/html/rfc5895), is considered by the specification to be a local user-interface issue distinct from IDNA conversion functionality. -For example, “Königsgäßchen” is not a permissible label as *LATIN +For example, "Königsgäßchen" is not a permissible label as *LATIN CAPITAL LETTER K* is not allowed (nor are capital letters in general). UTS 46 will convert this into lower case prior to applying the IDNA conversion. -.. code-block:: pycon - - >>> import idna - >>> idna.encode('Königsgäßchen') - ... - idna.core.InvalidCodepoint: Codepoint U+004B at position 1 of 'Königsgäßchen' not allowed - >>> idna.encode('Königsgäßchen', uts46=True) - b'xn--knigsgchen-b4a3dun' - >>> print(idna.decode('xn--knigsgchen-b4a3dun')) - königsgäßchen +```pycon +>>> import idna +>>> idna.encode('Königsgäßchen') +... +idna.core.InvalidCodepoint: Codepoint U+004B at position 1 of 'Königsgäßchen' not allowed +>>> idna.encode('Königsgäßchen', uts46=True) +b'xn--knigsgchen-b4a3dun' +>>> print(idna.decode('xn--knigsgchen-b4a3dun')) +königsgäßchen +``` -Exceptions ----------- +## Exceptions All errors raised during the conversion following the specification -should raise an exception derived from the ``idna.IDNAError`` base +should raise an exception derived from the `idna.IDNAError` base class. -More specific exceptions that may be generated as ``idna.IDNABidiError`` +More specific exceptions that may be generated as `idna.IDNABidiError` when the error reflects an illegal combination of left-to-right and -right-to-left characters in a label; ``idna.InvalidCodepoint`` when +right-to-left characters in a label; `idna.InvalidCodepoint` when a specific codepoint is an illegal character in an IDN label (i.e. -INVALID); and ``idna.InvalidCodepointContext`` when the codepoint is +INVALID); and `idna.InvalidCodepointContext` when the codepoint is illegal based on its position in the string (i.e. it is CONTEXTO or CONTEXTJ but the contextual requirements are not satisfied.) -Building and Diagnostics ------------------------- +## Building and Diagnostics The IDNA and UTS 46 functionality relies upon pre-calculated lookup tables for performance. These tables are derived from computing against eligibility criteria in the respective standards using the command-line -script ``tools/idna-data``. +script `tools/idna-data`. This tool will fetch relevant codepoint data from the Unicode repository and perform the required calculations to identify eligibility. There are three main modes: -* ``idna-data make-libdata``. Generates ``idnadata.py`` and - ``uts46data.py``, the pre-calculated lookup tables used for IDNA and +* `idna-data make-libdata`. Generates `idnadata.py` and + `uts46data.py`, the pre-calculated lookup tables used for IDNA and UTS 46 conversions. Implementers who wish to track this library against a different Unicode version may use this tool to manually generate a - different version of the ``idnadata.py`` and ``uts46data.py`` files. + different version of the `idnadata.py` and `uts46data.py` files. -* ``idna-data make-table``. Generate a table of the IDNA disposition +* `idna-data make-table`. Generate a table of the IDNA disposition (e.g. PVALID, CONTEXTJ, CONTEXTO) in the format found in Appendix - B.1 of RFC 5892 and the pre-computed tables published by `IANA - <https://www.iana.org/>`_. + B.1 of RFC 5892 and the pre-computed tables published by [IANA](https://www.iana.org/). -* ``idna-data U+0061``. Prints debugging output on the various +* `idna-data U+0061`. Prints debugging output on the various properties associated with an individual Unicode codepoint (in this case, U+0061), that are used to assess the IDNA and UTS 46 status of a codepoint. This is helpful in debugging or analysis. -The tool accepts a number of arguments, described using ``idna-data -h``. -Most notably, the ``--version`` argument allows the specification +The tool accepts a number of arguments, described using `idna-data -h`. +Most notably, the `--version` argument allows the specification of the version of Unicode to be used in computing the table data. For -example, ``idna-data --version 9.0.0 make-libdata`` will generate +example, `idna-data --version 9.0.0 make-libdata` will generate library data against Unicode 9.0.0. -Additional Notes ----------------- +## Additional Notes * **Packages**. The latest tagged release version is published in the - `Python Package Index <https://pypi.org/project/idna/>`_. + [Python Package Index](https://pypi.org/project/idna/). * **Version support**. This library supports Python 3.8 and higher. As this library serves as a low-level toolkit for a variety of @@ -154,8 +145,7 @@ Additional Notes * **Testing**. The library has a test suite based on each rule of the IDNA specification, as well as tests that are provided as part of the - Unicode Technical Standard 46, `Unicode IDNA Compatibility Processing - <https://unicode.org/reports/tr46/>`_. + Unicode Technical Standard 46, [Unicode IDNA Compatibility Processing](https://unicode.org/reports/tr46/). * **Emoji**. It is an occasional request to support emoji domains in this library. Encoding of symbols like emoji is expressly prohibited by diff --git a/contrib/python/idna/py3/idna/codec.py b/contrib/python/idna/py3/idna/codec.py index befba40ce59..280dc3972cb 100644 --- a/contrib/python/idna/py3/idna/codec.py +++ b/contrib/python/idna/py3/idna/codec.py @@ -1,10 +1,7 @@ import codecs -import re from typing import Any, Optional, Tuple -from .core import IDNAError, alabel, decode, encode, ulabel - -_unicode_dots_re = re.compile("[\u002e\u3002\uff0e\uff61]") +from .core import IDNAError, _unicode_dots_re, alabel, decode, encode, ulabel class Codec(codecs.Codec): @@ -20,7 +17,7 @@ class Codec(codecs.Codec): def encode(self, data: str, errors: str = "strict") -> Tuple[bytes, int]: # ty: ignore[invalid-method-override] if errors != "strict": - raise IDNAError('Unsupported error handling "{}"'.format(errors)) + raise IDNAError(f'Unsupported error handling "{errors}"') if not data: return b"", 0 @@ -29,7 +26,7 @@ class Codec(codecs.Codec): def decode(self, data: bytes, errors: str = "strict") -> Tuple[str, int]: # ty: ignore[invalid-method-override] if errors != "strict": - raise IDNAError('Unsupported error handling "{}"'.format(errors)) + raise IDNAError(f'Unsupported error handling "{errors}"') if not data: return "", 0 @@ -51,7 +48,7 @@ class IncrementalEncoder(codecs.BufferedIncrementalEncoder): def _buffer_encode(self, data: str, errors: str, final: bool) -> Tuple[bytes, int]: # ty: ignore[invalid-method-override] if errors != "strict": - raise IDNAError('Unsupported error handling "{}"'.format(errors)) + raise IDNAError(f'Unsupported error handling "{errors}"') if not data: return b"", 0 @@ -94,7 +91,7 @@ class IncrementalDecoder(codecs.BufferedIncrementalDecoder): def _buffer_decode(self, data: Any, errors: str, final: bool) -> Tuple[str, int]: # ty: ignore[invalid-method-override] if errors != "strict": - raise IDNAError('Unsupported error handling "{}"'.format(errors)) + raise IDNAError(f'Unsupported error handling "{errors}"') if not data: return ("", 0) diff --git a/contrib/python/idna/py3/idna/core.py b/contrib/python/idna/py3/idna/core.py index 1804ccaf758..b6f9442deb3 100644 --- a/contrib/python/idna/py3/idna/core.py +++ b/contrib/python/idna/py3/idna/core.py @@ -12,6 +12,18 @@ _alabel_prefix = b"xn--" _unicode_dots_re = re.compile("[\u002e\u3002\uff0e\uff61]") +# Bidi category sets from RFC 5893, hoisted out of the per-codepoint loop +_bidi_rtl_first = frozenset({"R", "AL"}) +_bidi_rtl_categories = frozenset({"R", "AL", "AN"}) +_bidi_rtl_allowed = frozenset({"R", "AL", "AN", "EN", "ES", "CS", "ET", "ON", "BN", "NSM"}) +_bidi_rtl_valid_ending = frozenset({"R", "AL", "EN", "AN"}) +_bidi_rtl_numeric = frozenset({"AN", "EN"}) +_bidi_ltr_allowed = frozenset({"L", "EN", "ES", "CS", "ET", "ON", "BN", "NSM"}) +_bidi_ltr_valid_ending = frozenset({"L", "EN"}) +_bidi_joiner_l_or_d = frozenset({ord("L"), ord("D")}) +_bidi_joiner_r_or_d = frozenset({ord("R"), ord("D")}) + + class IDNAError(UnicodeError): """Base exception for all IDNA-encoding related problems""" @@ -38,9 +50,8 @@ class InvalidCodepointContext(IDNAError): def _combining_class(cp: int) -> int: v = unicodedata.combining(chr(cp)) - if v == 0: - if not unicodedata.name(chr(cp)): - raise ValueError("Unknown character in unicodedata") + if v == 0 and not unicodedata.name(chr(cp)): + raise ValueError("Unknown character in unicodedata") return v @@ -53,7 +64,7 @@ def _punycode(s: str) -> bytes: def _unot(s: int) -> str: - return "U+{:04X}".format(s) + return f"U+{s:04X}" def valid_label_length(label: Union[bytes, str]) -> bool: @@ -68,25 +79,21 @@ def valid_label_length(label: Union[bytes, str]) -> bool: :returns: ``True`` if the label is within the length limit, otherwise ``False``. """ - if len(label) > 63: - return False - return True + return len(label) <= 63 -def valid_string_length(label: Union[bytes, str], trailing_dot: bool) -> bool: +def valid_string_length(domain: Union[bytes, str], trailing_dot: bool) -> bool: """Check that a full domain name does not exceed the maximum length. Per :rfc:`1035`, a domain name is limited to 253 octets when no trailing dot is present, or 254 octets when one is included. - :param label: The full (possibly multi-label) domain name. - :param trailing_dot: ``True`` if ``label`` includes a trailing ``.``. + :param domain: The full (possibly multi-label) domain name. + :param trailing_dot: ``True`` if ``domain`` includes a trailing ``.``. :returns: ``True`` if the domain is within the length limit, otherwise ``False``. """ - if len(label) > (254 if trailing_dot else 253): - return False - return True + return len(domain) <= (254 if trailing_dot else 253) def check_bidi(label: str, check_ltr: bool = False) -> bool: @@ -111,20 +118,20 @@ def check_bidi(label: str, check_ltr: bool = False) -> bool: direction = unicodedata.bidirectional(cp) if direction == "": # String likely comes from a newer version of Unicode - raise IDNABidiError("Unknown directionality in label {} at position {}".format(repr(label), idx)) - if direction in ["R", "AL", "AN"]: + raise IDNABidiError(f"Unknown directionality in label {repr(label)} at position {idx}") + if direction in _bidi_rtl_categories: bidi_label = True if not bidi_label and not check_ltr: return True # Bidi rule 1 direction = unicodedata.bidirectional(label[0]) - if direction in ["R", "AL"]: + if direction in _bidi_rtl_first: rtl = True elif direction == "L": rtl = False else: - raise IDNABidiError("First codepoint in label {} must be directionality L, R or AL".format(repr(label))) + raise IDNABidiError(f"First codepoint in label {repr(label)} must be directionality L, R or AL") valid_ending = False number_type: Optional[str] = None @@ -133,26 +140,15 @@ def check_bidi(label: str, check_ltr: bool = False) -> bool: if rtl: # Bidi rule 2 - if direction not in [ - "R", - "AL", - "AN", - "EN", - "ES", - "CS", - "ET", - "ON", - "BN", - "NSM", - ]: - raise IDNABidiError("Invalid direction for codepoint at position {} in a right-to-left label".format(idx)) + if direction not in _bidi_rtl_allowed: + raise IDNABidiError(f"Invalid direction for codepoint at position {idx} in a right-to-left label") # Bidi rule 3 - if direction in ["R", "AL", "EN", "AN"]: + if direction in _bidi_rtl_valid_ending: valid_ending = True elif direction != "NSM": valid_ending = False # Bidi rule 4 - if direction in ["AN", "EN"]: + if direction in _bidi_rtl_numeric: if not number_type: number_type = direction else: @@ -160,10 +156,10 @@ def check_bidi(label: str, check_ltr: bool = False) -> bool: raise IDNABidiError("Can not mix numeral types in a right-to-left label") else: # Bidi rule 5 - if direction not in ["L", "EN", "ES", "CS", "ET", "ON", "BN", "NSM"]: - raise IDNABidiError("Invalid direction for codepoint at position {} in a left-to-right label".format(idx)) + if direction not in _bidi_ltr_allowed: + raise IDNABidiError(f"Invalid direction for codepoint at position {idx} in a left-to-right label") # Bidi rule 6 - if direction in ["L", "EN"]: + if direction in _bidi_ltr_valid_ending: valid_ending = True elif direction != "NSM": valid_ending = False @@ -235,16 +231,15 @@ def valid_contextj(label: str, pos: int) -> bool: cp_value = ord(label[pos]) if cp_value == 0x200C: - if pos > 0: - if _combining_class(ord(label[pos - 1])) == _virama_combining_class: - return True + if pos > 0 and _combining_class(ord(label[pos - 1])) == _virama_combining_class: + return True ok = False for i in range(pos - 1, -1, -1): joining_type = idnadata.joining_types().get(ord(label[i])) if joining_type == ord("T"): continue - elif joining_type in [ord("L"), ord("D")]: + elif joining_type in _bidi_joiner_l_or_d: ok = True break else: @@ -258,7 +253,7 @@ def valid_contextj(label: str, pos: int) -> bool: joining_type = idnadata.joining_types().get(ord(label[i])) if joining_type == ord("T"): continue - elif joining_type in [ord("R"), ord("D")]: + elif joining_type in _bidi_joiner_r_or_d: ok = True break else: @@ -266,10 +261,7 @@ def valid_contextj(label: str, pos: int) -> bool: return ok if cp_value == 0x200D: - if pos > 0: - if _combining_class(ord(label[pos - 1])) == _virama_combining_class: - return True - return False + return pos > 0 and _combining_class(ord(label[pos - 1])) == _virama_combining_class else: return False @@ -292,10 +284,7 @@ def valid_contexto(label: str, pos: int, exception: bool = False) -> bool: cp_value = ord(label[pos]) if cp_value == 0x00B7: - if 0 < pos < len(label) - 1: - if ord(label[pos - 1]) == 0x006C and ord(label[pos + 1]) == 0x006C: - return True - return False + return 0 < pos < len(label) - 1 and ord(label[pos - 1]) == 0x006C and ord(label[pos + 1]) == 0x006C elif cp_value == 0x0375: if pos < len(label) - 1 and len(label) > 1: @@ -316,16 +305,10 @@ def valid_contexto(label: str, pos: int, exception: bool = False) -> bool: return False elif 0x660 <= cp_value <= 0x669: - for cp in label: - if 0x6F0 <= ord(cp) <= 0x06F9: - return False - return True + return not any(0x6F0 <= ord(cp) <= 0x06F9 for cp in label) elif 0x6F0 <= cp_value <= 0x6F9: - for cp in label: - if 0x660 <= ord(cp) <= 0x0669: - return False - return True + return not any(0x660 <= ord(cp) <= 0x0669 for cp in label) return False @@ -353,6 +336,11 @@ def check_label(label: Union[str, bytes, bytearray]) -> None: if len(label) == 0: raise IDNAError("Empty Label") + # Reject on domain length rather than label length so support some UTS 46 + # use cases, still reducing processing of label contextual rules + if not valid_string_length(label, trailing_dot=True): + raise IDNAError("Label too long") + check_nfc(label) check_hyphen_ok(label) check_initial_combiner(label) @@ -365,23 +353,19 @@ def check_label(label: Union[str, bytes, bytearray]) -> None: try: if not valid_contextj(label, pos): raise InvalidCodepointContext( - "Joiner {} not allowed at position {} in {}".format(_unot(cp_value), pos + 1, repr(label)) + f"Joiner {_unot(cp_value)} not allowed at position {pos + 1} in {repr(label)}" ) - except ValueError: + except ValueError as err: raise IDNAError( - "Unknown codepoint adjacent to joiner {} at position {} in {}".format( - _unot(cp_value), pos + 1, repr(label) - ) - ) + f"Unknown codepoint adjacent to joiner {_unot(cp_value)} at position {pos + 1} in {repr(label)}" + ) from err elif intranges_contain(cp_value, idnadata.codepoint_classes["CONTEXTO"]): if not valid_contexto(label, pos): raise InvalidCodepointContext( - "Codepoint {} not allowed at position {} in {}".format(_unot(cp_value), pos + 1, repr(label)) + f"Codepoint {_unot(cp_value)} not allowed at position {pos + 1} in {repr(label)}" ) else: - raise InvalidCodepoint( - "Codepoint {} at position {} of {} not allowed".format(_unot(cp_value), pos + 1, repr(label)) - ) + raise InvalidCodepoint(f"Codepoint {_unot(cp_value)} at position {pos + 1} of {repr(label)} not allowed") check_bidi(label) @@ -444,7 +428,7 @@ def ulabel(label: Union[str, bytes, bytearray]) -> str: label_bytes = label_bytes[len(_alabel_prefix) :] if not label_bytes: raise IDNAError("Malformed A-label, no Punycode eligible content found") - if label_bytes.decode("ascii")[-1] == "-": + if label_bytes.endswith(b"-"): raise IDNAError("A-label must not end with a hyphen") else: check_label(label_bytes) @@ -452,8 +436,8 @@ def ulabel(label: Union[str, bytes, bytearray]) -> str: try: label = label_bytes.decode("punycode") - except UnicodeError: - raise IDNAError("Invalid A-label") + except UnicodeError as err: + raise IDNAError("Invalid A-label") from err check_label(label) return label @@ -488,18 +472,27 @@ def uts46_remap(domain: str, std3_rules: bool = True, transitional: bool = False replacement: Optional[str] = None if len(uts46row) == 3: replacement = uts46row[2] # ty: ignore[index-out-of-bounds] - if status == "V" or (status == "D" and not transitional) or (status == "3" and not std3_rules and replacement is None): - output += char - elif replacement is not None and ( + + # UTS #46 §4: V is always valid, D is deviation (kept unless transitional), + # 3 is disallowed-STD3 (kept unmapped if std3_rules is off and no mapping). + keep_as_is = ( + status == "V" or (status == "D" and not transitional) or (status == "3" and not std3_rules and replacement is None) + ) + # M is mapped, 3-with-replacement and transitional D fall through to the + # same replacement output path. + use_replacement = replacement is not None and ( status == "M" or (status == "3" and not std3_rules) or (status == "D" and transitional) - ): + ) + + if keep_as_is: + output += char + elif use_replacement: + assert replacement is not None # narrowed by use_replacement output += replacement elif status == "I": continue else: - raise InvalidCodepoint( - "Codepoint {} not allowed at position {} in {}".format(_unot(code_point), pos + 1, repr(domain)) - ) + raise InvalidCodepoint(f"Codepoint {_unot(code_point)} not allowed at position {pos + 1} in {repr(domain)}") return unicodedata.normalize("NFC", output) @@ -542,8 +535,8 @@ def encode( if not isinstance(s, str): try: s = str(s, "ascii") - except (UnicodeDecodeError, TypeError): - raise IDNAError("should pass a unicode string to the function rather than a byte string.") + except (UnicodeDecodeError, TypeError) as err: + raise IDNAError("should pass a unicode string to the function rather than a byte string.") from err if uts46: s = uts46_remap(s, std3_rules, transitional) @@ -554,10 +547,7 @@ def encode( trailing_dot = False result = [] - if strict: - labels = s.split(".") - else: - labels = _unicode_dots_re.split(s) + labels = s.split(".") if strict else _unicode_dots_re.split(s) if not labels or labels == [""]: raise IDNAError("Empty domain") if labels[-1] == "": @@ -603,8 +593,8 @@ def decode( if not isinstance(s, str): try: s = str(s, "ascii") - except (UnicodeDecodeError, TypeError): - raise IDNAError("Invalid ASCII in A-label") + except (UnicodeDecodeError, TypeError) as err: + raise IDNAError("Invalid ASCII in A-label") from err if uts46: s = uts46_remap(s, std3_rules, False) # Reject inputs that exceed the maximum DNS domain length up-front @@ -613,10 +603,7 @@ def decode( raise IDNAError("Domain too long") trailing_dot = False result = [] - if not strict: - labels = _unicode_dots_re.split(s) - else: - labels = s.split(".") + labels = s.split(".") if strict else _unicode_dots_re.split(s) if not labels or labels == [""]: raise IDNAError("Empty domain") if not labels[-1]: diff --git a/contrib/python/idna/py3/idna/intranges.py b/contrib/python/idna/py3/idna/intranges.py index 7bfaa8d80d7..ea3455bb8da 100644 --- a/contrib/python/idna/py3/idna/intranges.py +++ b/contrib/python/idna/py3/idna/intranges.py @@ -21,9 +21,8 @@ def intranges_from_list(list_: List[int]) -> Tuple[int, ...]: ranges = [] last_write = -1 for i in range(len(sorted_list)): - if i + 1 < len(sorted_list): - if sorted_list[i] == sorted_list[i + 1] - 1: - continue + if i + 1 < len(sorted_list) and sorted_list[i] == sorted_list[i + 1] - 1: + continue current_range = sorted_list[last_write + 1 : i + 1] ranges.append(_encode_range(current_range[0], current_range[-1] + 1)) last_write = i diff --git a/contrib/python/idna/py3/idna/package_data.py b/contrib/python/idna/py3/idna/package_data.py index 2bd6cdb8838..75debb75d6f 100644 --- a/contrib/python/idna/py3/idna/package_data.py +++ b/contrib/python/idna/py3/idna/package_data.py @@ -1 +1 @@ -__version__ = "3.14" +__version__ = "3.15" diff --git a/contrib/python/idna/py3/idna/uts46data.py b/contrib/python/idna/py3/idna/uts46data.py index cc2453eeb43..e13aa90fb86 100644 --- a/contrib/python/idna/py3/idna/uts46data.py +++ b/contrib/python/idna/py3/idna/uts46data.py @@ -1,5 +1,4 @@ # This file is automatically generated by tools/idna-data -# vim: set fileencoding=utf-8 : from typing import Tuple, Union diff --git a/contrib/python/idna/py3/tests/test_idna.py b/contrib/python/idna/py3/tests/test_idna.py index 3d6a25400e2..65eecfd88ee 100644 --- a/contrib/python/idna/py3/tests/test_idna.py +++ b/contrib/python/idna/py3/tests/test_idna.py @@ -95,6 +95,30 @@ class IDNATests(unittest.TestCase): self.assertRaises(idna.IDNAError, idna.decode, payload) self.assertLess(time.perf_counter() - start, 1.0) + def test_oversized_label_rejected_promptly(self): + # The whole-domain cap in encode()/decode() does not cover direct + # callers of alabel/ulabel/check_label, nor the idna2008 + # incremental codec which calls alabel/ulabel per label. Without a + # per-label cap, a single oversized CONTEXTO-heavy label still + # drives validation into quadratic time. + import codecs + import time + + import idna.codec # noqa: F401 (register the idna2008 codec) + + payload = "・" * 8000 + "漢" + start = time.perf_counter() + self.assertRaises(idna.IDNAError, idna.check_label, payload) + self.assertRaises(idna.IDNAError, idna.alabel, payload) + self.assertRaises(idna.IDNAError, idna.ulabel, payload) + self.assertRaises( + idna.IDNAError, + codecs.getincrementalencoder("idna2008")().encode, + payload, + True, + ) + self.assertLess(time.perf_counter() - start, 1.0) + def test_check_bidi(self): la = "\u0061" r = "\u05d0" diff --git a/contrib/python/idna/py3/ya.make b/contrib/python/idna/py3/ya.make index 7efd5bf83ae..6f0b1006c52 100644 --- a/contrib/python/idna/py3/ya.make +++ b/contrib/python/idna/py3/ya.make @@ -2,7 +2,7 @@ PY3_LIBRARY() -VERSION(3.14) +VERSION(3.15) LICENSE(BSD-3-Clause) |
