summaryrefslogtreecommitdiffstats
path: root/contrib/python
diff options
context:
space:
mode:
authorrobot-piglet <[email protected]>2026-05-28 22:45:19 +0300
committerrobot-piglet <[email protected]>2026-05-29 11:45:12 +0300
commita73530ac63e9abc09636b5d6c5914a9173c6a0ec (patch)
tree4b794f12e28a60db811ae239a3f94089d8067352 /contrib/python
parentfd0ebc49a4e14d7194daaef90694074180d4ee7f (diff)
Intermediate changes
commit_hash:0d80b337d2b5cfb8757783f85a8f0159596a2b6e
Diffstat (limited to 'contrib/python')
-rw-r--r--contrib/python/idna/py3/.dist-info/METADATA148
-rw-r--r--contrib/python/idna/py3/README.md (renamed from contrib/python/idna/py3/README.rst)142
-rw-r--r--contrib/python/idna/py3/idna/codec.py13
-rw-r--r--contrib/python/idna/py3/idna/core.py163
-rw-r--r--contrib/python/idna/py3/idna/intranges.py5
-rw-r--r--contrib/python/idna/py3/idna/package_data.py2
-rw-r--r--contrib/python/idna/py3/idna/uts46data.py1
-rw-r--r--contrib/python/idna/py3/tests/test_idna.py24
-rw-r--r--contrib/python/idna/py3/ya.make2
9 files changed, 243 insertions, 257 deletions
diff --git a/contrib/python/idna/py3/.dist-info/METADATA b/contrib/python/idna/py3/.dist-info/METADATA
index b3024f7cbec..3dd388cb837 100644
--- a/contrib/python/idna/py3/.dist-info/METADATA
+++ b/contrib/python/idna/py3/.dist-info/METADATA
@@ -1,10 +1,10 @@
Metadata-Version: 2.4
Name: idna
-Version: 3.14
+Version: 3.15
Summary: Internationalized Domain Names in Applications (IDNA)
Author-email: Kim Davies <[email protected]>
Requires-Python: >=3.8
-Description-Content-Type: text/x-rst
+Description-Content-Type: text/markdown
License-Expression: BSD-3-Clause
Classifier: Development Status :: 5 - Production/Stable
Classifier: Intended Audience :: Developers
@@ -29,156 +29,147 @@ License-File: LICENSE.md
Requires-Dist: ruff >= 0.6.2 ; extra == "all"
Requires-Dist: mypy >= 1.11.2 ; extra == "all"
Requires-Dist: pytest >= 8.3.2 ; extra == "all"
-Project-URL: Changelog, https://github.com/kjd/idna/blob/master/HISTORY.rst
+Project-URL: Changelog, https://github.com/kjd/idna/blob/master/HISTORY.md
Project-URL: Issue tracker, https://github.com/kjd/idna/issues
Project-URL: Source, https://github.com/kjd/idna
Provides-Extra: all
-Internationalized Domain Names in Applications (IDNA)
-=====================================================
+# Internationalized Domain Names in Applications (IDNA)
-Support for `Internationalized Domain Names in
-Applications (IDNA) <https://tools.ietf.org/html/rfc5891>`_
-and `Unicode IDNA Compatibility Processing
-<https://unicode.org/reports/tr46/>`_.
+Support for [Internationalized Domain Names in
+Applications (IDNA)](https://tools.ietf.org/html/rfc5891)
+and [Unicode IDNA Compatibility Processing](https://unicode.org/reports/tr46/).
The latest versions of these standards supplied here provide
more comprehensive language coverage and reduce the potential of
allowing domains with known security vulnerabilities. This library
-is a suitable replacement for the “encodings.idna”
+is a suitable replacement for the "encodings.idna"
module that comes with the Python standard library, but which
only supports an older superseded IDNA specification from 2003.
Basic functions are simply executed:
-.. code-block:: pycon
+```pycon
+>>> import idna
+>>> idna.encode('ドメイン.テスト')
+b'xn--eckwd4c7c.xn--zckzah'
+>>> print(idna.decode('xn--eckwd4c7c.xn--zckzah'))
+ドメイン.テスト
+```
- >>> import idna
- >>> idna.encode('ドメイン.テスト')
- b'xn--eckwd4c7c.xn--zckzah'
- >>> print(idna.decode('xn--eckwd4c7c.xn--zckzah'))
- ドメイン.テスト
-
-Installation
-------------
+## Installation
This package is available for installation from PyPI via the
typical mechanisms, such as:
-.. code-block:: bash
-
- $ python3 -m pip install idna
+```bash
+$ python3 -m pip install idna
+```
-Usage
------
+## Usage
-For typical usage, the ``encode`` and ``decode`` functions will take a
+For typical usage, the `encode` and `decode` functions will take a
domain name argument and perform a conversion to ASCII-compatible encoding
(known as A-labels), or to Unicode strings (known as U-labels)
respectively.
-.. code-block:: pycon
-
- >>> import idna
- >>> idna.encode('ドメイン.テスト')
- b'xn--eckwd4c7c.xn--zckzah'
- >>> print(idna.decode('xn--eckwd4c7c.xn--zckzah'))
- ドメイン.テスト
+```pycon
+>>> import idna
+>>> idna.encode('ドメイン.テスト')
+b'xn--eckwd4c7c.xn--zckzah'
+>>> print(idna.decode('xn--eckwd4c7c.xn--zckzah'))
+ドメイン.テスト
+```
-Conversions can be applied at a per-label basis using the ``ulabel`` or
-``alabel`` functions if necessary:
+Conversions can be applied at a per-label basis using the `ulabel` or
+`alabel` functions if necessary:
-.. code-block:: pycon
+```pycon
+>>> idna.alabel('测试')
+b'xn--0zwm56d'
+```
- >>> idna.alabel('测试')
- b'xn--0zwm56d'
+### Compatibility Mapping (UTS #46)
-Compatibility Mapping (UTS #46)
-+++++++++++++++++++++++++++++++
-
-This library provides support for `Unicode IDNA Compatibility
-Processing <https://unicode.org/reports/tr46/>`_ which normalizes input from
+This library provides support for [Unicode IDNA Compatibility
+Processing](https://unicode.org/reports/tr46/) which normalizes input from
different potential ways a user may input a domain prior to performing the IDNA
-conversion operations. This functionality, known as a
-`mapping <https://tools.ietf.org/html/rfc5895>`_, is considered by the
+conversion operations. This functionality, known as a
+[mapping](https://tools.ietf.org/html/rfc5895), is considered by the
specification to be a local user-interface issue distinct from IDNA
conversion functionality.
-For example, “Königsgäßchen” is not a permissible label as *LATIN
+For example, "Königsgäßchen" is not a permissible label as *LATIN
CAPITAL LETTER K* is not allowed (nor are capital letters in general).
UTS 46 will convert this into lower case prior to applying the IDNA
conversion.
-.. code-block:: pycon
-
- >>> import idna
- >>> idna.encode('Königsgäßchen')
- ...
- idna.core.InvalidCodepoint: Codepoint U+004B at position 1 of 'Königsgäßchen' not allowed
- >>> idna.encode('Königsgäßchen', uts46=True)
- b'xn--knigsgchen-b4a3dun'
- >>> print(idna.decode('xn--knigsgchen-b4a3dun'))
- königsgäßchen
+```pycon
+>>> import idna
+>>> idna.encode('Königsgäßchen')
+...
+idna.core.InvalidCodepoint: Codepoint U+004B at position 1 of 'Königsgäßchen' not allowed
+>>> idna.encode('Königsgäßchen', uts46=True)
+b'xn--knigsgchen-b4a3dun'
+>>> print(idna.decode('xn--knigsgchen-b4a3dun'))
+königsgäßchen
+```
-Exceptions
-----------
+## Exceptions
All errors raised during the conversion following the specification
-should raise an exception derived from the ``idna.IDNAError`` base
+should raise an exception derived from the `idna.IDNAError` base
class.
-More specific exceptions that may be generated as ``idna.IDNABidiError``
+More specific exceptions that may be generated as `idna.IDNABidiError`
when the error reflects an illegal combination of left-to-right and
-right-to-left characters in a label; ``idna.InvalidCodepoint`` when
+right-to-left characters in a label; `idna.InvalidCodepoint` when
a specific codepoint is an illegal character in an IDN label (i.e.
-INVALID); and ``idna.InvalidCodepointContext`` when the codepoint is
+INVALID); and `idna.InvalidCodepointContext` when the codepoint is
illegal based on its position in the string (i.e. it is CONTEXTO or CONTEXTJ
but the contextual requirements are not satisfied.)
-Building and Diagnostics
-------------------------
+## Building and Diagnostics
The IDNA and UTS 46 functionality relies upon pre-calculated lookup
tables for performance. These tables are derived from computing against
eligibility criteria in the respective standards using the command-line
-script ``tools/idna-data``.
+script `tools/idna-data`.
This tool will fetch relevant codepoint data from the Unicode repository
and perform the required calculations to identify eligibility. There are
three main modes:
-* ``idna-data make-libdata``. Generates ``idnadata.py`` and
- ``uts46data.py``, the pre-calculated lookup tables used for IDNA and
+* `idna-data make-libdata`. Generates `idnadata.py` and
+ `uts46data.py`, the pre-calculated lookup tables used for IDNA and
UTS 46 conversions. Implementers who wish to track this library against
a different Unicode version may use this tool to manually generate a
- different version of the ``idnadata.py`` and ``uts46data.py`` files.
+ different version of the `idnadata.py` and `uts46data.py` files.
-* ``idna-data make-table``. Generate a table of the IDNA disposition
+* `idna-data make-table`. Generate a table of the IDNA disposition
(e.g. PVALID, CONTEXTJ, CONTEXTO) in the format found in Appendix
- B.1 of RFC 5892 and the pre-computed tables published by `IANA
- <https://www.iana.org/>`_.
+ B.1 of RFC 5892 and the pre-computed tables published by [IANA](https://www.iana.org/).
-* ``idna-data U+0061``. Prints debugging output on the various
+* `idna-data U+0061`. Prints debugging output on the various
properties associated with an individual Unicode codepoint (in this
case, U+0061), that are used to assess the IDNA and UTS 46 status of a
codepoint. This is helpful in debugging or analysis.
-The tool accepts a number of arguments, described using ``idna-data -h``.
-Most notably, the ``--version`` argument allows the specification
+The tool accepts a number of arguments, described using `idna-data -h`.
+Most notably, the `--version` argument allows the specification
of the version of Unicode to be used in computing the table data. For
-example, ``idna-data --version 9.0.0 make-libdata`` will generate
+example, `idna-data --version 9.0.0 make-libdata` will generate
library data against Unicode 9.0.0.
-Additional Notes
-----------------
+## Additional Notes
* **Packages**. The latest tagged release version is published in the
- `Python Package Index <https://pypi.org/project/idna/>`_.
+ [Python Package Index](https://pypi.org/project/idna/).
* **Version support**. This library supports Python 3.8 and higher.
As this library serves as a low-level toolkit for a variety of
@@ -190,8 +181,7 @@ Additional Notes
* **Testing**. The library has a test suite based on each rule of the
IDNA specification, as well as tests that are provided as part of the
- Unicode Technical Standard 46, `Unicode IDNA Compatibility Processing
- <https://unicode.org/reports/tr46/>`_.
+ Unicode Technical Standard 46, [Unicode IDNA Compatibility Processing](https://unicode.org/reports/tr46/).
* **Emoji**. It is an occasional request to support emoji domains in
this library. Encoding of symbols like emoji is expressly prohibited by
diff --git a/contrib/python/idna/py3/README.rst b/contrib/python/idna/py3/README.md
index 89ada8c4b25..5772219126b 100644
--- a/contrib/python/idna/py3/README.rst
+++ b/contrib/python/idna/py3/README.md
@@ -1,148 +1,139 @@
-Internationalized Domain Names in Applications (IDNA)
-=====================================================
+# Internationalized Domain Names in Applications (IDNA)
-Support for `Internationalized Domain Names in
-Applications (IDNA) <https://tools.ietf.org/html/rfc5891>`_
-and `Unicode IDNA Compatibility Processing
-<https://unicode.org/reports/tr46/>`_.
+Support for [Internationalized Domain Names in
+Applications (IDNA)](https://tools.ietf.org/html/rfc5891)
+and [Unicode IDNA Compatibility Processing](https://unicode.org/reports/tr46/).
The latest versions of these standards supplied here provide
more comprehensive language coverage and reduce the potential of
allowing domains with known security vulnerabilities. This library
-is a suitable replacement for the “encodings.idna”
+is a suitable replacement for the "encodings.idna"
module that comes with the Python standard library, but which
only supports an older superseded IDNA specification from 2003.
Basic functions are simply executed:
-.. code-block:: pycon
+```pycon
+>>> import idna
+>>> idna.encode('ドメイン.テスト')
+b'xn--eckwd4c7c.xn--zckzah'
+>>> print(idna.decode('xn--eckwd4c7c.xn--zckzah'))
+ドメイン.テスト
+```
- >>> import idna
- >>> idna.encode('ドメイン.テスト')
- b'xn--eckwd4c7c.xn--zckzah'
- >>> print(idna.decode('xn--eckwd4c7c.xn--zckzah'))
- ドメイン.テスト
-
-Installation
-------------
+## Installation
This package is available for installation from PyPI via the
typical mechanisms, such as:
-.. code-block:: bash
-
- $ python3 -m pip install idna
+```bash
+$ python3 -m pip install idna
+```
-Usage
------
+## Usage
-For typical usage, the ``encode`` and ``decode`` functions will take a
+For typical usage, the `encode` and `decode` functions will take a
domain name argument and perform a conversion to ASCII-compatible encoding
(known as A-labels), or to Unicode strings (known as U-labels)
respectively.
-.. code-block:: pycon
-
- >>> import idna
- >>> idna.encode('ドメイン.テスト')
- b'xn--eckwd4c7c.xn--zckzah'
- >>> print(idna.decode('xn--eckwd4c7c.xn--zckzah'))
- ドメイン.テスト
+```pycon
+>>> import idna
+>>> idna.encode('ドメイン.テスト')
+b'xn--eckwd4c7c.xn--zckzah'
+>>> print(idna.decode('xn--eckwd4c7c.xn--zckzah'))
+ドメイン.テスト
+```
-Conversions can be applied at a per-label basis using the ``ulabel`` or
-``alabel`` functions if necessary:
+Conversions can be applied at a per-label basis using the `ulabel` or
+`alabel` functions if necessary:
-.. code-block:: pycon
+```pycon
+>>> idna.alabel('测试')
+b'xn--0zwm56d'
+```
- >>> idna.alabel('测试')
- b'xn--0zwm56d'
+### Compatibility Mapping (UTS #46)
-Compatibility Mapping (UTS #46)
-+++++++++++++++++++++++++++++++
-
-This library provides support for `Unicode IDNA Compatibility
-Processing <https://unicode.org/reports/tr46/>`_ which normalizes input from
+This library provides support for [Unicode IDNA Compatibility
+Processing](https://unicode.org/reports/tr46/) which normalizes input from
different potential ways a user may input a domain prior to performing the IDNA
-conversion operations. This functionality, known as a
-`mapping <https://tools.ietf.org/html/rfc5895>`_, is considered by the
+conversion operations. This functionality, known as a
+[mapping](https://tools.ietf.org/html/rfc5895), is considered by the
specification to be a local user-interface issue distinct from IDNA
conversion functionality.
-For example, “Königsgäßchen” is not a permissible label as *LATIN
+For example, "Königsgäßchen" is not a permissible label as *LATIN
CAPITAL LETTER K* is not allowed (nor are capital letters in general).
UTS 46 will convert this into lower case prior to applying the IDNA
conversion.
-.. code-block:: pycon
-
- >>> import idna
- >>> idna.encode('Königsgäßchen')
- ...
- idna.core.InvalidCodepoint: Codepoint U+004B at position 1 of 'Königsgäßchen' not allowed
- >>> idna.encode('Königsgäßchen', uts46=True)
- b'xn--knigsgchen-b4a3dun'
- >>> print(idna.decode('xn--knigsgchen-b4a3dun'))
- königsgäßchen
+```pycon
+>>> import idna
+>>> idna.encode('Königsgäßchen')
+...
+idna.core.InvalidCodepoint: Codepoint U+004B at position 1 of 'Königsgäßchen' not allowed
+>>> idna.encode('Königsgäßchen', uts46=True)
+b'xn--knigsgchen-b4a3dun'
+>>> print(idna.decode('xn--knigsgchen-b4a3dun'))
+königsgäßchen
+```
-Exceptions
-----------
+## Exceptions
All errors raised during the conversion following the specification
-should raise an exception derived from the ``idna.IDNAError`` base
+should raise an exception derived from the `idna.IDNAError` base
class.
-More specific exceptions that may be generated as ``idna.IDNABidiError``
+More specific exceptions that may be generated as `idna.IDNABidiError`
when the error reflects an illegal combination of left-to-right and
-right-to-left characters in a label; ``idna.InvalidCodepoint`` when
+right-to-left characters in a label; `idna.InvalidCodepoint` when
a specific codepoint is an illegal character in an IDN label (i.e.
-INVALID); and ``idna.InvalidCodepointContext`` when the codepoint is
+INVALID); and `idna.InvalidCodepointContext` when the codepoint is
illegal based on its position in the string (i.e. it is CONTEXTO or CONTEXTJ
but the contextual requirements are not satisfied.)
-Building and Diagnostics
-------------------------
+## Building and Diagnostics
The IDNA and UTS 46 functionality relies upon pre-calculated lookup
tables for performance. These tables are derived from computing against
eligibility criteria in the respective standards using the command-line
-script ``tools/idna-data``.
+script `tools/idna-data`.
This tool will fetch relevant codepoint data from the Unicode repository
and perform the required calculations to identify eligibility. There are
three main modes:
-* ``idna-data make-libdata``. Generates ``idnadata.py`` and
- ``uts46data.py``, the pre-calculated lookup tables used for IDNA and
+* `idna-data make-libdata`. Generates `idnadata.py` and
+ `uts46data.py`, the pre-calculated lookup tables used for IDNA and
UTS 46 conversions. Implementers who wish to track this library against
a different Unicode version may use this tool to manually generate a
- different version of the ``idnadata.py`` and ``uts46data.py`` files.
+ different version of the `idnadata.py` and `uts46data.py` files.
-* ``idna-data make-table``. Generate a table of the IDNA disposition
+* `idna-data make-table`. Generate a table of the IDNA disposition
(e.g. PVALID, CONTEXTJ, CONTEXTO) in the format found in Appendix
- B.1 of RFC 5892 and the pre-computed tables published by `IANA
- <https://www.iana.org/>`_.
+ B.1 of RFC 5892 and the pre-computed tables published by [IANA](https://www.iana.org/).
-* ``idna-data U+0061``. Prints debugging output on the various
+* `idna-data U+0061`. Prints debugging output on the various
properties associated with an individual Unicode codepoint (in this
case, U+0061), that are used to assess the IDNA and UTS 46 status of a
codepoint. This is helpful in debugging or analysis.
-The tool accepts a number of arguments, described using ``idna-data -h``.
-Most notably, the ``--version`` argument allows the specification
+The tool accepts a number of arguments, described using `idna-data -h`.
+Most notably, the `--version` argument allows the specification
of the version of Unicode to be used in computing the table data. For
-example, ``idna-data --version 9.0.0 make-libdata`` will generate
+example, `idna-data --version 9.0.0 make-libdata` will generate
library data against Unicode 9.0.0.
-Additional Notes
-----------------
+## Additional Notes
* **Packages**. The latest tagged release version is published in the
- `Python Package Index <https://pypi.org/project/idna/>`_.
+ [Python Package Index](https://pypi.org/project/idna/).
* **Version support**. This library supports Python 3.8 and higher.
As this library serves as a low-level toolkit for a variety of
@@ -154,8 +145,7 @@ Additional Notes
* **Testing**. The library has a test suite based on each rule of the
IDNA specification, as well as tests that are provided as part of the
- Unicode Technical Standard 46, `Unicode IDNA Compatibility Processing
- <https://unicode.org/reports/tr46/>`_.
+ Unicode Technical Standard 46, [Unicode IDNA Compatibility Processing](https://unicode.org/reports/tr46/).
* **Emoji**. It is an occasional request to support emoji domains in
this library. Encoding of symbols like emoji is expressly prohibited by
diff --git a/contrib/python/idna/py3/idna/codec.py b/contrib/python/idna/py3/idna/codec.py
index befba40ce59..280dc3972cb 100644
--- a/contrib/python/idna/py3/idna/codec.py
+++ b/contrib/python/idna/py3/idna/codec.py
@@ -1,10 +1,7 @@
import codecs
-import re
from typing import Any, Optional, Tuple
-from .core import IDNAError, alabel, decode, encode, ulabel
-
-_unicode_dots_re = re.compile("[\u002e\u3002\uff0e\uff61]")
+from .core import IDNAError, _unicode_dots_re, alabel, decode, encode, ulabel
class Codec(codecs.Codec):
@@ -20,7 +17,7 @@ class Codec(codecs.Codec):
def encode(self, data: str, errors: str = "strict") -> Tuple[bytes, int]: # ty: ignore[invalid-method-override]
if errors != "strict":
- raise IDNAError('Unsupported error handling "{}"'.format(errors))
+ raise IDNAError(f'Unsupported error handling "{errors}"')
if not data:
return b"", 0
@@ -29,7 +26,7 @@ class Codec(codecs.Codec):
def decode(self, data: bytes, errors: str = "strict") -> Tuple[str, int]: # ty: ignore[invalid-method-override]
if errors != "strict":
- raise IDNAError('Unsupported error handling "{}"'.format(errors))
+ raise IDNAError(f'Unsupported error handling "{errors}"')
if not data:
return "", 0
@@ -51,7 +48,7 @@ class IncrementalEncoder(codecs.BufferedIncrementalEncoder):
def _buffer_encode(self, data: str, errors: str, final: bool) -> Tuple[bytes, int]: # ty: ignore[invalid-method-override]
if errors != "strict":
- raise IDNAError('Unsupported error handling "{}"'.format(errors))
+ raise IDNAError(f'Unsupported error handling "{errors}"')
if not data:
return b"", 0
@@ -94,7 +91,7 @@ class IncrementalDecoder(codecs.BufferedIncrementalDecoder):
def _buffer_decode(self, data: Any, errors: str, final: bool) -> Tuple[str, int]: # ty: ignore[invalid-method-override]
if errors != "strict":
- raise IDNAError('Unsupported error handling "{}"'.format(errors))
+ raise IDNAError(f'Unsupported error handling "{errors}"')
if not data:
return ("", 0)
diff --git a/contrib/python/idna/py3/idna/core.py b/contrib/python/idna/py3/idna/core.py
index 1804ccaf758..b6f9442deb3 100644
--- a/contrib/python/idna/py3/idna/core.py
+++ b/contrib/python/idna/py3/idna/core.py
@@ -12,6 +12,18 @@ _alabel_prefix = b"xn--"
_unicode_dots_re = re.compile("[\u002e\u3002\uff0e\uff61]")
+# Bidi category sets from RFC 5893, hoisted out of the per-codepoint loop
+_bidi_rtl_first = frozenset({"R", "AL"})
+_bidi_rtl_categories = frozenset({"R", "AL", "AN"})
+_bidi_rtl_allowed = frozenset({"R", "AL", "AN", "EN", "ES", "CS", "ET", "ON", "BN", "NSM"})
+_bidi_rtl_valid_ending = frozenset({"R", "AL", "EN", "AN"})
+_bidi_rtl_numeric = frozenset({"AN", "EN"})
+_bidi_ltr_allowed = frozenset({"L", "EN", "ES", "CS", "ET", "ON", "BN", "NSM"})
+_bidi_ltr_valid_ending = frozenset({"L", "EN"})
+_bidi_joiner_l_or_d = frozenset({ord("L"), ord("D")})
+_bidi_joiner_r_or_d = frozenset({ord("R"), ord("D")})
+
+
class IDNAError(UnicodeError):
"""Base exception for all IDNA-encoding related problems"""
@@ -38,9 +50,8 @@ class InvalidCodepointContext(IDNAError):
def _combining_class(cp: int) -> int:
v = unicodedata.combining(chr(cp))
- if v == 0:
- if not unicodedata.name(chr(cp)):
- raise ValueError("Unknown character in unicodedata")
+ if v == 0 and not unicodedata.name(chr(cp)):
+ raise ValueError("Unknown character in unicodedata")
return v
@@ -53,7 +64,7 @@ def _punycode(s: str) -> bytes:
def _unot(s: int) -> str:
- return "U+{:04X}".format(s)
+ return f"U+{s:04X}"
def valid_label_length(label: Union[bytes, str]) -> bool:
@@ -68,25 +79,21 @@ def valid_label_length(label: Union[bytes, str]) -> bool:
:returns: ``True`` if the label is within the length limit, otherwise
``False``.
"""
- if len(label) > 63:
- return False
- return True
+ return len(label) <= 63
-def valid_string_length(label: Union[bytes, str], trailing_dot: bool) -> bool:
+def valid_string_length(domain: Union[bytes, str], trailing_dot: bool) -> bool:
"""Check that a full domain name does not exceed the maximum length.
Per :rfc:`1035`, a domain name is limited to 253 octets when no trailing
dot is present, or 254 octets when one is included.
- :param label: The full (possibly multi-label) domain name.
- :param trailing_dot: ``True`` if ``label`` includes a trailing ``.``.
+ :param domain: The full (possibly multi-label) domain name.
+ :param trailing_dot: ``True`` if ``domain`` includes a trailing ``.``.
:returns: ``True`` if the domain is within the length limit, otherwise
``False``.
"""
- if len(label) > (254 if trailing_dot else 253):
- return False
- return True
+ return len(domain) <= (254 if trailing_dot else 253)
def check_bidi(label: str, check_ltr: bool = False) -> bool:
@@ -111,20 +118,20 @@ def check_bidi(label: str, check_ltr: bool = False) -> bool:
direction = unicodedata.bidirectional(cp)
if direction == "":
# String likely comes from a newer version of Unicode
- raise IDNABidiError("Unknown directionality in label {} at position {}".format(repr(label), idx))
- if direction in ["R", "AL", "AN"]:
+ raise IDNABidiError(f"Unknown directionality in label {repr(label)} at position {idx}")
+ if direction in _bidi_rtl_categories:
bidi_label = True
if not bidi_label and not check_ltr:
return True
# Bidi rule 1
direction = unicodedata.bidirectional(label[0])
- if direction in ["R", "AL"]:
+ if direction in _bidi_rtl_first:
rtl = True
elif direction == "L":
rtl = False
else:
- raise IDNABidiError("First codepoint in label {} must be directionality L, R or AL".format(repr(label)))
+ raise IDNABidiError(f"First codepoint in label {repr(label)} must be directionality L, R or AL")
valid_ending = False
number_type: Optional[str] = None
@@ -133,26 +140,15 @@ def check_bidi(label: str, check_ltr: bool = False) -> bool:
if rtl:
# Bidi rule 2
- if direction not in [
- "R",
- "AL",
- "AN",
- "EN",
- "ES",
- "CS",
- "ET",
- "ON",
- "BN",
- "NSM",
- ]:
- raise IDNABidiError("Invalid direction for codepoint at position {} in a right-to-left label".format(idx))
+ if direction not in _bidi_rtl_allowed:
+ raise IDNABidiError(f"Invalid direction for codepoint at position {idx} in a right-to-left label")
# Bidi rule 3
- if direction in ["R", "AL", "EN", "AN"]:
+ if direction in _bidi_rtl_valid_ending:
valid_ending = True
elif direction != "NSM":
valid_ending = False
# Bidi rule 4
- if direction in ["AN", "EN"]:
+ if direction in _bidi_rtl_numeric:
if not number_type:
number_type = direction
else:
@@ -160,10 +156,10 @@ def check_bidi(label: str, check_ltr: bool = False) -> bool:
raise IDNABidiError("Can not mix numeral types in a right-to-left label")
else:
# Bidi rule 5
- if direction not in ["L", "EN", "ES", "CS", "ET", "ON", "BN", "NSM"]:
- raise IDNABidiError("Invalid direction for codepoint at position {} in a left-to-right label".format(idx))
+ if direction not in _bidi_ltr_allowed:
+ raise IDNABidiError(f"Invalid direction for codepoint at position {idx} in a left-to-right label")
# Bidi rule 6
- if direction in ["L", "EN"]:
+ if direction in _bidi_ltr_valid_ending:
valid_ending = True
elif direction != "NSM":
valid_ending = False
@@ -235,16 +231,15 @@ def valid_contextj(label: str, pos: int) -> bool:
cp_value = ord(label[pos])
if cp_value == 0x200C:
- if pos > 0:
- if _combining_class(ord(label[pos - 1])) == _virama_combining_class:
- return True
+ if pos > 0 and _combining_class(ord(label[pos - 1])) == _virama_combining_class:
+ return True
ok = False
for i in range(pos - 1, -1, -1):
joining_type = idnadata.joining_types().get(ord(label[i]))
if joining_type == ord("T"):
continue
- elif joining_type in [ord("L"), ord("D")]:
+ elif joining_type in _bidi_joiner_l_or_d:
ok = True
break
else:
@@ -258,7 +253,7 @@ def valid_contextj(label: str, pos: int) -> bool:
joining_type = idnadata.joining_types().get(ord(label[i]))
if joining_type == ord("T"):
continue
- elif joining_type in [ord("R"), ord("D")]:
+ elif joining_type in _bidi_joiner_r_or_d:
ok = True
break
else:
@@ -266,10 +261,7 @@ def valid_contextj(label: str, pos: int) -> bool:
return ok
if cp_value == 0x200D:
- if pos > 0:
- if _combining_class(ord(label[pos - 1])) == _virama_combining_class:
- return True
- return False
+ return pos > 0 and _combining_class(ord(label[pos - 1])) == _virama_combining_class
else:
return False
@@ -292,10 +284,7 @@ def valid_contexto(label: str, pos: int, exception: bool = False) -> bool:
cp_value = ord(label[pos])
if cp_value == 0x00B7:
- if 0 < pos < len(label) - 1:
- if ord(label[pos - 1]) == 0x006C and ord(label[pos + 1]) == 0x006C:
- return True
- return False
+ return 0 < pos < len(label) - 1 and ord(label[pos - 1]) == 0x006C and ord(label[pos + 1]) == 0x006C
elif cp_value == 0x0375:
if pos < len(label) - 1 and len(label) > 1:
@@ -316,16 +305,10 @@ def valid_contexto(label: str, pos: int, exception: bool = False) -> bool:
return False
elif 0x660 <= cp_value <= 0x669:
- for cp in label:
- if 0x6F0 <= ord(cp) <= 0x06F9:
- return False
- return True
+ return not any(0x6F0 <= ord(cp) <= 0x06F9 for cp in label)
elif 0x6F0 <= cp_value <= 0x6F9:
- for cp in label:
- if 0x660 <= ord(cp) <= 0x0669:
- return False
- return True
+ return not any(0x660 <= ord(cp) <= 0x0669 for cp in label)
return False
@@ -353,6 +336,11 @@ def check_label(label: Union[str, bytes, bytearray]) -> None:
if len(label) == 0:
raise IDNAError("Empty Label")
+ # Reject on domain length rather than label length so support some UTS 46
+ # use cases, still reducing processing of label contextual rules
+ if not valid_string_length(label, trailing_dot=True):
+ raise IDNAError("Label too long")
+
check_nfc(label)
check_hyphen_ok(label)
check_initial_combiner(label)
@@ -365,23 +353,19 @@ def check_label(label: Union[str, bytes, bytearray]) -> None:
try:
if not valid_contextj(label, pos):
raise InvalidCodepointContext(
- "Joiner {} not allowed at position {} in {}".format(_unot(cp_value), pos + 1, repr(label))
+ f"Joiner {_unot(cp_value)} not allowed at position {pos + 1} in {repr(label)}"
)
- except ValueError:
+ except ValueError as err:
raise IDNAError(
- "Unknown codepoint adjacent to joiner {} at position {} in {}".format(
- _unot(cp_value), pos + 1, repr(label)
- )
- )
+ f"Unknown codepoint adjacent to joiner {_unot(cp_value)} at position {pos + 1} in {repr(label)}"
+ ) from err
elif intranges_contain(cp_value, idnadata.codepoint_classes["CONTEXTO"]):
if not valid_contexto(label, pos):
raise InvalidCodepointContext(
- "Codepoint {} not allowed at position {} in {}".format(_unot(cp_value), pos + 1, repr(label))
+ f"Codepoint {_unot(cp_value)} not allowed at position {pos + 1} in {repr(label)}"
)
else:
- raise InvalidCodepoint(
- "Codepoint {} at position {} of {} not allowed".format(_unot(cp_value), pos + 1, repr(label))
- )
+ raise InvalidCodepoint(f"Codepoint {_unot(cp_value)} at position {pos + 1} of {repr(label)} not allowed")
check_bidi(label)
@@ -444,7 +428,7 @@ def ulabel(label: Union[str, bytes, bytearray]) -> str:
label_bytes = label_bytes[len(_alabel_prefix) :]
if not label_bytes:
raise IDNAError("Malformed A-label, no Punycode eligible content found")
- if label_bytes.decode("ascii")[-1] == "-":
+ if label_bytes.endswith(b"-"):
raise IDNAError("A-label must not end with a hyphen")
else:
check_label(label_bytes)
@@ -452,8 +436,8 @@ def ulabel(label: Union[str, bytes, bytearray]) -> str:
try:
label = label_bytes.decode("punycode")
- except UnicodeError:
- raise IDNAError("Invalid A-label")
+ except UnicodeError as err:
+ raise IDNAError("Invalid A-label") from err
check_label(label)
return label
@@ -488,18 +472,27 @@ def uts46_remap(domain: str, std3_rules: bool = True, transitional: bool = False
replacement: Optional[str] = None
if len(uts46row) == 3:
replacement = uts46row[2] # ty: ignore[index-out-of-bounds]
- if status == "V" or (status == "D" and not transitional) or (status == "3" and not std3_rules and replacement is None):
- output += char
- elif replacement is not None and (
+
+ # UTS #46 §4: V is always valid, D is deviation (kept unless transitional),
+ # 3 is disallowed-STD3 (kept unmapped if std3_rules is off and no mapping).
+ keep_as_is = (
+ status == "V" or (status == "D" and not transitional) or (status == "3" and not std3_rules and replacement is None)
+ )
+ # M is mapped, 3-with-replacement and transitional D fall through to the
+ # same replacement output path.
+ use_replacement = replacement is not None and (
status == "M" or (status == "3" and not std3_rules) or (status == "D" and transitional)
- ):
+ )
+
+ if keep_as_is:
+ output += char
+ elif use_replacement:
+ assert replacement is not None # narrowed by use_replacement
output += replacement
elif status == "I":
continue
else:
- raise InvalidCodepoint(
- "Codepoint {} not allowed at position {} in {}".format(_unot(code_point), pos + 1, repr(domain))
- )
+ raise InvalidCodepoint(f"Codepoint {_unot(code_point)} not allowed at position {pos + 1} in {repr(domain)}")
return unicodedata.normalize("NFC", output)
@@ -542,8 +535,8 @@ def encode(
if not isinstance(s, str):
try:
s = str(s, "ascii")
- except (UnicodeDecodeError, TypeError):
- raise IDNAError("should pass a unicode string to the function rather than a byte string.")
+ except (UnicodeDecodeError, TypeError) as err:
+ raise IDNAError("should pass a unicode string to the function rather than a byte string.") from err
if uts46:
s = uts46_remap(s, std3_rules, transitional)
@@ -554,10 +547,7 @@ def encode(
trailing_dot = False
result = []
- if strict:
- labels = s.split(".")
- else:
- labels = _unicode_dots_re.split(s)
+ labels = s.split(".") if strict else _unicode_dots_re.split(s)
if not labels or labels == [""]:
raise IDNAError("Empty domain")
if labels[-1] == "":
@@ -603,8 +593,8 @@ def decode(
if not isinstance(s, str):
try:
s = str(s, "ascii")
- except (UnicodeDecodeError, TypeError):
- raise IDNAError("Invalid ASCII in A-label")
+ except (UnicodeDecodeError, TypeError) as err:
+ raise IDNAError("Invalid ASCII in A-label") from err
if uts46:
s = uts46_remap(s, std3_rules, False)
# Reject inputs that exceed the maximum DNS domain length up-front
@@ -613,10 +603,7 @@ def decode(
raise IDNAError("Domain too long")
trailing_dot = False
result = []
- if not strict:
- labels = _unicode_dots_re.split(s)
- else:
- labels = s.split(".")
+ labels = s.split(".") if strict else _unicode_dots_re.split(s)
if not labels or labels == [""]:
raise IDNAError("Empty domain")
if not labels[-1]:
diff --git a/contrib/python/idna/py3/idna/intranges.py b/contrib/python/idna/py3/idna/intranges.py
index 7bfaa8d80d7..ea3455bb8da 100644
--- a/contrib/python/idna/py3/idna/intranges.py
+++ b/contrib/python/idna/py3/idna/intranges.py
@@ -21,9 +21,8 @@ def intranges_from_list(list_: List[int]) -> Tuple[int, ...]:
ranges = []
last_write = -1
for i in range(len(sorted_list)):
- if i + 1 < len(sorted_list):
- if sorted_list[i] == sorted_list[i + 1] - 1:
- continue
+ if i + 1 < len(sorted_list) and sorted_list[i] == sorted_list[i + 1] - 1:
+ continue
current_range = sorted_list[last_write + 1 : i + 1]
ranges.append(_encode_range(current_range[0], current_range[-1] + 1))
last_write = i
diff --git a/contrib/python/idna/py3/idna/package_data.py b/contrib/python/idna/py3/idna/package_data.py
index 2bd6cdb8838..75debb75d6f 100644
--- a/contrib/python/idna/py3/idna/package_data.py
+++ b/contrib/python/idna/py3/idna/package_data.py
@@ -1 +1 @@
-__version__ = "3.14"
+__version__ = "3.15"
diff --git a/contrib/python/idna/py3/idna/uts46data.py b/contrib/python/idna/py3/idna/uts46data.py
index cc2453eeb43..e13aa90fb86 100644
--- a/contrib/python/idna/py3/idna/uts46data.py
+++ b/contrib/python/idna/py3/idna/uts46data.py
@@ -1,5 +1,4 @@
# This file is automatically generated by tools/idna-data
-# vim: set fileencoding=utf-8 :
from typing import Tuple, Union
diff --git a/contrib/python/idna/py3/tests/test_idna.py b/contrib/python/idna/py3/tests/test_idna.py
index 3d6a25400e2..65eecfd88ee 100644
--- a/contrib/python/idna/py3/tests/test_idna.py
+++ b/contrib/python/idna/py3/tests/test_idna.py
@@ -95,6 +95,30 @@ class IDNATests(unittest.TestCase):
self.assertRaises(idna.IDNAError, idna.decode, payload)
self.assertLess(time.perf_counter() - start, 1.0)
+ def test_oversized_label_rejected_promptly(self):
+ # The whole-domain cap in encode()/decode() does not cover direct
+ # callers of alabel/ulabel/check_label, nor the idna2008
+ # incremental codec which calls alabel/ulabel per label. Without a
+ # per-label cap, a single oversized CONTEXTO-heavy label still
+ # drives validation into quadratic time.
+ import codecs
+ import time
+
+ import idna.codec # noqa: F401 (register the idna2008 codec)
+
+ payload = "・" * 8000 + "漢"
+ start = time.perf_counter()
+ self.assertRaises(idna.IDNAError, idna.check_label, payload)
+ self.assertRaises(idna.IDNAError, idna.alabel, payload)
+ self.assertRaises(idna.IDNAError, idna.ulabel, payload)
+ self.assertRaises(
+ idna.IDNAError,
+ codecs.getincrementalencoder("idna2008")().encode,
+ payload,
+ True,
+ )
+ self.assertLess(time.perf_counter() - start, 1.0)
+
def test_check_bidi(self):
la = "\u0061"
r = "\u05d0"
diff --git a/contrib/python/idna/py3/ya.make b/contrib/python/idna/py3/ya.make
index 7efd5bf83ae..6f0b1006c52 100644
--- a/contrib/python/idna/py3/ya.make
+++ b/contrib/python/idna/py3/ya.make
@@ -2,7 +2,7 @@
PY3_LIBRARY()
-VERSION(3.14)
+VERSION(3.15)
LICENSE(BSD-3-Clause)