summaryrefslogtreecommitdiffstats
path: root/contrib/python
diff options
context:
space:
mode:
authorrobot-piglet <[email protected]>2026-05-27 09:47:32 +0300
committerrobot-piglet <[email protected]>2026-05-27 10:34:13 +0300
commitb3fe130270ac8af19b010f7cdf80789e68c9b78b (patch)
tree35addf5fe01eb515c714ca5a8c67d8ea0c72ee87 /contrib/python
parent7a90a58a5986c1826d8ae2f8066a3f4e86c909fc (diff)
Intermediate changes
commit_hash:8d7a61a89d145f2cf8e01f4c370f0200b0b078ce
Diffstat (limited to 'contrib/python')
-rw-r--r--contrib/python/idna/py3/.dist-info/METADATA2
-rw-r--r--contrib/python/idna/py3/idna/codec.py48
-rw-r--r--contrib/python/idna/py3/idna/compat.py26
-rw-r--r--contrib/python/idna/py3/idna/core.py207
-rw-r--r--contrib/python/idna/py3/idna/package_data.py2
-rw-r--r--contrib/python/idna/py3/tests/test_idna.py139
-rw-r--r--contrib/python/idna/py3/ya.make2
7 files changed, 354 insertions, 72 deletions
diff --git a/contrib/python/idna/py3/.dist-info/METADATA b/contrib/python/idna/py3/.dist-info/METADATA
index b30fccc5b67..b3024f7cbec 100644
--- a/contrib/python/idna/py3/.dist-info/METADATA
+++ b/contrib/python/idna/py3/.dist-info/METADATA
@@ -1,6 +1,6 @@
Metadata-Version: 2.4
Name: idna
-Version: 3.13
+Version: 3.14
Summary: Internationalized Domain Names in Applications (IDNA)
Author-email: Kim Davies <[email protected]>
Requires-Python: >=3.8
diff --git a/contrib/python/idna/py3/idna/codec.py b/contrib/python/idna/py3/idna/codec.py
index cbc2e4ff4ec..befba40ce59 100644
--- a/contrib/python/idna/py3/idna/codec.py
+++ b/contrib/python/idna/py3/idna/codec.py
@@ -8,7 +8,17 @@ _unicode_dots_re = re.compile("[\u002e\u3002\uff0e\uff61]")
class Codec(codecs.Codec):
- def encode(self, data: str, errors: str = "strict") -> Tuple[bytes, int]:
+ """Stateless IDNA 2008 codec.
+
+ Implements the :class:`codecs.Codec` protocol so that the whole-domain
+ encoder (:func:`idna.encode`) and decoder (:func:`idna.decode`) are
+ accessible through the standard codec machinery as ``"idna2008"``.
+
+ Only the ``"strict"`` error handler is supported; any other handler
+ raises :exc:`~idna.IDNAError`.
+ """
+
+ def encode(self, data: str, errors: str = "strict") -> Tuple[bytes, int]: # ty: ignore[invalid-method-override]
if errors != "strict":
raise IDNAError('Unsupported error handling "{}"'.format(errors))
@@ -17,7 +27,7 @@ class Codec(codecs.Codec):
return encode(data), len(data)
- def decode(self, data: bytes, errors: str = "strict") -> Tuple[str, int]:
+ def decode(self, data: bytes, errors: str = "strict") -> Tuple[str, int]: # ty: ignore[invalid-method-override]
if errors != "strict":
raise IDNAError('Unsupported error handling "{}"'.format(errors))
@@ -28,7 +38,18 @@ class Codec(codecs.Codec):
class IncrementalEncoder(codecs.BufferedIncrementalEncoder):
- def _buffer_encode(self, data: str, errors: str, final: bool) -> Tuple[bytes, int]:
+ """Incremental IDNA 2008 encoder.
+
+ Buffers a partial trailing label across calls until either the next
+ label separator is seen or ``final=True``, so that streamed input is
+ encoded one whole label at a time. Any of the four Unicode label
+ separators (``U+002E``, ``U+3002``, ``U+FF0E``, ``U+FF61``) ends a
+ label; the result always uses ``U+002E`` as the separator.
+
+ Only the ``"strict"`` error handler is supported.
+ """
+
+ def _buffer_encode(self, data: str, errors: str, final: bool) -> Tuple[bytes, int]: # ty: ignore[invalid-method-override]
if errors != "strict":
raise IDNAError('Unsupported error handling "{}"'.format(errors))
@@ -62,7 +83,16 @@ class IncrementalEncoder(codecs.BufferedIncrementalEncoder):
class IncrementalDecoder(codecs.BufferedIncrementalDecoder):
- def _buffer_decode(self, data: Any, errors: str, final: bool) -> Tuple[str, int]:
+ """Incremental IDNA 2008 decoder.
+
+ Buffers a partial trailing label across calls until either the next
+ label separator is seen or ``final=True``, so that streamed input is
+ decoded one whole label at a time.
+
+ Only the ``"strict"`` error handler is supported.
+ """
+
+ def _buffer_decode(self, data: Any, errors: str, final: bool) -> Tuple[str, int]: # ty: ignore[invalid-method-override]
if errors != "strict":
raise IDNAError('Unsupported error handling "{}"'.format(errors))
@@ -106,6 +136,16 @@ class StreamReader(Codec, codecs.StreamReader):
def search_function(name: str) -> Optional[codecs.CodecInfo]:
+ """Codec search function registered with :mod:`codecs`.
+
+ Returns a :class:`codecs.CodecInfo` for the ``"idna2008"`` codec name
+ so that ``str.encode("idna2008")`` and ``bytes.decode("idna2008")``
+ invoke the IDNA 2008 codec defined in this module.
+
+ :param name: The codec name being looked up.
+ :returns: A :class:`codecs.CodecInfo` instance if ``name`` is
+ ``"idna2008"``, otherwise ``None``.
+ """
if name != "idna2008":
return None
return codecs.CodecInfo(
diff --git a/contrib/python/idna/py3/idna/compat.py b/contrib/python/idna/py3/idna/compat.py
index 1df9f2a70e6..1d01e3d9750 100644
--- a/contrib/python/idna/py3/idna/compat.py
+++ b/contrib/python/idna/py3/idna/compat.py
@@ -4,12 +4,38 @@ from .core import decode, encode
def ToASCII(label: str) -> bytes:
+ """Compatibility shim for :rfc:`3490` ``ToASCII``.
+
+ Delegates to :func:`idna.encode` (IDNA 2008). Provided to ease porting
+ of code written against the legacy :mod:`encodings.idna` API; new code
+ should call :func:`idna.encode` directly.
+
+ :param label: The label or domain to encode.
+ :returns: The encoded form as ASCII :class:`bytes`.
+ """
return encode(label)
def ToUnicode(label: Union[bytes, bytearray]) -> str:
+ """Compatibility shim for :rfc:`3490` ``ToUnicode``.
+
+ Delegates to :func:`idna.decode` (IDNA 2008). Provided to ease porting
+ of code written against the legacy :mod:`encodings.idna` API; new code
+ should call :func:`idna.decode` directly.
+
+ :param label: The label or domain to decode.
+ :returns: The decoded Unicode form.
+ """
return decode(label)
def nameprep(s: Any) -> None:
+ """Stub for :rfc:`3491` Nameprep, which is not used by IDNA 2008.
+
+ IDNA 2008 (:rfc:`5891`) replaces Nameprep with the per-codepoint
+ validity classes from :rfc:`5892`; this function exists only to
+ return a clear error if legacy code attempts to call it.
+
+ :raises NotImplementedError: Always.
+ """
raise NotImplementedError("IDNA 2008 does not utilise nameprep protocol")
diff --git a/contrib/python/idna/py3/idna/core.py b/contrib/python/idna/py3/idna/core.py
index e6ac1f984fd..1804ccaf758 100644
--- a/contrib/python/idna/py3/idna/core.py
+++ b/contrib/python/idna/py3/idna/core.py
@@ -57,18 +57,54 @@ def _unot(s: int) -> str:
def valid_label_length(label: Union[bytes, str]) -> bool:
+ """Check that a label does not exceed the maximum permitted length.
+
+ Per :rfc:`1035` (and :rfc:`5891` §4.2.4) a DNS label must not exceed
+ 63 octets. The argument may be either a :class:`str` (a U-label, where
+ length is measured in characters) or :class:`bytes` (an A-label, where
+ length is measured in octets).
+
+ :param label: The label to check.
+ :returns: ``True`` if the label is within the length limit, otherwise
+ ``False``.
+ """
if len(label) > 63:
return False
return True
def valid_string_length(label: Union[bytes, str], trailing_dot: bool) -> bool:
+ """Check that a full domain name does not exceed the maximum length.
+
+ Per :rfc:`1035`, a domain name is limited to 253 octets when no trailing
+ dot is present, or 254 octets when one is included.
+
+ :param label: The full (possibly multi-label) domain name.
+ :param trailing_dot: ``True`` if ``label`` includes a trailing ``.``.
+ :returns: ``True`` if the domain is within the length limit, otherwise
+ ``False``.
+ """
if len(label) > (254 if trailing_dot else 253):
return False
return True
def check_bidi(label: str, check_ltr: bool = False) -> bool:
+ """Validate the Bidi Rule from :rfc:`5893` for a single label.
+
+ The Bidi Rule constrains how bidirectional characters (Hebrew, Arabic,
+ etc.) may appear within a label. By default the check is only applied
+ when the label contains at least one right-to-left character (Unicode
+ bidirectional categories ``R``, ``AL``, or ``AN``); set ``check_ltr``
+ to ``True`` to apply it to LTR-only labels as well.
+
+ :param label: The label to validate, as a Unicode string.
+ :param check_ltr: If ``True``, apply the rules even when the label
+ contains no RTL characters.
+ :returns: ``True`` if the label satisfies the Bidi Rule.
+ :raises IDNABidiError: If any of Bidi Rule conditions 1-6 are violated,
+ or if the directional category of a codepoint cannot be determined.
+ """
# Bidi rules should only be applied if string contains RTL characters
bidi_label = False
for idx, cp in enumerate(label, 1):
@@ -139,12 +175,31 @@ def check_bidi(label: str, check_ltr: bool = False) -> bool:
def check_initial_combiner(label: str) -> bool:
+ """Reject labels that begin with a combining mark.
+
+ Per :rfc:`5891` §4.2.3.2 a label must not start with a character of
+ Unicode general category ``M`` (Mark).
+
+ :param label: The label to check.
+ :returns: ``True`` if the first character is not a combining mark.
+ :raises IDNAError: If the label begins with a combining character.
+ """
if unicodedata.category(label[0])[0] == "M":
raise IDNAError("Label begins with an illegal combining character")
return True
def check_hyphen_ok(label: str) -> bool:
+ """Validate the hyphen restrictions for a label.
+
+ Per :rfc:`5891` §4.2.3.1 a label must not start or end with a hyphen
+ (``U+002D``), and must not have hyphens in both the third and fourth
+ positions (the prefix reserved for A-labels).
+
+ :param label: The label to check.
+ :returns: ``True`` if the hyphen restrictions are satisfied.
+ :raises IDNAError: If any of the hyphen restrictions are violated.
+ """
if label[2:4] == "--":
raise IDNAError("Label has disallowed hyphens in 3rd and 4th position")
if label[0] == "-" or label[-1] == "-":
@@ -153,11 +208,30 @@ def check_hyphen_ok(label: str) -> bool:
def check_nfc(label: str) -> None:
+ """Require that a label is in Unicode Normalization Form C.
+
+ :param label: The label to check.
+ :raises IDNAError: If ``label`` differs from its NFC normalisation.
+ """
if unicodedata.normalize("NFC", label) != label:
raise IDNAError("Label must be in Normalization Form C")
def valid_contextj(label: str, pos: int) -> bool:
+ """Validate the CONTEXTJ rules from :rfc:`5892` Appendix A.
+
+ These rules govern the contextual use of the joiner codepoints
+ ``U+200C`` (ZERO WIDTH NON-JOINER, Appendix A.1) and ``U+200D``
+ (ZERO WIDTH JOINER, Appendix A.2) within a label.
+
+ :param label: The label containing the codepoint.
+ :param pos: Index of the joiner codepoint within ``label``.
+ :returns: ``True`` if the codepoint at ``pos`` satisfies its CONTEXTJ
+ rule, ``False`` otherwise (including when the codepoint at
+ ``pos`` is not a recognised joiner).
+ :raises ValueError: If an adjacent codepoint has no Unicode name when
+ determining its combining class.
+ """
cp_value = ord(label[pos])
if cp_value == 0x200C:
@@ -202,6 +276,19 @@ def valid_contextj(label: str, pos: int) -> bool:
def valid_contexto(label: str, pos: int, exception: bool = False) -> bool:
+ """Validate the CONTEXTO rules from :rfc:`5892` Appendix A.
+
+ Covers the contextual rules for codepoints such as MIDDLE DOT
+ (``U+00B7``), Greek lower numeral sign, Hebrew punctuation, Katakana
+ middle dot, and the Arabic-Indic / Extended Arabic-Indic digit ranges.
+
+ :param label: The label containing the codepoint.
+ :param pos: Index of the codepoint within ``label``.
+ :param exception: Reserved for forward compatibility; currently unused.
+ :returns: ``True`` if the codepoint at ``pos`` satisfies its CONTEXTO
+ rule, ``False`` otherwise (including when the codepoint is not a
+ recognised CONTEXTO codepoint).
+ """
cp_value = ord(label[pos])
if cp_value == 0x00B7:
@@ -244,6 +331,23 @@ def valid_contexto(label: str, pos: int, exception: bool = False) -> bool:
def check_label(label: Union[str, bytes, bytearray]) -> None:
+ """Run the full set of IDNA 2008 validity checks on a single label.
+
+ Applies, in order: NFC normalisation (:func:`check_nfc`), hyphen
+ restrictions (:func:`check_hyphen_ok`), the no-leading-combiner rule
+ (:func:`check_initial_combiner`), per-codepoint validity (PVALID,
+ CONTEXTJ, CONTEXTO classes from :rfc:`5892`), and the Bidi Rule
+ (:func:`check_bidi`).
+
+ :param label: The label to validate. ``bytes`` or ``bytearray`` input
+ is decoded as UTF-8 first.
+ :raises IDNAError: If the label is empty or fails a structural rule.
+ :raises InvalidCodepoint: If the label contains a DISALLOWED or
+ UNASSIGNED codepoint.
+ :raises InvalidCodepointContext: If a CONTEXTJ or CONTEXTO codepoint
+ is not valid in its context.
+ :raises IDNABidiError: If the Bidi Rule is violated.
+ """
if isinstance(label, (bytes, bytearray)):
label = label.decode("utf-8")
if len(label) == 0:
@@ -283,6 +387,18 @@ def check_label(label: Union[str, bytes, bytearray]) -> None:
def alabel(label: str) -> bytes:
+ """Convert a single U-label into its A-label form.
+
+ The result is the ASCII-Compatible Encoding (ACE) form per :rfc:`5891`
+ §4: the label is validated, Punycode-encoded, and prefixed with
+ ``xn--``. Pure ASCII labels that are already valid IDNA labels are
+ returned unchanged (as :class:`bytes`).
+
+ :param label: The label to convert, as a Unicode string.
+ :returns: The A-label as ASCII-encoded :class:`bytes`.
+ :raises IDNAError: If the label is invalid or the resulting A-label
+ exceeds 63 octets.
+ """
try:
label_bytes = label.encode("ascii")
ulabel(label_bytes)
@@ -302,6 +418,18 @@ def alabel(label: str) -> bytes:
def ulabel(label: Union[str, bytes, bytearray]) -> str:
+ """Convert a single A-label into its U-label form.
+
+ Performs the inverse of :func:`alabel`: an ``xn--``-prefixed label is
+ Punycode-decoded and validated. Labels that are already Unicode (or
+ plain ASCII without the ACE prefix) are validated and returned as a
+ Unicode string.
+
+ :param label: The label to convert. ``bytes`` or ``bytearray`` input
+ is treated as ASCII.
+ :returns: The U-label as a Unicode string.
+ :raises IDNAError: If the label is malformed or fails validation.
+ """
if not isinstance(label, (bytes, bytearray)):
try:
label_bytes = label.encode("ascii")
@@ -331,7 +459,24 @@ def ulabel(label: Union[str, bytes, bytearray]) -> str:
def uts46_remap(domain: str, std3_rules: bool = True, transitional: bool = False) -> str:
- """Re-map the characters in the string according to UTS46 processing."""
+ """Apply the UTS #46 character mapping to a domain string.
+
+ Implements the mapping table from `UTS #46 §4
+ <https://www.unicode.org/reports/tr46/>`_: each character is kept,
+ replaced, or rejected based on its status (``V``, ``M``, ``D``, ``3``,
+ ``I``). The result is returned in Normalisation Form C.
+
+ :param domain: The full domain name to remap.
+ :param std3_rules: If ``True``, apply the stricter STD3 ASCII rules
+ (status ``3`` codepoints raise instead of being kept or mapped).
+ :param transitional: If ``True``, use transitional processing (status
+ ``D`` codepoints are mapped instead of kept). Transitional
+ processing has been removed from UTS #46 and this option is
+ retained only for backwards compatibility.
+ :returns: The remapped domain, in Normalisation Form C.
+ :raises InvalidCodepoint: If the domain contains a disallowed
+ codepoint under the chosen rules.
+ """
from .uts46data import uts46data
output = ""
@@ -342,7 +487,7 @@ def uts46_remap(domain: str, std3_rules: bool = True, transitional: bool = False
status = uts46row[1]
replacement: Optional[str] = None
if len(uts46row) == 3:
- replacement = uts46row[2]
+ replacement = uts46row[2] # ty: ignore[index-out-of-bounds]
if status == "V" or (status == "D" and not transitional) or (status == "3" and not std3_rules and replacement is None):
output += char
elif replacement is not None and (
@@ -366,6 +511,27 @@ def encode(
std3_rules: bool = False,
transitional: bool = False,
) -> bytes:
+ """Encode a Unicode domain name into its ASCII (A-label) form.
+
+ Splits the input on label separators (only ``U+002E`` if ``strict`` is
+ set; otherwise also IDEOGRAPHIC FULL STOP ``U+3002``, FULLWIDTH FULL
+ STOP ``U+FF0E``, and HALFWIDTH IDEOGRAPHIC FULL STOP ``U+FF61``),
+ encodes each label with :func:`alabel`, and rejoins them with ``.``.
+ Optionally pre-processes the input through :func:`uts46_remap`.
+
+ :param s: The domain name to encode.
+ :param strict: If ``True``, only ``U+002E`` is recognised as a label
+ separator.
+ :param uts46: If ``True``, apply UTS #46 mapping before encoding.
+ :param std3_rules: Forwarded to :func:`uts46_remap` when ``uts46`` is
+ ``True``.
+ :param transitional: Forwarded to :func:`uts46_remap` when ``uts46``
+ is ``True``. Deprecated: emits a :class:`DeprecationWarning` and
+ will be removed in a future version.
+ :returns: The encoded domain as ASCII :class:`bytes`.
+ :raises IDNAError: If the domain is empty, contains an invalid label,
+ or exceeds the maximum domain length.
+ """
if transitional:
warnings.warn(
"Transitional processing has been removed from UTS #46. "
@@ -376,10 +542,16 @@ def encode(
if not isinstance(s, str):
try:
s = str(s, "ascii")
- except UnicodeDecodeError:
+ except (UnicodeDecodeError, TypeError):
raise IDNAError("should pass a unicode string to the function rather than a byte string.")
if uts46:
s = uts46_remap(s, std3_rules, transitional)
+
+ # Reject inputs that exceed the maximum DNS domain length up-front
+ # to avoid expensive computation on long inputs.
+ if not valid_string_length(s, trailing_dot=True):
+ raise IDNAError("Domain too long")
+
trailing_dot = False
result = []
if strict:
@@ -411,13 +583,34 @@ def decode(
uts46: bool = False,
std3_rules: bool = False,
) -> str:
- try:
- if not isinstance(s, str):
+ """Decode an A-label-encoded domain name back to Unicode.
+
+ Splits the input on label separators (see :func:`encode` for the
+ rules), decodes each label with :func:`ulabel`, and rejoins them
+ with ``.``. Optionally pre-processes the input through
+ :func:`uts46_remap`.
+
+ :param s: The domain name to decode.
+ :param strict: If ``True``, only ``U+002E`` is recognised as a label
+ separator.
+ :param uts46: If ``True``, apply UTS #46 mapping before decoding.
+ :param std3_rules: Forwarded to :func:`uts46_remap` when ``uts46`` is
+ ``True``.
+ :returns: The decoded domain as a Unicode string.
+ :raises IDNAError: If the input is not valid ASCII, contains an
+ invalid label, or is empty.
+ """
+ if not isinstance(s, str):
+ try:
s = str(s, "ascii")
- except UnicodeDecodeError:
- raise IDNAError("Invalid ASCII in A-label")
+ except (UnicodeDecodeError, TypeError):
+ raise IDNAError("Invalid ASCII in A-label")
if uts46:
s = uts46_remap(s, std3_rules, False)
+ # Reject inputs that exceed the maximum DNS domain length up-front
+ # to avoid expensive computation on long inputs.
+ if not valid_string_length(s, trailing_dot=True):
+ raise IDNAError("Domain too long")
trailing_dot = False
result = []
if not strict:
diff --git a/contrib/python/idna/py3/idna/package_data.py b/contrib/python/idna/py3/idna/package_data.py
index fc1788cdab7..2bd6cdb8838 100644
--- a/contrib/python/idna/py3/idna/package_data.py
+++ b/contrib/python/idna/py3/idna/package_data.py
@@ -1 +1 @@
-__version__ = "3.13"
+__version__ = "3.14"
diff --git a/contrib/python/idna/py3/tests/test_idna.py b/contrib/python/idna/py3/tests/test_idna.py
index 1ac3432efb9..3d6a25400e2 100644
--- a/contrib/python/idna/py3/tests/test_idna.py
+++ b/contrib/python/idna/py3/tests/test_idna.py
@@ -2,85 +2,99 @@
import unittest
import warnings
+from typing import List, Tuple
import idna
class IDNATests(unittest.TestCase):
def setUp(self):
- self.tld_strings = [
- ["\u6d4b\u8bd5", b"xn--0zwm56d"],
- ["\u092a\u0930\u0940\u0915\u094d\u0937\u093e", b"xn--11b5bs3a9aj6g"],
- ["\ud55c\uad6d", b"xn--3e0b707e"],
- ["\u09ad\u09be\u09b0\u09a4", b"xn--45brj9c"],
- ["\u09ac\u09be\u0982\u09b2\u09be", b"xn--54b7fta0cc"],
- [
+ self.tld_strings: List[Tuple[str, bytes]] = [
+ ("\u6d4b\u8bd5", b"xn--0zwm56d"),
+ ("\u092a\u0930\u0940\u0915\u094d\u0937\u093e", b"xn--11b5bs3a9aj6g"),
+ ("\ud55c\uad6d", b"xn--3e0b707e"),
+ ("\u09ad\u09be\u09b0\u09a4", b"xn--45brj9c"),
+ ("\u09ac\u09be\u0982\u09b2\u09be", b"xn--54b7fta0cc"),
+ (
"\u0438\u0441\u043f\u044b\u0442\u0430\u043d\u0438\u0435",
b"xn--80akhbyknj4f",
- ],
- ["\u0441\u0440\u0431", b"xn--90a3ac"],
- ["\ud14c\uc2a4\ud2b8", b"xn--9t4b11yi5a"],
- [
+ ),
+ ("\u0441\u0440\u0431", b"xn--90a3ac"),
+ ("\ud14c\uc2a4\ud2b8", b"xn--9t4b11yi5a"),
+ (
"\u0b9a\u0bbf\u0b99\u0bcd\u0b95\u0baa\u0bcd\u0baa\u0bc2\u0bb0\u0bcd",
b"xn--clchc0ea0b2g2a9gcd",
- ],
- ["\u05d8\u05e2\u05e1\u05d8", b"xn--deba0ad"],
- ["\u4e2d\u56fd", b"xn--fiqs8s"],
- ["\u4e2d\u570b", b"xn--fiqz9s"],
- ["\u0c2d\u0c3e\u0c30\u0c24\u0c4d", b"xn--fpcrj9c3d"],
- ["\u0dbd\u0d82\u0d9a\u0dcf", b"xn--fzc2c9e2c"],
- ["\u6e2c\u8a66", b"xn--g6w251d"],
- ["\u0aad\u0abe\u0ab0\u0aa4", b"xn--gecrj9c"],
- ["\u092d\u093e\u0930\u0924", b"xn--h2brj9c"],
- ["\u0622\u0632\u0645\u0627\u06cc\u0634\u06cc", b"xn--hgbk6aj7f53bba"],
- ["\u0baa\u0bb0\u0bbf\u0b9f\u0bcd\u0b9a\u0bc8", b"xn--hlcj6aya9esc7a"],
- ["\u0443\u043a\u0440", b"xn--j1amh"],
- ["\u9999\u6e2f", b"xn--j6w193g"],
- ["\u03b4\u03bf\u03ba\u03b9\u03bc\u03ae", b"xn--jxalpdlp"],
- ["\u0625\u062e\u062a\u0628\u0627\u0631", b"xn--kgbechtv"],
- ["\u53f0\u6e7e", b"xn--kprw13d"],
- ["\u53f0\u7063", b"xn--kpry57d"],
- ["\u0627\u0644\u062c\u0632\u0627\u0626\u0631", b"xn--lgbbat1ad8j"],
- ["\u0639\u0645\u0627\u0646", b"xn--mgb9awbf"],
- ["\u0627\u06cc\u0631\u0627\u0646", b"xn--mgba3a4f16a"],
- ["\u0627\u0645\u0627\u0631\u0627\u062a", b"xn--mgbaam7a8h"],
- ["\u067e\u0627\u06a9\u0633\u062a\u0627\u0646", b"xn--mgbai9azgqp6j"],
- ["\u0627\u0644\u0627\u0631\u062f\u0646", b"xn--mgbayh7gpa"],
- ["\u0628\u06be\u0627\u0631\u062a", b"xn--mgbbh1a71e"],
- ["\u0627\u0644\u0645\u063a\u0631\u0628", b"xn--mgbc0a9azcg"],
- ["\u0627\u0644\u0633\u0639\u0648\u062f\u064a\u0629", b"xn--mgberp4a5d4ar"],
- ["\u10d2\u10d4", b"xn--node"],
- ["\u0e44\u0e17\u0e22", b"xn--o3cw4h"],
- ["\u0633\u0648\u0631\u064a\u0629", b"xn--ogbpf8fl"],
- ["\u0440\u0444", b"xn--p1ai"],
- ["\u062a\u0648\u0646\u0633", b"xn--pgbs0dh"],
- ["\u0a2d\u0a3e\u0a30\u0a24", b"xn--s9brj9c"],
- ["\u0645\u0635\u0631", b"xn--wgbh1c"],
- ["\u0642\u0637\u0631", b"xn--wgbl6a"],
- ["\u0b87\u0bb2\u0b99\u0bcd\u0b95\u0bc8", b"xn--xkc2al3hye2a"],
- ["\u0b87\u0ba8\u0bcd\u0ba4\u0bbf\u0baf\u0bbe", b"xn--xkc2dl3a5ee0h"],
- ["\u65b0\u52a0\u5761", b"xn--yfro4i67o"],
- ["\u0641\u0644\u0633\u0637\u064a\u0646", b"xn--ygbi2ammx"],
- ["\u30c6\u30b9\u30c8", b"xn--zckzah"],
- ["\u049b\u0430\u0437", b"xn--80ao21a"],
- ["\u0645\u0644\u064a\u0633\u064a\u0627", b"xn--mgbx4cd0ab"],
- ["\u043c\u043e\u043d", b"xn--l1acc"],
- ["\u0633\u0648\u062f\u0627\u0646", b"xn--mgbpl2fh"],
+ ),
+ ("\u05d8\u05e2\u05e1\u05d8", b"xn--deba0ad"),
+ ("\u4e2d\u56fd", b"xn--fiqs8s"),
+ ("\u4e2d\u570b", b"xn--fiqz9s"),
+ ("\u0c2d\u0c3e\u0c30\u0c24\u0c4d", b"xn--fpcrj9c3d"),
+ ("\u0dbd\u0d82\u0d9a\u0dcf", b"xn--fzc2c9e2c"),
+ ("\u6e2c\u8a66", b"xn--g6w251d"),
+ ("\u0aad\u0abe\u0ab0\u0aa4", b"xn--gecrj9c"),
+ ("\u092d\u093e\u0930\u0924", b"xn--h2brj9c"),
+ ("\u0622\u0632\u0645\u0627\u06cc\u0634\u06cc", b"xn--hgbk6aj7f53bba"),
+ ("\u0baa\u0bb0\u0bbf\u0b9f\u0bcd\u0b9a\u0bc8", b"xn--hlcj6aya9esc7a"),
+ ("\u0443\u043a\u0440", b"xn--j1amh"),
+ ("\u9999\u6e2f", b"xn--j6w193g"),
+ ("\u03b4\u03bf\u03ba\u03b9\u03bc\u03ae", b"xn--jxalpdlp"),
+ ("\u0625\u062e\u062a\u0628\u0627\u0631", b"xn--kgbechtv"),
+ ("\u53f0\u6e7e", b"xn--kprw13d"),
+ ("\u53f0\u7063", b"xn--kpry57d"),
+ ("\u0627\u0644\u062c\u0632\u0627\u0626\u0631", b"xn--lgbbat1ad8j"),
+ ("\u0639\u0645\u0627\u0646", b"xn--mgb9awbf"),
+ ("\u0627\u06cc\u0631\u0627\u0646", b"xn--mgba3a4f16a"),
+ ("\u0627\u0645\u0627\u0631\u0627\u062a", b"xn--mgbaam7a8h"),
+ ("\u067e\u0627\u06a9\u0633\u062a\u0627\u0646", b"xn--mgbai9azgqp6j"),
+ ("\u0627\u0644\u0627\u0631\u062f\u0646", b"xn--mgbayh7gpa"),
+ ("\u0628\u06be\u0627\u0631\u062a", b"xn--mgbbh1a71e"),
+ ("\u0627\u0644\u0645\u063a\u0631\u0628", b"xn--mgbc0a9azcg"),
+ ("\u0627\u0644\u0633\u0639\u0648\u062f\u064a\u0629", b"xn--mgberp4a5d4ar"),
+ ("\u10d2\u10d4", b"xn--node"),
+ ("\u0e44\u0e17\u0e22", b"xn--o3cw4h"),
+ ("\u0633\u0648\u0631\u064a\u0629", b"xn--ogbpf8fl"),
+ ("\u0440\u0444", b"xn--p1ai"),
+ ("\u062a\u0648\u0646\u0633", b"xn--pgbs0dh"),
+ ("\u0a2d\u0a3e\u0a30\u0a24", b"xn--s9brj9c"),
+ ("\u0645\u0635\u0631", b"xn--wgbh1c"),
+ ("\u0642\u0637\u0631", b"xn--wgbl6a"),
+ ("\u0b87\u0bb2\u0b99\u0bcd\u0b95\u0bc8", b"xn--xkc2al3hye2a"),
+ ("\u0b87\u0ba8\u0bcd\u0ba4\u0bbf\u0baf\u0bbe", b"xn--xkc2dl3a5ee0h"),
+ ("\u65b0\u52a0\u5761", b"xn--yfro4i67o"),
+ ("\u0641\u0644\u0633\u0637\u064a\u0646", b"xn--ygbi2ammx"),
+ ("\u30c6\u30b9\u30c8", b"xn--zckzah"),
+ ("\u049b\u0430\u0437", b"xn--80ao21a"),
+ ("\u0645\u0644\u064a\u0633\u064a\u0627", b"xn--mgbx4cd0ab"),
+ ("\u043c\u043e\u043d", b"xn--l1acc"),
+ ("\u0633\u0648\u062f\u0627\u0646", b"xn--mgbpl2fh"),
]
def testIDNTLDALabels(self):
- for ulabel, alabel in self.tld_strings:
- self.assertEqual(alabel, idna.alabel(ulabel))
+ for u, a in self.tld_strings:
+ self.assertEqual(a, idna.alabel(u))
def testIDNTLDULabels(self):
- for ulabel, alabel in self.tld_strings:
- self.assertEqual(ulabel, idna.ulabel(alabel))
+ for u, a in self.tld_strings:
+ self.assertEqual(u, idna.ulabel(a))
def test_valid_label_length(self):
self.assertTrue(idna.valid_label_length("a" * 63))
self.assertFalse(idna.valid_label_length("a" * 64))
self.assertRaises(idna.IDNAError, idna.encode, "a" * 64)
+ def test_oversized_input_rejected_promptly(self):
+ # GHSA-65pc-fj4g-8rjx: encode/decode must reject inputs that
+ # exceed the maximum DNS domain length before per-codepoint
+ # validation runs, so labels dominated by CONTEXTO codepoints
+ # cannot drive validation into quadratic time.
+ import time
+
+ for payload in ("٠" * 8000, "・" * 8000 + "漢"):
+ start = time.perf_counter()
+ self.assertRaises(idna.IDNAError, idna.encode, payload)
+ self.assertRaises(idna.IDNAError, idna.decode, payload)
+ self.assertLess(time.perf_counter() - start, 1.0)
+
def test_check_bidi(self):
la = "\u0061"
r = "\u05d0"
@@ -303,6 +317,15 @@ class IDNATests(unittest.TestCase):
idna.encode("example.com", uts46=True)
self.assertEqual(len(w), 0)
+ def test_encode_decode_invalid_input_type(self):
+ # encode() and decode() are documented to raise IDNAError on bad
+ # input. Inputs that are not str, bytes, or bytes-like used to leak
+ # a raw TypeError out of str(s, "ascii"); they should be wrapped in
+ # IDNAError just like UnicodeDecodeError already is.
+ for value in (42, None, 1.5, ["a", "b"], {"a": 1}):
+ self.assertRaises(idna.IDNAError, idna.encode, value)
+ self.assertRaises(idna.IDNAError, idna.decode, value)
+
if __name__ == "__main__":
unittest.main()
diff --git a/contrib/python/idna/py3/ya.make b/contrib/python/idna/py3/ya.make
index e36e264a2e4..7efd5bf83ae 100644
--- a/contrib/python/idna/py3/ya.make
+++ b/contrib/python/idna/py3/ya.make
@@ -2,7 +2,7 @@
PY3_LIBRARY()
-VERSION(3.13)
+VERSION(3.14)
LICENSE(BSD-3-Clause)