diff options
| author | robot-piglet <[email protected]> | 2026-05-27 09:47:32 +0300 |
|---|---|---|
| committer | robot-piglet <[email protected]> | 2026-05-27 10:34:13 +0300 |
| commit | b3fe130270ac8af19b010f7cdf80789e68c9b78b (patch) | |
| tree | 35addf5fe01eb515c714ca5a8c67d8ea0c72ee87 /contrib/python | |
| parent | 7a90a58a5986c1826d8ae2f8066a3f4e86c909fc (diff) | |
Intermediate changes
commit_hash:8d7a61a89d145f2cf8e01f4c370f0200b0b078ce
Diffstat (limited to 'contrib/python')
| -rw-r--r-- | contrib/python/idna/py3/.dist-info/METADATA | 2 | ||||
| -rw-r--r-- | contrib/python/idna/py3/idna/codec.py | 48 | ||||
| -rw-r--r-- | contrib/python/idna/py3/idna/compat.py | 26 | ||||
| -rw-r--r-- | contrib/python/idna/py3/idna/core.py | 207 | ||||
| -rw-r--r-- | contrib/python/idna/py3/idna/package_data.py | 2 | ||||
| -rw-r--r-- | contrib/python/idna/py3/tests/test_idna.py | 139 | ||||
| -rw-r--r-- | contrib/python/idna/py3/ya.make | 2 |
7 files changed, 354 insertions, 72 deletions
diff --git a/contrib/python/idna/py3/.dist-info/METADATA b/contrib/python/idna/py3/.dist-info/METADATA index b30fccc5b67..b3024f7cbec 100644 --- a/contrib/python/idna/py3/.dist-info/METADATA +++ b/contrib/python/idna/py3/.dist-info/METADATA @@ -1,6 +1,6 @@ Metadata-Version: 2.4 Name: idna -Version: 3.13 +Version: 3.14 Summary: Internationalized Domain Names in Applications (IDNA) Author-email: Kim Davies <[email protected]> Requires-Python: >=3.8 diff --git a/contrib/python/idna/py3/idna/codec.py b/contrib/python/idna/py3/idna/codec.py index cbc2e4ff4ec..befba40ce59 100644 --- a/contrib/python/idna/py3/idna/codec.py +++ b/contrib/python/idna/py3/idna/codec.py @@ -8,7 +8,17 @@ _unicode_dots_re = re.compile("[\u002e\u3002\uff0e\uff61]") class Codec(codecs.Codec): - def encode(self, data: str, errors: str = "strict") -> Tuple[bytes, int]: + """Stateless IDNA 2008 codec. + + Implements the :class:`codecs.Codec` protocol so that the whole-domain + encoder (:func:`idna.encode`) and decoder (:func:`idna.decode`) are + accessible through the standard codec machinery as ``"idna2008"``. + + Only the ``"strict"`` error handler is supported; any other handler + raises :exc:`~idna.IDNAError`. + """ + + def encode(self, data: str, errors: str = "strict") -> Tuple[bytes, int]: # ty: ignore[invalid-method-override] if errors != "strict": raise IDNAError('Unsupported error handling "{}"'.format(errors)) @@ -17,7 +27,7 @@ class Codec(codecs.Codec): return encode(data), len(data) - def decode(self, data: bytes, errors: str = "strict") -> Tuple[str, int]: + def decode(self, data: bytes, errors: str = "strict") -> Tuple[str, int]: # ty: ignore[invalid-method-override] if errors != "strict": raise IDNAError('Unsupported error handling "{}"'.format(errors)) @@ -28,7 +38,18 @@ class Codec(codecs.Codec): class IncrementalEncoder(codecs.BufferedIncrementalEncoder): - def _buffer_encode(self, data: str, errors: str, final: bool) -> Tuple[bytes, int]: + """Incremental IDNA 2008 encoder. + + Buffers a partial trailing label across calls until either the next + label separator is seen or ``final=True``, so that streamed input is + encoded one whole label at a time. Any of the four Unicode label + separators (``U+002E``, ``U+3002``, ``U+FF0E``, ``U+FF61``) ends a + label; the result always uses ``U+002E`` as the separator. + + Only the ``"strict"`` error handler is supported. + """ + + def _buffer_encode(self, data: str, errors: str, final: bool) -> Tuple[bytes, int]: # ty: ignore[invalid-method-override] if errors != "strict": raise IDNAError('Unsupported error handling "{}"'.format(errors)) @@ -62,7 +83,16 @@ class IncrementalEncoder(codecs.BufferedIncrementalEncoder): class IncrementalDecoder(codecs.BufferedIncrementalDecoder): - def _buffer_decode(self, data: Any, errors: str, final: bool) -> Tuple[str, int]: + """Incremental IDNA 2008 decoder. + + Buffers a partial trailing label across calls until either the next + label separator is seen or ``final=True``, so that streamed input is + decoded one whole label at a time. + + Only the ``"strict"`` error handler is supported. + """ + + def _buffer_decode(self, data: Any, errors: str, final: bool) -> Tuple[str, int]: # ty: ignore[invalid-method-override] if errors != "strict": raise IDNAError('Unsupported error handling "{}"'.format(errors)) @@ -106,6 +136,16 @@ class StreamReader(Codec, codecs.StreamReader): def search_function(name: str) -> Optional[codecs.CodecInfo]: + """Codec search function registered with :mod:`codecs`. + + Returns a :class:`codecs.CodecInfo` for the ``"idna2008"`` codec name + so that ``str.encode("idna2008")`` and ``bytes.decode("idna2008")`` + invoke the IDNA 2008 codec defined in this module. + + :param name: The codec name being looked up. + :returns: A :class:`codecs.CodecInfo` instance if ``name`` is + ``"idna2008"``, otherwise ``None``. + """ if name != "idna2008": return None return codecs.CodecInfo( diff --git a/contrib/python/idna/py3/idna/compat.py b/contrib/python/idna/py3/idna/compat.py index 1df9f2a70e6..1d01e3d9750 100644 --- a/contrib/python/idna/py3/idna/compat.py +++ b/contrib/python/idna/py3/idna/compat.py @@ -4,12 +4,38 @@ from .core import decode, encode def ToASCII(label: str) -> bytes: + """Compatibility shim for :rfc:`3490` ``ToASCII``. + + Delegates to :func:`idna.encode` (IDNA 2008). Provided to ease porting + of code written against the legacy :mod:`encodings.idna` API; new code + should call :func:`idna.encode` directly. + + :param label: The label or domain to encode. + :returns: The encoded form as ASCII :class:`bytes`. + """ return encode(label) def ToUnicode(label: Union[bytes, bytearray]) -> str: + """Compatibility shim for :rfc:`3490` ``ToUnicode``. + + Delegates to :func:`idna.decode` (IDNA 2008). Provided to ease porting + of code written against the legacy :mod:`encodings.idna` API; new code + should call :func:`idna.decode` directly. + + :param label: The label or domain to decode. + :returns: The decoded Unicode form. + """ return decode(label) def nameprep(s: Any) -> None: + """Stub for :rfc:`3491` Nameprep, which is not used by IDNA 2008. + + IDNA 2008 (:rfc:`5891`) replaces Nameprep with the per-codepoint + validity classes from :rfc:`5892`; this function exists only to + return a clear error if legacy code attempts to call it. + + :raises NotImplementedError: Always. + """ raise NotImplementedError("IDNA 2008 does not utilise nameprep protocol") diff --git a/contrib/python/idna/py3/idna/core.py b/contrib/python/idna/py3/idna/core.py index e6ac1f984fd..1804ccaf758 100644 --- a/contrib/python/idna/py3/idna/core.py +++ b/contrib/python/idna/py3/idna/core.py @@ -57,18 +57,54 @@ def _unot(s: int) -> str: def valid_label_length(label: Union[bytes, str]) -> bool: + """Check that a label does not exceed the maximum permitted length. + + Per :rfc:`1035` (and :rfc:`5891` §4.2.4) a DNS label must not exceed + 63 octets. The argument may be either a :class:`str` (a U-label, where + length is measured in characters) or :class:`bytes` (an A-label, where + length is measured in octets). + + :param label: The label to check. + :returns: ``True`` if the label is within the length limit, otherwise + ``False``. + """ if len(label) > 63: return False return True def valid_string_length(label: Union[bytes, str], trailing_dot: bool) -> bool: + """Check that a full domain name does not exceed the maximum length. + + Per :rfc:`1035`, a domain name is limited to 253 octets when no trailing + dot is present, or 254 octets when one is included. + + :param label: The full (possibly multi-label) domain name. + :param trailing_dot: ``True`` if ``label`` includes a trailing ``.``. + :returns: ``True`` if the domain is within the length limit, otherwise + ``False``. + """ if len(label) > (254 if trailing_dot else 253): return False return True def check_bidi(label: str, check_ltr: bool = False) -> bool: + """Validate the Bidi Rule from :rfc:`5893` for a single label. + + The Bidi Rule constrains how bidirectional characters (Hebrew, Arabic, + etc.) may appear within a label. By default the check is only applied + when the label contains at least one right-to-left character (Unicode + bidirectional categories ``R``, ``AL``, or ``AN``); set ``check_ltr`` + to ``True`` to apply it to LTR-only labels as well. + + :param label: The label to validate, as a Unicode string. + :param check_ltr: If ``True``, apply the rules even when the label + contains no RTL characters. + :returns: ``True`` if the label satisfies the Bidi Rule. + :raises IDNABidiError: If any of Bidi Rule conditions 1-6 are violated, + or if the directional category of a codepoint cannot be determined. + """ # Bidi rules should only be applied if string contains RTL characters bidi_label = False for idx, cp in enumerate(label, 1): @@ -139,12 +175,31 @@ def check_bidi(label: str, check_ltr: bool = False) -> bool: def check_initial_combiner(label: str) -> bool: + """Reject labels that begin with a combining mark. + + Per :rfc:`5891` §4.2.3.2 a label must not start with a character of + Unicode general category ``M`` (Mark). + + :param label: The label to check. + :returns: ``True`` if the first character is not a combining mark. + :raises IDNAError: If the label begins with a combining character. + """ if unicodedata.category(label[0])[0] == "M": raise IDNAError("Label begins with an illegal combining character") return True def check_hyphen_ok(label: str) -> bool: + """Validate the hyphen restrictions for a label. + + Per :rfc:`5891` §4.2.3.1 a label must not start or end with a hyphen + (``U+002D``), and must not have hyphens in both the third and fourth + positions (the prefix reserved for A-labels). + + :param label: The label to check. + :returns: ``True`` if the hyphen restrictions are satisfied. + :raises IDNAError: If any of the hyphen restrictions are violated. + """ if label[2:4] == "--": raise IDNAError("Label has disallowed hyphens in 3rd and 4th position") if label[0] == "-" or label[-1] == "-": @@ -153,11 +208,30 @@ def check_hyphen_ok(label: str) -> bool: def check_nfc(label: str) -> None: + """Require that a label is in Unicode Normalization Form C. + + :param label: The label to check. + :raises IDNAError: If ``label`` differs from its NFC normalisation. + """ if unicodedata.normalize("NFC", label) != label: raise IDNAError("Label must be in Normalization Form C") def valid_contextj(label: str, pos: int) -> bool: + """Validate the CONTEXTJ rules from :rfc:`5892` Appendix A. + + These rules govern the contextual use of the joiner codepoints + ``U+200C`` (ZERO WIDTH NON-JOINER, Appendix A.1) and ``U+200D`` + (ZERO WIDTH JOINER, Appendix A.2) within a label. + + :param label: The label containing the codepoint. + :param pos: Index of the joiner codepoint within ``label``. + :returns: ``True`` if the codepoint at ``pos`` satisfies its CONTEXTJ + rule, ``False`` otherwise (including when the codepoint at + ``pos`` is not a recognised joiner). + :raises ValueError: If an adjacent codepoint has no Unicode name when + determining its combining class. + """ cp_value = ord(label[pos]) if cp_value == 0x200C: @@ -202,6 +276,19 @@ def valid_contextj(label: str, pos: int) -> bool: def valid_contexto(label: str, pos: int, exception: bool = False) -> bool: + """Validate the CONTEXTO rules from :rfc:`5892` Appendix A. + + Covers the contextual rules for codepoints such as MIDDLE DOT + (``U+00B7``), Greek lower numeral sign, Hebrew punctuation, Katakana + middle dot, and the Arabic-Indic / Extended Arabic-Indic digit ranges. + + :param label: The label containing the codepoint. + :param pos: Index of the codepoint within ``label``. + :param exception: Reserved for forward compatibility; currently unused. + :returns: ``True`` if the codepoint at ``pos`` satisfies its CONTEXTO + rule, ``False`` otherwise (including when the codepoint is not a + recognised CONTEXTO codepoint). + """ cp_value = ord(label[pos]) if cp_value == 0x00B7: @@ -244,6 +331,23 @@ def valid_contexto(label: str, pos: int, exception: bool = False) -> bool: def check_label(label: Union[str, bytes, bytearray]) -> None: + """Run the full set of IDNA 2008 validity checks on a single label. + + Applies, in order: NFC normalisation (:func:`check_nfc`), hyphen + restrictions (:func:`check_hyphen_ok`), the no-leading-combiner rule + (:func:`check_initial_combiner`), per-codepoint validity (PVALID, + CONTEXTJ, CONTEXTO classes from :rfc:`5892`), and the Bidi Rule + (:func:`check_bidi`). + + :param label: The label to validate. ``bytes`` or ``bytearray`` input + is decoded as UTF-8 first. + :raises IDNAError: If the label is empty or fails a structural rule. + :raises InvalidCodepoint: If the label contains a DISALLOWED or + UNASSIGNED codepoint. + :raises InvalidCodepointContext: If a CONTEXTJ or CONTEXTO codepoint + is not valid in its context. + :raises IDNABidiError: If the Bidi Rule is violated. + """ if isinstance(label, (bytes, bytearray)): label = label.decode("utf-8") if len(label) == 0: @@ -283,6 +387,18 @@ def check_label(label: Union[str, bytes, bytearray]) -> None: def alabel(label: str) -> bytes: + """Convert a single U-label into its A-label form. + + The result is the ASCII-Compatible Encoding (ACE) form per :rfc:`5891` + §4: the label is validated, Punycode-encoded, and prefixed with + ``xn--``. Pure ASCII labels that are already valid IDNA labels are + returned unchanged (as :class:`bytes`). + + :param label: The label to convert, as a Unicode string. + :returns: The A-label as ASCII-encoded :class:`bytes`. + :raises IDNAError: If the label is invalid or the resulting A-label + exceeds 63 octets. + """ try: label_bytes = label.encode("ascii") ulabel(label_bytes) @@ -302,6 +418,18 @@ def alabel(label: str) -> bytes: def ulabel(label: Union[str, bytes, bytearray]) -> str: + """Convert a single A-label into its U-label form. + + Performs the inverse of :func:`alabel`: an ``xn--``-prefixed label is + Punycode-decoded and validated. Labels that are already Unicode (or + plain ASCII without the ACE prefix) are validated and returned as a + Unicode string. + + :param label: The label to convert. ``bytes`` or ``bytearray`` input + is treated as ASCII. + :returns: The U-label as a Unicode string. + :raises IDNAError: If the label is malformed or fails validation. + """ if not isinstance(label, (bytes, bytearray)): try: label_bytes = label.encode("ascii") @@ -331,7 +459,24 @@ def ulabel(label: Union[str, bytes, bytearray]) -> str: def uts46_remap(domain: str, std3_rules: bool = True, transitional: bool = False) -> str: - """Re-map the characters in the string according to UTS46 processing.""" + """Apply the UTS #46 character mapping to a domain string. + + Implements the mapping table from `UTS #46 §4 + <https://www.unicode.org/reports/tr46/>`_: each character is kept, + replaced, or rejected based on its status (``V``, ``M``, ``D``, ``3``, + ``I``). The result is returned in Normalisation Form C. + + :param domain: The full domain name to remap. + :param std3_rules: If ``True``, apply the stricter STD3 ASCII rules + (status ``3`` codepoints raise instead of being kept or mapped). + :param transitional: If ``True``, use transitional processing (status + ``D`` codepoints are mapped instead of kept). Transitional + processing has been removed from UTS #46 and this option is + retained only for backwards compatibility. + :returns: The remapped domain, in Normalisation Form C. + :raises InvalidCodepoint: If the domain contains a disallowed + codepoint under the chosen rules. + """ from .uts46data import uts46data output = "" @@ -342,7 +487,7 @@ def uts46_remap(domain: str, std3_rules: bool = True, transitional: bool = False status = uts46row[1] replacement: Optional[str] = None if len(uts46row) == 3: - replacement = uts46row[2] + replacement = uts46row[2] # ty: ignore[index-out-of-bounds] if status == "V" or (status == "D" and not transitional) or (status == "3" and not std3_rules and replacement is None): output += char elif replacement is not None and ( @@ -366,6 +511,27 @@ def encode( std3_rules: bool = False, transitional: bool = False, ) -> bytes: + """Encode a Unicode domain name into its ASCII (A-label) form. + + Splits the input on label separators (only ``U+002E`` if ``strict`` is + set; otherwise also IDEOGRAPHIC FULL STOP ``U+3002``, FULLWIDTH FULL + STOP ``U+FF0E``, and HALFWIDTH IDEOGRAPHIC FULL STOP ``U+FF61``), + encodes each label with :func:`alabel`, and rejoins them with ``.``. + Optionally pre-processes the input through :func:`uts46_remap`. + + :param s: The domain name to encode. + :param strict: If ``True``, only ``U+002E`` is recognised as a label + separator. + :param uts46: If ``True``, apply UTS #46 mapping before encoding. + :param std3_rules: Forwarded to :func:`uts46_remap` when ``uts46`` is + ``True``. + :param transitional: Forwarded to :func:`uts46_remap` when ``uts46`` + is ``True``. Deprecated: emits a :class:`DeprecationWarning` and + will be removed in a future version. + :returns: The encoded domain as ASCII :class:`bytes`. + :raises IDNAError: If the domain is empty, contains an invalid label, + or exceeds the maximum domain length. + """ if transitional: warnings.warn( "Transitional processing has been removed from UTS #46. " @@ -376,10 +542,16 @@ def encode( if not isinstance(s, str): try: s = str(s, "ascii") - except UnicodeDecodeError: + except (UnicodeDecodeError, TypeError): raise IDNAError("should pass a unicode string to the function rather than a byte string.") if uts46: s = uts46_remap(s, std3_rules, transitional) + + # Reject inputs that exceed the maximum DNS domain length up-front + # to avoid expensive computation on long inputs. + if not valid_string_length(s, trailing_dot=True): + raise IDNAError("Domain too long") + trailing_dot = False result = [] if strict: @@ -411,13 +583,34 @@ def decode( uts46: bool = False, std3_rules: bool = False, ) -> str: - try: - if not isinstance(s, str): + """Decode an A-label-encoded domain name back to Unicode. + + Splits the input on label separators (see :func:`encode` for the + rules), decodes each label with :func:`ulabel`, and rejoins them + with ``.``. Optionally pre-processes the input through + :func:`uts46_remap`. + + :param s: The domain name to decode. + :param strict: If ``True``, only ``U+002E`` is recognised as a label + separator. + :param uts46: If ``True``, apply UTS #46 mapping before decoding. + :param std3_rules: Forwarded to :func:`uts46_remap` when ``uts46`` is + ``True``. + :returns: The decoded domain as a Unicode string. + :raises IDNAError: If the input is not valid ASCII, contains an + invalid label, or is empty. + """ + if not isinstance(s, str): + try: s = str(s, "ascii") - except UnicodeDecodeError: - raise IDNAError("Invalid ASCII in A-label") + except (UnicodeDecodeError, TypeError): + raise IDNAError("Invalid ASCII in A-label") if uts46: s = uts46_remap(s, std3_rules, False) + # Reject inputs that exceed the maximum DNS domain length up-front + # to avoid expensive computation on long inputs. + if not valid_string_length(s, trailing_dot=True): + raise IDNAError("Domain too long") trailing_dot = False result = [] if not strict: diff --git a/contrib/python/idna/py3/idna/package_data.py b/contrib/python/idna/py3/idna/package_data.py index fc1788cdab7..2bd6cdb8838 100644 --- a/contrib/python/idna/py3/idna/package_data.py +++ b/contrib/python/idna/py3/idna/package_data.py @@ -1 +1 @@ -__version__ = "3.13" +__version__ = "3.14" diff --git a/contrib/python/idna/py3/tests/test_idna.py b/contrib/python/idna/py3/tests/test_idna.py index 1ac3432efb9..3d6a25400e2 100644 --- a/contrib/python/idna/py3/tests/test_idna.py +++ b/contrib/python/idna/py3/tests/test_idna.py @@ -2,85 +2,99 @@ import unittest import warnings +from typing import List, Tuple import idna class IDNATests(unittest.TestCase): def setUp(self): - self.tld_strings = [ - ["\u6d4b\u8bd5", b"xn--0zwm56d"], - ["\u092a\u0930\u0940\u0915\u094d\u0937\u093e", b"xn--11b5bs3a9aj6g"], - ["\ud55c\uad6d", b"xn--3e0b707e"], - ["\u09ad\u09be\u09b0\u09a4", b"xn--45brj9c"], - ["\u09ac\u09be\u0982\u09b2\u09be", b"xn--54b7fta0cc"], - [ + self.tld_strings: List[Tuple[str, bytes]] = [ + ("\u6d4b\u8bd5", b"xn--0zwm56d"), + ("\u092a\u0930\u0940\u0915\u094d\u0937\u093e", b"xn--11b5bs3a9aj6g"), + ("\ud55c\uad6d", b"xn--3e0b707e"), + ("\u09ad\u09be\u09b0\u09a4", b"xn--45brj9c"), + ("\u09ac\u09be\u0982\u09b2\u09be", b"xn--54b7fta0cc"), + ( "\u0438\u0441\u043f\u044b\u0442\u0430\u043d\u0438\u0435", b"xn--80akhbyknj4f", - ], - ["\u0441\u0440\u0431", b"xn--90a3ac"], - ["\ud14c\uc2a4\ud2b8", b"xn--9t4b11yi5a"], - [ + ), + ("\u0441\u0440\u0431", b"xn--90a3ac"), + ("\ud14c\uc2a4\ud2b8", b"xn--9t4b11yi5a"), + ( "\u0b9a\u0bbf\u0b99\u0bcd\u0b95\u0baa\u0bcd\u0baa\u0bc2\u0bb0\u0bcd", b"xn--clchc0ea0b2g2a9gcd", - ], - ["\u05d8\u05e2\u05e1\u05d8", b"xn--deba0ad"], - ["\u4e2d\u56fd", b"xn--fiqs8s"], - ["\u4e2d\u570b", b"xn--fiqz9s"], - ["\u0c2d\u0c3e\u0c30\u0c24\u0c4d", b"xn--fpcrj9c3d"], - ["\u0dbd\u0d82\u0d9a\u0dcf", b"xn--fzc2c9e2c"], - ["\u6e2c\u8a66", b"xn--g6w251d"], - ["\u0aad\u0abe\u0ab0\u0aa4", b"xn--gecrj9c"], - ["\u092d\u093e\u0930\u0924", b"xn--h2brj9c"], - ["\u0622\u0632\u0645\u0627\u06cc\u0634\u06cc", b"xn--hgbk6aj7f53bba"], - ["\u0baa\u0bb0\u0bbf\u0b9f\u0bcd\u0b9a\u0bc8", b"xn--hlcj6aya9esc7a"], - ["\u0443\u043a\u0440", b"xn--j1amh"], - ["\u9999\u6e2f", b"xn--j6w193g"], - ["\u03b4\u03bf\u03ba\u03b9\u03bc\u03ae", b"xn--jxalpdlp"], - ["\u0625\u062e\u062a\u0628\u0627\u0631", b"xn--kgbechtv"], - ["\u53f0\u6e7e", b"xn--kprw13d"], - ["\u53f0\u7063", b"xn--kpry57d"], - ["\u0627\u0644\u062c\u0632\u0627\u0626\u0631", b"xn--lgbbat1ad8j"], - ["\u0639\u0645\u0627\u0646", b"xn--mgb9awbf"], - ["\u0627\u06cc\u0631\u0627\u0646", b"xn--mgba3a4f16a"], - ["\u0627\u0645\u0627\u0631\u0627\u062a", b"xn--mgbaam7a8h"], - ["\u067e\u0627\u06a9\u0633\u062a\u0627\u0646", b"xn--mgbai9azgqp6j"], - ["\u0627\u0644\u0627\u0631\u062f\u0646", b"xn--mgbayh7gpa"], - ["\u0628\u06be\u0627\u0631\u062a", b"xn--mgbbh1a71e"], - ["\u0627\u0644\u0645\u063a\u0631\u0628", b"xn--mgbc0a9azcg"], - ["\u0627\u0644\u0633\u0639\u0648\u062f\u064a\u0629", b"xn--mgberp4a5d4ar"], - ["\u10d2\u10d4", b"xn--node"], - ["\u0e44\u0e17\u0e22", b"xn--o3cw4h"], - ["\u0633\u0648\u0631\u064a\u0629", b"xn--ogbpf8fl"], - ["\u0440\u0444", b"xn--p1ai"], - ["\u062a\u0648\u0646\u0633", b"xn--pgbs0dh"], - ["\u0a2d\u0a3e\u0a30\u0a24", b"xn--s9brj9c"], - ["\u0645\u0635\u0631", b"xn--wgbh1c"], - ["\u0642\u0637\u0631", b"xn--wgbl6a"], - ["\u0b87\u0bb2\u0b99\u0bcd\u0b95\u0bc8", b"xn--xkc2al3hye2a"], - ["\u0b87\u0ba8\u0bcd\u0ba4\u0bbf\u0baf\u0bbe", b"xn--xkc2dl3a5ee0h"], - ["\u65b0\u52a0\u5761", b"xn--yfro4i67o"], - ["\u0641\u0644\u0633\u0637\u064a\u0646", b"xn--ygbi2ammx"], - ["\u30c6\u30b9\u30c8", b"xn--zckzah"], - ["\u049b\u0430\u0437", b"xn--80ao21a"], - ["\u0645\u0644\u064a\u0633\u064a\u0627", b"xn--mgbx4cd0ab"], - ["\u043c\u043e\u043d", b"xn--l1acc"], - ["\u0633\u0648\u062f\u0627\u0646", b"xn--mgbpl2fh"], + ), + ("\u05d8\u05e2\u05e1\u05d8", b"xn--deba0ad"), + ("\u4e2d\u56fd", b"xn--fiqs8s"), + ("\u4e2d\u570b", b"xn--fiqz9s"), + ("\u0c2d\u0c3e\u0c30\u0c24\u0c4d", b"xn--fpcrj9c3d"), + ("\u0dbd\u0d82\u0d9a\u0dcf", b"xn--fzc2c9e2c"), + ("\u6e2c\u8a66", b"xn--g6w251d"), + ("\u0aad\u0abe\u0ab0\u0aa4", b"xn--gecrj9c"), + ("\u092d\u093e\u0930\u0924", b"xn--h2brj9c"), + ("\u0622\u0632\u0645\u0627\u06cc\u0634\u06cc", b"xn--hgbk6aj7f53bba"), + ("\u0baa\u0bb0\u0bbf\u0b9f\u0bcd\u0b9a\u0bc8", b"xn--hlcj6aya9esc7a"), + ("\u0443\u043a\u0440", b"xn--j1amh"), + ("\u9999\u6e2f", b"xn--j6w193g"), + ("\u03b4\u03bf\u03ba\u03b9\u03bc\u03ae", b"xn--jxalpdlp"), + ("\u0625\u062e\u062a\u0628\u0627\u0631", b"xn--kgbechtv"), + ("\u53f0\u6e7e", b"xn--kprw13d"), + ("\u53f0\u7063", b"xn--kpry57d"), + ("\u0627\u0644\u062c\u0632\u0627\u0626\u0631", b"xn--lgbbat1ad8j"), + ("\u0639\u0645\u0627\u0646", b"xn--mgb9awbf"), + ("\u0627\u06cc\u0631\u0627\u0646", b"xn--mgba3a4f16a"), + ("\u0627\u0645\u0627\u0631\u0627\u062a", b"xn--mgbaam7a8h"), + ("\u067e\u0627\u06a9\u0633\u062a\u0627\u0646", b"xn--mgbai9azgqp6j"), + ("\u0627\u0644\u0627\u0631\u062f\u0646", b"xn--mgbayh7gpa"), + ("\u0628\u06be\u0627\u0631\u062a", b"xn--mgbbh1a71e"), + ("\u0627\u0644\u0645\u063a\u0631\u0628", b"xn--mgbc0a9azcg"), + ("\u0627\u0644\u0633\u0639\u0648\u062f\u064a\u0629", b"xn--mgberp4a5d4ar"), + ("\u10d2\u10d4", b"xn--node"), + ("\u0e44\u0e17\u0e22", b"xn--o3cw4h"), + ("\u0633\u0648\u0631\u064a\u0629", b"xn--ogbpf8fl"), + ("\u0440\u0444", b"xn--p1ai"), + ("\u062a\u0648\u0646\u0633", b"xn--pgbs0dh"), + ("\u0a2d\u0a3e\u0a30\u0a24", b"xn--s9brj9c"), + ("\u0645\u0635\u0631", b"xn--wgbh1c"), + ("\u0642\u0637\u0631", b"xn--wgbl6a"), + ("\u0b87\u0bb2\u0b99\u0bcd\u0b95\u0bc8", b"xn--xkc2al3hye2a"), + ("\u0b87\u0ba8\u0bcd\u0ba4\u0bbf\u0baf\u0bbe", b"xn--xkc2dl3a5ee0h"), + ("\u65b0\u52a0\u5761", b"xn--yfro4i67o"), + ("\u0641\u0644\u0633\u0637\u064a\u0646", b"xn--ygbi2ammx"), + ("\u30c6\u30b9\u30c8", b"xn--zckzah"), + ("\u049b\u0430\u0437", b"xn--80ao21a"), + ("\u0645\u0644\u064a\u0633\u064a\u0627", b"xn--mgbx4cd0ab"), + ("\u043c\u043e\u043d", b"xn--l1acc"), + ("\u0633\u0648\u062f\u0627\u0646", b"xn--mgbpl2fh"), ] def testIDNTLDALabels(self): - for ulabel, alabel in self.tld_strings: - self.assertEqual(alabel, idna.alabel(ulabel)) + for u, a in self.tld_strings: + self.assertEqual(a, idna.alabel(u)) def testIDNTLDULabels(self): - for ulabel, alabel in self.tld_strings: - self.assertEqual(ulabel, idna.ulabel(alabel)) + for u, a in self.tld_strings: + self.assertEqual(u, idna.ulabel(a)) def test_valid_label_length(self): self.assertTrue(idna.valid_label_length("a" * 63)) self.assertFalse(idna.valid_label_length("a" * 64)) self.assertRaises(idna.IDNAError, idna.encode, "a" * 64) + def test_oversized_input_rejected_promptly(self): + # GHSA-65pc-fj4g-8rjx: encode/decode must reject inputs that + # exceed the maximum DNS domain length before per-codepoint + # validation runs, so labels dominated by CONTEXTO codepoints + # cannot drive validation into quadratic time. + import time + + for payload in ("٠" * 8000, "・" * 8000 + "漢"): + start = time.perf_counter() + self.assertRaises(idna.IDNAError, idna.encode, payload) + self.assertRaises(idna.IDNAError, idna.decode, payload) + self.assertLess(time.perf_counter() - start, 1.0) + def test_check_bidi(self): la = "\u0061" r = "\u05d0" @@ -303,6 +317,15 @@ class IDNATests(unittest.TestCase): idna.encode("example.com", uts46=True) self.assertEqual(len(w), 0) + def test_encode_decode_invalid_input_type(self): + # encode() and decode() are documented to raise IDNAError on bad + # input. Inputs that are not str, bytes, or bytes-like used to leak + # a raw TypeError out of str(s, "ascii"); they should be wrapped in + # IDNAError just like UnicodeDecodeError already is. + for value in (42, None, 1.5, ["a", "b"], {"a": 1}): + self.assertRaises(idna.IDNAError, idna.encode, value) + self.assertRaises(idna.IDNAError, idna.decode, value) + if __name__ == "__main__": unittest.main() diff --git a/contrib/python/idna/py3/ya.make b/contrib/python/idna/py3/ya.make index e36e264a2e4..7efd5bf83ae 100644 --- a/contrib/python/idna/py3/ya.make +++ b/contrib/python/idna/py3/ya.make @@ -2,7 +2,7 @@ PY3_LIBRARY() -VERSION(3.13) +VERSION(3.14) LICENSE(BSD-3-Clause) |
