diff options
author | shadchin <shadchin@yandex-team.ru> | 2022-02-10 16:44:30 +0300 |
---|---|---|
committer | Daniil Cherednik <dcherednik@yandex-team.ru> | 2022-02-10 16:44:30 +0300 |
commit | 2598ef1d0aee359b4b6d5fdd1758916d5907d04f (patch) | |
tree | 012bb94d777798f1f56ac1cec429509766d05181 /contrib/python/idna/tests | |
parent | 6751af0b0c1b952fede40b19b71da8025b5d8bcf (diff) | |
download | ydb-2598ef1d0aee359b4b6d5fdd1758916d5907d04f.tar.gz |
Restoring authorship annotation for <shadchin@yandex-team.ru>. Commit 1 of 2.
Diffstat (limited to 'contrib/python/idna/tests')
-rwxr-xr-x | contrib/python/idna/tests/test_idna.py | 528 | ||||
-rw-r--r-- | contrib/python/idna/tests/test_idna_codec.py | 142 | ||||
-rw-r--r-- | contrib/python/idna/tests/test_idna_compat.py | 38 | ||||
-rwxr-xr-x | contrib/python/idna/tests/test_idna_other.py | 28 | ||||
-rwxr-xr-x | contrib/python/idna/tests/test_idna_uts46.py | 290 | ||||
-rw-r--r-- | contrib/python/idna/tests/test_intranges.py | 130 | ||||
-rw-r--r-- | contrib/python/idna/tests/ya.make | 48 |
7 files changed, 602 insertions, 602 deletions
diff --git a/contrib/python/idna/tests/test_idna.py b/contrib/python/idna/tests/test_idna.py index 2d8c45014b..c7870da059 100755 --- a/contrib/python/idna/tests/test_idna.py +++ b/contrib/python/idna/tests/test_idna.py @@ -1,264 +1,264 @@ -#!/usr/bin/env python - -import unittest - -import idna - - -class IDNATests(unittest.TestCase): - - def setUp(self): - self.tld_strings = [ - [u'\u6d4b\u8bd5', b'xn--0zwm56d'], - [u'\u092a\u0930\u0940\u0915\u094d\u0937\u093e', b'xn--11b5bs3a9aj6g'], - [u'\ud55c\uad6d', b'xn--3e0b707e'], - [u'\u09ad\u09be\u09b0\u09a4', b'xn--45brj9c'], - [u'\u09ac\u09be\u0982\u09b2\u09be', b'xn--54b7fta0cc'], - [u'\u0438\u0441\u043f\u044b\u0442\u0430\u043d\u0438\u0435', b'xn--80akhbyknj4f'], - [u'\u0441\u0440\u0431', b'xn--90a3ac'], - [u'\ud14c\uc2a4\ud2b8', b'xn--9t4b11yi5a'], - [u'\u0b9a\u0bbf\u0b99\u0bcd\u0b95\u0baa\u0bcd\u0baa\u0bc2\u0bb0\u0bcd', b'xn--clchc0ea0b2g2a9gcd'], - [u'\u05d8\u05e2\u05e1\u05d8', b'xn--deba0ad'], - [u'\u4e2d\u56fd', b'xn--fiqs8s'], - [u'\u4e2d\u570b', b'xn--fiqz9s'], - [u'\u0c2d\u0c3e\u0c30\u0c24\u0c4d', b'xn--fpcrj9c3d'], - [u'\u0dbd\u0d82\u0d9a\u0dcf', b'xn--fzc2c9e2c'], - [u'\u6e2c\u8a66', b'xn--g6w251d'], - [u'\u0aad\u0abe\u0ab0\u0aa4', b'xn--gecrj9c'], - [u'\u092d\u093e\u0930\u0924', b'xn--h2brj9c'], - [u'\u0622\u0632\u0645\u0627\u06cc\u0634\u06cc', b'xn--hgbk6aj7f53bba'], - [u'\u0baa\u0bb0\u0bbf\u0b9f\u0bcd\u0b9a\u0bc8', b'xn--hlcj6aya9esc7a'], - [u'\u0443\u043a\u0440', b'xn--j1amh'], - [u'\u9999\u6e2f', b'xn--j6w193g'], - [u'\u03b4\u03bf\u03ba\u03b9\u03bc\u03ae', b'xn--jxalpdlp'], - [u'\u0625\u062e\u062a\u0628\u0627\u0631', b'xn--kgbechtv'], - [u'\u53f0\u6e7e', b'xn--kprw13d'], - [u'\u53f0\u7063', b'xn--kpry57d'], - [u'\u0627\u0644\u062c\u0632\u0627\u0626\u0631', b'xn--lgbbat1ad8j'], - [u'\u0639\u0645\u0627\u0646', b'xn--mgb9awbf'], - [u'\u0627\u06cc\u0631\u0627\u0646', b'xn--mgba3a4f16a'], - [u'\u0627\u0645\u0627\u0631\u0627\u062a', b'xn--mgbaam7a8h'], - [u'\u067e\u0627\u06a9\u0633\u062a\u0627\u0646', b'xn--mgbai9azgqp6j'], - [u'\u0627\u0644\u0627\u0631\u062f\u0646', b'xn--mgbayh7gpa'], - [u'\u0628\u06be\u0627\u0631\u062a', b'xn--mgbbh1a71e'], - [u'\u0627\u0644\u0645\u063a\u0631\u0628', b'xn--mgbc0a9azcg'], - [u'\u0627\u0644\u0633\u0639\u0648\u062f\u064a\u0629', b'xn--mgberp4a5d4ar'], - [u'\u10d2\u10d4', b'xn--node'], - [u'\u0e44\u0e17\u0e22', b'xn--o3cw4h'], - [u'\u0633\u0648\u0631\u064a\u0629', b'xn--ogbpf8fl'], - [u'\u0440\u0444', b'xn--p1ai'], - [u'\u062a\u0648\u0646\u0633', b'xn--pgbs0dh'], - [u'\u0a2d\u0a3e\u0a30\u0a24', b'xn--s9brj9c'], - [u'\u0645\u0635\u0631', b'xn--wgbh1c'], - [u'\u0642\u0637\u0631', b'xn--wgbl6a'], - [u'\u0b87\u0bb2\u0b99\u0bcd\u0b95\u0bc8', b'xn--xkc2al3hye2a'], - [u'\u0b87\u0ba8\u0bcd\u0ba4\u0bbf\u0baf\u0bbe', b'xn--xkc2dl3a5ee0h'], - [u'\u65b0\u52a0\u5761', b'xn--yfro4i67o'], - [u'\u0641\u0644\u0633\u0637\u064a\u0646', b'xn--ygbi2ammx'], - [u'\u30c6\u30b9\u30c8', b'xn--zckzah'], - [u'\u049b\u0430\u0437', b'xn--80ao21a'], - [u'\u0645\u0644\u064a\u0633\u064a\u0627', b'xn--mgbx4cd0ab'], - [u'\u043c\u043e\u043d', b'xn--l1acc'], - [u'\u0633\u0648\u062f\u0627\u0646', b'xn--mgbpl2fh'], - ] - - def testIDNTLDALabels(self): - - for (ulabel, alabel) in self.tld_strings: - self.assertEqual(alabel, idna.alabel(ulabel)) - - def testIDNTLDULabels(self): - - for (ulabel, alabel) in self.tld_strings: - self.assertEqual(ulabel, idna.ulabel(alabel)) - - def test_valid_label_length(self): - - self.assertTrue(idna.valid_label_length('a' * 63)) - self.assertFalse(idna.valid_label_length('a' * 64)) - self.assertRaises(idna.IDNAError, idna.encode, 'a' * 64) - - def test_check_bidi(self): - - l = u'\u0061' - r = u'\u05d0' - al = u'\u0627' - an = u'\u0660' - en = u'\u0030' - es = u'\u002d' - cs = u'\u002c' - et = u'\u0024' - on = u'\u0021' - bn = u'\u200c' - nsm = u'\u0610' - ws = u'\u0020' - - # RFC 5893 Rule 1 - self.assertTrue(idna.check_bidi(l)) - self.assertTrue(idna.check_bidi(r)) - self.assertTrue(idna.check_bidi(al)) - self.assertRaises(idna.IDNABidiError, idna.check_bidi, an) - - # RFC 5893 Rule 2 - self.assertTrue(idna.check_bidi(r + al)) - self.assertTrue(idna.check_bidi(r + al)) - self.assertTrue(idna.check_bidi(r + an)) - self.assertTrue(idna.check_bidi(r + en)) - self.assertTrue(idna.check_bidi(r + es + al)) - self.assertTrue(idna.check_bidi(r + cs + al)) - self.assertTrue(idna.check_bidi(r + et + al)) - self.assertTrue(idna.check_bidi(r + on + al)) - self.assertTrue(idna.check_bidi(r + bn + al)) - self.assertTrue(idna.check_bidi(r + nsm)) - self.assertRaises(idna.IDNABidiError, idna.check_bidi, r + l) - self.assertRaises(idna.IDNABidiError, idna.check_bidi, r + ws) - - # RFC 5893 Rule 3 - self.assertTrue(idna.check_bidi(r + al)) - self.assertTrue(idna.check_bidi(r + en)) - self.assertTrue(idna.check_bidi(r + an)) - self.assertTrue(idna.check_bidi(r + nsm)) - self.assertTrue(idna.check_bidi(r + nsm + nsm)) - self.assertRaises(idna.IDNABidiError, idna.check_bidi, r + on) - - # RFC 5893 Rule 4 - self.assertTrue(idna.check_bidi(r + en)) - self.assertTrue(idna.check_bidi(r + an)) - self.assertRaises(idna.IDNABidiError, idna.check_bidi, r + en + an) - self.assertRaises(idna.IDNABidiError, idna.check_bidi, r + an + en) - - # RFC 5893 Rule 5 - self.assertTrue(idna.check_bidi(l + en, check_ltr=True)) - self.assertTrue(idna.check_bidi(l + es + l, check_ltr=True)) - self.assertTrue(idna.check_bidi(l + cs + l, check_ltr=True)) - self.assertTrue(idna.check_bidi(l + et + l, check_ltr=True)) - self.assertTrue(idna.check_bidi(l + on + l, check_ltr=True)) - self.assertTrue(idna.check_bidi(l + bn + l, check_ltr=True)) - self.assertTrue(idna.check_bidi(l + nsm, check_ltr=True)) - - # RFC 5893 Rule 6 - self.assertTrue(idna.check_bidi(l + l, check_ltr=True)) - self.assertTrue(idna.check_bidi(l + en, check_ltr=True)) - self.assertTrue(idna.check_bidi(l + en + nsm, check_ltr=True)) - self.assertTrue(idna.check_bidi(l + en + nsm + nsm, check_ltr=True)) - self.assertRaises(idna.IDNABidiError, idna.check_bidi, l + cs, check_ltr=True) - - def test_check_initial_combiner(self): - - m = u'\u0300' - a = u'\u0061' - - self.assertTrue(idna.check_initial_combiner(a)) - self.assertTrue(idna.check_initial_combiner(a + m)) - self.assertRaises(idna.IDNAError, idna.check_initial_combiner, m + a) - - def test_check_hyphen_ok(self): - - self.assertTrue(idna.check_hyphen_ok('abc')) - self.assertTrue(idna.check_hyphen_ok('a--b')) - self.assertRaises(idna.IDNAError, idna.check_hyphen_ok, 'aa--') - self.assertRaises(idna.IDNAError, idna.check_hyphen_ok, 'a-') - self.assertRaises(idna.IDNAError, idna.check_hyphen_ok, '-a') - - def test_valid_contextj(self): - - zwnj = u'\u200c' - zwj = u'\u200d' - virama = u'\u094d' - latin = u'\u0061' - - # RFC 5892 Appendix A.1 (Zero Width Non-Joiner) - self.assertFalse(idna.valid_contextj(zwnj, 0)) - self.assertFalse(idna.valid_contextj(latin + zwnj, 1)) # No preceding Virama - self.assertTrue(idna.valid_contextj(virama + zwnj, 1)) # Preceding Virama - - # RFC 5892 Appendix A.2 (Zero Width Joiner) - self.assertFalse(idna.valid_contextj(zwj, 0)) - self.assertFalse(idna.valid_contextj(latin + zwj, 1)) # No preceding Virama - self.assertTrue(idna.valid_contextj(virama + zwj, 1)) # Preceding Virama - - def test_valid_contexto(self): - - latin = u'\u0061' - latin_l = u'\u006c' - greek = u'\u03b1' - hebrew = u'\u05d0' - katakana = u'\u30a1' - hiragana = u'\u3041' - han = u'\u6f22' - arabic_digit = u'\u0660' - ext_arabic_digit = u'\u06f0' - - # RFC 5892 Rule A.3 (Middle Dot) - latin_middle_dot = u'\u00b7' - self.assertTrue(idna.valid_contexto(latin_l + latin_middle_dot + latin_l, 1)) - self.assertFalse(idna.valid_contexto(latin_middle_dot + latin_l, 1)) - self.assertFalse(idna.valid_contexto(latin_l + latin_middle_dot, 0)) - self.assertFalse(idna.valid_contexto(latin_middle_dot, 0)) - self.assertFalse(idna.valid_contexto(latin_l + latin_middle_dot + latin, 1)) - - # RFC 5892 Rule A.4 (Greek Lower Numeral Sign) - glns = u'\u0375' - self.assertTrue(idna.valid_contexto(glns + greek, 0)) - self.assertFalse(idna.valid_contexto(glns + latin, 0)) - self.assertFalse(idna.valid_contexto(glns, 0)) - self.assertFalse(idna.valid_contexto(greek + glns, 1)) - - # RFC 5892 Rule A.5 (Hebrew Punctuation Geresh) - geresh = u'\u05f3' - self.assertTrue(idna.valid_contexto(hebrew + geresh, 1)) - self.assertFalse(idna.valid_contexto(latin + geresh, 1)) - - # RFC 5892 Rule A.6 (Hebrew Punctuation Gershayim) - gershayim = u'\u05f4' - self.assertTrue(idna.valid_contexto(hebrew + gershayim, 1)) - self.assertFalse(idna.valid_contexto(latin + gershayim, 1)) - - # RFC 5892 Rule A.7 (Katakana Middle Dot) - ja_middle_dot = u'\u30fb' - self.assertTrue(idna.valid_contexto(katakana + ja_middle_dot + katakana, 1)) - self.assertTrue(idna.valid_contexto(hiragana + ja_middle_dot + hiragana, 1)) - self.assertTrue(idna.valid_contexto(han + ja_middle_dot + han, 1)) - self.assertTrue(idna.valid_contexto(han + ja_middle_dot + latin, 1)) - self.assertTrue(idna.valid_contexto(u'\u6f22\u30fb\u5b57', 1)) - self.assertFalse(idna.valid_contexto(u'\u0061\u30fb\u0061', 1)) - - # RFC 5892 Rule A.8 (Arabic-Indic Digits) - self.assertTrue(idna.valid_contexto(arabic_digit + arabic_digit, 0)) - self.assertFalse(idna.valid_contexto(arabic_digit + ext_arabic_digit, 0)) - - # RFC 5892 Rule A.9 (Extended Arabic-Indic Digits) - self.assertTrue(idna.valid_contexto(ext_arabic_digit + ext_arabic_digit, 0)) - self.assertFalse(idna.valid_contexto(ext_arabic_digit + arabic_digit, 0)) - - def test_encode(self): - - self.assertEqual(idna.encode('xn--zckzah.xn--zckzah'), b'xn--zckzah.xn--zckzah') - self.assertEqual(idna.encode(u'\u30c6\u30b9\u30c8.xn--zckzah'), b'xn--zckzah.xn--zckzah') - self.assertEqual(idna.encode(u'\u30c6\u30b9\u30c8.\u30c6\u30b9\u30c8'), b'xn--zckzah.xn--zckzah') - self.assertEqual(idna.encode('abc.abc'), b'abc.abc') - self.assertEqual(idna.encode('xn--zckzah.abc'), b'xn--zckzah.abc') - self.assertEqual(idna.encode(u'\u30c6\u30b9\u30c8.abc'), b'xn--zckzah.abc') - self.assertEqual(idna.encode(u'\u0521\u0525\u0523-\u0523\u0523-----\u0521\u0523\u0523\u0523.aa'), - b'xn---------90gglbagaar.aa') - self.assertRaises(idna.IDNAError, idna.encode, - u'\u0521\u0524\u0523-\u0523\u0523-----\u0521\u0523\u0523\u0523.aa', uts46=False) - self.assertEqual(idna.encode('a'*63), b'a'*63) - self.assertRaises(idna.IDNAError, idna.encode, 'a'*64) - self.assertRaises(idna.core.InvalidCodepoint, idna.encode, '*') - - def test_decode(self): - - self.assertEqual(idna.decode('xn--zckzah.xn--zckzah'), u'\u30c6\u30b9\u30c8.\u30c6\u30b9\u30c8') - self.assertEqual(idna.decode(u'\u30c6\u30b9\u30c8.xn--zckzah'), u'\u30c6\u30b9\u30c8.\u30c6\u30b9\u30c8') - self.assertEqual(idna.decode(u'\u30c6\u30b9\u30c8.\u30c6\u30b9\u30c8'), - u'\u30c6\u30b9\u30c8.\u30c6\u30b9\u30c8') - self.assertEqual(idna.decode('abc.abc'), u'abc.abc') - self.assertEqual(idna.decode('xn---------90gglbagaar.aa'), - u'\u0521\u0525\u0523-\u0523\u0523-----\u0521\u0523\u0523\u0523.aa') - self.assertRaises(idna.IDNAError, idna.decode, 'XN---------90GGLBAGAAC.AA') - self.assertRaises(idna.IDNAError, idna.decode, 'xn---------90gglbagaac.aa') - self.assertRaises(idna.IDNAError, idna.decode, 'xn--') - -if __name__ == '__main__': - unittest.main() +#!/usr/bin/env python + +import unittest + +import idna + + +class IDNATests(unittest.TestCase): + + def setUp(self): + self.tld_strings = [ + [u'\u6d4b\u8bd5', b'xn--0zwm56d'], + [u'\u092a\u0930\u0940\u0915\u094d\u0937\u093e', b'xn--11b5bs3a9aj6g'], + [u'\ud55c\uad6d', b'xn--3e0b707e'], + [u'\u09ad\u09be\u09b0\u09a4', b'xn--45brj9c'], + [u'\u09ac\u09be\u0982\u09b2\u09be', b'xn--54b7fta0cc'], + [u'\u0438\u0441\u043f\u044b\u0442\u0430\u043d\u0438\u0435', b'xn--80akhbyknj4f'], + [u'\u0441\u0440\u0431', b'xn--90a3ac'], + [u'\ud14c\uc2a4\ud2b8', b'xn--9t4b11yi5a'], + [u'\u0b9a\u0bbf\u0b99\u0bcd\u0b95\u0baa\u0bcd\u0baa\u0bc2\u0bb0\u0bcd', b'xn--clchc0ea0b2g2a9gcd'], + [u'\u05d8\u05e2\u05e1\u05d8', b'xn--deba0ad'], + [u'\u4e2d\u56fd', b'xn--fiqs8s'], + [u'\u4e2d\u570b', b'xn--fiqz9s'], + [u'\u0c2d\u0c3e\u0c30\u0c24\u0c4d', b'xn--fpcrj9c3d'], + [u'\u0dbd\u0d82\u0d9a\u0dcf', b'xn--fzc2c9e2c'], + [u'\u6e2c\u8a66', b'xn--g6w251d'], + [u'\u0aad\u0abe\u0ab0\u0aa4', b'xn--gecrj9c'], + [u'\u092d\u093e\u0930\u0924', b'xn--h2brj9c'], + [u'\u0622\u0632\u0645\u0627\u06cc\u0634\u06cc', b'xn--hgbk6aj7f53bba'], + [u'\u0baa\u0bb0\u0bbf\u0b9f\u0bcd\u0b9a\u0bc8', b'xn--hlcj6aya9esc7a'], + [u'\u0443\u043a\u0440', b'xn--j1amh'], + [u'\u9999\u6e2f', b'xn--j6w193g'], + [u'\u03b4\u03bf\u03ba\u03b9\u03bc\u03ae', b'xn--jxalpdlp'], + [u'\u0625\u062e\u062a\u0628\u0627\u0631', b'xn--kgbechtv'], + [u'\u53f0\u6e7e', b'xn--kprw13d'], + [u'\u53f0\u7063', b'xn--kpry57d'], + [u'\u0627\u0644\u062c\u0632\u0627\u0626\u0631', b'xn--lgbbat1ad8j'], + [u'\u0639\u0645\u0627\u0646', b'xn--mgb9awbf'], + [u'\u0627\u06cc\u0631\u0627\u0646', b'xn--mgba3a4f16a'], + [u'\u0627\u0645\u0627\u0631\u0627\u062a', b'xn--mgbaam7a8h'], + [u'\u067e\u0627\u06a9\u0633\u062a\u0627\u0646', b'xn--mgbai9azgqp6j'], + [u'\u0627\u0644\u0627\u0631\u062f\u0646', b'xn--mgbayh7gpa'], + [u'\u0628\u06be\u0627\u0631\u062a', b'xn--mgbbh1a71e'], + [u'\u0627\u0644\u0645\u063a\u0631\u0628', b'xn--mgbc0a9azcg'], + [u'\u0627\u0644\u0633\u0639\u0648\u062f\u064a\u0629', b'xn--mgberp4a5d4ar'], + [u'\u10d2\u10d4', b'xn--node'], + [u'\u0e44\u0e17\u0e22', b'xn--o3cw4h'], + [u'\u0633\u0648\u0631\u064a\u0629', b'xn--ogbpf8fl'], + [u'\u0440\u0444', b'xn--p1ai'], + [u'\u062a\u0648\u0646\u0633', b'xn--pgbs0dh'], + [u'\u0a2d\u0a3e\u0a30\u0a24', b'xn--s9brj9c'], + [u'\u0645\u0635\u0631', b'xn--wgbh1c'], + [u'\u0642\u0637\u0631', b'xn--wgbl6a'], + [u'\u0b87\u0bb2\u0b99\u0bcd\u0b95\u0bc8', b'xn--xkc2al3hye2a'], + [u'\u0b87\u0ba8\u0bcd\u0ba4\u0bbf\u0baf\u0bbe', b'xn--xkc2dl3a5ee0h'], + [u'\u65b0\u52a0\u5761', b'xn--yfro4i67o'], + [u'\u0641\u0644\u0633\u0637\u064a\u0646', b'xn--ygbi2ammx'], + [u'\u30c6\u30b9\u30c8', b'xn--zckzah'], + [u'\u049b\u0430\u0437', b'xn--80ao21a'], + [u'\u0645\u0644\u064a\u0633\u064a\u0627', b'xn--mgbx4cd0ab'], + [u'\u043c\u043e\u043d', b'xn--l1acc'], + [u'\u0633\u0648\u062f\u0627\u0646', b'xn--mgbpl2fh'], + ] + + def testIDNTLDALabels(self): + + for (ulabel, alabel) in self.tld_strings: + self.assertEqual(alabel, idna.alabel(ulabel)) + + def testIDNTLDULabels(self): + + for (ulabel, alabel) in self.tld_strings: + self.assertEqual(ulabel, idna.ulabel(alabel)) + + def test_valid_label_length(self): + + self.assertTrue(idna.valid_label_length('a' * 63)) + self.assertFalse(idna.valid_label_length('a' * 64)) + self.assertRaises(idna.IDNAError, idna.encode, 'a' * 64) + + def test_check_bidi(self): + + l = u'\u0061' + r = u'\u05d0' + al = u'\u0627' + an = u'\u0660' + en = u'\u0030' + es = u'\u002d' + cs = u'\u002c' + et = u'\u0024' + on = u'\u0021' + bn = u'\u200c' + nsm = u'\u0610' + ws = u'\u0020' + + # RFC 5893 Rule 1 + self.assertTrue(idna.check_bidi(l)) + self.assertTrue(idna.check_bidi(r)) + self.assertTrue(idna.check_bidi(al)) + self.assertRaises(idna.IDNABidiError, idna.check_bidi, an) + + # RFC 5893 Rule 2 + self.assertTrue(idna.check_bidi(r + al)) + self.assertTrue(idna.check_bidi(r + al)) + self.assertTrue(idna.check_bidi(r + an)) + self.assertTrue(idna.check_bidi(r + en)) + self.assertTrue(idna.check_bidi(r + es + al)) + self.assertTrue(idna.check_bidi(r + cs + al)) + self.assertTrue(idna.check_bidi(r + et + al)) + self.assertTrue(idna.check_bidi(r + on + al)) + self.assertTrue(idna.check_bidi(r + bn + al)) + self.assertTrue(idna.check_bidi(r + nsm)) + self.assertRaises(idna.IDNABidiError, idna.check_bidi, r + l) + self.assertRaises(idna.IDNABidiError, idna.check_bidi, r + ws) + + # RFC 5893 Rule 3 + self.assertTrue(idna.check_bidi(r + al)) + self.assertTrue(idna.check_bidi(r + en)) + self.assertTrue(idna.check_bidi(r + an)) + self.assertTrue(idna.check_bidi(r + nsm)) + self.assertTrue(idna.check_bidi(r + nsm + nsm)) + self.assertRaises(idna.IDNABidiError, idna.check_bidi, r + on) + + # RFC 5893 Rule 4 + self.assertTrue(idna.check_bidi(r + en)) + self.assertTrue(idna.check_bidi(r + an)) + self.assertRaises(idna.IDNABidiError, idna.check_bidi, r + en + an) + self.assertRaises(idna.IDNABidiError, idna.check_bidi, r + an + en) + + # RFC 5893 Rule 5 + self.assertTrue(idna.check_bidi(l + en, check_ltr=True)) + self.assertTrue(idna.check_bidi(l + es + l, check_ltr=True)) + self.assertTrue(idna.check_bidi(l + cs + l, check_ltr=True)) + self.assertTrue(idna.check_bidi(l + et + l, check_ltr=True)) + self.assertTrue(idna.check_bidi(l + on + l, check_ltr=True)) + self.assertTrue(idna.check_bidi(l + bn + l, check_ltr=True)) + self.assertTrue(idna.check_bidi(l + nsm, check_ltr=True)) + + # RFC 5893 Rule 6 + self.assertTrue(idna.check_bidi(l + l, check_ltr=True)) + self.assertTrue(idna.check_bidi(l + en, check_ltr=True)) + self.assertTrue(idna.check_bidi(l + en + nsm, check_ltr=True)) + self.assertTrue(idna.check_bidi(l + en + nsm + nsm, check_ltr=True)) + self.assertRaises(idna.IDNABidiError, idna.check_bidi, l + cs, check_ltr=True) + + def test_check_initial_combiner(self): + + m = u'\u0300' + a = u'\u0061' + + self.assertTrue(idna.check_initial_combiner(a)) + self.assertTrue(idna.check_initial_combiner(a + m)) + self.assertRaises(idna.IDNAError, idna.check_initial_combiner, m + a) + + def test_check_hyphen_ok(self): + + self.assertTrue(idna.check_hyphen_ok('abc')) + self.assertTrue(idna.check_hyphen_ok('a--b')) + self.assertRaises(idna.IDNAError, idna.check_hyphen_ok, 'aa--') + self.assertRaises(idna.IDNAError, idna.check_hyphen_ok, 'a-') + self.assertRaises(idna.IDNAError, idna.check_hyphen_ok, '-a') + + def test_valid_contextj(self): + + zwnj = u'\u200c' + zwj = u'\u200d' + virama = u'\u094d' + latin = u'\u0061' + + # RFC 5892 Appendix A.1 (Zero Width Non-Joiner) + self.assertFalse(idna.valid_contextj(zwnj, 0)) + self.assertFalse(idna.valid_contextj(latin + zwnj, 1)) # No preceding Virama + self.assertTrue(idna.valid_contextj(virama + zwnj, 1)) # Preceding Virama + + # RFC 5892 Appendix A.2 (Zero Width Joiner) + self.assertFalse(idna.valid_contextj(zwj, 0)) + self.assertFalse(idna.valid_contextj(latin + zwj, 1)) # No preceding Virama + self.assertTrue(idna.valid_contextj(virama + zwj, 1)) # Preceding Virama + + def test_valid_contexto(self): + + latin = u'\u0061' + latin_l = u'\u006c' + greek = u'\u03b1' + hebrew = u'\u05d0' + katakana = u'\u30a1' + hiragana = u'\u3041' + han = u'\u6f22' + arabic_digit = u'\u0660' + ext_arabic_digit = u'\u06f0' + + # RFC 5892 Rule A.3 (Middle Dot) + latin_middle_dot = u'\u00b7' + self.assertTrue(idna.valid_contexto(latin_l + latin_middle_dot + latin_l, 1)) + self.assertFalse(idna.valid_contexto(latin_middle_dot + latin_l, 1)) + self.assertFalse(idna.valid_contexto(latin_l + latin_middle_dot, 0)) + self.assertFalse(idna.valid_contexto(latin_middle_dot, 0)) + self.assertFalse(idna.valid_contexto(latin_l + latin_middle_dot + latin, 1)) + + # RFC 5892 Rule A.4 (Greek Lower Numeral Sign) + glns = u'\u0375' + self.assertTrue(idna.valid_contexto(glns + greek, 0)) + self.assertFalse(idna.valid_contexto(glns + latin, 0)) + self.assertFalse(idna.valid_contexto(glns, 0)) + self.assertFalse(idna.valid_contexto(greek + glns, 1)) + + # RFC 5892 Rule A.5 (Hebrew Punctuation Geresh) + geresh = u'\u05f3' + self.assertTrue(idna.valid_contexto(hebrew + geresh, 1)) + self.assertFalse(idna.valid_contexto(latin + geresh, 1)) + + # RFC 5892 Rule A.6 (Hebrew Punctuation Gershayim) + gershayim = u'\u05f4' + self.assertTrue(idna.valid_contexto(hebrew + gershayim, 1)) + self.assertFalse(idna.valid_contexto(latin + gershayim, 1)) + + # RFC 5892 Rule A.7 (Katakana Middle Dot) + ja_middle_dot = u'\u30fb' + self.assertTrue(idna.valid_contexto(katakana + ja_middle_dot + katakana, 1)) + self.assertTrue(idna.valid_contexto(hiragana + ja_middle_dot + hiragana, 1)) + self.assertTrue(idna.valid_contexto(han + ja_middle_dot + han, 1)) + self.assertTrue(idna.valid_contexto(han + ja_middle_dot + latin, 1)) + self.assertTrue(idna.valid_contexto(u'\u6f22\u30fb\u5b57', 1)) + self.assertFalse(idna.valid_contexto(u'\u0061\u30fb\u0061', 1)) + + # RFC 5892 Rule A.8 (Arabic-Indic Digits) + self.assertTrue(idna.valid_contexto(arabic_digit + arabic_digit, 0)) + self.assertFalse(idna.valid_contexto(arabic_digit + ext_arabic_digit, 0)) + + # RFC 5892 Rule A.9 (Extended Arabic-Indic Digits) + self.assertTrue(idna.valid_contexto(ext_arabic_digit + ext_arabic_digit, 0)) + self.assertFalse(idna.valid_contexto(ext_arabic_digit + arabic_digit, 0)) + + def test_encode(self): + + self.assertEqual(idna.encode('xn--zckzah.xn--zckzah'), b'xn--zckzah.xn--zckzah') + self.assertEqual(idna.encode(u'\u30c6\u30b9\u30c8.xn--zckzah'), b'xn--zckzah.xn--zckzah') + self.assertEqual(idna.encode(u'\u30c6\u30b9\u30c8.\u30c6\u30b9\u30c8'), b'xn--zckzah.xn--zckzah') + self.assertEqual(idna.encode('abc.abc'), b'abc.abc') + self.assertEqual(idna.encode('xn--zckzah.abc'), b'xn--zckzah.abc') + self.assertEqual(idna.encode(u'\u30c6\u30b9\u30c8.abc'), b'xn--zckzah.abc') + self.assertEqual(idna.encode(u'\u0521\u0525\u0523-\u0523\u0523-----\u0521\u0523\u0523\u0523.aa'), + b'xn---------90gglbagaar.aa') + self.assertRaises(idna.IDNAError, idna.encode, + u'\u0521\u0524\u0523-\u0523\u0523-----\u0521\u0523\u0523\u0523.aa', uts46=False) + self.assertEqual(idna.encode('a'*63), b'a'*63) + self.assertRaises(idna.IDNAError, idna.encode, 'a'*64) + self.assertRaises(idna.core.InvalidCodepoint, idna.encode, '*') + + def test_decode(self): + + self.assertEqual(idna.decode('xn--zckzah.xn--zckzah'), u'\u30c6\u30b9\u30c8.\u30c6\u30b9\u30c8') + self.assertEqual(idna.decode(u'\u30c6\u30b9\u30c8.xn--zckzah'), u'\u30c6\u30b9\u30c8.\u30c6\u30b9\u30c8') + self.assertEqual(idna.decode(u'\u30c6\u30b9\u30c8.\u30c6\u30b9\u30c8'), + u'\u30c6\u30b9\u30c8.\u30c6\u30b9\u30c8') + self.assertEqual(idna.decode('abc.abc'), u'abc.abc') + self.assertEqual(idna.decode('xn---------90gglbagaar.aa'), + u'\u0521\u0525\u0523-\u0523\u0523-----\u0521\u0523\u0523\u0523.aa') + self.assertRaises(idna.IDNAError, idna.decode, 'XN---------90GGLBAGAAC.AA') + self.assertRaises(idna.IDNAError, idna.decode, 'xn---------90gglbagaac.aa') + self.assertRaises(idna.IDNAError, idna.decode, 'xn--') + +if __name__ == '__main__': + unittest.main() diff --git a/contrib/python/idna/tests/test_idna_codec.py b/contrib/python/idna/tests/test_idna_codec.py index b85b9cfdd0..18511b4c13 100644 --- a/contrib/python/idna/tests/test_idna_codec.py +++ b/contrib/python/idna/tests/test_idna_codec.py @@ -1,71 +1,71 @@ -#!/usr/bin/env python - -import codecs -import sys -import unittest - -import idna.codec - -class IDNACodecTests(unittest.TestCase): - - def testCodec(self): - pass - - def testIncrementalDecoder(self): - - # Tests derived from Python standard library test/test_codecs.py - - incremental_tests = ( - (u"python.org", b"python.org"), - (u"python.org.", b"python.org."), - (u"pyth\xf6n.org", b"xn--pythn-mua.org"), - (u"pyth\xf6n.org.", b"xn--pythn-mua.org."), - ) - - for decoded, encoded in incremental_tests: - if sys.version_info[0] == 2: - self.assertEqual("".join(codecs.iterdecode(encoded, "idna")), - decoded) - else: - self.assertEqual("".join(codecs.iterdecode((bytes([c]) for c in encoded), "idna")), - decoded) - - decoder = codecs.getincrementaldecoder("idna")() - self.assertEqual(decoder.decode(b"xn--xam", ), u"") - self.assertEqual(decoder.decode(b"ple-9ta.o", ), u"\xe4xample.") - self.assertEqual(decoder.decode(b"rg"), u"") - self.assertEqual(decoder.decode(b"", True), u"org") - - decoder.reset() - self.assertEqual(decoder.decode(b"xn--xam", ), u"") - self.assertEqual(decoder.decode(b"ple-9ta.o", ), u"\xe4xample.") - self.assertEqual(decoder.decode(b"rg."), u"org.") - self.assertEqual(decoder.decode(b"", True), u"") - - - def testIncrementalEncoder(self): - - # Tests derived from Python standard library test/test_codecs.py - - incremental_tests = ( - (u"python.org", b"python.org"), - (u"python.org.", b"python.org."), - (u"pyth\xf6n.org", b"xn--pythn-mua.org"), - (u"pyth\xf6n.org.", b"xn--pythn-mua.org."), - ) - for decoded, encoded in incremental_tests: - self.assertEqual(b"".join(codecs.iterencode(decoded, "idna")), - encoded) - - encoder = codecs.getincrementalencoder("idna")() - self.assertEqual(encoder.encode(u"\xe4x"), b"") - self.assertEqual(encoder.encode(u"ample.org"), b"xn--xample-9ta.") - self.assertEqual(encoder.encode(u"", True), b"org") - - encoder.reset() - self.assertEqual(encoder.encode(u"\xe4x"), b"") - self.assertEqual(encoder.encode(u"ample.org."), b"xn--xample-9ta.org.") - self.assertEqual(encoder.encode(u"", True), b"") - -if __name__ == '__main__': - unittest.main() +#!/usr/bin/env python + +import codecs +import sys +import unittest + +import idna.codec + +class IDNACodecTests(unittest.TestCase): + + def testCodec(self): + pass + + def testIncrementalDecoder(self): + + # Tests derived from Python standard library test/test_codecs.py + + incremental_tests = ( + (u"python.org", b"python.org"), + (u"python.org.", b"python.org."), + (u"pyth\xf6n.org", b"xn--pythn-mua.org"), + (u"pyth\xf6n.org.", b"xn--pythn-mua.org."), + ) + + for decoded, encoded in incremental_tests: + if sys.version_info[0] == 2: + self.assertEqual("".join(codecs.iterdecode(encoded, "idna")), + decoded) + else: + self.assertEqual("".join(codecs.iterdecode((bytes([c]) for c in encoded), "idna")), + decoded) + + decoder = codecs.getincrementaldecoder("idna")() + self.assertEqual(decoder.decode(b"xn--xam", ), u"") + self.assertEqual(decoder.decode(b"ple-9ta.o", ), u"\xe4xample.") + self.assertEqual(decoder.decode(b"rg"), u"") + self.assertEqual(decoder.decode(b"", True), u"org") + + decoder.reset() + self.assertEqual(decoder.decode(b"xn--xam", ), u"") + self.assertEqual(decoder.decode(b"ple-9ta.o", ), u"\xe4xample.") + self.assertEqual(decoder.decode(b"rg."), u"org.") + self.assertEqual(decoder.decode(b"", True), u"") + + + def testIncrementalEncoder(self): + + # Tests derived from Python standard library test/test_codecs.py + + incremental_tests = ( + (u"python.org", b"python.org"), + (u"python.org.", b"python.org."), + (u"pyth\xf6n.org", b"xn--pythn-mua.org"), + (u"pyth\xf6n.org.", b"xn--pythn-mua.org."), + ) + for decoded, encoded in incremental_tests: + self.assertEqual(b"".join(codecs.iterencode(decoded, "idna")), + encoded) + + encoder = codecs.getincrementalencoder("idna")() + self.assertEqual(encoder.encode(u"\xe4x"), b"") + self.assertEqual(encoder.encode(u"ample.org"), b"xn--xample-9ta.") + self.assertEqual(encoder.encode(u"", True), b"org") + + encoder.reset() + self.assertEqual(encoder.encode(u"\xe4x"), b"") + self.assertEqual(encoder.encode(u"ample.org."), b"xn--xample-9ta.org.") + self.assertEqual(encoder.encode(u"", True), b"") + +if __name__ == '__main__': + unittest.main() diff --git a/contrib/python/idna/tests/test_idna_compat.py b/contrib/python/idna/tests/test_idna_compat.py index bc670a864e..68b4ce363c 100644 --- a/contrib/python/idna/tests/test_idna_compat.py +++ b/contrib/python/idna/tests/test_idna_compat.py @@ -1,19 +1,19 @@ -#!/usr/bin/env python - -import unittest - -import idna.compat - -class IDNACompatTests(unittest.TestCase): - - def testToASCII(self): - self.assertEqual(idna.compat.ToASCII(u'\u30c6\u30b9\u30c8.xn--zckzah'), b'xn--zckzah.xn--zckzah') - - def testToUnicode(self): - self.assertEqual(idna.compat.ToUnicode(b'xn--zckzah.xn--zckzah'), u'\u30c6\u30b9\u30c8.\u30c6\u30b9\u30c8') - - def test_nameprep(self): - self.assertRaises(NotImplementedError, idna.compat.nameprep, "a") - -if __name__ == '__main__': - unittest.main() +#!/usr/bin/env python + +import unittest + +import idna.compat + +class IDNACompatTests(unittest.TestCase): + + def testToASCII(self): + self.assertEqual(idna.compat.ToASCII(u'\u30c6\u30b9\u30c8.xn--zckzah'), b'xn--zckzah.xn--zckzah') + + def testToUnicode(self): + self.assertEqual(idna.compat.ToUnicode(b'xn--zckzah.xn--zckzah'), u'\u30c6\u30b9\u30c8.\u30c6\u30b9\u30c8') + + def test_nameprep(self): + self.assertRaises(NotImplementedError, idna.compat.nameprep, "a") + +if __name__ == '__main__': + unittest.main() diff --git a/contrib/python/idna/tests/test_idna_other.py b/contrib/python/idna/tests/test_idna_other.py index ee03eeac5e..bf0332d875 100755 --- a/contrib/python/idna/tests/test_idna_other.py +++ b/contrib/python/idna/tests/test_idna_other.py @@ -1,14 +1,14 @@ -"""Tests for other functions""" - -import unittest - -import idna - - -class OtherUTS46Tests(unittest.TestCase): - - def test_std3(self): - - self.assertEqual(idna.uts46_remap('A_', std3_rules=False), 'a_') - self.assertRaises(idna.InvalidCodepoint, idna.uts46_remap, 'A_', std3_rules=True) - +"""Tests for other functions""" + +import unittest + +import idna + + +class OtherUTS46Tests(unittest.TestCase): + + def test_std3(self): + + self.assertEqual(idna.uts46_remap('A_', std3_rules=False), 'a_') + self.assertRaises(idna.InvalidCodepoint, idna.uts46_remap, 'A_', std3_rules=True) + diff --git a/contrib/python/idna/tests/test_idna_uts46.py b/contrib/python/idna/tests/test_idna_uts46.py index 1d6c1f2ae0..ebb180b93a 100755 --- a/contrib/python/idna/tests/test_idna_uts46.py +++ b/contrib/python/idna/tests/test_idna_uts46.py @@ -1,145 +1,145 @@ -"""Tests for TR46 code.""" - -import gzip -import os.path -import re -import sys -import unittest - -import idna - -if sys.version_info[0] >= 3: - unichr = chr - unicode = str - -_RE_UNICODE = re.compile(u"\\\\u([0-9a-fA-F]{4})") -_RE_SURROGATE = re.compile(u"[\uD800-\uDBFF][\uDC00-\uDFFF]") -_SKIP_TESTS = [ - # These appear to be errors in the test vectors. All relate to incorrectly applying - # bidi rules across label boundaries. Appears independently confirmed - # at http://www.alvestrand.no/pipermail/idna-update/2017-January/007946.html - u'0\u00E0.\u05D0', u'0a\u0300.\u05D0', u'0A\u0300.\u05D0', u'0\u00C0.\u05D0', 'xn--0-sfa.xn--4db', - u'\u00E0\u02c7.\u05D0', u'a\u0300\u02c7.\u05D0', u'A\u0300\u02c7.\u05D0', u'\u00C0\u02c7.\u05D0', - 'xn--0ca88g.xn--4db', u'0A.\u05D0', u'0a.\u05D0', '0a.xn--4db', 'c.xn--0-eha.xn--4db', - u'c.0\u00FC.\u05D0', u'c.0u\u0308.\u05D0', u'C.0U\u0308.\u05D0', u'C.0\u00DC.\u05D0', - u'\u06B6\u06DF\u3002\u2087\uA806', u'\u06B6\u06DF\u30027\uA806', 'xn--pkb6f.xn--7-x93e', - u'\u06B6\u06DF.7\uA806', u'1.\uAC7E6.\U00010C41\u06D0', u'1.\u1100\u1165\u11B56.\U00010C41\u06D0', - '1.xn--6-945e.xn--glb1794k', - - # These are transitional strings that compute to NV8 and thus are not supported - # in IDNA 2008. - u'\U000102F7\u3002\u200D', - u'\U0001D7F5\u9681\u2BEE\uFF0E\u180D\u200C', - u'9\u9681\u2BEE.\u180D\u200C', - u'\u00DF\u200C\uAAF6\u18A5.\u22B6\u2D21\u2D16', - u'ss\u200C\uAAF6\u18A5.\u22B6\u2D21\u2D16', - u'\u00DF\u200C\uAAF6\u18A5\uFF0E\u22B6\u2D21\u2D16', - u'ss\u200C\uAAF6\u18A5\uFF0E\u22B6\u2D21\u2D16', - u'\U00010A57\u200D\u3002\u2D09\u2D15', - u'\U00010A57\u200D\uFF61\u2D09\u2D15', - u'\U0001D7CF\U0001DA19\u2E16.\u200D', - u'1\U0001DA19\u2E16.\u200D', - u'\U0001D7E04\U000E01D7\U0001D23B\uFF0E\u200D\U000102F5\u26E7\u200D', - u'84\U000E01D7\U0001D23B.\u200D\U000102F5\u26E7\u200D', -] - -def unicode_fixup(string): - """Replace backslash-u-XXXX with appropriate unicode characters.""" - return _RE_SURROGATE.sub(lambda match: unichr( - (ord(match.group(0)[0]) - 0xd800) * 0x400 + - ord(match.group(0)[1]) - 0xdc00 + 0x10000), - _RE_UNICODE.sub(lambda match: unichr(int(match.group(1), 16)), string)) - - -def parse_idna_test_table(inputstream): - """Parse IdnaTest.txt and return a list of tuples.""" - tests = [] - for lineno, line in enumerate(inputstream): - line = line.decode("utf-8").strip() - if "#" in line: - line = line.split("#", 1)[0] - if not line: - continue - tests.append((lineno + 1, tuple(field.strip() - for field in line.split(u";")))) - return tests - - -class TestIdnaTest(unittest.TestCase): - """Run one of the IdnaTest.txt test lines.""" - def __init__(self, lineno=None, fields=None): - super(TestIdnaTest, self).__init__() - self.lineno = lineno - self.fields = fields - - def id(self): - return "%s.%d" % (super(TestIdnaTest, self).id(), self.lineno) - - def shortDescription(self): - if not self.fields: - return "" - return "IdnaTest.txt line %d: %r" % (self.lineno, - u"; ".join(self.fields)) - - def runTest(self): - if not self.fields: - return - try: - types, source, to_unicode, to_ascii = (unicode_fixup(field) - for field in self.fields[:4]) - if (unicode_fixup(u"\\uD804\\uDC39") in source and - sys.version_info[0] < 3): - raise unittest.SkipTest( - "Python 2's Unicode support is too old for this test") - except ValueError: - raise unittest.SkipTest( - "Test requires Python wide Unicode support") - if source in _SKIP_TESTS: - return - if not to_unicode: - to_unicode = source - if not to_ascii: - to_ascii = to_unicode - nv8 = (len(self.fields) > 4 and self.fields[4]) - try: - output = idna.decode(source, uts46=True, strict=True) - if to_unicode[0] == u"[": - self.fail("decode() did not emit required error {0} for {1}".format(to_unicode, repr(source))) - self.assertEqual(output, to_unicode, "unexpected decode() output") - except (idna.IDNAError, UnicodeError, ValueError) as exc: - if unicode(exc).startswith(u"Unknown"): - raise unittest.SkipTest("Test requires support for a newer" - " version of Unicode than this Python supports") - if to_unicode[0] != u"[" and not nv8: - raise - for transitional in { - u"B": (True, False), - u"T": (True,), - u"N": (False,), - }[types]: - try: - output = idna.encode(source, uts46=True, strict=True, - transitional=transitional).decode("ascii") - if to_ascii[0] == u"[": - self.fail( - "encode(transitional={0}) did not emit required error {1} for {2}". - format(transitional, to_ascii, repr(source))) - self.assertEqual(output, to_ascii, - "unexpected encode(transitional={0}) output". - format(transitional)) - except (idna.IDNAError, UnicodeError, ValueError) as exc: - if unicode(exc).startswith(u"Unknown"): - raise unittest.SkipTest("Test requires support for a newer" - " version of Unicode than this Python supports") - if to_ascii[0] != u"[" and not nv8: - raise - - -def load_tests(loader, tests, pattern): - """Create a suite of all the individual tests.""" - suite = unittest.TestSuite() - with gzip.open(os.path.join(os.path.dirname(__file__), - "IdnaTest.txt.gz"), "rb") as tests_file: - suite.addTests(TestIdnaTest(lineno, fields) - for lineno, fields in parse_idna_test_table(tests_file)) - return suite +"""Tests for TR46 code.""" + +import gzip +import os.path +import re +import sys +import unittest + +import idna + +if sys.version_info[0] >= 3: + unichr = chr + unicode = str + +_RE_UNICODE = re.compile(u"\\\\u([0-9a-fA-F]{4})") +_RE_SURROGATE = re.compile(u"[\uD800-\uDBFF][\uDC00-\uDFFF]") +_SKIP_TESTS = [ + # These appear to be errors in the test vectors. All relate to incorrectly applying + # bidi rules across label boundaries. Appears independently confirmed + # at http://www.alvestrand.no/pipermail/idna-update/2017-January/007946.html + u'0\u00E0.\u05D0', u'0a\u0300.\u05D0', u'0A\u0300.\u05D0', u'0\u00C0.\u05D0', 'xn--0-sfa.xn--4db', + u'\u00E0\u02c7.\u05D0', u'a\u0300\u02c7.\u05D0', u'A\u0300\u02c7.\u05D0', u'\u00C0\u02c7.\u05D0', + 'xn--0ca88g.xn--4db', u'0A.\u05D0', u'0a.\u05D0', '0a.xn--4db', 'c.xn--0-eha.xn--4db', + u'c.0\u00FC.\u05D0', u'c.0u\u0308.\u05D0', u'C.0U\u0308.\u05D0', u'C.0\u00DC.\u05D0', + u'\u06B6\u06DF\u3002\u2087\uA806', u'\u06B6\u06DF\u30027\uA806', 'xn--pkb6f.xn--7-x93e', + u'\u06B6\u06DF.7\uA806', u'1.\uAC7E6.\U00010C41\u06D0', u'1.\u1100\u1165\u11B56.\U00010C41\u06D0', + '1.xn--6-945e.xn--glb1794k', + + # These are transitional strings that compute to NV8 and thus are not supported + # in IDNA 2008. + u'\U000102F7\u3002\u200D', + u'\U0001D7F5\u9681\u2BEE\uFF0E\u180D\u200C', + u'9\u9681\u2BEE.\u180D\u200C', + u'\u00DF\u200C\uAAF6\u18A5.\u22B6\u2D21\u2D16', + u'ss\u200C\uAAF6\u18A5.\u22B6\u2D21\u2D16', + u'\u00DF\u200C\uAAF6\u18A5\uFF0E\u22B6\u2D21\u2D16', + u'ss\u200C\uAAF6\u18A5\uFF0E\u22B6\u2D21\u2D16', + u'\U00010A57\u200D\u3002\u2D09\u2D15', + u'\U00010A57\u200D\uFF61\u2D09\u2D15', + u'\U0001D7CF\U0001DA19\u2E16.\u200D', + u'1\U0001DA19\u2E16.\u200D', + u'\U0001D7E04\U000E01D7\U0001D23B\uFF0E\u200D\U000102F5\u26E7\u200D', + u'84\U000E01D7\U0001D23B.\u200D\U000102F5\u26E7\u200D', +] + +def unicode_fixup(string): + """Replace backslash-u-XXXX with appropriate unicode characters.""" + return _RE_SURROGATE.sub(lambda match: unichr( + (ord(match.group(0)[0]) - 0xd800) * 0x400 + + ord(match.group(0)[1]) - 0xdc00 + 0x10000), + _RE_UNICODE.sub(lambda match: unichr(int(match.group(1), 16)), string)) + + +def parse_idna_test_table(inputstream): + """Parse IdnaTest.txt and return a list of tuples.""" + tests = [] + for lineno, line in enumerate(inputstream): + line = line.decode("utf-8").strip() + if "#" in line: + line = line.split("#", 1)[0] + if not line: + continue + tests.append((lineno + 1, tuple(field.strip() + for field in line.split(u";")))) + return tests + + +class TestIdnaTest(unittest.TestCase): + """Run one of the IdnaTest.txt test lines.""" + def __init__(self, lineno=None, fields=None): + super(TestIdnaTest, self).__init__() + self.lineno = lineno + self.fields = fields + + def id(self): + return "%s.%d" % (super(TestIdnaTest, self).id(), self.lineno) + + def shortDescription(self): + if not self.fields: + return "" + return "IdnaTest.txt line %d: %r" % (self.lineno, + u"; ".join(self.fields)) + + def runTest(self): + if not self.fields: + return + try: + types, source, to_unicode, to_ascii = (unicode_fixup(field) + for field in self.fields[:4]) + if (unicode_fixup(u"\\uD804\\uDC39") in source and + sys.version_info[0] < 3): + raise unittest.SkipTest( + "Python 2's Unicode support is too old for this test") + except ValueError: + raise unittest.SkipTest( + "Test requires Python wide Unicode support") + if source in _SKIP_TESTS: + return + if not to_unicode: + to_unicode = source + if not to_ascii: + to_ascii = to_unicode + nv8 = (len(self.fields) > 4 and self.fields[4]) + try: + output = idna.decode(source, uts46=True, strict=True) + if to_unicode[0] == u"[": + self.fail("decode() did not emit required error {0} for {1}".format(to_unicode, repr(source))) + self.assertEqual(output, to_unicode, "unexpected decode() output") + except (idna.IDNAError, UnicodeError, ValueError) as exc: + if unicode(exc).startswith(u"Unknown"): + raise unittest.SkipTest("Test requires support for a newer" + " version of Unicode than this Python supports") + if to_unicode[0] != u"[" and not nv8: + raise + for transitional in { + u"B": (True, False), + u"T": (True,), + u"N": (False,), + }[types]: + try: + output = idna.encode(source, uts46=True, strict=True, + transitional=transitional).decode("ascii") + if to_ascii[0] == u"[": + self.fail( + "encode(transitional={0}) did not emit required error {1} for {2}". + format(transitional, to_ascii, repr(source))) + self.assertEqual(output, to_ascii, + "unexpected encode(transitional={0}) output". + format(transitional)) + except (idna.IDNAError, UnicodeError, ValueError) as exc: + if unicode(exc).startswith(u"Unknown"): + raise unittest.SkipTest("Test requires support for a newer" + " version of Unicode than this Python supports") + if to_ascii[0] != u"[" and not nv8: + raise + + +def load_tests(loader, tests, pattern): + """Create a suite of all the individual tests.""" + suite = unittest.TestSuite() + with gzip.open(os.path.join(os.path.dirname(__file__), + "IdnaTest.txt.gz"), "rb") as tests_file: + suite.addTests(TestIdnaTest(lineno, fields) + for lineno, fields in parse_idna_test_table(tests_file)) + return suite diff --git a/contrib/python/idna/tests/test_intranges.py b/contrib/python/idna/tests/test_intranges.py index 868ab8fd57..2b8b66c40f 100644 --- a/contrib/python/idna/tests/test_intranges.py +++ b/contrib/python/idna/tests/test_intranges.py @@ -1,65 +1,65 @@ -#!/usr/bin/env python - -import unittest - -from idna.intranges import intranges_from_list, intranges_contain, _encode_range - - -class IntrangeTests(unittest.TestCase): - - def test_ranging(self): - self.assertEqual( - intranges_from_list(list(range(293, 499)) + list(range(4888, 9876))), - (_encode_range(293, 499), _encode_range(4888, 9876),) - ) - - def test_ranging_2(self): - self.assertEqual( - intranges_from_list([111]), - (_encode_range(111, 112),) - ) - - def test_skips(self): - self.assertEqual( - intranges_from_list([0, 2, 4, 6, 9, 10, 11, 13, 15,]), - ( - _encode_range(0, 1), - _encode_range(2, 3), - _encode_range(4, 5), - _encode_range(6, 7), - _encode_range(9, 12), - _encode_range(13, 14), - _encode_range(15, 16), - ) - ) - - def test_empty_range(self): - self.assertEqual( - intranges_from_list([]), - () - ) - - -class IntrangeContainsTests(unittest.TestCase): - - def _test_containment(self, ints, disjoint_ints): - ranges = intranges_from_list(ints) - for int_ in ints: - assert intranges_contain(int_, ranges) - for int_ in disjoint_ints: - assert not intranges_contain(int_, ranges) - - def test_simple(self): - self._test_containment(range(10, 20), [2, 3, 68, 3893]) - - def test_skips(self): - self._test_containment( - [0, 2, 4, 6, 9, 10, 11, 13, 15,], - [-1, 1, 3, 5, 7, 4898] - ) - - def test_singleton(self): - self._test_containment([111], [110, 112]) - - def test_empty(self): - self._test_containment([], range(100)) +#!/usr/bin/env python + +import unittest + +from idna.intranges import intranges_from_list, intranges_contain, _encode_range + + +class IntrangeTests(unittest.TestCase): + + def test_ranging(self): + self.assertEqual( + intranges_from_list(list(range(293, 499)) + list(range(4888, 9876))), + (_encode_range(293, 499), _encode_range(4888, 9876),) + ) + + def test_ranging_2(self): + self.assertEqual( + intranges_from_list([111]), + (_encode_range(111, 112),) + ) + + def test_skips(self): + self.assertEqual( + intranges_from_list([0, 2, 4, 6, 9, 10, 11, 13, 15,]), + ( + _encode_range(0, 1), + _encode_range(2, 3), + _encode_range(4, 5), + _encode_range(6, 7), + _encode_range(9, 12), + _encode_range(13, 14), + _encode_range(15, 16), + ) + ) + + def test_empty_range(self): + self.assertEqual( + intranges_from_list([]), + () + ) + + +class IntrangeContainsTests(unittest.TestCase): + + def _test_containment(self, ints, disjoint_ints): + ranges = intranges_from_list(ints) + for int_ in ints: + assert intranges_contain(int_, ranges) + for int_ in disjoint_ints: + assert not intranges_contain(int_, ranges) + + def test_simple(self): + self._test_containment(range(10, 20), [2, 3, 68, 3893]) + + def test_skips(self): + self._test_containment( + [0, 2, 4, 6, 9, 10, 11, 13, 15,], + [-1, 1, 3, 5, 7, 4898] + ) + + def test_singleton(self): + self._test_containment([111], [110, 112]) + + def test_empty(self): + self._test_containment([], range(100)) diff --git a/contrib/python/idna/tests/ya.make b/contrib/python/idna/tests/ya.make index e015365538..01b9571f60 100644 --- a/contrib/python/idna/tests/ya.make +++ b/contrib/python/idna/tests/ya.make @@ -1,24 +1,24 @@ -PY23_TEST() - -OWNER(g:python-contrib yaskevich) - -PEERDIR( - contrib/python/idna -) - -ENV(LC_ALL=ru_RU.UTF-8) -ENV(LANG=ru_RU.UTF-8) - -TEST_SRCS( - test_idna_compat.py - test_idna.py - test_intranges.py - test_idna_codec.py - test_idna_other.py - test_idna_uts46.py -) - -NO_LINT() -FORK_SUBTESTS() - -END() +PY23_TEST() + +OWNER(g:python-contrib yaskevich) + +PEERDIR( + contrib/python/idna +) + +ENV(LC_ALL=ru_RU.UTF-8) +ENV(LANG=ru_RU.UTF-8) + +TEST_SRCS( + test_idna_compat.py + test_idna.py + test_intranges.py + test_idna_codec.py + test_idna_other.py + test_idna_uts46.py +) + +NO_LINT() +FORK_SUBTESTS() + +END() |