summaryrefslogtreecommitdiffstats
path: root/contrib/tools/cython/Cython/Compiler/Tests/TestStringEncoding.py
diff options
context:
space:
mode:
authorDevtools Arcadia <[email protected]>2022-02-07 18:08:42 +0300
committerDevtools Arcadia <[email protected]>2022-02-07 18:08:42 +0300
commit1110808a9d39d4b808aef724c861a2e1a38d2a69 (patch)
treee26c9fed0de5d9873cce7e00bc214573dc2195b7 /contrib/tools/cython/Cython/Compiler/Tests/TestStringEncoding.py
intermediate changes
ref:cde9a383711a11544ce7e107a78147fb96cc4029
Diffstat (limited to 'contrib/tools/cython/Cython/Compiler/Tests/TestStringEncoding.py')
-rw-r--r--contrib/tools/cython/Cython/Compiler/Tests/TestStringEncoding.py44
1 files changed, 44 insertions, 0 deletions
diff --git a/contrib/tools/cython/Cython/Compiler/Tests/TestStringEncoding.py b/contrib/tools/cython/Cython/Compiler/Tests/TestStringEncoding.py
new file mode 100644
index 00000000000..91d099333a0
--- /dev/null
+++ b/contrib/tools/cython/Cython/Compiler/Tests/TestStringEncoding.py
@@ -0,0 +1,44 @@
+# -*- coding: utf-8 -*-
+
+import sys
+import unittest
+
+import Cython.Compiler.StringEncoding as StringEncoding
+
+
+class StringEncodingTest(unittest.TestCase):
+ """
+ Test the StringEncoding module.
+ """
+ def test_string_contains_lone_surrogates(self):
+ self.assertFalse(StringEncoding.string_contains_lone_surrogates(u"abc"))
+ self.assertFalse(StringEncoding.string_contains_lone_surrogates(u"\uABCD"))
+ self.assertFalse(StringEncoding.string_contains_lone_surrogates(u"\N{SNOWMAN}"))
+
+ # This behaves differently in Py2 when freshly parsed and read from a .pyc file,
+ # but it seems to be a marshalling bug in Py2, which doesn't hurt us in Cython.
+ if sys.version_info[0] != 2:
+ self.assertTrue(StringEncoding.string_contains_lone_surrogates(u"\uD800\uDFFF"))
+
+ # In Py2 with 16bit Unicode, the following is indistinguishable from the 32bit character.
+ obfuscated_surrogate_pair = (u"\uDFFF" + "\uD800")[::-1]
+ if sys.version_info[0] == 2 and sys.maxunicode == 65565:
+ self.assertFalse(StringEncoding.string_contains_lone_surrogates(obfuscated_surrogate_pair))
+ else:
+ self.assertTrue(StringEncoding.string_contains_lone_surrogates(obfuscated_surrogate_pair))
+
+ self.assertTrue(StringEncoding.string_contains_lone_surrogates(u"\uD800"))
+ self.assertTrue(StringEncoding.string_contains_lone_surrogates(u"\uDFFF"))
+ self.assertTrue(StringEncoding.string_contains_lone_surrogates(u"\uDFFF\uD800"))
+ self.assertTrue(StringEncoding.string_contains_lone_surrogates(u"\uD800x\uDFFF"))
+
+ def test_string_contains_surrogates(self):
+ self.assertFalse(StringEncoding.string_contains_surrogates(u"abc"))
+ self.assertFalse(StringEncoding.string_contains_surrogates(u"\uABCD"))
+ self.assertFalse(StringEncoding.string_contains_surrogates(u"\N{SNOWMAN}"))
+
+ self.assertTrue(StringEncoding.string_contains_surrogates(u"\uD800"))
+ self.assertTrue(StringEncoding.string_contains_surrogates(u"\uDFFF"))
+ self.assertTrue(StringEncoding.string_contains_surrogates(u"\uD800\uDFFF"))
+ self.assertTrue(StringEncoding.string_contains_surrogates(u"\uDFFF\uD800"))
+ self.assertTrue(StringEncoding.string_contains_surrogates(u"\uD800x\uDFFF"))