summaryrefslogtreecommitdiffstats
path: root/contrib/tools/cython/Cython/Compiler/Tests/TestStringEncoding.py
diff options
context:
space:
mode:
authoralexv-smirnov <[email protected]>2023-03-15 19:59:12 +0300
committeralexv-smirnov <[email protected]>2023-03-15 19:59:12 +0300
commit056bb284ccf8dd6793ec3a54ffa36c4fb2b9ad11 (patch)
tree4740980126f32e3af7937ba0ca5f83e59baa4ab0 /contrib/tools/cython/Cython/Compiler/Tests/TestStringEncoding.py
parent269126dcced1cc8b53eb4398b4a33e5142f10290 (diff)
add library/cpp/actors, ymake build to ydb oss export
Diffstat (limited to 'contrib/tools/cython/Cython/Compiler/Tests/TestStringEncoding.py')
-rw-r--r--contrib/tools/cython/Cython/Compiler/Tests/TestStringEncoding.py44
1 files changed, 44 insertions, 0 deletions
diff --git a/contrib/tools/cython/Cython/Compiler/Tests/TestStringEncoding.py b/contrib/tools/cython/Cython/Compiler/Tests/TestStringEncoding.py
new file mode 100644
index 00000000000..91d099333a0
--- /dev/null
+++ b/contrib/tools/cython/Cython/Compiler/Tests/TestStringEncoding.py
@@ -0,0 +1,44 @@
+# -*- coding: utf-8 -*-
+
+import sys
+import unittest
+
+import Cython.Compiler.StringEncoding as StringEncoding
+
+
+class StringEncodingTest(unittest.TestCase):
+ """
+ Test the StringEncoding module.
+ """
+ def test_string_contains_lone_surrogates(self):
+ self.assertFalse(StringEncoding.string_contains_lone_surrogates(u"abc"))
+ self.assertFalse(StringEncoding.string_contains_lone_surrogates(u"\uABCD"))
+ self.assertFalse(StringEncoding.string_contains_lone_surrogates(u"\N{SNOWMAN}"))
+
+ # This behaves differently in Py2 when freshly parsed and read from a .pyc file,
+ # but it seems to be a marshalling bug in Py2, which doesn't hurt us in Cython.
+ if sys.version_info[0] != 2:
+ self.assertTrue(StringEncoding.string_contains_lone_surrogates(u"\uD800\uDFFF"))
+
+ # In Py2 with 16bit Unicode, the following is indistinguishable from the 32bit character.
+ obfuscated_surrogate_pair = (u"\uDFFF" + "\uD800")[::-1]
+ if sys.version_info[0] == 2 and sys.maxunicode == 65565:
+ self.assertFalse(StringEncoding.string_contains_lone_surrogates(obfuscated_surrogate_pair))
+ else:
+ self.assertTrue(StringEncoding.string_contains_lone_surrogates(obfuscated_surrogate_pair))
+
+ self.assertTrue(StringEncoding.string_contains_lone_surrogates(u"\uD800"))
+ self.assertTrue(StringEncoding.string_contains_lone_surrogates(u"\uDFFF"))
+ self.assertTrue(StringEncoding.string_contains_lone_surrogates(u"\uDFFF\uD800"))
+ self.assertTrue(StringEncoding.string_contains_lone_surrogates(u"\uD800x\uDFFF"))
+
+ def test_string_contains_surrogates(self):
+ self.assertFalse(StringEncoding.string_contains_surrogates(u"abc"))
+ self.assertFalse(StringEncoding.string_contains_surrogates(u"\uABCD"))
+ self.assertFalse(StringEncoding.string_contains_surrogates(u"\N{SNOWMAN}"))
+
+ self.assertTrue(StringEncoding.string_contains_surrogates(u"\uD800"))
+ self.assertTrue(StringEncoding.string_contains_surrogates(u"\uDFFF"))
+ self.assertTrue(StringEncoding.string_contains_surrogates(u"\uD800\uDFFF"))
+ self.assertTrue(StringEncoding.string_contains_surrogates(u"\uDFFF\uD800"))
+ self.assertTrue(StringEncoding.string_contains_surrogates(u"\uD800x\uDFFF"))