aboutsummaryrefslogtreecommitdiffstats
path: root/contrib/tools/cython/Cython/Compiler/StringEncoding.py
diff options
context:
space:
mode:
authorshadchin <shadchin@yandex-team.ru>2022-02-10 16:44:30 +0300
committerDaniil Cherednik <dcherednik@yandex-team.ru>2022-02-10 16:44:30 +0300
commit2598ef1d0aee359b4b6d5fdd1758916d5907d04f (patch)
tree012bb94d777798f1f56ac1cec429509766d05181 /contrib/tools/cython/Cython/Compiler/StringEncoding.py
parent6751af0b0c1b952fede40b19b71da8025b5d8bcf (diff)
downloadydb-2598ef1d0aee359b4b6d5fdd1758916d5907d04f.tar.gz
Restoring authorship annotation for <shadchin@yandex-team.ru>. Commit 1 of 2.
Diffstat (limited to 'contrib/tools/cython/Cython/Compiler/StringEncoding.py')
-rw-r--r--contrib/tools/cython/Cython/Compiler/StringEncoding.py56
1 files changed, 28 insertions, 28 deletions
diff --git a/contrib/tools/cython/Cython/Compiler/StringEncoding.py b/contrib/tools/cython/Cython/Compiler/StringEncoding.py
index c37e8aab79..4bbcd8a3d6 100644
--- a/contrib/tools/cython/Cython/Compiler/StringEncoding.py
+++ b/contrib/tools/cython/Cython/Compiler/StringEncoding.py
@@ -154,34 +154,34 @@ def string_contains_surrogates(ustring):
return False
-def string_contains_lone_surrogates(ustring):
- """
- Check if the unicode string contains lone surrogate code points
- on a CPython platform with wide (UCS-4) or narrow (UTF-16)
- Unicode, i.e. characters that would be spelled as two
- separate code units on a narrow platform, but that do not form a pair.
- """
- last_was_start = False
- unicode_uses_surrogate_encoding = sys.maxunicode == 65535
- for c in map(ord, ustring):
- # surrogates tend to be rare
- if c < 0xD800 or c > 0xDFFF:
- if last_was_start:
- return True
- elif not unicode_uses_surrogate_encoding:
- # on 32bit Unicode platforms, there is never a pair
- return True
- elif c <= 0xDBFF:
- if last_was_start:
- return True # lone start
- last_was_start = True
- else:
- if not last_was_start:
- return True # lone end
- last_was_start = False
- return last_was_start
-
-
+def string_contains_lone_surrogates(ustring):
+ """
+ Check if the unicode string contains lone surrogate code points
+ on a CPython platform with wide (UCS-4) or narrow (UTF-16)
+ Unicode, i.e. characters that would be spelled as two
+ separate code units on a narrow platform, but that do not form a pair.
+ """
+ last_was_start = False
+ unicode_uses_surrogate_encoding = sys.maxunicode == 65535
+ for c in map(ord, ustring):
+ # surrogates tend to be rare
+ if c < 0xD800 or c > 0xDFFF:
+ if last_was_start:
+ return True
+ elif not unicode_uses_surrogate_encoding:
+ # on 32bit Unicode platforms, there is never a pair
+ return True
+ elif c <= 0xDBFF:
+ if last_was_start:
+ return True # lone start
+ last_was_start = True
+ else:
+ if not last_was_start:
+ return True # lone end
+ last_was_start = False
+ return last_was_start
+
+
class BytesLiteral(_bytes):
# bytes subclass that is compatible with EncodedString
encoding = None