diff options
author | shadchin <shadchin@yandex-team.ru> | 2022-02-10 16:44:30 +0300 |
---|---|---|
committer | Daniil Cherednik <dcherednik@yandex-team.ru> | 2022-02-10 16:44:30 +0300 |
commit | 2598ef1d0aee359b4b6d5fdd1758916d5907d04f (patch) | |
tree | 012bb94d777798f1f56ac1cec429509766d05181 /contrib/tools/cython/Cython/Compiler/StringEncoding.py | |
parent | 6751af0b0c1b952fede40b19b71da8025b5d8bcf (diff) | |
download | ydb-2598ef1d0aee359b4b6d5fdd1758916d5907d04f.tar.gz |
Restoring authorship annotation for <shadchin@yandex-team.ru>. Commit 1 of 2.
Diffstat (limited to 'contrib/tools/cython/Cython/Compiler/StringEncoding.py')
-rw-r--r-- | contrib/tools/cython/Cython/Compiler/StringEncoding.py | 56 |
1 files changed, 28 insertions, 28 deletions
diff --git a/contrib/tools/cython/Cython/Compiler/StringEncoding.py b/contrib/tools/cython/Cython/Compiler/StringEncoding.py index c37e8aab79..4bbcd8a3d6 100644 --- a/contrib/tools/cython/Cython/Compiler/StringEncoding.py +++ b/contrib/tools/cython/Cython/Compiler/StringEncoding.py @@ -154,34 +154,34 @@ def string_contains_surrogates(ustring): return False -def string_contains_lone_surrogates(ustring): - """ - Check if the unicode string contains lone surrogate code points - on a CPython platform with wide (UCS-4) or narrow (UTF-16) - Unicode, i.e. characters that would be spelled as two - separate code units on a narrow platform, but that do not form a pair. - """ - last_was_start = False - unicode_uses_surrogate_encoding = sys.maxunicode == 65535 - for c in map(ord, ustring): - # surrogates tend to be rare - if c < 0xD800 or c > 0xDFFF: - if last_was_start: - return True - elif not unicode_uses_surrogate_encoding: - # on 32bit Unicode platforms, there is never a pair - return True - elif c <= 0xDBFF: - if last_was_start: - return True # lone start - last_was_start = True - else: - if not last_was_start: - return True # lone end - last_was_start = False - return last_was_start - - +def string_contains_lone_surrogates(ustring): + """ + Check if the unicode string contains lone surrogate code points + on a CPython platform with wide (UCS-4) or narrow (UTF-16) + Unicode, i.e. characters that would be spelled as two + separate code units on a narrow platform, but that do not form a pair. + """ + last_was_start = False + unicode_uses_surrogate_encoding = sys.maxunicode == 65535 + for c in map(ord, ustring): + # surrogates tend to be rare + if c < 0xD800 or c > 0xDFFF: + if last_was_start: + return True + elif not unicode_uses_surrogate_encoding: + # on 32bit Unicode platforms, there is never a pair + return True + elif c <= 0xDBFF: + if last_was_start: + return True # lone start + last_was_start = True + else: + if not last_was_start: + return True # lone end + last_was_start = False + return last_was_start + + class BytesLiteral(_bytes): # bytes subclass that is compatible with EncodedString encoding = None |