diff options
author | shadchin <[email protected]> | 2022-02-10 16:44:39 +0300 |
---|---|---|
committer | Daniil Cherednik <[email protected]> | 2022-02-10 16:44:39 +0300 |
commit | e9656aae26e0358d5378e5b63dcac5c8dbe0e4d0 (patch) | |
tree | 64175d5cadab313b3e7039ebaa06c5bc3295e274 /contrib/tools/cython/Cython/Compiler/StringEncoding.py | |
parent | 2598ef1d0aee359b4b6d5fdd1758916d5907d04f (diff) |
Restoring authorship annotation for <[email protected]>. Commit 2 of 2.
Diffstat (limited to 'contrib/tools/cython/Cython/Compiler/StringEncoding.py')
-rw-r--r-- | contrib/tools/cython/Cython/Compiler/StringEncoding.py | 56 |
1 files changed, 28 insertions, 28 deletions
diff --git a/contrib/tools/cython/Cython/Compiler/StringEncoding.py b/contrib/tools/cython/Cython/Compiler/StringEncoding.py index 4bbcd8a3d69..c37e8aab799 100644 --- a/contrib/tools/cython/Cython/Compiler/StringEncoding.py +++ b/contrib/tools/cython/Cython/Compiler/StringEncoding.py @@ -154,34 +154,34 @@ def string_contains_surrogates(ustring): return False -def string_contains_lone_surrogates(ustring): - """ - Check if the unicode string contains lone surrogate code points - on a CPython platform with wide (UCS-4) or narrow (UTF-16) - Unicode, i.e. characters that would be spelled as two - separate code units on a narrow platform, but that do not form a pair. - """ - last_was_start = False - unicode_uses_surrogate_encoding = sys.maxunicode == 65535 - for c in map(ord, ustring): - # surrogates tend to be rare - if c < 0xD800 or c > 0xDFFF: - if last_was_start: - return True - elif not unicode_uses_surrogate_encoding: - # on 32bit Unicode platforms, there is never a pair - return True - elif c <= 0xDBFF: - if last_was_start: - return True # lone start - last_was_start = True - else: - if not last_was_start: - return True # lone end - last_was_start = False - return last_was_start - - +def string_contains_lone_surrogates(ustring): + """ + Check if the unicode string contains lone surrogate code points + on a CPython platform with wide (UCS-4) or narrow (UTF-16) + Unicode, i.e. characters that would be spelled as two + separate code units on a narrow platform, but that do not form a pair. + """ + last_was_start = False + unicode_uses_surrogate_encoding = sys.maxunicode == 65535 + for c in map(ord, ustring): + # surrogates tend to be rare + if c < 0xD800 or c > 0xDFFF: + if last_was_start: + return True + elif not unicode_uses_surrogate_encoding: + # on 32bit Unicode platforms, there is never a pair + return True + elif c <= 0xDBFF: + if last_was_start: + return True # lone start + last_was_start = True + else: + if not last_was_start: + return True # lone end + last_was_start = False + return last_was_start + + class BytesLiteral(_bytes): # bytes subclass that is compatible with EncodedString encoding = None |