aboutsummaryrefslogtreecommitdiffstats
path: root/contrib/python/wcwidth/py2/tests/test_emojis.py
diff options
context:
space:
mode:
authorAlexSm <alex@ydb.tech>2023-12-27 23:31:58 +0100
committerGitHub <noreply@github.com>2023-12-27 23:31:58 +0100
commitd67bfb4b4b7549081543e87a31bc6cb5c46ac973 (patch)
tree8674f2f1570877cb653e7ddcff37ba00288de15a /contrib/python/wcwidth/py2/tests/test_emojis.py
parent1f6bef05ed441c3aa2d565ac792b26cded704ac7 (diff)
downloadydb-d67bfb4b4b7549081543e87a31bc6cb5c46ac973.tar.gz
Import libs 4 (#758)
Diffstat (limited to 'contrib/python/wcwidth/py2/tests/test_emojis.py')
-rw-r--r--contrib/python/wcwidth/py2/tests/test_emojis.py243
1 files changed, 243 insertions, 0 deletions
diff --git a/contrib/python/wcwidth/py2/tests/test_emojis.py b/contrib/python/wcwidth/py2/tests/test_emojis.py
new file mode 100644
index 0000000000..4f88e2330e
--- /dev/null
+++ b/contrib/python/wcwidth/py2/tests/test_emojis.py
@@ -0,0 +1,243 @@
+# std imports
+import os
+import codecs
+
+# 3rd party
+import pytest
+
+try:
+ # python 2
+ _ = unichr
+except NameError:
+ # python 3
+ unichr = chr
+
+# some tests cannot be done on some builds of python, where the internal
+# unicode structure is limited to 0x10000 for memory conservation,
+# "ValueError: unichr() arg not in range(0x10000) (narrow Python build)"
+try:
+ unichr(0x2fffe)
+ NARROW_ONLY = False
+except ValueError:
+ NARROW_ONLY = True
+
+# local
+import wcwidth
+
+
+def make_sequence_from_line(line):
+ # convert '002A FE0F ; ..' -> (0x2a, 0xfe0f) -> chr(0x2a) + chr(0xfe0f)
+ return ''.join(unichr(int(cp, 16)) for cp in line.split(';', 1)[0].strip().split())
+
+
+@pytest.mark.skipif(NARROW_ONLY, reason="Test cannot verify on python 'narrow' builds")
+def emoji_zwj_sequence():
+ u"""
+ Emoji zwj sequence of four codepoints is just 2 cells.
+ """
+ phrase = (u"\U0001f469" # Base, Category So, East Asian Width property 'W' -- WOMAN
+ u"\U0001f3fb" # Modifier, Category Sk, East Asian Width property 'W' -- EMOJI MODIFIER FITZPATRICK TYPE-1-2
+ u"\u200d" # Joiner, Category Cf, East Asian Width property 'N' -- ZERO WIDTH JOINER
+ u"\U0001f4bb") # Fused, Category So, East Asian Width peroperty 'W' -- PERSONAL COMPUTER
+ # This test adapted from https://www.unicode.org/L2/L2023/23107-terminal-suppt.pdf
+ expect_length_each = (2, 0, 0, 2)
+ expect_length_phrase = 2
+
+ # exercise,
+ length_each = tuple(map(wcwidth.wcwidth, phrase))
+ length_phrase = wcwidth.wcswidth(phrase)
+
+ # verify.
+ assert length_each == expect_length_each
+ assert length_phrase == expect_length_phrase
+
+
+@pytest.mark.skipif(NARROW_ONLY, reason="Test cannot verify on python 'narrow' builds")
+def test_unfinished_zwj_sequence():
+ u"""
+ Ensure index-out-of-bounds does not occur for zero-width joiner without any following character
+ """
+ phrase = (u"\U0001f469" # Base, Category So, East Asian Width property 'W' -- WOMAN
+ u"\U0001f3fb" # Modifier, Category Sk, East Asian Width property 'W' -- EMOJI MODIFIER FITZPATRICK TYPE-1-2
+ u"\u200d") # Joiner, Category Cf, East Asian Width property 'N' -- ZERO WIDTH JOINER
+ expect_length_each = (2, 0, 0)
+ expect_length_phrase = 2
+
+ # exercise,
+ length_each = tuple(map(wcwidth.wcwidth, phrase))
+ length_phrase = wcwidth.wcswidth(phrase)
+
+ # verify.
+ assert length_each == expect_length_each
+ assert length_phrase == expect_length_phrase
+
+
+@pytest.mark.skipif(NARROW_ONLY, reason="Test cannot verify on python 'narrow' builds")
+def test_non_recommended_zwj_sequence():
+ """
+ Verify ZWJ is measured as though successful with characters that cannot be joined, wcwidth does not verify
+ """
+ phrase = (u"\U0001f469" # Base, Category So, East Asian Width property 'W' -- WOMAN
+ u"\U0001f3fb" # Modifier, Category Sk, East Asian Width property 'W' -- EMOJI MODIFIER FITZPATRICK TYPE-1-2
+ u"\u200d") # Joiner, Category Cf, East Asian Width property 'N' -- ZERO WIDTH JOINER
+ expect_length_each = (2, 0, 0)
+ expect_length_phrase = 2
+
+ # exercise,
+ length_each = tuple(map(wcwidth.wcwidth, phrase))
+ length_phrase = wcwidth.wcswidth(phrase)
+
+ # verify.
+ assert length_each == expect_length_each
+ assert length_phrase == expect_length_phrase
+
+
+@pytest.mark.skipif(NARROW_ONLY, reason="Test cannot verify on python 'narrow' builds")
+def test_another_emoji_zwj_sequence():
+ phrase = (
+ u"\u26F9" # PERSON WITH BALL
+ u"\U0001F3FB" # EMOJI MODIFIER FITZPATRICK TYPE-1-2
+ u"\u200D" # ZERO WIDTH JOINER
+ u"\u2640" # FEMALE SIGN
+ u"\uFE0F") # VARIATION SELECTOR-16
+ expect_length_each = (1, 0, 0, 1, 0)
+ expect_length_phrase = 2
+
+ # exercise,
+ length_each = tuple(map(wcwidth.wcwidth, phrase))
+ length_phrase = wcwidth.wcswidth(phrase)
+
+ # verify.
+ assert length_each == expect_length_each
+ assert length_phrase == expect_length_phrase
+
+
+@pytest.mark.skipif(NARROW_ONLY, reason="Test cannot verify on python 'narrow' builds")
+def test_longer_emoji_zwj_sequence():
+ """
+ A much longer emoji ZWJ sequence of 10 total codepoints is just 2 cells!
+
+ Also test the same sequence in duplicate, verifying multiple VS-16 sequences
+ in a single function call.
+ """
+ # 'Category Code', 'East Asian Width property' -- 'description'
+ phrase = (u"\U0001F9D1" # 'So', 'W' -- ADULT
+ u"\U0001F3FB" # 'Sk', 'W' -- EMOJI MODIFIER FITZPATRICK TYPE-1-2
+ u"\u200d" # 'Cf', 'N' -- ZERO WIDTH JOINER
+ u"\u2764" # 'So', 'N' -- HEAVY BLACK HEART
+ u"\uFE0F" # 'Mn', 'A' -- VARIATION SELECTOR-16
+ u"\u200d" # 'Cf', 'N' -- ZERO WIDTH JOINER
+ u"\U0001F48B" # 'So', 'W' -- KISS MARK
+ u"\u200d" # 'Cf', 'N' -- ZERO WIDTH JOINER
+ u"\U0001F9D1" # 'So', 'W' -- ADULT
+ u"\U0001F3FD" # 'Sk', 'W' -- EMOJI MODIFIER FITZPATRICK TYPE-4
+ ) * 2
+ # This test adapted from https://www.unicode.org/L2/L2023/23107-terminal-suppt.pdf
+ expect_length_each = (2, 0, 0, 1, 0, 0, 2, 0, 2, 0) * 2
+ expect_length_phrase = 4
+
+ # exercise,
+ length_each = tuple(map(wcwidth.wcwidth, phrase))
+ length_phrase = wcwidth.wcswidth(phrase)
+
+ # verify.
+ assert length_each == expect_length_each
+ assert length_phrase == expect_length_phrase
+
+
+def read_sequences_from_file(filename):
+ fp = codecs.open(os.path.join(os.path.dirname(__file__), filename), 'r', encoding='utf-8')
+ lines = [line.strip()
+ for line in fp.readlines()
+ if not line.startswith('#') and line.strip()]
+ fp.close()
+ sequences = [make_sequence_from_line(line) for line in lines]
+ return lines, sequences
+
+
+@pytest.mark.skipif(NARROW_ONLY, reason="Some sequences in text file are not compatible with 'narrow' builds")
+def test_recommended_emoji_zwj_sequences():
+ """
+ Test wcswidth of all of the unicode.org-published emoji-zwj-sequences.txt
+ """
+ # given,
+ lines, sequences = read_sequences_from_file('emoji-zwj-sequences.txt')
+
+ errors = []
+ # Exercise, track by zipping with original text file line, a debugging aide
+ num = 0
+ for sequence, line in zip(sequences, lines):
+ num += 1
+ measured_width = wcwidth.wcswidth(sequence)
+ if measured_width != 2:
+ errors.append({
+ 'expected_width': 2,
+ 'line': line,
+ 'measured_width': measured_width,
+ 'sequence': sequence,
+ })
+
+ # verify
+ assert errors == []
+ assert num >= 1468
+
+
+def test_recommended_variation_16_sequences():
+ """
+ Test wcswidth of all of the unicode.org-published emoji-variation-sequences.txt
+ """
+ # given,
+ lines, sequences = read_sequences_from_file('emoji-variation-sequences.txt')
+
+ errors = []
+ num = 0
+ for sequence, line in zip(sequences, lines):
+ num += 1
+ if '\ufe0f' not in sequence:
+ # filter for only \uFE0F (VS-16)
+ continue
+ measured_width = wcwidth.wcswidth(sequence)
+ if measured_width != 2:
+ errors.append({
+ 'expected_width': 2,
+ 'line': line,
+ 'measured_width': wcwidth.wcswidth(sequence),
+ 'sequence': sequence,
+ })
+
+ # verify
+ assert errors == []
+ assert num >= 742
+
+
+def test_unicode_9_vs16():
+ """Verify effect of VS-16 on unicode_version 9.0 and later"""
+ phrase = (u"\u2640" # FEMALE SIGN
+ u"\uFE0F") # VARIATION SELECTOR-16
+
+ expect_length_each = (1, 0)
+ expect_length_phrase = 2
+
+ # exercise,
+ length_each = tuple(wcwidth.wcwidth(w_char, unicode_version='9.0') for w_char in phrase)
+ length_phrase = wcwidth.wcswidth(phrase, unicode_version='9.0')
+
+ # verify.
+ assert length_each == expect_length_each
+ assert length_phrase == expect_length_phrase
+
+def test_unicode_8_vs16():
+ """Verify that VS-16 has no effect on unicode_version 8.0 and earler"""
+ phrase = (u"\u2640" # FEMALE SIGN
+ u"\uFE0F") # VARIATION SELECTOR-16
+
+ expect_length_each = (1, 0)
+ expect_length_phrase = 1
+
+ # exercise,
+ length_each = tuple(wcwidth.wcwidth(w_char, unicode_version='8.0') for w_char in phrase)
+ length_phrase = wcwidth.wcswidth(phrase, unicode_version='8.0')
+
+ # verify.
+ assert length_each == expect_length_each
+ assert length_phrase == expect_length_phrase \ No newline at end of file