diff options
author | AlexSm <alex@ydb.tech> | 2023-12-27 23:31:58 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-12-27 23:31:58 +0100 |
commit | d67bfb4b4b7549081543e87a31bc6cb5c46ac973 (patch) | |
tree | 8674f2f1570877cb653e7ddcff37ba00288de15a /contrib/python/wcwidth/py2/tests/test_emojis.py | |
parent | 1f6bef05ed441c3aa2d565ac792b26cded704ac7 (diff) | |
download | ydb-d67bfb4b4b7549081543e87a31bc6cb5c46ac973.tar.gz |
Import libs 4 (#758)
Diffstat (limited to 'contrib/python/wcwidth/py2/tests/test_emojis.py')
-rw-r--r-- | contrib/python/wcwidth/py2/tests/test_emojis.py | 243 |
1 files changed, 243 insertions, 0 deletions
diff --git a/contrib/python/wcwidth/py2/tests/test_emojis.py b/contrib/python/wcwidth/py2/tests/test_emojis.py new file mode 100644 index 0000000000..4f88e2330e --- /dev/null +++ b/contrib/python/wcwidth/py2/tests/test_emojis.py @@ -0,0 +1,243 @@ +# std imports +import os +import codecs + +# 3rd party +import pytest + +try: + # python 2 + _ = unichr +except NameError: + # python 3 + unichr = chr + +# some tests cannot be done on some builds of python, where the internal +# unicode structure is limited to 0x10000 for memory conservation, +# "ValueError: unichr() arg not in range(0x10000) (narrow Python build)" +try: + unichr(0x2fffe) + NARROW_ONLY = False +except ValueError: + NARROW_ONLY = True + +# local +import wcwidth + + +def make_sequence_from_line(line): + # convert '002A FE0F ; ..' -> (0x2a, 0xfe0f) -> chr(0x2a) + chr(0xfe0f) + return ''.join(unichr(int(cp, 16)) for cp in line.split(';', 1)[0].strip().split()) + + +@pytest.mark.skipif(NARROW_ONLY, reason="Test cannot verify on python 'narrow' builds") +def emoji_zwj_sequence(): + u""" + Emoji zwj sequence of four codepoints is just 2 cells. + """ + phrase = (u"\U0001f469" # Base, Category So, East Asian Width property 'W' -- WOMAN + u"\U0001f3fb" # Modifier, Category Sk, East Asian Width property 'W' -- EMOJI MODIFIER FITZPATRICK TYPE-1-2 + u"\u200d" # Joiner, Category Cf, East Asian Width property 'N' -- ZERO WIDTH JOINER + u"\U0001f4bb") # Fused, Category So, East Asian Width peroperty 'W' -- PERSONAL COMPUTER + # This test adapted from https://www.unicode.org/L2/L2023/23107-terminal-suppt.pdf + expect_length_each = (2, 0, 0, 2) + expect_length_phrase = 2 + + # exercise, + length_each = tuple(map(wcwidth.wcwidth, phrase)) + length_phrase = wcwidth.wcswidth(phrase) + + # verify. + assert length_each == expect_length_each + assert length_phrase == expect_length_phrase + + +@pytest.mark.skipif(NARROW_ONLY, reason="Test cannot verify on python 'narrow' builds") +def test_unfinished_zwj_sequence(): + u""" + Ensure index-out-of-bounds does not occur for zero-width joiner without any following character + """ + phrase = (u"\U0001f469" # Base, Category So, East Asian Width property 'W' -- WOMAN + u"\U0001f3fb" # Modifier, Category Sk, East Asian Width property 'W' -- EMOJI MODIFIER FITZPATRICK TYPE-1-2 + u"\u200d") # Joiner, Category Cf, East Asian Width property 'N' -- ZERO WIDTH JOINER + expect_length_each = (2, 0, 0) + expect_length_phrase = 2 + + # exercise, + length_each = tuple(map(wcwidth.wcwidth, phrase)) + length_phrase = wcwidth.wcswidth(phrase) + + # verify. + assert length_each == expect_length_each + assert length_phrase == expect_length_phrase + + +@pytest.mark.skipif(NARROW_ONLY, reason="Test cannot verify on python 'narrow' builds") +def test_non_recommended_zwj_sequence(): + """ + Verify ZWJ is measured as though successful with characters that cannot be joined, wcwidth does not verify + """ + phrase = (u"\U0001f469" # Base, Category So, East Asian Width property 'W' -- WOMAN + u"\U0001f3fb" # Modifier, Category Sk, East Asian Width property 'W' -- EMOJI MODIFIER FITZPATRICK TYPE-1-2 + u"\u200d") # Joiner, Category Cf, East Asian Width property 'N' -- ZERO WIDTH JOINER + expect_length_each = (2, 0, 0) + expect_length_phrase = 2 + + # exercise, + length_each = tuple(map(wcwidth.wcwidth, phrase)) + length_phrase = wcwidth.wcswidth(phrase) + + # verify. + assert length_each == expect_length_each + assert length_phrase == expect_length_phrase + + +@pytest.mark.skipif(NARROW_ONLY, reason="Test cannot verify on python 'narrow' builds") +def test_another_emoji_zwj_sequence(): + phrase = ( + u"\u26F9" # PERSON WITH BALL + u"\U0001F3FB" # EMOJI MODIFIER FITZPATRICK TYPE-1-2 + u"\u200D" # ZERO WIDTH JOINER + u"\u2640" # FEMALE SIGN + u"\uFE0F") # VARIATION SELECTOR-16 + expect_length_each = (1, 0, 0, 1, 0) + expect_length_phrase = 2 + + # exercise, + length_each = tuple(map(wcwidth.wcwidth, phrase)) + length_phrase = wcwidth.wcswidth(phrase) + + # verify. + assert length_each == expect_length_each + assert length_phrase == expect_length_phrase + + +@pytest.mark.skipif(NARROW_ONLY, reason="Test cannot verify on python 'narrow' builds") +def test_longer_emoji_zwj_sequence(): + """ + A much longer emoji ZWJ sequence of 10 total codepoints is just 2 cells! + + Also test the same sequence in duplicate, verifying multiple VS-16 sequences + in a single function call. + """ + # 'Category Code', 'East Asian Width property' -- 'description' + phrase = (u"\U0001F9D1" # 'So', 'W' -- ADULT + u"\U0001F3FB" # 'Sk', 'W' -- EMOJI MODIFIER FITZPATRICK TYPE-1-2 + u"\u200d" # 'Cf', 'N' -- ZERO WIDTH JOINER + u"\u2764" # 'So', 'N' -- HEAVY BLACK HEART + u"\uFE0F" # 'Mn', 'A' -- VARIATION SELECTOR-16 + u"\u200d" # 'Cf', 'N' -- ZERO WIDTH JOINER + u"\U0001F48B" # 'So', 'W' -- KISS MARK + u"\u200d" # 'Cf', 'N' -- ZERO WIDTH JOINER + u"\U0001F9D1" # 'So', 'W' -- ADULT + u"\U0001F3FD" # 'Sk', 'W' -- EMOJI MODIFIER FITZPATRICK TYPE-4 + ) * 2 + # This test adapted from https://www.unicode.org/L2/L2023/23107-terminal-suppt.pdf + expect_length_each = (2, 0, 0, 1, 0, 0, 2, 0, 2, 0) * 2 + expect_length_phrase = 4 + + # exercise, + length_each = tuple(map(wcwidth.wcwidth, phrase)) + length_phrase = wcwidth.wcswidth(phrase) + + # verify. + assert length_each == expect_length_each + assert length_phrase == expect_length_phrase + + +def read_sequences_from_file(filename): + fp = codecs.open(os.path.join(os.path.dirname(__file__), filename), 'r', encoding='utf-8') + lines = [line.strip() + for line in fp.readlines() + if not line.startswith('#') and line.strip()] + fp.close() + sequences = [make_sequence_from_line(line) for line in lines] + return lines, sequences + + +@pytest.mark.skipif(NARROW_ONLY, reason="Some sequences in text file are not compatible with 'narrow' builds") +def test_recommended_emoji_zwj_sequences(): + """ + Test wcswidth of all of the unicode.org-published emoji-zwj-sequences.txt + """ + # given, + lines, sequences = read_sequences_from_file('emoji-zwj-sequences.txt') + + errors = [] + # Exercise, track by zipping with original text file line, a debugging aide + num = 0 + for sequence, line in zip(sequences, lines): + num += 1 + measured_width = wcwidth.wcswidth(sequence) + if measured_width != 2: + errors.append({ + 'expected_width': 2, + 'line': line, + 'measured_width': measured_width, + 'sequence': sequence, + }) + + # verify + assert errors == [] + assert num >= 1468 + + +def test_recommended_variation_16_sequences(): + """ + Test wcswidth of all of the unicode.org-published emoji-variation-sequences.txt + """ + # given, + lines, sequences = read_sequences_from_file('emoji-variation-sequences.txt') + + errors = [] + num = 0 + for sequence, line in zip(sequences, lines): + num += 1 + if '\ufe0f' not in sequence: + # filter for only \uFE0F (VS-16) + continue + measured_width = wcwidth.wcswidth(sequence) + if measured_width != 2: + errors.append({ + 'expected_width': 2, + 'line': line, + 'measured_width': wcwidth.wcswidth(sequence), + 'sequence': sequence, + }) + + # verify + assert errors == [] + assert num >= 742 + + +def test_unicode_9_vs16(): + """Verify effect of VS-16 on unicode_version 9.0 and later""" + phrase = (u"\u2640" # FEMALE SIGN + u"\uFE0F") # VARIATION SELECTOR-16 + + expect_length_each = (1, 0) + expect_length_phrase = 2 + + # exercise, + length_each = tuple(wcwidth.wcwidth(w_char, unicode_version='9.0') for w_char in phrase) + length_phrase = wcwidth.wcswidth(phrase, unicode_version='9.0') + + # verify. + assert length_each == expect_length_each + assert length_phrase == expect_length_phrase + +def test_unicode_8_vs16(): + """Verify that VS-16 has no effect on unicode_version 8.0 and earler""" + phrase = (u"\u2640" # FEMALE SIGN + u"\uFE0F") # VARIATION SELECTOR-16 + + expect_length_each = (1, 0) + expect_length_phrase = 1 + + # exercise, + length_each = tuple(wcwidth.wcwidth(w_char, unicode_version='8.0') for w_char in phrase) + length_phrase = wcwidth.wcswidth(phrase, unicode_version='8.0') + + # verify. + assert length_each == expect_length_each + assert length_phrase == expect_length_phrase
\ No newline at end of file |