diff options
author | robot-contrib <robot-contrib@yandex-team.com> | 2023-11-28 10:22:06 +0300 |
---|---|---|
committer | robot-contrib <robot-contrib@yandex-team.com> | 2023-11-28 11:32:21 +0300 |
commit | e47e2d7f764c4433fc3ccb00c5875361175988f7 (patch) | |
tree | 7fbd42979a8996707562949ec1a98eab234a090d /contrib/python/wcwidth/py3/tests | |
parent | 13de5f3d2da60e60f030c6cde3e389b07a9192d3 (diff) | |
download | ydb-e47e2d7f764c4433fc3ccb00c5875361175988f7.tar.gz |
Update contrib/python/wcwidth/py3 to 0.2.10
Diffstat (limited to 'contrib/python/wcwidth/py3/tests')
-rw-r--r-- | contrib/python/wcwidth/py3/tests/test_core.py | 103 | ||||
-rw-r--r-- | contrib/python/wcwidth/py3/tests/test_emojis.py | 243 |
2 files changed, 243 insertions, 103 deletions
diff --git a/contrib/python/wcwidth/py3/tests/test_core.py b/contrib/python/wcwidth/py3/tests/test_core.py index f0396ea968..d2776cd992 100644 --- a/contrib/python/wcwidth/py3/tests/test_core.py +++ b/contrib/python/wcwidth/py3/tests/test_core.py @@ -10,23 +10,12 @@ except ImportError: # local import wcwidth -# 3rd party -import pytest - -# some tests cannot be done on some builds of python, where the internal -# unicode structure is limited to 0x10000 for memory conservation, -# "ValueError: unichr() arg not in range(0x10000) (narrow Python build)" try: # python 2 _ = unichr except NameError: # python 3 unichr = chr -try: - unichr(0x2fffe) - NARROW_ONLY = False -except ValueError: - NARROW_ONLY = True def test_package_version(): @@ -254,98 +243,6 @@ def test_kr_jamo_filler(): assert length_phrase == expect_length_phrase -@pytest.mark.skipif(NARROW_ONLY, reason="Test cannot verify on python 'narrow' builds") -def emoji_zwj_sequence(): - u""" - Emoji zwj sequence of four codepoints is just 2 cells. - """ - phrase = (u"\U0001f469" # Base, Category So, East Asian Width property 'W' -- WOMAN - u"\U0001f3fb" # Modifier, Category Sk, East Asian Width property 'W' -- EMOJI MODIFIER FITZPATRICK TYPE-1-2 - u"\u200d" # Joiner, Category Cf, East Asian Width property 'N' -- ZERO WIDTH JOINER - u"\U0001f4bb") # Fused, Category So, East Asian Width peroperty 'W' -- PERSONAL COMPUTER - # This test adapted from https://www.unicode.org/L2/L2023/23107-terminal-suppt.pdf - expect_length_each = (2, 0, 0, 2) - expect_length_phrase = 2 - - # exercise, - length_each = tuple(map(wcwidth.wcwidth, phrase)) - length_phrase = wcwidth.wcswidth(phrase) - - # verify. - assert length_each == expect_length_each - assert length_phrase == expect_length_phrase - - -@pytest.mark.skipif(NARROW_ONLY, reason="Test cannot verify on python 'narrow' builds") -def test_unfinished_zwj_sequence(): - u""" - Ensure index-out-of-bounds does not occur for zero-width joiner without any following character - """ - phrase = (u"\U0001f469" # Base, Category So, East Asian Width property 'W' -- WOMAN - u"\U0001f3fb" # Modifier, Category Sk, East Asian Width property 'W' -- EMOJI MODIFIER FITZPATRICK TYPE-1-2 - u"\u200d") # Joiner, Category Cf, East Asian Width property 'N' -- ZERO WIDTH JOINER - expect_length_each = (2, 0, 0) - expect_length_phrase = 2 - - # exercise, - length_each = tuple(map(wcwidth.wcwidth, phrase)) - length_phrase = wcwidth.wcswidth(phrase) - - # verify. - assert length_each == expect_length_each - assert length_phrase == expect_length_phrase - - -@pytest.mark.skipif(NARROW_ONLY, reason="Test cannot verify on python 'narrow' builds") -def test_non_recommended_zwj_sequence(): - """ - Verify ZWJ is measured as though successful with characters that cannot be joined, wcwidth does not verify - """ - phrase = (u"\U0001f469" # Base, Category So, East Asian Width property 'W' -- WOMAN - u"\U0001f3fb" # Modifier, Category Sk, East Asian Width property 'W' -- EMOJI MODIFIER FITZPATRICK TYPE-1-2 - u"\u200d") # Joiner, Category Cf, East Asian Width property 'N' -- ZERO WIDTH JOINER - expect_length_each = (2, 0, 0) - expect_length_phrase = 2 - - # exercise, - length_each = tuple(map(wcwidth.wcwidth, phrase)) - length_phrase = wcwidth.wcswidth(phrase) - - # verify. - assert length_each == expect_length_each - assert length_phrase == expect_length_phrase - - -@pytest.mark.skipif(NARROW_ONLY, reason="Test cannot verify on python 'narrow' builds") -def test_longer_emoji_zwj_sequence(): - """ - A much longer emoji ZWJ sequence of 10 total codepoints is just 2 cells! - """ - # 'Category Code', 'East Asian Width property' -- 'description' - phrase = (u"\U0001F9D1" # 'So', 'W' -- ADULT - u"\U0001F3FB" # 'Sk', 'W' -- EMOJI MODIFIER FITZPATRICK TYPE-1-2 - u"\u200d" # 'Cf', 'N' -- ZERO WIDTH JOINER - u"\u2764" # 'So', 'N' -- HEAVY BLACK HEART - u"\uFE0F" # 'Mn', 'A' -- VARIATION SELECTOR-16 - u"\u200d" # 'Cf', 'N' -- ZERO WIDTH JOINER - u"\U0001F48B" # 'So', 'W' -- KISS MARK - u"\u200d" # 'Cf', 'N' -- ZERO WIDTH JOINER - u"\U0001F9D1" # 'So', 'W' -- ADULT - u"\U0001F3FD") # 'Sk', 'W' -- EMOJI MODIFIER FITZPATRICK TYPE-4 - - # This test adapted from https://www.unicode.org/L2/L2023/23107-terminal-suppt.pdf - expect_length_each = (2, 0, 0, 1, 0, 0, 2, 0, 2, 0) - expect_length_phrase = 2 - - # exercise, - length_each = tuple(map(wcwidth.wcwidth, phrase)) - length_phrase = wcwidth.wcswidth(phrase) - - # verify. - assert length_each == expect_length_each - assert length_phrase == expect_length_phrase - - def test_devanagari_script(): """ Attempt to test the measurement width of Devanagari script. diff --git a/contrib/python/wcwidth/py3/tests/test_emojis.py b/contrib/python/wcwidth/py3/tests/test_emojis.py new file mode 100644 index 0000000000..4f88e2330e --- /dev/null +++ b/contrib/python/wcwidth/py3/tests/test_emojis.py @@ -0,0 +1,243 @@ +# std imports +import os +import codecs + +# 3rd party +import pytest + +try: + # python 2 + _ = unichr +except NameError: + # python 3 + unichr = chr + +# some tests cannot be done on some builds of python, where the internal +# unicode structure is limited to 0x10000 for memory conservation, +# "ValueError: unichr() arg not in range(0x10000) (narrow Python build)" +try: + unichr(0x2fffe) + NARROW_ONLY = False +except ValueError: + NARROW_ONLY = True + +# local +import wcwidth + + +def make_sequence_from_line(line): + # convert '002A FE0F ; ..' -> (0x2a, 0xfe0f) -> chr(0x2a) + chr(0xfe0f) + return ''.join(unichr(int(cp, 16)) for cp in line.split(';', 1)[0].strip().split()) + + +@pytest.mark.skipif(NARROW_ONLY, reason="Test cannot verify on python 'narrow' builds") +def emoji_zwj_sequence(): + u""" + Emoji zwj sequence of four codepoints is just 2 cells. + """ + phrase = (u"\U0001f469" # Base, Category So, East Asian Width property 'W' -- WOMAN + u"\U0001f3fb" # Modifier, Category Sk, East Asian Width property 'W' -- EMOJI MODIFIER FITZPATRICK TYPE-1-2 + u"\u200d" # Joiner, Category Cf, East Asian Width property 'N' -- ZERO WIDTH JOINER + u"\U0001f4bb") # Fused, Category So, East Asian Width peroperty 'W' -- PERSONAL COMPUTER + # This test adapted from https://www.unicode.org/L2/L2023/23107-terminal-suppt.pdf + expect_length_each = (2, 0, 0, 2) + expect_length_phrase = 2 + + # exercise, + length_each = tuple(map(wcwidth.wcwidth, phrase)) + length_phrase = wcwidth.wcswidth(phrase) + + # verify. + assert length_each == expect_length_each + assert length_phrase == expect_length_phrase + + +@pytest.mark.skipif(NARROW_ONLY, reason="Test cannot verify on python 'narrow' builds") +def test_unfinished_zwj_sequence(): + u""" + Ensure index-out-of-bounds does not occur for zero-width joiner without any following character + """ + phrase = (u"\U0001f469" # Base, Category So, East Asian Width property 'W' -- WOMAN + u"\U0001f3fb" # Modifier, Category Sk, East Asian Width property 'W' -- EMOJI MODIFIER FITZPATRICK TYPE-1-2 + u"\u200d") # Joiner, Category Cf, East Asian Width property 'N' -- ZERO WIDTH JOINER + expect_length_each = (2, 0, 0) + expect_length_phrase = 2 + + # exercise, + length_each = tuple(map(wcwidth.wcwidth, phrase)) + length_phrase = wcwidth.wcswidth(phrase) + + # verify. + assert length_each == expect_length_each + assert length_phrase == expect_length_phrase + + +@pytest.mark.skipif(NARROW_ONLY, reason="Test cannot verify on python 'narrow' builds") +def test_non_recommended_zwj_sequence(): + """ + Verify ZWJ is measured as though successful with characters that cannot be joined, wcwidth does not verify + """ + phrase = (u"\U0001f469" # Base, Category So, East Asian Width property 'W' -- WOMAN + u"\U0001f3fb" # Modifier, Category Sk, East Asian Width property 'W' -- EMOJI MODIFIER FITZPATRICK TYPE-1-2 + u"\u200d") # Joiner, Category Cf, East Asian Width property 'N' -- ZERO WIDTH JOINER + expect_length_each = (2, 0, 0) + expect_length_phrase = 2 + + # exercise, + length_each = tuple(map(wcwidth.wcwidth, phrase)) + length_phrase = wcwidth.wcswidth(phrase) + + # verify. + assert length_each == expect_length_each + assert length_phrase == expect_length_phrase + + +@pytest.mark.skipif(NARROW_ONLY, reason="Test cannot verify on python 'narrow' builds") +def test_another_emoji_zwj_sequence(): + phrase = ( + u"\u26F9" # PERSON WITH BALL + u"\U0001F3FB" # EMOJI MODIFIER FITZPATRICK TYPE-1-2 + u"\u200D" # ZERO WIDTH JOINER + u"\u2640" # FEMALE SIGN + u"\uFE0F") # VARIATION SELECTOR-16 + expect_length_each = (1, 0, 0, 1, 0) + expect_length_phrase = 2 + + # exercise, + length_each = tuple(map(wcwidth.wcwidth, phrase)) + length_phrase = wcwidth.wcswidth(phrase) + + # verify. + assert length_each == expect_length_each + assert length_phrase == expect_length_phrase + + +@pytest.mark.skipif(NARROW_ONLY, reason="Test cannot verify on python 'narrow' builds") +def test_longer_emoji_zwj_sequence(): + """ + A much longer emoji ZWJ sequence of 10 total codepoints is just 2 cells! + + Also test the same sequence in duplicate, verifying multiple VS-16 sequences + in a single function call. + """ + # 'Category Code', 'East Asian Width property' -- 'description' + phrase = (u"\U0001F9D1" # 'So', 'W' -- ADULT + u"\U0001F3FB" # 'Sk', 'W' -- EMOJI MODIFIER FITZPATRICK TYPE-1-2 + u"\u200d" # 'Cf', 'N' -- ZERO WIDTH JOINER + u"\u2764" # 'So', 'N' -- HEAVY BLACK HEART + u"\uFE0F" # 'Mn', 'A' -- VARIATION SELECTOR-16 + u"\u200d" # 'Cf', 'N' -- ZERO WIDTH JOINER + u"\U0001F48B" # 'So', 'W' -- KISS MARK + u"\u200d" # 'Cf', 'N' -- ZERO WIDTH JOINER + u"\U0001F9D1" # 'So', 'W' -- ADULT + u"\U0001F3FD" # 'Sk', 'W' -- EMOJI MODIFIER FITZPATRICK TYPE-4 + ) * 2 + # This test adapted from https://www.unicode.org/L2/L2023/23107-terminal-suppt.pdf + expect_length_each = (2, 0, 0, 1, 0, 0, 2, 0, 2, 0) * 2 + expect_length_phrase = 4 + + # exercise, + length_each = tuple(map(wcwidth.wcwidth, phrase)) + length_phrase = wcwidth.wcswidth(phrase) + + # verify. + assert length_each == expect_length_each + assert length_phrase == expect_length_phrase + + +def read_sequences_from_file(filename): + fp = codecs.open(os.path.join(os.path.dirname(__file__), filename), 'r', encoding='utf-8') + lines = [line.strip() + for line in fp.readlines() + if not line.startswith('#') and line.strip()] + fp.close() + sequences = [make_sequence_from_line(line) for line in lines] + return lines, sequences + + +@pytest.mark.skipif(NARROW_ONLY, reason="Some sequences in text file are not compatible with 'narrow' builds") +def test_recommended_emoji_zwj_sequences(): + """ + Test wcswidth of all of the unicode.org-published emoji-zwj-sequences.txt + """ + # given, + lines, sequences = read_sequences_from_file('emoji-zwj-sequences.txt') + + errors = [] + # Exercise, track by zipping with original text file line, a debugging aide + num = 0 + for sequence, line in zip(sequences, lines): + num += 1 + measured_width = wcwidth.wcswidth(sequence) + if measured_width != 2: + errors.append({ + 'expected_width': 2, + 'line': line, + 'measured_width': measured_width, + 'sequence': sequence, + }) + + # verify + assert errors == [] + assert num >= 1468 + + +def test_recommended_variation_16_sequences(): + """ + Test wcswidth of all of the unicode.org-published emoji-variation-sequences.txt + """ + # given, + lines, sequences = read_sequences_from_file('emoji-variation-sequences.txt') + + errors = [] + num = 0 + for sequence, line in zip(sequences, lines): + num += 1 + if '\ufe0f' not in sequence: + # filter for only \uFE0F (VS-16) + continue + measured_width = wcwidth.wcswidth(sequence) + if measured_width != 2: + errors.append({ + 'expected_width': 2, + 'line': line, + 'measured_width': wcwidth.wcswidth(sequence), + 'sequence': sequence, + }) + + # verify + assert errors == [] + assert num >= 742 + + +def test_unicode_9_vs16(): + """Verify effect of VS-16 on unicode_version 9.0 and later""" + phrase = (u"\u2640" # FEMALE SIGN + u"\uFE0F") # VARIATION SELECTOR-16 + + expect_length_each = (1, 0) + expect_length_phrase = 2 + + # exercise, + length_each = tuple(wcwidth.wcwidth(w_char, unicode_version='9.0') for w_char in phrase) + length_phrase = wcwidth.wcswidth(phrase, unicode_version='9.0') + + # verify. + assert length_each == expect_length_each + assert length_phrase == expect_length_phrase + +def test_unicode_8_vs16(): + """Verify that VS-16 has no effect on unicode_version 8.0 and earler""" + phrase = (u"\u2640" # FEMALE SIGN + u"\uFE0F") # VARIATION SELECTOR-16 + + expect_length_each = (1, 0) + expect_length_phrase = 1 + + # exercise, + length_each = tuple(wcwidth.wcwidth(w_char, unicode_version='8.0') for w_char in phrase) + length_phrase = wcwidth.wcswidth(phrase, unicode_version='8.0') + + # verify. + assert length_each == expect_length_each + assert length_phrase == expect_length_phrase
\ No newline at end of file |