contrib/python/wcwidth/py2/tests/test_emojis.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243

# std imports
import os
import codecs

# 3rd party
import pytest

try:
    # python 2
    _ = unichr
except NameError:
    # python 3
    unichr = chr

# some tests cannot be done on some builds of python, where the internal
# unicode structure is limited to 0x10000 for memory conservation,
# "ValueError: unichr() arg not in range(0x10000) (narrow Python build)"
try:
    unichr(0x2fffe)
    NARROW_ONLY = False
except ValueError:
    NARROW_ONLY = True

# local
import wcwidth


def make_sequence_from_line(line):
    # convert '002A FE0F  ; ..' -> (0x2a, 0xfe0f) -> chr(0x2a) + chr(0xfe0f)
    return ''.join(unichr(int(cp, 16)) for cp in line.split(';', 1)[0].strip().split())


@pytest.mark.skipif(NARROW_ONLY, reason="Test cannot verify on python 'narrow' builds")
def emoji_zwj_sequence():
    u"""
    Emoji zwj sequence of four codepoints is just 2 cells.
    """
    phrase = (u"\U0001f469"   # Base, Category So, East Asian Width property 'W' -- WOMAN
              u"\U0001f3fb"   # Modifier, Category Sk, East Asian Width property 'W' -- EMOJI MODIFIER FITZPATRICK TYPE-1-2
              u"\u200d"       # Joiner, Category Cf, East Asian Width property 'N'  -- ZERO WIDTH JOINER
              u"\U0001f4bb")  # Fused, Category So, East Asian Width peroperty 'W' -- PERSONAL COMPUTER
    # This test adapted from https://www.unicode.org/L2/L2023/23107-terminal-suppt.pdf
    expect_length_each = (2, 0, 0, 2)
    expect_length_phrase = 2

    # exercise,
    length_each = tuple(map(wcwidth.wcwidth, phrase))
    length_phrase = wcwidth.wcswidth(phrase)

    # verify.
    assert length_each == expect_length_each
    assert length_phrase == expect_length_phrase


@pytest.mark.skipif(NARROW_ONLY, reason="Test cannot verify on python 'narrow' builds")
def test_unfinished_zwj_sequence():
    u"""
    Ensure index-out-of-bounds does not occur for zero-width joiner without any following character
    """
    phrase = (u"\U0001f469"   # Base, Category So, East Asian Width property 'W' -- WOMAN
              u"\U0001f3fb"   # Modifier, Category Sk, East Asian Width property 'W' -- EMOJI MODIFIER FITZPATRICK TYPE-1-2
              u"\u200d")      # Joiner, Category Cf, East Asian Width property 'N'  -- ZERO WIDTH JOINER
    expect_length_each = (2, 0, 0)
    expect_length_phrase = 2

    # exercise,
    length_each = tuple(map(wcwidth.wcwidth, phrase))
    length_phrase = wcwidth.wcswidth(phrase)

    # verify.
    assert length_each == expect_length_each
    assert length_phrase == expect_length_phrase


@pytest.mark.skipif(NARROW_ONLY, reason="Test cannot verify on python 'narrow' builds")
def test_non_recommended_zwj_sequence():
    """
    Verify ZWJ is measured as though successful with characters that cannot be joined, wcwidth does not verify
    """
    phrase = (u"\U0001f469"   # Base, Category So, East Asian Width property 'W' -- WOMAN
              u"\U0001f3fb"   # Modifier, Category Sk, East Asian Width property 'W' -- EMOJI MODIFIER FITZPATRICK TYPE-1-2
              u"\u200d")      # Joiner, Category Cf, East Asian Width property 'N'  -- ZERO WIDTH JOINER
    expect_length_each = (2, 0, 0)
    expect_length_phrase = 2

    # exercise,
    length_each = tuple(map(wcwidth.wcwidth, phrase))
    length_phrase = wcwidth.wcswidth(phrase)

    # verify.
    assert length_each == expect_length_each
    assert length_phrase == expect_length_phrase


@pytest.mark.skipif(NARROW_ONLY, reason="Test cannot verify on python 'narrow' builds")
def test_another_emoji_zwj_sequence():
    phrase = (
        u"\u26F9"        # PERSON WITH BALL
        u"\U0001F3FB"    # EMOJI MODIFIER FITZPATRICK TYPE-1-2
        u"\u200D"        # ZERO WIDTH JOINER
        u"\u2640"        # FEMALE SIGN
        u"\uFE0F")       # VARIATION SELECTOR-16
    expect_length_each = (1, 0, 0, 1, 0)
    expect_length_phrase = 2

    # exercise,
    length_each = tuple(map(wcwidth.wcwidth, phrase))
    length_phrase = wcwidth.wcswidth(phrase)

    # verify.
    assert length_each == expect_length_each
    assert length_phrase == expect_length_phrase


@pytest.mark.skipif(NARROW_ONLY, reason="Test cannot verify on python 'narrow' builds")
def test_longer_emoji_zwj_sequence():
    """
    A much longer emoji ZWJ sequence of 10 total codepoints is just 2 cells!

    Also test the same sequence in duplicate, verifying multiple VS-16 sequences
    in a single function call.
    """
    # 'Category Code', 'East Asian Width property' -- 'description'
    phrase = (u"\U0001F9D1"   # 'So', 'W' -- ADULT
              u"\U0001F3FB"   # 'Sk', 'W' -- EMOJI MODIFIER FITZPATRICK TYPE-1-2
              u"\u200d"       # 'Cf', 'N' -- ZERO WIDTH JOINER
              u"\u2764"       # 'So', 'N' -- HEAVY BLACK HEART
              u"\uFE0F"       # 'Mn', 'A' -- VARIATION SELECTOR-16
              u"\u200d"       # 'Cf', 'N' -- ZERO WIDTH JOINER
              u"\U0001F48B"   # 'So', 'W' -- KISS MARK
              u"\u200d"       # 'Cf', 'N' -- ZERO WIDTH JOINER
              u"\U0001F9D1"   # 'So', 'W' -- ADULT
              u"\U0001F3FD"   # 'Sk', 'W' -- EMOJI MODIFIER FITZPATRICK TYPE-4
    ) * 2
    # This test adapted from https://www.unicode.org/L2/L2023/23107-terminal-suppt.pdf
    expect_length_each = (2, 0, 0, 1, 0, 0, 2, 0, 2, 0) * 2
    expect_length_phrase = 4

    # exercise,
    length_each = tuple(map(wcwidth.wcwidth, phrase))
    length_phrase = wcwidth.wcswidth(phrase)

    # verify.
    assert length_each == expect_length_each
    assert length_phrase == expect_length_phrase


def read_sequences_from_file(filename):
    fp = codecs.open(os.path.join(os.path.dirname(__file__), filename), 'r', encoding='utf-8')
    lines = [line.strip()
                for line in fp.readlines()
                if not line.startswith('#') and line.strip()]
    fp.close()
    sequences = [make_sequence_from_line(line) for line in lines]
    return lines, sequences


@pytest.mark.skipif(NARROW_ONLY, reason="Some sequences in text file are not compatible with 'narrow' builds")
def test_recommended_emoji_zwj_sequences():
    """
    Test wcswidth of all of the unicode.org-published emoji-zwj-sequences.txt
    """
    # given,
    lines, sequences = read_sequences_from_file('emoji-zwj-sequences.txt')

    errors = []
    # Exercise, track by zipping with original text file line, a debugging aide
    num = 0
    for sequence, line in zip(sequences, lines):
        num += 1
        measured_width = wcwidth.wcswidth(sequence)
        if measured_width != 2:
            errors.append({
                'expected_width': 2,
                'line': line,
                'measured_width': measured_width,
                'sequence': sequence,
            })

    # verify
    assert errors == []
    assert num >= 1468


def test_recommended_variation_16_sequences():
    """
    Test wcswidth of all of the unicode.org-published emoji-variation-sequences.txt
    """
    # given,
    lines, sequences = read_sequences_from_file('emoji-variation-sequences.txt')

    errors = []
    num = 0
    for sequence, line in zip(sequences, lines):
        num += 1
        if '\ufe0f' not in sequence:
            # filter for only \uFE0F (VS-16)
            continue
        measured_width = wcwidth.wcswidth(sequence)
        if measured_width != 2:
            errors.append({
                'expected_width': 2,
                'line': line,
                'measured_width': wcwidth.wcswidth(sequence),
                'sequence': sequence,
            })

    # verify
    assert errors == []
    assert num >= 742


def test_unicode_9_vs16():
    """Verify effect of VS-16 on unicode_version 9.0 and later"""
    phrase = (u"\u2640"        # FEMALE SIGN
              u"\uFE0F")       # VARIATION SELECTOR-16

    expect_length_each = (1, 0)
    expect_length_phrase = 2

    # exercise,
    length_each = tuple(wcwidth.wcwidth(w_char, unicode_version='9.0') for w_char in phrase)
    length_phrase = wcwidth.wcswidth(phrase, unicode_version='9.0')

    # verify.
    assert length_each == expect_length_each
    assert length_phrase == expect_length_phrase

def test_unicode_8_vs16():
    """Verify that VS-16 has no effect on unicode_version 8.0 and earler"""
    phrase = (u"\u2640"        # FEMALE SIGN
              u"\uFE0F")       # VARIATION SELECTOR-16

    expect_length_each = (1, 0)
    expect_length_phrase = 1

    # exercise,
    length_each = tuple(wcwidth.wcwidth(w_char, unicode_version='8.0') for w_char in phrase)
    length_phrase = wcwidth.wcswidth(phrase, unicode_version='8.0')

    # verify.
    assert length_each == expect_length_each
    assert length_phrase == expect_length_phrase