diff options
Diffstat (limited to 'contrib/python/wcwidth/py3/tests/test_width.py')
| -rw-r--r-- | contrib/python/wcwidth/py3/tests/test_width.py | 170 |
1 files changed, 148 insertions, 22 deletions
diff --git a/contrib/python/wcwidth/py3/tests/test_width.py b/contrib/python/wcwidth/py3/tests/test_width.py index 67d7b017258..8e43b47b1de 100644 --- a/contrib/python/wcwidth/py3/tests/test_width.py +++ b/contrib/python/wcwidth/py3/tests/test_width.py @@ -1,10 +1,11 @@ """Tests for width() function.""" + # 3rd party import pytest # local import wcwidth -from wcwidth.escape_sequences import ZERO_WIDTH_PATTERN +from wcwidth.escape_sequences import ZERO_WIDTH_PATTERN, INDETERMINATE_EFFECT_SEQUENCE BASIC_WIDTH_CASES = [ ('', 0, 'empty'), @@ -29,7 +30,7 @@ IGNORE_MODE_CASES = [ ('\x1b[31mred\x1b[0m', 3, 'SGR_sequence'), ('hello\x80world', 10, 'C1_control'), ('\x1b', 0, 'lone_ESC'), - ('a\x1bb', 2, 'lone_ESC_between'), + ('a\x1bb', 1, 'fs_sequence_between'), ] @@ -45,8 +46,10 @@ STRICT_RAISES_CASES = [ ('hello\x7fworld', 'DEL'), ('hello\x80world', 'C1_control'), ('hello\nworld', 'LF'), + ('hello\rworld', 'CR'), ('hello\x1b[Hworld', 'cursor_home'), ('hello\x1b[Aworld', 'cursor_up'), + ('hello\x1b[5Gworld', 'hpa'), ] @@ -61,11 +64,11 @@ STRICT_ALLOWED_CASES = [ ('hello\x07world', 10, 'BEL'), ('hello\x00world', 10, 'NUL'), ('abc\bd', 3, 'backspace'), - ('abc\rxy', 3, 'CR'), ('\x1b[31mred\x1b[0m', 3, 'SGR_sequence'), ('a\x1b[2Cb', 4, 'cursor_right'), + ('ab\x1b[Db', 2, 'cursor_left'), ('\x1b', 0, 'lone_ESC'), - ('a\x1bb', 2, 'lone_ESC_between'), + ('a\x1bb', 1, 'fs_sequence_between'), ('\x1b!', 1, 'ESC_unrecognized'), ] @@ -88,6 +91,7 @@ STRICT_INDETERMINATE_SEQUENCES = [ ('\x1b[1X', 'erase_chars'), ('\x1b[1S', 'parm_index'), ('\x1b[1T', 'parm_rindex'), + ('\x1bc', 'full_reset'), ] @@ -106,6 +110,11 @@ PARSE_MODE_CASES = [ ('abcd\x1b[2De', 4, 'cursor_left'), ('\x1b[31mred\x1b[0m', 3, 'SGR'), ('ab\x1b[Hcd', 4, 'indeterminate'), + ('def\x1b[3Dabc', 3, 'cursor_left_overwrite'), + ('def\x1b[10Dabc', 3, 'cursor_left_past_start'), + ('abc\x1b[5Gde', 6, 'hpa_parse'), + ('abc\x1b[Gde', 3, 'hpa_no_param'), + ('\x1b[5Gabc', 7, 'hpa_before_text'), ] @@ -190,29 +199,26 @@ def test_vs16_selector(): def test_zwj_with_non_emoji_chars(): - """ZWJ with non-emoji characters and trailing VS16.""" - # ZWJ (Zero Width Joiner) skips both itself and the following character, treating them as a - # failed emoji ZWJ sequence. When followed by VS16, the VS16 should NOT apply to the earlier - # emoji because VS16 must immediately follow the character it modifies. - # - # In the full parse loop, VS16 checks `last_measured_idx == idx - 1` (immediate adjacency). - # The ZWJ+char skip means VS16 is not adjacent to the smiley, so VS16 has no effect. - # + """ + ZWJ with non-emoji characters and trailing VS16. + + These are invalid Unicode sequences (ZWJ followed by non-emoji), so behavior is implementation- + defined. The emoji base (smiley, width 1) is narrow, and VS16 looks back to it across the ZWJ- + consumed characters, adding 1 cell for a total width of 2. + """ # Control test, assert wcwidth.width("\u263A\uFE0F") == 2 # smiley + VS16 = 2 - # ZWJ followed by non-emoji, VS16 does not apply (not adjacent) - assert wcwidth.width("\u263A\u200Da\uFE0F") == 1 - assert wcwidth.width("\u263A\u200Dx\uFE0F") == 1 - assert wcwidth.width("\u263A\u200Da\u200Db\uFE0F") == 1 + # ZWJ followed by non-emoji: VS16 applies to the smiley base + assert wcwidth.width("\u263A\u200Da\uFE0F") == 2 + assert wcwidth.width("\u263A\u200Dx\uFE0F") == 2 + assert wcwidth.width("\u263A\u200Da\u200Db\uFE0F") == 2 # ZWJ at end of string assert wcwidth.width("\u263A\u200D") == 1 # smiley + ZWJ = 1 # Long strings (>20 chars) use fast path which routes to wcswidth(). - # wcswidth() has more lenient VS16 handling, causing VS16 to incorrectly apply (!) - # Multiply by 10 to exceed threshold: "\u263A\u200Da\uFE0F" (4 chars) * 10 = 40 chars - assert wcwidth.width("\u263A\u200Da\uFE0F" * 10) == 20 # (smiley(1) + ZWJ+a(0) + VS16(+1)) * 10 (!) + assert wcwidth.width("\u263A\u200Da\uFE0F" * 10) == 20 def test_vs16_after_control_chars(): @@ -228,10 +234,9 @@ def test_vs16_after_control_chars(): assert wcwidth.width("\u263A\x0d\uFE0F") == 1 # smiley(1) + CR(reset) + VS16(0), extent=1 # Long strings (>20 chars) use fast path which routes to wcswidth(). - # wcswidth() has more lenient VS16 handling (`last_measured_idx >= 0` vs `== idx - 1`), - # causing VS16 to incorrectly apply when separated by control chars (!) + # In ignore mode, BEL is stripped, so VS16 is adjacent to the smiley and applies correctly. # Multiply by 10 to exceed threshold - assert wcwidth.width(("\u263A\x07\uFE0F") * 10) == 20 # (smiley(1) + BEL(0) + VS16(+1)) * 10 (!) + assert wcwidth.width(("\u263A\x07\uFE0F") * 10) == 20 # (smiley(1) + BEL-stripped(0) + VS16(+1)) * 10 def test_width_long_horizontal_fastpath(): @@ -266,6 +271,42 @@ def test_carriage_return_resets_column(): assert wcwidth.width('abc\rde') == 3 +def test_carriage_return_strict_raises(): + """CR in strict mode raises ValueError (indeterminate starting column).""" + with pytest.raises(ValueError, match='Horizontal movement'): + wcwidth.width('hello\rworld', control_codes='strict') + + +def test_hpa_parse_best_effort(): + """HPA in parse mode assumes string begins at column 0.""" + assert wcwidth.width('abc\x1b[5Gde') == 6 + assert wcwidth.width('abc\x1b[Gde') == 3 + assert wcwidth.width('\x1b[10Ghi') == 11 + + +def test_hpa_strict_raises(): + """HPA in strict mode raises ValueError (indeterminate starting column).""" + with pytest.raises(ValueError, match='horizontal position'): + wcwidth.width('abc\x1b[5Gde', control_codes='strict') + + +def test_cursor_left_strict_out_of_bounds(): + """Cursor-left beyond string start raises ValueError in strict mode.""" + with pytest.raises(ValueError, match='Cursor left movement'): + wcwidth.width('a\x1b[5Da', control_codes='strict') + + +def test_cursor_left_out_of_bounds_parse_no_raise(): + """Cursor-left beyond string start is silently clamped in parse mode.""" + assert wcwidth.width('a\x1b[5Da') == 1 + assert wcwidth.width('abc\x1b[99Ddef') == 3 # 99D clamped to col 0, then b,c,d overwritten + + +def test_cursor_left_out_of_bounds_ignore_mode(): + """Cursor-left beyond string start is zero-width in ignore mode.""" + assert wcwidth.width('a\x1b[5Da', control_codes='ignore') == 2 + + def test_iter_sequences_lone_esc(): """Lone ESC is yielded as a sequence.""" assert list(wcwidth.iter_sequences('\x1b')) == [('\x1b', True)] @@ -449,3 +490,88 @@ def test_fitzpatrick_modifier_standalone_width(): """Standalone Fitzpatrick modifier, however, is wide character in width().""" result = wcwidth.width('\U0001F3FB') assert result == 2 + + +FS_SEQUENCE_CASES = [ + ('\x1bc', 'ris'), + ('\x1bl', 'memory_lock'), + ('\x1bm', 'memory_unlock'), + ('\x1bn', 'ls2'), + ('\x1bo', 'ls3'), + ('\x1b|', 'ls3r'), + ('\x1b}', 'ls2r'), + ('\x1b~', 'ls1r'), +] + + [email protected]('seq,name', FS_SEQUENCE_CASES) +def test_fs_sequences_matched(seq, name): + """Fs (independent function) sequences are matched as zero-width.""" + segments = list(wcwidth.iter_sequences(seq)) + assert segments == [(seq, True)] + assert wcwidth.width(seq) == 0 + + +FP_SEQUENCE_CASES = [ + ('\x1b7', 'decsc'), + ('\x1b8', 'decrc'), + ('\x1b=', 'deckpam'), + ('\x1b>', 'deckpnm'), + ('\x1b0', 'fp_0'), + ('\x1b1', 'fp_1'), + ('\x1b9', 'fp_9'), +] + + [email protected]('seq,name', FP_SEQUENCE_CASES) +def test_fp_sequences_matched(seq, name): + """Fp (private use) sequences are matched as zero-width.""" + segments = list(wcwidth.iter_sequences(seq)) + assert segments == [(seq, True)] + assert wcwidth.width(seq) == 0 + + +NF_SEQUENCE_CASES = [ + ('\x1b F', 's7c1t'), + ('\x1b G', 's8c1t'), + ('\x1b#3', 'decdhl_top'), + ('\x1b#4', 'decdhl_bottom'), + ('\x1b#5', 'decswl'), + ('\x1b#6', 'decdwl'), + ('\x1b#8', 'decaln'), + ('\x1b%G', 'utf8_designate'), + ('\x1b%@', 'iso2022_return'), +] + + [email protected]('seq,name', NF_SEQUENCE_CASES) +def test_nf_sequences_matched(seq, name): + """NF (multi-byte) escape sequences are matched as zero-width.""" + segments = list(wcwidth.iter_sequences(seq)) + assert segments == [(seq, True)] + assert wcwidth.width(seq) == 0 + + +def test_fs_sequence_embedded_in_text(): + """Fs sequence surrounded by text is correctly segmented.""" + segments = list(wcwidth.iter_sequences('abc\x1bcdef')) + assert segments == [('abc', False), ('\x1bc', True), ('def', False)] + assert wcwidth.width('abc\x1bcdef') == 6 + + +def test_nf_sequence_embedded_in_text(): + """NF sequence surrounded by text is correctly segmented.""" + segments = list(wcwidth.iter_sequences('abc\x1b#8def')) + assert segments == [('abc', False), ('\x1b#8', True), ('def', False)] + assert wcwidth.width('abc\x1b#8def') == 6 + + +def test_screen_title_sequences(): + """Screen/tmux title sequence ESC k hello ST.""" + segments = list(wcwidth.iter_sequences('\x1bkhello\x1b\\')) + assert segments[0] == ('\x1bk', True) + + +def test_ris_indeterminate(): + """RIS (ESC c) is flagged as indeterminate effect.""" + assert INDETERMINATE_EFFECT_SEQUENCE.match('\x1bc') |
