diff options
| author | robot-piglet <[email protected]> | 2026-05-19 07:26:19 +0300 |
|---|---|---|
| committer | robot-piglet <[email protected]> | 2026-05-19 07:47:21 +0300 |
| commit | 5bf2e428d5a1e8e8cef995ebb1857890e73d55fd (patch) | |
| tree | 55ed4b4e95a17bef5df0bfea947e3ee510a983f4 /contrib/python/wcwidth/py3/tests | |
| parent | 9f85ff287c0afb40d523e9261e77f27cd168cc47 (diff) | |
Intermediate changes
commit_hash:ce1258717e3f2e41b5b19e40e84850b1db4aa361
Diffstat (limited to 'contrib/python/wcwidth/py3/tests')
| -rw-r--r-- | contrib/python/wcwidth/py3/tests/conftest.py | 2 | ||||
| -rw-r--r-- | contrib/python/wcwidth/py3/tests/test_ambiguous.py | 1 | ||||
| -rw-r--r-- | contrib/python/wcwidth/py3/tests/test_benchmarks.py | 179 | ||||
| -rw-r--r-- | contrib/python/wcwidth/py3/tests/test_clip.py | 234 | ||||
| -rw-r--r-- | contrib/python/wcwidth/py3/tests/test_clip_cjk_emoji.py | 47 | ||||
| -rw-r--r-- | contrib/python/wcwidth/py3/tests/test_clip_overtyping.py | 159 | ||||
| -rw-r--r-- | contrib/python/wcwidth/py3/tests/test_core.py | 48 | ||||
| -rw-r--r-- | contrib/python/wcwidth/py3/tests/test_emojis.py | 1 | ||||
| -rw-r--r-- | contrib/python/wcwidth/py3/tests/test_grapheme.py | 1 | ||||
| -rw-r--r-- | contrib/python/wcwidth/py3/tests/test_hyperlink.py | 75 | ||||
| -rw-r--r-- | contrib/python/wcwidth/py3/tests/test_justify.py | 1 | ||||
| -rw-r--r-- | contrib/python/wcwidth/py3/tests/test_sgr_state.py | 1 | ||||
| -rw-r--r-- | contrib/python/wcwidth/py3/tests/test_text_sizing.py | 327 | ||||
| -rw-r--r-- | contrib/python/wcwidth/py3/tests/test_textwrap.py | 108 | ||||
| -rw-r--r-- | contrib/python/wcwidth/py3/tests/test_ucslevel.py | 1 | ||||
| -rw-r--r-- | contrib/python/wcwidth/py3/tests/test_width.py | 170 |
16 files changed, 1221 insertions, 134 deletions
diff --git a/contrib/python/wcwidth/py3/tests/conftest.py b/contrib/python/wcwidth/py3/tests/conftest.py index 2d0a2779dbe..ecbbdc876e0 100644 --- a/contrib/python/wcwidth/py3/tests/conftest.py +++ b/contrib/python/wcwidth/py3/tests/conftest.py @@ -1,4 +1,5 @@ """Pytest configuration and fixtures.""" + # 3rd party import pytest @@ -10,6 +11,7 @@ except ImportError: @pytest.fixture def benchmark(): """No-op benchmark fixture for environments without pytest-codspeed.""" + def _passthrough(func, *args, **kwargs): return func(*args, **kwargs) return _passthrough diff --git a/contrib/python/wcwidth/py3/tests/test_ambiguous.py b/contrib/python/wcwidth/py3/tests/test_ambiguous.py index 0c61cdacf9c..20ed4d7d1b3 100644 --- a/contrib/python/wcwidth/py3/tests/test_ambiguous.py +++ b/contrib/python/wcwidth/py3/tests/test_ambiguous.py @@ -1,4 +1,5 @@ """Tests for ambiguous_width parameter.""" + # 3rd party import pytest diff --git a/contrib/python/wcwidth/py3/tests/test_benchmarks.py b/contrib/python/wcwidth/py3/tests/test_benchmarks.py index be940ccc8a1..80c9be01de6 100644 --- a/contrib/python/wcwidth/py3/tests/test_benchmarks.py +++ b/contrib/python/wcwidth/py3/tests/test_benchmarks.py @@ -1,4 +1,5 @@ """Performance benchmarks for wcwidth module.""" + # std imports import os import sys @@ -10,7 +11,7 @@ import pytest # local import wcwidth -_wcwidth_module = sys.modules['wcwidth.wcwidth'] +_width_module = sys.modules['wcwidth._width'] def test_wcwidth_ascii(benchmark): @@ -292,6 +293,149 @@ def test_clip_complex_sgr(benchmark): benchmark(wcwidth.clip, text, 6, 11) +def test_clip_long_cjk_past_window(benchmark): + """Benchmark clip() with long CJK text, narrow window (early-exit path).""" + text = '中文测试字符串' * 100 # 700 chars, no escape sequences + benchmark(wcwidth.clip, text, 0, 50) + + +def test_clip_dense_ansi_past_window(benchmark): + """Benchmark clip() with dense ANSI sequences past clip window (SGR tracking).""" + text = '\x1b[31mred\x1b[0m \x1b[32mgreen\x1b[0m \x1b[33myellow\x1b[0m ' * 50 + benchmark(wcwidth.clip, text, 6, 30) + + +def test_clip_dense_ansi_no_propagate(benchmark): + """Benchmark clip() with dense ANSI sequences, SGR propagation disabled.""" + text = '\x1b[31mred\x1b[0m \x1b[32mgreen\x1b[0m \x1b[33myellow\x1b[0m ' * 50 + benchmark(wcwidth.clip, text, 6, 30, propagate_sgr=False) + + +def test_clip_osc8_hyperlinks(benchmark): + """Benchmark clip() with OSC 8 hyperlinks (hyperlink parsing path).""" + text = '\x1b]8;;http://example.com\x07Click Here\x1b]8;;\x07 ' * 20 + benchmark(wcwidth.clip, text, 0, 80) + + +def test_width_osc66(benchmark): + """Benchmark width() with OSC 66 text sizing sequences.""" + text = '\x1b]66;w=2;XY\x07\x1b]66;s=3;ABC\x07' + benchmark(wcwidth.width, text) + + +def test_clip_osc66(benchmark): + """Benchmark clip() with OSC 66 text sizing sequences.""" + text = '\x1b]66;w=2;XY\x07\x1b]66;s=3;ABC\x07' + benchmark(wcwidth.clip, text, 3, 8) + + +def test_clip_cursor_cr_overwrite(benchmark): + """Benchmark clip() with carriage-return overwrite (painter path).""" + text = 'hello\rworld ' * 20 + benchmark(wcwidth.clip, text, 0, 50) + + +def test_clip_cursor_csi_backward(benchmark): + """Benchmark clip() with CSI cursor-backward sequences (painter path).""" + text = 'hello\x1b[2Dxy ' * 20 + benchmark(wcwidth.clip, text, 0, 40) + + +def test_clip_long_ascii_fastpath(benchmark): + """Benchmark clip() with long ASCII string (fast-path slice).""" + text = 'hello world ' * 1000 + benchmark(wcwidth.clip, text, 500, 600) + + +def test_clip_with_ansi_no_overtype(benchmark): + """Benchmark clip() with ANSI sequences, overtyping disabled.""" + text = '\x1b[31m中文字\x1b[0m' + benchmark(wcwidth.clip, text, 0, 3, overtyping=False) + + +def test_clip_complex_sgr_no_overtype(benchmark): + """Benchmark clip() with complex SGR, overtyping disabled.""" + text = '\x1b[1;38;5;208mHello world text\x1b[0m' + benchmark(wcwidth.clip, text, 6, 11, overtyping=False) + + +def test_clip_dense_ansi_no_overtype(benchmark): + """Benchmark clip() with dense ANSI, overtyping disabled.""" + text = '\x1b[31mred\x1b[0m \x1b[32mgreen\x1b[0m \x1b[33myellow\x1b[0m ' * 50 + benchmark(wcwidth.clip, text, 6, 30, overtyping=False) + + +def test_clip_dense_ansi_no_propagate_no_overtype(benchmark): + """Benchmark clip() with dense ANSI, SGR propagation and overtyping disabled.""" + text = '\x1b[31mred\x1b[0m \x1b[32mgreen\x1b[0m \x1b[33myellow\x1b[0m ' * 50 + benchmark(wcwidth.clip, text, 6, 30, propagate_sgr=False, overtyping=False) + + +def test_clip_dense_ansi_overtype(benchmark): + """Benchmark clip() with dense ANSI, overtyping forced (painter path).""" + text = '\x1b[31mred\x1b[0m \x1b[32mgreen\x1b[0m \x1b[33myellow\x1b[0m ' * 50 + benchmark(wcwidth.clip, text, 6, 30, overtyping=True) + + +def test_clip_long_cjk_overtype(benchmark): + """Benchmark clip() with long CJK, overtyping forced (painter path).""" + text = '中文测试字符串' * 100 + benchmark(wcwidth.clip, text, 0, 50, overtyping=True) + + +def test_width_dense_ansi_control_codes_ignore(benchmark): + """Benchmark width() with dense ANSI and control_codes='ignore'.""" + text = '\x1b[31mred\x1b[0m \x1b[32mgreen\x1b[0m \x1b[33myellow\x1b[0m ' * 50 + benchmark(wcwidth.width, text, control_codes='ignore') + + +def test_width_complex_ansi_control_codes_ignore(benchmark): + """Benchmark width() with complex ANSI and control_codes='ignore'.""" + text = '\x1b[38;2;255;150;100mWARN\x1b[0m: \x1b[1mBold\x1b[0m \x1b[4mUnderline\x1b[0m' + benchmark(wcwidth.width, text, control_codes='ignore') + + +def test_clip_dense_ansi_control_codes_ignore(benchmark): + """Benchmark clip() with dense ANSI, control_codes='ignore' (skips painter/OSC).""" + text = '\x1b[31mred\x1b[0m \x1b[32mgreen\x1b[0m \x1b[33myellow\x1b[0m ' * 50 + benchmark(wcwidth.clip, text, 6, 30, control_codes='ignore') + + +def test_clip_long_cjk_control_codes_ignore(benchmark): + """Benchmark clip() with long CJK and control_codes='ignore' (early-exit path).""" + text = '中文测试字符串' * 100 + benchmark(wcwidth.clip, text, 0, 50, control_codes='ignore') + + +def test_clip_cursor_cr_control_codes_ignore(benchmark): + """Benchmark clip() with CR overwrite and control_codes='ignore' (painter skipped).""" + text = 'hello\rworld ' * 20 + benchmark(wcwidth.clip, text, 0, 50, control_codes='ignore') + + +def test_clip_dense_ansi_no_propagate_control_codes_ignore(benchmark): + """Benchmark clip() with dense ANSI, propagate_sgr=False and control_codes='ignore'.""" + text = '\x1b[31mred\x1b[0m \x1b[32mgreen\x1b[0m \x1b[33myellow\x1b[0m ' * 50 + benchmark(wcwidth.clip, text, 6, 30, propagate_sgr=False, control_codes='ignore') + + +def test_clip_long_ascii_control_codes_ignore(benchmark): + """Benchmark clip() with long ASCII and control_codes='ignore' (fast-path slice).""" + text = 'hello world ' * 1000 + benchmark(wcwidth.clip, text, 500, 600, control_codes='ignore') + + +def test_wrap_with_ansi_control_codes_ignore(benchmark): + """Benchmark wrap() with ANSI sequences and control_codes='ignore'.""" + text = '\x1b[31mThe quick brown fox jumps over the lazy dog.\x1b[0m Did it really? ' * 20 + benchmark(wcwidth.wrap, text, 40, control_codes='ignore') + + +def test_ljust_ascii_control_codes_ignore(benchmark): + """Benchmark ljust() with ASCII and control_codes='ignore'.""" + benchmark(wcwidth.ljust, 'hello', 20, control_codes='ignore') + + def test_propagate_sgr_multiline(benchmark): """Benchmark propagate_sgr() with multiple lines.""" lines = ['\x1b[1;31mline one', 'line two', 'line three\x1b[0m'] @@ -327,7 +471,7 @@ def test_iter_sequences_mixed(benchmark): benchmark(lambda: list(wcwidth.iter_sequences(text))) -# Brahmic script benchmarks — text with virama conjuncts +# Brahmic script benchmarks -- text with virama conjuncts BRAHMIC_DEVANAGARI = 'हिन्दी भाषा में लिखा गया पाठ है। क्षत्रिय स्त्री ' * 20 BRAHMIC_BENGALI = 'বাংলা ভাষায় লেখা একটি পাঠ। বাঙ্গালী ভাষা ' * 20 @@ -374,33 +518,48 @@ _udhr_skip = pytest.mark.skipif( reason=f"{os.path.basename(UDHR_FILE)} is missing; run bin/update-tables.py", ) +_py38_skip_pedantic = pytest.mark.skipif( + sys.version_info[:2] < (3, 9), + reason='benchmark.pedantic() not supported in python 3.8 or earlier') + @_udhr_skip +@_py38_skip_pedantic def test_wrap_udhr(benchmark): """Benchmark wrap() with multilingual UDHR text.""" + if not hasattr(benchmark, 'pedantic'): + pytest.skip('pytest-codspeed not installed') result = benchmark.pedantic(wcwidth.wrap, args=(UDHR_TEXT, 80), rounds=1, iterations=1) assert len(result) assert all(0 <= wcwidth.width(_l) <= 80 for _l in result) @_udhr_skip +@_py38_skip_pedantic def test_width_udhr(benchmark): """Benchmark width() with multilingual UDHR text.""" + if not hasattr(benchmark, 'pedantic'): + pytest.skip('pytest-codspeed not installed') result = benchmark.pedantic(wcwidth.width, args=(UDHR_TEXT,), rounds=1, iterations=1) assert result > 0 @_udhr_skip +@_py38_skip_pedantic def test_width_udhr_lines(benchmark): """Benchmark width() on individual UDHR lines.""" + if not hasattr(benchmark, 'pedantic'): + pytest.skip('pytest-codspeed not installed') result = benchmark.pedantic(lambda: sum(wcwidth.width(line) for line in UDHR_LINES), rounds=1, iterations=1) assert result > 0 @_udhr_skip +@_py38_skip_pedantic def test_width_wcswidth_consistency_udhr(benchmark): """Verify width() and wcswidth() agree for printable multilingual text.""" + def check(): failures = [] for line in UDHR_LINES: @@ -411,30 +570,38 @@ def test_width_wcswidth_consistency_udhr(benchmark): if w != wcs: failures.append((line[:60], w, wcs)) return failures + if not hasattr(benchmark, 'pedantic'): + pytest.skip('pytest-codspeed not installed') failures = benchmark.pedantic(check, rounds=1, iterations=1) assert not failures @_udhr_skip +@_py38_skip_pedantic def test_width_fastpath_integrity_udhr(benchmark): """Verify width() produces identical results with and without the fast path.""" - saved = _wcwidth_module._WIDTH_FAST_PATH_MIN_LEN + saved = _width_module._WIDTH_FAST_PATH_MIN_LEN def check(): - _wcwidth_module._WIDTH_FAST_PATH_MIN_LEN = 0 + _width_module._WIDTH_FAST_PATH_MIN_LEN = 0 fast_total = sum(wcwidth.width(line) for line in UDHR_LINES) - _wcwidth_module._WIDTH_FAST_PATH_MIN_LEN = 999_999 + _width_module._WIDTH_FAST_PATH_MIN_LEN = 999_999 parse_total = sum(wcwidth.width(line) for line in UDHR_LINES) return fast_total, parse_total + if not hasattr(benchmark, 'pedantic'): + pytest.skip('pytest-codspeed not installed') fast_total, parse_total = benchmark.pedantic(check, rounds=1, iterations=1) - _wcwidth_module._WIDTH_FAST_PATH_MIN_LEN = saved + _width_module._WIDTH_FAST_PATH_MIN_LEN = saved assert fast_total == parse_total @_udhr_skip +@_py38_skip_pedantic def test_ljust_udhr_lines(benchmark): """Benchmark ljust() on UDHR lines.""" + if not hasattr(benchmark, 'pedantic'): + pytest.skip('pytest-codspeed not installed') benchmark.pedantic(lambda: [wcwidth.ljust(line, w + 1, UDHR_FILLCHAR) for line, w in zip(UDHR_LINES, UDHR_WIDTHS)], rounds=1, iterations=1) diff --git a/contrib/python/wcwidth/py3/tests/test_clip.py b/contrib/python/wcwidth/py3/tests/test_clip.py index 995d383a8ac..8ab3f1d24e8 100644 --- a/contrib/python/wcwidth/py3/tests/test_clip.py +++ b/contrib/python/wcwidth/py3/tests/test_clip.py @@ -1,4 +1,5 @@ """Tests for clip() and strip_sequences() functions.""" + # 3rd party import pytest @@ -23,7 +24,7 @@ STRIP_SEQUENCES_CASES = [ ('\x1b[1m\U0001F468\u200D\U0001F469\u200D\U0001F467\x1b[0m', '\U0001F468\u200D\U0001F469\u200D\U0001F467'), ('\x1b', '\x1b'), - ('a\x1bb', 'a\x1bb'), + ('a\x1bb', 'a'), ('\x1b[', ''), ('text\x1b[mmore', 'textmore'), ] @@ -114,26 +115,162 @@ def test_clip_sequences_after_end(): # With propagate_sgr=True (default), no style active at start, so no prefix assert clip('hello\x1b[31m world\x1b[0m', 0, 5) == 'hello' # With propagate_sgr=False, all sequences preserved - assert clip('hello\x1b[31m world\x1b[0m', 0, 5, propagate_sgr=False) == 'hello\x1b[31m\x1b[0m' + assert repr(clip('hello\x1b[31m world\x1b[0m', 0, 5, propagate_sgr=False)) == repr('hello\x1b[31m\x1b[0m') def test_clip_sequences_multiple(): # With propagate_sgr=True (default), sequences collapsed to minimal assert clip('\x1b[1m\x1b[31mbold red\x1b[0m', 0, 4) == '\x1b[1;31mbold\x1b[0m' # With propagate_sgr=False, all sequences preserved separately - assert clip('\x1b[1m\x1b[31mbold red\x1b[0m', 0, 4, propagate_sgr=False) == '\x1b[1m\x1b[31mbold\x1b[0m' + assert repr(clip('\x1b[1m\x1b[31mbold red\x1b[0m', 0, 4, propagate_sgr=False)) == repr('\x1b[1m\x1b[31mbold\x1b[0m') def test_clip_sequences_only(): # With propagate_sgr=True (default), no visible text means empty result assert clip('\x1b[31m\x1b[0m', 0, 10) == '' # With propagate_sgr=False, sequences preserved - assert clip('\x1b[31m\x1b[0m', 0, 10, propagate_sgr=False) == '\x1b[31m\x1b[0m' + assert repr(clip('\x1b[31m\x1b[0m', 0, 10, propagate_sgr=False)) == repr('\x1b[31m\x1b[0m') def test_clip_sequences_osc_hyperlink(): - assert clip('\x1b]8;;https://example.com\x07link\x1b]8;;\x07', 0, 4) == \ + assert repr(clip('\x1b]8;;https://example.com\x07link\x1b]8;;\x07', 0, 4)) == repr( '\x1b]8;;https://example.com\x07link\x1b]8;;\x07' + ) + + +# OSC 8 hyperlink clipping + +OSC_START_BEL = '\x1b]8;;http://example.com\x07' +OSC_END_BEL = '\x1b]8;;\x07' +OSC_START_ST = '\x1b]8;;http://example.com\x1b\\' +OSC_END_ST = '\x1b]8;;\x1b\\' + + +CLIP_HYPERLINK_CASES = [ + # Full hyperlink visible -- preserved as-is + (f'{OSC_START_BEL}link{OSC_END_BEL}', 0, 4, + f'{OSC_START_BEL}link{OSC_END_BEL}'), + # Clipping middle of hyperlink text -- rebuild around clipped inner text + (f'{OSC_START_BEL}Click This link{OSC_END_BEL}', 6, 10, + f'{OSC_START_BEL}This{OSC_END_BEL}'), + # Clipping from start -- only first portion + (f'{OSC_START_BEL}Click This{OSC_END_BEL}', 0, 5, + f'{OSC_START_BEL}Click{OSC_END_BEL}'), + # Clipping from end -- only last portion + (f'{OSC_START_BEL}Click This{OSC_END_BEL}', 6, 10, + f'{OSC_START_BEL}This{OSC_END_BEL}'), + # Hyperlink entirely before clip window -- dropped + (f'{OSC_START_BEL}link{OSC_END_BEL}world', 0, 4, + f'{OSC_START_BEL}link{OSC_END_BEL}'), + # Hyperlink entirely after clip window -- dropped + (f'hello{OSC_START_BEL}link{OSC_END_BEL}', 0, 5, 'hello'), + # Hyperlink clipped to nothing -- empty hyperlink dropped + (f'{OSC_START_BEL}link{OSC_END_BEL}', 5, 10, ''), + # Empty hyperlink (no inner text) -- dropped + (f'before{OSC_START_BEL}{OSC_END_BEL}after', 0, 11, 'beforeafter'), + # Hyperlink with CJK text clipped + (f'{OSC_START_BEL}中文文字{OSC_END_BEL}', 0, 4, + f'{OSC_START_BEL}中文{OSC_END_BEL}'), + # Hyperlink with CJK text clipped at odd column + (f'{OSC_START_BEL}中文文字{OSC_END_BEL}', 0, 3, + f'{OSC_START_BEL}中 {OSC_END_BEL}'), + # Hyperlink with ST terminator + (f'{OSC_START_ST}Click This{OSC_END_ST}', 0, 5, + f'{OSC_START_ST}Click{OSC_END_ST}'), + # Multiple non-overlapping hyperlinks + (f'{OSC_START_BEL}ab{OSC_END_BEL} {OSC_START_ST}cd{OSC_END_ST}', 0, 5, + f'{OSC_START_BEL}ab{OSC_END_BEL} {OSC_START_ST}cd{OSC_END_ST}'), + # Hyperlink with params preserved + ('\x1b]8;id=myid;http://example.com\x07link\x1b]8;;\x07', 1, 3, + '\x1b]8;id=myid;http://example.com\x07in\x1b]8;;\x07'), + # Hyperlink text before clip window, hyperlink within + (f'before{OSC_START_BEL}link{OSC_END_BEL}', 6, 10, + f'{OSC_START_BEL}link{OSC_END_BEL}'), + # SGR inside hyperlink is preserved + (f'{OSC_START_BEL}\x1b[31mred link\x1b[0m{OSC_END_BEL}', 4, 8, + f'{OSC_START_BEL}\x1b[31mlink\x1b[0m{OSC_END_BEL}'), + # Hyperlink open without matching close -- preserved as regular sequence + ('\x1b]8;;http://example.com\x07link', 0, 4, '\x1b]8;;http://example.com\x07link'), + # Bare ESC between hyperlink markers + ('\x1b]8;;url\x07ab\x1bxcd\x1b]8;;\x07', 0, 6, + '\x1b]8;;url\x07ab\x1bxcd\x1b]8;;\x07'), + # Per OSC 8 spec "A note on opening/closing hyperlinks": terminal + # emulators treat hyperlinks as a state attribute, not nested anchors. + # Opening a new hyperlink replaces the current one; a single close + # terminates the hyperlink regardless of how many opens preceded it. + # + # Two opens, one close: URL "b" replaces "a", close terminates. + ('\x1b]8;;a\x07AB\x1b]8;;b\x07CD\x1b]8;;\x07EF', 0, 6, + '\x1b]8;;a\x07AB\x1b]8;;b\x07CD\x1b]8;;\x07EF'), + # URL switch without closing: "b" replaces "a", no close in input. + ('\x1b]8;;a\x07AB\x1b]8;;b\x07CD', 0, 4, + '\x1b]8;;a\x07AB\x1b]8;;b\x07CD'), + # Multiple opens, close, bare close: "b" replaces "a", first close + # terminates, trailing close is harmless (closing when not open). + ('\x1b]8;;a\x07ABCD \x1b]8;;b\x07XY\x1b]8;;\x07 EF\x1b]8;;\x07', 0, 10, + '\x1b]8;;a\x07ABCD \x1b]8;;b\x07XY\x1b]8;;\x07 EF\x1b]8;;\x07'), +] + + [email protected]('text,start,end,expected', CLIP_HYPERLINK_CASES) +def test_clip_osc_hyperlink_text_clipping(text, start, end, expected): + """OSC 8 hyperlink inner text is clipped and hyperlink rebuilt.""" + assert repr(clip(text, start, end)) == repr(expected) + + +# Control_codes variants with cursor movement into hyperlink +# +# Overwriting hyperlink cells causes corrupted "run on" hyperlinks in practical +# testing with kitty, presumably the hidden "end hyperlink" sequence is +# overwritten, in any case, we make no attempt to parse overwrite of +# hyperlinks, we consider it a "glitch sequence +_HLINK_OVERWRITE = f'{OSC_START_BEL}link{OSC_END_BEL}\x1b[2Dxy' +CLIP_HYPERLINK_CONTROL_CODES_CASES = [ + ('parse', 0, 4, f'{OSC_START_BEL}link{OSC_END_BEL}'), + ('parse', 0, 3, f'{OSC_START_BEL}lin{OSC_END_BEL}'), + ('parse', 0, 2, f'{OSC_START_BEL}li{OSC_END_BEL}'), + ('parse', 0, 1, f'{OSC_START_BEL}l{OSC_END_BEL}'), + # these next two are certainly "in error" + ('parse', 1, 4, f'{OSC_START_BEL}ink{OSC_END_BEL}y'), + ('parse', 1, 3, f'{OSC_START_BEL}in{OSC_END_BEL}x'), + ('parse', 1, 2, f'{OSC_START_BEL}i{OSC_END_BEL}'), + ('ignore', 0, 20, f'{_HLINK_OVERWRITE}'), + # and these two, 'xy' are missing entirely, also "in error" + ('parse', 0, 20, f'{OSC_START_BEL}link{OSC_END_BEL}'), + ('strict', 0, 20, f'{OSC_START_BEL}link{OSC_END_BEL}'), +] + + [email protected]('control_codes,start,end,expected', + CLIP_HYPERLINK_CONTROL_CODES_CASES) +def test_clip_hyperlink_control_codes_overwrite(control_codes, start, end, expected): + assert repr(clip(_HLINK_OVERWRITE, start, end, control_codes=control_codes)) == repr(expected) + + +# Painter-path hyperlink edge cases +CLIP_HYPERLINK_PAINTER_CASES = [ + # Empty hyperlink dropped + (f'\x1b[2D{OSC_START_BEL}{OSC_END_BEL}xy', 'parse', 0, 4, 'xy'), + # Hyperlink entirely after clip window -- skipped + (f'\x1b[2Dab{OSC_START_BEL}cde{OSC_END_BEL}', 'parse', 0, 2, 'ab'), + # Hyperlink entirely before clip window -- skipped + (f'{OSC_START_BEL}ab{OSC_END_BEL}\x1b[2Dcdef', 'parse', 2, 4, 'ef'), + # Hyperlink overlapping clip window -- clipped + (f'\x1b[2D{OSC_START_BEL}abcdef{OSC_END_BEL}', 'parse', 0, 3, + f'{OSC_START_BEL}abc{OSC_END_BEL}'), + # Bare ESC inside hyperlink in painter path + (f'\x1b[2D{OSC_START_BEL}a\x1bb{OSC_END_BEL}', 'parse', 0, 4, + f'{OSC_START_BEL}a\x1bb{OSC_END_BEL}'), + # strict mode: non-hyperlink cells don't overlap hyperlink_cells + (f'{OSC_START_BEL}link{OSC_END_BEL}\x1b[5Chi', 'strict', 0, 11, + f'{OSC_START_BEL}link{OSC_END_BEL} hi'), +] + + [email protected]('text,control_codes,start,end,expected', + CLIP_HYPERLINK_PAINTER_CASES) +def test_clip_hyperlink_painter_cases(text, control_codes, start, end, expected): + assert repr(clip(text, start, end, control_codes=control_codes)) == repr(expected) def test_clip_sequences_cjk_with_sequences(): @@ -148,7 +285,7 @@ def test_clip_sequences_between_chars(): assert clip('a\x1b[31mb\x1b[0mc', 1, 2) == '\x1b[31mb\x1b[0m' -def test_clip_sequences_lone_esc(): +def test_clip_sequences_fs_escape(): assert clip('a\x1bb', 0, 2) == 'a\x1bb' @@ -230,12 +367,13 @@ def test_clip_tab_with_sequences(): CLIP_CONTROL_CHAR_CASES = [ - ('abc\bde', 0, 5, 'abc\bde'), - ('ab\acd', 0, 4, 'ab\acd'), + ('abc\bde', 0, 5, 'abde'), + ('ab\acd', 0, 4, 'ab\x07cd'), ('ab\x00cd', 0, 4, 'ab\x00cd'), - ('abc\rde', 0, 5, 'abc\rde'), - ('\a\b\rHello', 0, 5, '\a\b\rHello'), + ('abc\rde', 0, 5, 'dec'), + ('\a\b\rHello', 0, 5, '\x07Hello'), ('ab\x01\x02cd', 0, 4, 'ab\x01\x02cd'), + ('ab\x1b\x00cd', 0, 4, 'ab\x1b\x00cd'), ] @@ -244,19 +382,73 @@ def test_clip_control_chars_zero_width(text, start, end, expected): assert clip(text, start, end) == expected -CLIP_CURSOR_SEQUENCE_CASES = [ - ('ab\x1b[5Ccd', 0, 4, 'ab\x1b[5Ccd'), - ('abcde\x1b[2Df', 0, 6, 'abcde\x1b[2Df'), - ('ab\x1b[10Ccd', 0, 4, 'ab\x1b[10Ccd'), - ('ab\x1b[Ccd', 0, 4, 'ab\x1b[Ccd'), +def test_clip_tab_first_visible_with_sgr(): + """Tab as first visible character with SGR propagation.""" + assert clip('\x1b[31m\tb', 0, 4, tabsize=8) == '\x1b[31m \x1b[0m' + + +def test_clip_overtyping_override_by_control_codes_ignore(): + """When overtyping=True and control_codes='ignore', overtyping is overridden to False.""" + # elif entered: overtyping=True + control_codes='ignore' → overtyping=False + assert clip('hello world', 0, 5, overtyping=True, control_codes='ignore') == 'hello' + # Verify that overtyping is actually disabled: cursor movement chars are + # treated as zero-width, so the result is the same as without overtyping. + assert clip('ab\x08cd', 0, 4, overtyping=True, control_codes='ignore') == 'ab\x08cd' + + +def test_clip_overtyping_without_ignore(): + """When overtyping=True and control_codes='parse', elif is not entered.""" + # elif skipped: overtyping=True + control_codes='parse' → overtyping stays True + # The painter path is used, cursor movement sequences affect output. + assert clip('ab\x1b[2Dcd', 0, 4, overtyping=True, control_codes='parse') == 'cd' + + +# Indeterminate-effect sequences that raise ValueError in strict mode +# (matching width() behavior). These are not cursor-movement sequences, +# so they exercise the simple (non-overtyping) path. + +INDETERMINATE_SEQUENCES = [ + ('\x1b[K', 'erase_in_line'), + ('\x1b[2K', 'erase_in_line_params'), + ('\x1b[J', 'erase_in_display'), + ('\x1b[2J', 'erase_in_display_params'), + ('\x1b[H', 'cursor_home'), + ('\x1b[1;1H', 'cursor_address'), + ('\x1b[A', 'cursor_up'), + ('\x1b[2A', 'cursor_up_params'), + ('\x1b[B', 'cursor_down'), + ('\x1b[5B', 'cursor_down_params'), + ('\x1b[P', 'delete_character'), + ('\x1b[1P', 'parm_dch'), + ('\x1b[M', 'delete_line'), + ('\x1b[1M', 'parm_delete_line'), + ('\x1b[L', 'insert_line'), + ('\x1b[1L', 'parm_insert_line'), + ('\x1b[@', 'insert_character'), + ('\x1b[1X', 'erase_chars'), + ('\x1b[S', 'scroll_up'), + ('\x1b[T', 'scroll_down'), + ('\x1b[?1049h', 'enter_fullscreen'), + ('\x1b[?1049l', 'exit_fullscreen'), + ('\x1bD', 'scroll_forward'), + ('\x1bM', 'scroll_reverse'), + ('\x1b8', 'restore_cursor'), + ('\x1bc', 'full_reset'), ] [email protected]('text,start,end,expected', CLIP_CURSOR_SEQUENCE_CASES) -def test_clip_cursor_sequences_zero_width(text, start, end, expected): - assert clip(text, start, end) == expected [email protected]('seq,cap_name', INDETERMINATE_SEQUENCES) +def test_clip_strict_indeterminate_raises(seq, cap_name): + """Clip() strict mode raises ValueError on indeterminate-effect sequences.""" + with pytest.raises(ValueError, match='Indeterminate cursor sequence'): + clip(f'hello{seq}world', 0, 10, control_codes='strict') -def test_clip_tab_first_visible_with_sgr(): - """Tab as first visible character with SGR propagation.""" - assert clip('\x1b[31m\tb', 0, 4, tabsize=8) == '\x1b[31m \x1b[0m' [email protected]('seq,cap_name', INDETERMINATE_SEQUENCES) +def test_clip_parse_indeterminate_preserved(seq, cap_name): + """Clip() parse mode preserves indeterminate sequences as zero-width.""" + result = clip(f'hello{seq}world', 0, 10, control_codes='parse') + # The sequence is preserved, visible text is hello + world = 10 chars + assert 'hello' in result + assert 'world' in result + assert seq in result diff --git a/contrib/python/wcwidth/py3/tests/test_clip_cjk_emoji.py b/contrib/python/wcwidth/py3/tests/test_clip_cjk_emoji.py new file mode 100644 index 00000000000..e41bd627663 --- /dev/null +++ b/contrib/python/wcwidth/py3/tests/test_clip_cjk_emoji.py @@ -0,0 +1,47 @@ +""" +Tests for clip() with CJK and Emoji characters. + +These ensure wide graphemes (CJK / emoji / ZWJ sequences) are clipped correctly: +- Partial columns of a wide grapheme are replaced by fillchar. +- Full grapheme included when fully inside slice. +""" + +# 3rd party +import pytest + +# local +from wcwidth import clip, width + + [email protected]("ch", [ + "中", + "🙂", + "👨\u200d👩\u200d👧", # family ZWJ + "👩\u200d👩\u200d👧" # another ZWJ variant +]) +def test_partial_and_full_wide_grapheme(ch): + w = width(ch) + assert w >= 1 + if w > 1: + # partial clip of first column -> fillchar + assert clip(ch, 0, 1) == ' ' + # full clip covering entire grapheme -> original grapheme + assert clip(ch, 0, w) == ch + # width of clipped full grapheme should match + assert width(clip(ch, 0, w)) == w + else: + # narrow grapheme: trivial + assert clip(ch, 0, 1) == ch + + +def test_mixed_cjk_emoji_sequence(): + text = 'A中🙂B' + total_w = width(text) + # sanity + assert total_w >= 4 + # pick a slice that includes the middle two columns (center of string) + # ensure clip doesn't raise and width matches requested slice + start = 1 + end = 4 + out = clip(text, start, end) + assert width(out) == (end - start) diff --git a/contrib/python/wcwidth/py3/tests/test_clip_overtyping.py b/contrib/python/wcwidth/py3/tests/test_clip_overtyping.py new file mode 100644 index 00000000000..1d106bae4bb --- /dev/null +++ b/contrib/python/wcwidth/py3/tests/test_clip_overtyping.py @@ -0,0 +1,159 @@ +""" +Tests for clip()'s overtyping (painter) path. + +The painter algorithm is used when the text contains cursor movement sequences +(CSI n C/D, backspace, carriage return, HPA) that require column-level tracking +to determine the final visible output. Auto-detection of the overtyping path +happens in clip() via the presence of \\x08, \\r, or horizontal cursor movement +escape sequences, or can be forced with ``overtyping=True``. + +These tests codify expected visible results when cursor movement sequences +affect horizontal positions. +""" + +# 3rd party +import pytest + +# local +from wcwidth import clip + + [email protected]("text,start,end,kwargs,expected", [ + # Cursor-right introduces a gap that should be filled with spaces + ("hello\x1b[10Cworld", 0, 10, {}, "hello" + " " * 5), + # Clipping just the initial region ignores the later rightward write + ("hello\x1b[10Cworld", 0, 5, {}, "hello"), + # Cursor-left overwrites previous characters + ("hello\x1b[2DXY", 0, 5, {}, "helXY"), + # Cursor-left overwrites entire visible token + ("abc\x1b[3DXY", 0, 5, {}, "XYc"), + # Cursor-left at column 0 (prev_col not > col, no overwrite) + ("\x1b[2Dhi", 0, 2, {}, "hi"), + # Cursor-left with no visible tokens emitted + ("\x1b[5C\x1b[2Dhi", 5, 7, {}, ""), + # Cursor-left overwrites text, seq tokens preserve column spatial order + ("ab\x1b]8;;http://example.com\x07\x1b[2Dcd", 0, 4, {}, "cd\x1b]8;;http://example.com\x07"), + # Cursor-left into wide char twice, second time on empty token triggers i < 0 break + ("中\x1b[D\x1b[Da", 0, 4, {}, "a "), + ('ab\x1b[5Ccd', 0, 4, {}, 'ab '), + ('abcde\x1b[2Df', 0, 6, {}, 'abcfe'), + ('hello\x1b[5Dw', 0, 5, {}, 'wello'), + ('ab\x1b[10Ccd', 0, 4, {}, 'ab '), + ('XY\x1b[Czy', 0, 4, {}, 'XY z'), + ('XY\x1b[Czy', 0, 5, {}, 'XY zy'), + ('XY\x1b[Czy', 1, 3, {}, 'Y '), + ('XY\x1b[Czy', 1, 4, {}, 'Y z'), + ('LOL\x1b[5Clol', 0, 12, {}, 'LOL lol'), + ('LOL\x1b[5Clol', 1, 11, {}, 'OL lol'), + ('LOL\x1b[5Clol', 2, 11, {}, 'L lol'), + ('LOL\x1b[5Clol', 3, 11, {}, ' lol'), + ('LOL\x1b[5Clol', 4, 11, {}, ' lol'), + ('LOL\x1b[5Clol', 5, 11, {}, ' lol'), + ('LOL\x1b[5Clol', 6, 11, {}, ' lol'), + ('LOL\x1b[5Clol', 7, 11, {}, ' lol'), + ('LOL\x1b[5Clol', 8, 11, {}, 'lol'), + ('LOL\x1b[5Clol', 9, 11, {}, 'ol'), + # SGR + cursor movement: SGR state update in painter path (line 245) + ('\x1b[31mab\x1b[2Dcd', 0, 4, {}, '\x1b[31mcd\x1b[0m'), + # Tab tabsize=0 in painter path (line 272->280 else branch) + ('ab\x1b[2D\tcd', 0, 4, {'tabsize': 0}, '\tcd'), + # Zero-width grapheme outside clip window in painter (line 290->301) + ('\x1b[2D\u0301hello', 1, 4, {}, 'ell'), + # Wide char partially clipped in painter (lines 298-299) + ('ab\x1b[2D中d', 1, 4, {}, ' d'), + # walk_col >= end in painter reconstruction (327->328) + ('hello\x1b[2Dxy', 0, 3, {}, 'hel'), + # Hole fillchar in painter reconstruction (345->346) + ('\x1b[5Chi', 0, 7, {}, ' hi'), + # Trailing sequences stored at columns after col_limit (352, 354->355, 355->356) + ('abc\x1b[2D', 0, 2, {}, 'ab'), + # Bare ESC not part of any sequence, pass through in painter path (239->240) + ('a\x1bb\x1b[2Dc', 0, 3, {}, 'c\x1bb'), + # Tab with tabsize>0 in painter; `b` falls at col 4, inside (0,5) (277->284, 278->279, 278->280) + ('\x1b[2Da\tb', 0, 5, {'tabsize': 4}, 'a b'), + # propagate_sgr=False in painter path (225->226) + ('ab\x1b[2Dcd', 0, 4, {'propagate_sgr': False}, 'cd'), + # Non-SGR sequence before any visible text in painter (225->226 True) + ('\x1b]8;;http://example.com\x07ab\x1b[2Dcd', 0, 4, {}, '\x1b]8;;http://example.com\x07cd'), + # Bare ESC at end of text in painter (239->240) + ('ab\x1b[2D\x1b', 0, 2, {}, '\x1bab'), + # Wide char overwritten from right side (212 orphan fixup) + ('a中\x1b[Db', 0, 4, {}, 'a b'), + # Tab expansion with col+=1 not inside clip window (277->279, 293) + ('\x1b[2Ca\tb', 2, 4, {'tabsize': 8}, 'a '), + # CR: carriage return resets column to 0, overwriting earlier cells + ('aaa\r\r\rxxx', 0, 4, {}, 'xxx'), + ('abc\rXY', 0, 5, {}, 'XYc'), + ('hello\rworld', 0, 5, {}, 'world'), + # CR moves back to column 0 then writes within clip window + ('abc\rde', 1, 3, {}, 'ec'), + # BS: backspace overwrites previous character + ('abc\bde', 0, 5, {}, 'abde'), + ('abc\b\bXY', 0, 5, {}, 'aXY'), + ('ab\b\b\bXY', 0, 4, {}, 'XY'), + # HPA: horizontal position absolute (CSI n G) + ('abc\x1b[GXY', 0, 5, {}, 'XYc'), + ('abc\x1b[2GXY', 0, 5, {}, 'aXY'), + ('abc\x1b[5GXY', 0, 7, {}, 'abc XY'), + ('abc\x1b[5GXY', 0, 5, {}, 'abc X'), + ('\x1b[5GXY', 3, 7, {}, ' XY'), + # HPA no-param inside clip window + ('abc\x1b[GXY', 1, 4, {}, 'Yc'), + # walk_col >= end with sequences at column == end (line 351) + ('\x1b[5C\x1b]8;;http://example.com\x07', 0, 5, {'propagate_sgr': False}, ' \x1b]8;;http://example.com\x07'), + # Trailing sequences past col_limit (line 374) + ('\x1b[5C\x1b]8;;http://example.com\x07', 0, 3, {'propagate_sgr': False}, ' \x1b]8;;http://example.com\x07'), + # Lone ESC as first visible thing in painter (captured_style = current_style, line 398) + ('\x1b[D\x1b\x1bXy', 0, 3, {}, '\x1b\x1bXy'), + # Hyperlink VISIBLE after captured_style already set + ('a\x1b[C\x1b]8;;http://x\x07hi\x1b]8;;\x07', 0, 5, {}, 'a \x1b]8;;http://x\x07hi\x1b]8;;\x07'), + # Tab with tabsize=0 as first visible thing in painter + ('\x1b[D\tab', 0, 2, {'tabsize': 0}, '\tab'), + # Zero-width grapheme as first visible thing in painter + ('\x1b[D\u0301x', 0, 3, {}, '\u0301x'), + # Generic escape sequence as first visible in painter + ('\x1b[D\x1b[Hxy', 0, 3, {}, '\x1b[Hxy'), +]) +def test_clip_cursor_sequences_expected_behaviour(text, start, end, kwargs, expected): + """Verify clip() output matches terminal-visible columns after cursor moves.""" + result = clip(text, start, end, **kwargs) + assert repr(result) == repr(expected) + + +def test_clip_cursor_left_strict_out_of_bounds(): + """Clip() with control_codes='strict' raises on cursor-left beyond string start.""" + with pytest.raises(ValueError, match='Cursor left movement'): + clip('a\x1b[5Da', 0, 1, control_codes='strict') + + +def test_clip_cursor_left_strict_out_of_bounds_painter(): + """Clip() strict-mode raises on cursor-left beyond start in painter path.""" + with pytest.raises(ValueError, match='Cursor left movement'): + clip('\x1b[2Dab', 0, 2, control_codes='strict') + + +def test_clip_cursor_left_out_of_bounds_parse_no_raise(): + """Clip() parse mode silently clamps cursor-left beyond start.""" + assert clip('a\x1b[5Da', 0, 1) == 'a' + assert clip('ab\x1b[99Dcd', 0, 4) == 'cd' + + +def test_clip_strict_cr_allowed(): + """Carriage return is allowed in strict mode (text begins at column 0).""" + assert clip('hello\rworld', 0, 5, control_codes='strict') == 'world' + + +def test_clip_strict_hpa_allowed(): + """HPA is allowed in strict mode (text begins at column 0).""" + assert clip('abc\x1b[5Gde', 0, 10, control_codes='strict') == 'abc de' + + +def test_clip_strict_cursor_left_allowed(): + """Cursor-left within bounds is allowed in strict mode.""" + assert clip('hello\x1b[2Dxy', 0, 5, control_codes='strict') == 'helxy' + + +def test_clip_strict_indeterminate_sequence_painter(): + """Clip() strict-mode raises on indeterminate sequence in painter path.""" + with pytest.raises(ValueError, match='Indeterminate cursor sequence'): + clip('a\x1b[D\x1b[Hb', 0, 3, control_codes='strict') diff --git a/contrib/python/wcwidth/py3/tests/test_core.py b/contrib/python/wcwidth/py3/tests/test_core.py index 024dcdba05d..dd1e3b7d1b7 100644 --- a/contrib/python/wcwidth/py3/tests/test_core.py +++ b/contrib/python/wcwidth/py3/tests/test_core.py @@ -1,6 +1,5 @@ """Core tests for wcwidth module.""" # std imports -import sys import importlib.metadata # 3rd party @@ -8,9 +7,7 @@ import pytest # local import wcwidth - -_wcwidth_module = sys.modules['wcwidth.wcwidth'] -_WIDTH_FAST_PATH_MIN_LEN = _wcwidth_module._WIDTH_FAST_PATH_MIN_LEN +from wcwidth._width import _WIDTH_FAST_PATH_MIN_LEN def test_package_version(): @@ -68,9 +65,8 @@ def test_hello_jp(): """ Width of Japanese phrase: コンニチハ, セカイ! - Given a phrase of 5 and 3 Katakana ideographs, joined with - 3 English-ASCII punctuation characters, totaling 11, this - phrase consumes 19 cells of a terminal emulator. + Given a phrase of 5 and 3 Katakana ideographs, joined with 3 English-ASCII punctuation + characters, totaling 11, this phrase consumes 19 cells of a terminal emulator. """ # given, phrase = 'コンニチハ, セカイ!' @@ -90,8 +86,7 @@ def test_wcswidth_substr(): """ Test wcswidth() optional 2nd parameter, ``n``. - ``n`` determines at which position of the string - to stop counting length. + ``n`` determines at which position of the string to stop counting length. """ # given, phrase = 'コンニチハ, セカイ!' @@ -414,13 +409,8 @@ def test_bengali_nukta_mc(): @pytest.mark.parametrize("repeat", [1, _WIDTH_FAST_PATH_MIN_LEN]) def test_mc_width_consistency(repeat): - # width(), wcswidth(), and per-grapheme width sums must all agree. - # - # The repeat parameter ensures both the short (parse) and long (fast) code - # paths of width() are exercised. At repeat=1 the phrases are short enough - # to go through character-by-character parse mode. At repeat=_WIDTH_FAST_PATH_MIN_LEN - # every phrase exceeds the threshold and takes the fast path that delegates - # to wcswidth(). + """Check width() to wcswidth() consistency.""" + # repeat value 'WIDTH_FAST_PATH_MIN_LEN' ensures both "fast" and "slow" paths are taken phrases = [ "\u0915\u094D\u0937\u093F", "\u0b95\u0bcd\u0bb7\u0bcc", @@ -464,6 +454,11 @@ def test_virama_conjunct(phrase, expected): assert wcwidth.width(phrase) == expected +def test_zwj_at_end_of_string(): + """ZWJ at end of string (not after virama) is consumed with zero width.""" + assert wcwidth.wcswidth('a\u200D') == 1 + + def test_soft_hyphen(): # Test SOFT HYPHEN, category 'Cf' usually are zero-width, but most # implementations agree to draw it was '1' cell, visually @@ -493,3 +488,24 @@ def test_prepended_concatenation_mark_width(codepoint, name): """Prepended Concatenation Marks have width 1, not 0.""" # https://github.com/jquast/wcwidth/issues/119 assert wcwidth.wcwidth(chr(codepoint)) == 1 + + +def test_legacy_module(): + """Verify legacy ``wcwidth.wcwidth`` module's public items are importable.""" + # pylint: disable=import-outside-toplevel + # std imports + import sys + + # Access the legacy submodule via sys.modules (matching 0.6.0 where + # 'import wcwidth.wcwidth' returned the function, not the module). + _legacy = sys.modules['wcwidth.wcwidth'] + + for name in _legacy.__all__: + attr = getattr(_legacy, name) + assert attr is not None, f"wcwidth.wcwidth.{name} is None" + + # Verify that individual imports from the legacy path also work, + # e.g. 'from wcwidth.wcwidth import wcswidth' + for name in _legacy.__all__: + obj = getattr(_legacy, name) + assert obj is not None, f"could not import {name} from wcwidth.wcwidth" diff --git a/contrib/python/wcwidth/py3/tests/test_emojis.py b/contrib/python/wcwidth/py3/tests/test_emojis.py index c7ce359939a..fedeff3cffe 100644 --- a/contrib/python/wcwidth/py3/tests/test_emojis.py +++ b/contrib/python/wcwidth/py3/tests/test_emojis.py @@ -1,4 +1,5 @@ """Tests for emoji width measurement and ZWJ sequences.""" + # std imports import os diff --git a/contrib/python/wcwidth/py3/tests/test_grapheme.py b/contrib/python/wcwidth/py3/tests/test_grapheme.py index a02ac5e7529..de4489f8e7e 100644 --- a/contrib/python/wcwidth/py3/tests/test_grapheme.py +++ b/contrib/python/wcwidth/py3/tests/test_grapheme.py @@ -1,4 +1,5 @@ """Tests for grapheme cluster segmentation.""" + # std imports import os diff --git a/contrib/python/wcwidth/py3/tests/test_hyperlink.py b/contrib/python/wcwidth/py3/tests/test_hyperlink.py new file mode 100644 index 00000000000..7b083a82573 --- /dev/null +++ b/contrib/python/wcwidth/py3/tests/test_hyperlink.py @@ -0,0 +1,75 @@ +"""Tests for OSC 8 hyperlink parsing.""" + +# 3rd party +import pytest + +# local +from wcwidth.hyperlink import Hyperlink, HyperlinkParams + +PARAMS_PARSE_VALID = [ + ('\x1b]8;;http://example.com\x07', 'http://example.com', '', '\x07'), + ('\x1b]8;id=a;http://example.com\x1b\\', 'http://example.com', 'id=a', '\x1b\\'), +] + + [email protected]('seq,url,params,term', PARAMS_PARSE_VALID) +def test_hyperlinkparams_parse_valid(seq, url, params, term): + """Parse a valid OSC 8 open sequence.""" + result = HyperlinkParams.parse(seq) + assert result is not None + assert result.url == url + assert result.params == params + assert result.terminator == term + + [email protected]('seq', [ + 'not an escape', + '\x1b[31m', + '', +]) +def test_hyperlinkparams_parse_invalid(seq): + """Parse an invalid/non-OSC-8 sequence returns None.""" + assert HyperlinkParams.parse(seq) is None + + +def test_hyperlinkparams_make_open(): + assert HyperlinkParams(url='http://example.com', params='id=a', terminator='\x07').make_open() == '\x1b]8;id=a;http://example.com\x07' + + +def test_hyperlinkparams_make_close(): + assert HyperlinkParams(url='http://example.com', terminator='\x07').make_close() == '\x1b]8;;\x07' + + +_HL = '\x1b]8;;http://example.com\x07Hello\x1b]8;;\x07' + + +def test_hyperlink_parse_valid(): + hl = Hyperlink.parse(_HL) + assert hl is not None + assert hl.text == 'Hello' + assert hl.params.url == 'http://example.com' + + [email protected]('text,start', [ + ('Hello world', 0), + ('\x1b[31mHello\x1b[0m', 0), # SGR, not OSC 8 + ('\x1b]8;;http://example.com\x07Hello', 0), # open without close +]) +def test_hyperlink_parse_returns_none(text, start): + assert Hyperlink.parse(text, start) is None + + +def test_hyperlink_find_close_not_found(): + assert Hyperlink.find_close('no escape here', 0) == (-1, -1) + + +def test_hyperlink_make_sequence(): + hl = Hyperlink.parse(_HL) + assert hl is not None + assert hl.make_sequence() == _HL + + +def test_hyperlink_display_width(): + hl = Hyperlink.parse(_HL) + assert hl is not None + assert hl.display_width() == 5 diff --git a/contrib/python/wcwidth/py3/tests/test_justify.py b/contrib/python/wcwidth/py3/tests/test_justify.py index 71dec6199b6..f2639e8ca41 100644 --- a/contrib/python/wcwidth/py3/tests/test_justify.py +++ b/contrib/python/wcwidth/py3/tests/test_justify.py @@ -1,4 +1,5 @@ """Tests for text justification functions.""" + # local from wcwidth import ljust, rjust, width, center diff --git a/contrib/python/wcwidth/py3/tests/test_sgr_state.py b/contrib/python/wcwidth/py3/tests/test_sgr_state.py index db9c8a9c94d..ecba402f237 100644 --- a/contrib/python/wcwidth/py3/tests/test_sgr_state.py +++ b/contrib/python/wcwidth/py3/tests/test_sgr_state.py @@ -1,4 +1,5 @@ """Tests for SGR state tracking and propagation.""" + from __future__ import annotations # std imports diff --git a/contrib/python/wcwidth/py3/tests/test_text_sizing.py b/contrib/python/wcwidth/py3/tests/test_text_sizing.py new file mode 100644 index 00000000000..b5e18085e35 --- /dev/null +++ b/contrib/python/wcwidth/py3/tests/test_text_sizing.py @@ -0,0 +1,327 @@ +"""Tests for Text Sizing Protocol (OSC 66) support.""" + +# 3rd party +import pytest + +# local +from wcwidth import (TextSizing, + TextSizingParams, + clip, + width, + wcswidth, + iter_sequences, + strip_sequences) +from wcwidth.text_sizing import TEXT_FIELD_MAPPING +from wcwidth.escape_sequences import TEXT_SIZING_PATTERN + +_W_HI = TEXT_FIELD_MAPPING['w'].high +_N_HI = TEXT_FIELD_MAPPING['n'].high +_D_HI = TEXT_FIELD_MAPPING['d'].high + +CONTROL_CODES_PARAMS_CASES = [ + ('x=2', "", "Unknown text sizing field 'x' in "), + ('s=3:x=3', "s=3", "Unknown text sizing field 'x' in "), + ('s=2:x=3:w=9', f"s=2:w={_W_HI}", "Unknown text sizing field 'x' in "), + ('xyz=2', "", "Unknown text sizing field 'xyz' in "), + ('xxx', "", "Expected '=' in text sizing parameter"), + ('s=xxx', "", "Illegal text sizing value 'xxx' in "), + ('s=-99', "", "Out of bounds text sizing value '-99' in "), + ('s=99', f"s={_W_HI}", "Out of bounds text sizing value '99' in "), + ('w=-1', "", "Out of bounds text sizing value '-1' in "), + ('w=8', f"w={_W_HI}", "Out of bounds text sizing value '8' in "), + ('n=20', f"n={_N_HI}", "Out of bounds text sizing value '20' in "), + ('d=99', f"d={_D_HI}", "Out of bounds text sizing value '99' in "), + ('v=5', "v=2", "Out of bounds text sizing value '5' in "), + ('h=3', "h=2", "Out of bounds text sizing value '3' in "), +] + + [email protected]('given_params,expected_remainder,expected_exc,', CONTROL_CODES_PARAMS_CASES) +def test_text_sizing_params_control_codes(given_params, expected_remainder, expected_exc): + """Verify control_codes='strict' and 'parse' behavior in TextSizingParams.from_params().""" + # assert control_codes='strict' raises expected exception, + with pytest.raises(ValueError) as exc_info: + TextSizingParams.from_params(given_params, control_codes='strict') + assert exc_info.value.args[0].startswith(expected_exc) + + # when 'parse' (default), any illegal argument or value is filtered, excluded, or clipped + params = TextSizingParams.from_params(given_params) + assert params.make_sequence() == expected_remainder + + [email protected]('given_params,expected_remainder,expected_exc,', CONTROL_CODES_PARAMS_CASES) +def test_text_sizing_width_control_codes(given_params, expected_remainder, expected_exc): + """Verify control_codes='strict' with invalid OSC 66 sequences in wciwdth.width().""" + seq1 = '\x1b]66;' + given_params + ';ABC' + '\x07' + seq2 = '\x1b]66;' + given_params + ';ABC' + '\x1b\\' + for seq in (seq1, seq2): + with pytest.raises(ValueError) as exc_info: + width(seq, control_codes='strict') + assert exc_info.value.args[0].startswith(expected_exc) + + [email protected]('params,expected_repr', [ + (TextSizingParams(), 'TextSizingParams()'), + (TextSizingParams(scale=2, width=1), 'TextSizingParams(scale=2, width=1)'), + (TextSizingParams(scale=2, width=3, numerator=1, denominator=2, + vertical_align=1, horizontal_align=2), + 'TextSizingParams(scale=2, width=3, numerator=1, denominator=2, ' + 'vertical_align=1, horizontal_align=2)'), +]) +def test_text_sizing_params_repr(params, expected_repr): + """Verify TextSizingParams.__repr__ output.""" + assert repr(params) == expected_repr + + [email protected]('params,text,expected_width', [ + # cases of static width=N values, + (TextSizingParams(scale=2, width=1), 'climclam', 2), + (TextSizingParams(scale=2, width=3), 'anything', 6), + (TextSizingParams(scale=1, width=5), '', 5), + (TextSizingParams(scale=3, width=1), 'x', 3), + # and automatic width (width=0) values, + (TextSizingParams(), '', 0), + (TextSizingParams(), 'AB', 2), + (TextSizingParams(), '中', 2), + (TextSizingParams(scale=2), 'AB', 4), + (TextSizingParams(scale=2), '中', 4), + (TextSizingParams(scale=3), '', 0), + (TextSizingParams(scale=7, width=7, numerator=15, denominator=15, + vertical_align=2, horizontal_align=2), 'x!yzzy', 49), +]) +def test_text_sizing_width(params, text, expected_width): + """Verify width using with both kinds of terminator.""" + # verify internal TextSizing.display_width() result, + assert TextSizing(params, text, terminator='\x07').display_width() == expected_width + assert TextSizing(params, text, terminator='\x1b\\').display_width() == expected_width + seq1 = TextSizing(params, text, terminator='\x07').make_sequence() + seq2 = TextSizing(params, text, terminator='\x1b\\').make_sequence() + + # verify round-trip + ts_match1, ts_match2 = TEXT_SIZING_PATTERN.match(seq1), TEXT_SIZING_PATTERN.match(seq2) + assert ts_match1 and ts_match2 + assert TextSizing.from_match(ts_match1) == TextSizing(params, text, terminator='\x07') + assert TextSizing.from_match(ts_match2) == TextSizing(params, text, terminator='\x1b\\') + + # and external width(), + assert width(seq1) == expected_width + assert width(seq2) == expected_width + + # verify 'strict' does not raise ValueError + width(seq1, control_codes='strict') + width(seq2, control_codes='strict') + + # and verify 'ignore' measures only inner_text (does not parse scale or width) + assert width(seq1, control_codes='ignore') == wcswidth(text) + assert width(seq2, control_codes='ignore') == wcswidth(text) + + [email protected]('given_sequence,expected_text,expected_params,expected_width', [ + ('\x1b]66;s=2:w=2;AB\x07', 'AB', 's=2:w=2', 4), + ('\x1b]66;s=2:w=2;\u4e2d\x07', '\u4e2d', 's=2:w=2', 4), + ('\x1b]66;s=3:w=1;x\x07', 'x', 's=3:w=1', 3), + ('\x1b]66;w=5;hello\x07', 'hello', 'w=5', 5), + ('\x1b]66;s=2:w=3;anything\x07', 'anything', 's=2:w=3', 6), + ('\x1b]66;w=3;x\x07', 'x', 'w=3', 3), + ('\x1b]66;s=1;AB\x07', 'AB', '', 2), + ('\x1b]66;s=2;AB\x07', 'AB', 's=2', 4), + ('\x1b]66;s=2;中\x07', '中', 's=2', 4), + ('\x1b]66;s=2;\x07', '', 's=2', 0), + ('\x1b]66;s=1:w=1;\x07', '', 'w=1', 1), + ('\x1b]66;w=2;A\x07', 'A', 'w=2', 2), + ('\x1b]66;s=2:w=3;text\x1b\\', 'text', 's=2:w=3', 6), +]) +def test_text_sizing_sequence(given_sequence, expected_text, expected_params, expected_width): + """Verify parsing and measured width of raw OSC 66 sequence.""" + ts_match = TEXT_SIZING_PATTERN.match(given_sequence) + assert ts_match is not None + text_size = TextSizing.from_match(ts_match) + assert text_size.params.make_sequence() == expected_params + assert text_size.text == expected_text + assert width(given_sequence, control_codes='parse') == expected_width + assert width(given_sequence, control_codes='strict') == expected_width + assert width(given_sequence, control_codes='ignore') == wcswidth(expected_text) + + [email protected]('text,expected', [ + ('\x1b]66;s=2:w=3:n=1:d=2:v=1:h=2;x!yzzy\x1b\\', 6), + ('\x1b]66;s=2:w=3;anything\x07', 6), + ('\x1b]66;w=3;x\x07', 3), + ('\x1b]66;s=1:w=0;AB\x07', 2), + ('\x1b]66;s=2:w=0;AB\x07', 4), + ('\x1b]66;s=2:w=0;\u4e2d\x07', 4), # '中' + ('\x1b]66;s=1:w=0;\x07', 0), + ('abc\x1b]66;w=3;x\x07def', 9), + ('\x1b]66;w=2;A\x07\x1b]66;w=3;B\x07', 5), + ('\x1b]66;s=2:w=3;text\x1b\\', 6), + ('\x1b[31m\x1b]66;w=2;AB\x07\x1b[0m', 2), +]) +def test_strings_with_text_sizing(text, expected): + """Verify measured width strings containing OSC66.""" + assert width(text) == expected + assert width(text, control_codes='strict') == expected + + [email protected]('text,expected', [ + ('\x1b]66;s=2;hello\x07', 'hello'), + ('\x1b]66;s=2;hello\x1b\\', 'hello'), + ('\x1b]66;;text\x07', 'text'), + ('\x1b]66;s=3:w=2;\x07', ''), + ('abc\x1b]66;w=2;XY\x07def', 'abcXYdef'), + ('\x1b[31m\x1b]66;s=2;red\x07\x1b[0m', 'red'), + ('\x1b]66;w=1;A\x07\x1b]66;w=1;B\x07', 'AB'), +]) +def test_strip_strings_with_text_sizing(text, expected): + assert strip_sequences(text) == expected + + [email protected]('text,expected_segs', [ + ('abc\x1b]66;s=2;hello\x07def', [('abc', False), ('\x1b]66;s=2;hello\x07', True), ('def', False)]), + ('abc\x1b]66;s=2;n=1,d=2,w=3;hello\x1b\\def', [('abc', False), ('\x1b]66;s=2;n=1,d=2,w=3;hello\x1b\\', True), ('def', False)]), +]) +def test_iter_sequences_text_sizing(text, expected_segs): + assert list(iter_sequences(text)) == expected_segs + + [email protected]('text,start,end,expected', [ + ('\x1b]66;w=3;ABC\x07', 0, 3, '\x1b]66;w=3;ABC\x07'), + ('\x1b]66;w=3;ABC\x07', 0, 2, '\x1b]66;w=2;AB\x07'), + ('\x1b]66;w=3;ABC\x07', 1, 3, '\x1b]66;w=2;BC\x07'), + ('ab\x1b]66;w=2;XY\x07cd', 0, 6, 'ab\x1b]66;w=2;XY\x07cd'), + ('ab\x1b]66;w=2;XY\x07cd', 0, 3, 'ab\x1b]66;w=1;X\x07'), + ('ab\x1b]66;w=2;XY\x07cd', 3, 6, '\x1b]66;w=1;Y\x07cd'), + ('ab\x1b]66;w=2;XY\x07cd', 4, 6, 'cd'), +]) +def test_clip_text_sizing_basic(text, start, end, expected): + """Test basic support of clip() with text sizing sequence.""" + assert repr(clip(text, start, end)) == repr(expected) + + [email protected]('text,start,end,expected', [ + ('\x1b]66;s=2;ABC\x07', 0, 0, ''), + ('\x1b]66;s=2;ABC\x07', 6, 6, ''), + ('\x1b]66;s=2;ABC\x07', 0, 2, '\x1b]66;s=2;A\x07'), + ('\x1b]66;s=2;ABC\x07', 0, 4, '\x1b]66;s=2;AB\x07'), + ('\x1b]66;s=2;ABC\x07', 0, 6, '\x1b]66;s=2;ABC\x07'), + ('\x1b]66;s=2;ABC\x07', 2, 6, '\x1b]66;s=2;BC\x07'), + ('\x1b]66;s=2;ABC\x07', 4, 6, '\x1b]66;s=2;C\x07'), +]) +def test_clip_text_sizing_scaled(text, start, end, expected): + """Test support of clip() with scale=N arguments.""" + assert repr(clip(text, start, end)) == repr(expected) + + [email protected]('text,start,end,expected', [ + # a b c + # === === === + # 012 345 678 + # . + # .. + # *a* + # *a* . + # ... *b* + # ... *b* . + # ... *b* .. + # ... *b* *c* + ('\x1b]66;s=3;ABC\x07', 0, 0, ''), + ('\x1b]66;s=3;ABC\x07', 0, 1, '.'), + ('\x1b]66;s=3;ABC\x07', 0, 2, '..'), + ('\x1b]66;s=3;ABC\x07', 0, 3, '\x1b]66;s=3;A\x07'), + ('\x1b]66;s=3;ABC\x07', 0, 4, '\x1b]66;s=3;A\x07.'), + ('\x1b]66;s=3;ABC\x07', 0, 5, '\x1b]66;s=3;A\x07..'), + ('\x1b]66;s=3;ABC\x07', 0, 6, '\x1b]66;s=3;AB\x07'), + ('\x1b]66;s=3;ABC\x07', 0, 7, '\x1b]66;s=3;AB\x07.'), + ('\x1b]66;s=3;ABC\x07', 0, 8, '\x1b]66;s=3;AB\x07..'), + ('\x1b]66;s=3;ABC\x07', 0, 9, '\x1b]66;s=3;ABC\x07'), + ('\x1b]66;s=3;ABC\x07', 0, 10, '\x1b]66;s=3;ABC\x07'), + # a b + # === === === + # 012 345 678 + # . 1, 2 + # .. 1, 3 + # .. . 1, 4 + # .. .. 1, 5 + # .. *b* 1, 6 + # .. *b* . 1, 7 + # .. *b* .. 1, 8 + # .. *b* *c* 1, 9 + ('\x1b]66;s=3;ABC\x07', 1, 1, ''), + ('\x1b]66;s=3;ABC\x07', 1, 2, '.'), + ('\x1b]66;s=3;ABC\x07', 1, 3, '..'), + ('\x1b]66;s=3;ABC\x07', 1, 4, '...'), + ('\x1b]66;s=3;ABC\x07', 1, 5, '....'), + ('\x1b]66;s=3;ABC\x07', 1, 6, '..\x1b]66;s=3;B\x07'), + ('\x1b]66;s=3;ABC\x07', 1, 7, '..\x1b]66;s=3;B\x07.'), + ('\x1b]66;s=3;ABC\x07', 1, 8, '..\x1b]66;s=3;B\x07..'), + ('\x1b]66;s=3;ABC\x07', 1, 9, '..\x1b]66;s=3;BC\x07'), + ('\x1b]66;s=3;ABC\x07', 1, 10, '..\x1b]66;s=3;BC\x07'), + # two-thirds of string 'A' and half of string 'B' is fillchar + # ('\x1b]66;s=3;ABC\x07', 2, 4, '..'), + # half of string 'A' and all of string 'B' + # a b + # === === === + # 012 345 678 + # . 2, 3 + # . . 2, 4 + # . .. 2, 5 + # . *b* 2, 6 + # . *b* . 2, 7 + # . *b* .. 2, 8 + # . *b* *c* 2, 9 + ('\x1b]66;s=3;ABC\x07', 2, 2, ''), + ('\x1b]66;s=3;ABC\x07', 2, 3, '.'), + ('\x1b]66;s=3;ABC\x07', 2, 4, '..'), + ('\x1b]66;s=3;ABC\x07', 2, 5, '...'), + ('\x1b]66;s=3;ABC\x07', 2, 6, '.\x1b]66;s=3;B\x07'), + ('\x1b]66;s=3;ABC\x07', 2, 7, '.\x1b]66;s=3;B\x07.'), + ('\x1b]66;s=3;ABC\x07', 2, 8, '.\x1b]66;s=3;B\x07..'), + ('\x1b]66;s=3;ABC\x07', 2, 9, '.\x1b]66;s=3;BC\x07'), + ('\x1b]66;s=3;ABC\x07', 2, 10, '.\x1b]66;s=3;BC\x07'), + # and now 3:10, should be easy ... + ('\x1b]66;s=3;ABC\x07', 3, 3, ''), + ('\x1b]66;s=3;ABC\x07', 3, 4, '.'), + ('\x1b]66;s=3;ABC\x07', 3, 5, '..'), + ('\x1b]66;s=3;ABC\x07', 3, 6, '\x1b]66;s=3;B\x07'), + ('\x1b]66;s=3;ABC\x07', 3, 7, '\x1b]66;s=3;B\x07.'), + ('\x1b]66;s=3;ABC\x07', 3, 8, '\x1b]66;s=3;B\x07..'), + ('\x1b]66;s=3;ABC\x07', 3, 9, '\x1b]66;s=3;BC\x07'), + ('\x1b]66;s=3;ABC\x07', 3, 10, '\x1b]66;s=3;BC\x07'), +]) +def test_clip_text_sizing_scaled_with_fillchar(text, start, end, expected): + """Test support of clip() with scale=N and fillchar is needed to fill remainder.""" + assert repr(clip(text, start, end, fillchar='.')) == repr(expected) + + +def test_clip_simple_path_padding(): + """Simple-path clip with w=N larger than text length exercises padding loop.""" + # w=4 but only 1 grapheme 'X' — 3 empty units are padded. + # Clip window (0, 1) forces partial overlap, triggering + # _text_sizing_clip_simple's padding branch. + assert repr(clip('\x1b]66;w=4;X\x07', 0, 1)) == repr('\x1b]66;w=1;X\x07') + + [email protected]('text,start,end,expected', [ + # CR forces painter path; fully-visible text sizing sequence + ('\r\x1b]66;w=2;XY\x07', 0, 3, '\x1b]66;w=2;XY\x07'), + # CR painter path, text sizing partially clipped (first unit visible) + ('\r\x1b]66;w=2;XY\x07', 0, 1, '\x1b]66;w=1;X\x07'), + # BS forces painter path; text sizing fully visible + ('ab\b\b\x1b]66;w=2;XY\x07', 0, 4, '\x1b]66;w=2;XY\x07'), + # Painter path with partial text sizing overlap (exercises _text_sizing_clip_painter) + ('\ra\x1b]66;s=2;BC\x07', 0, 3, 'a\x1b]66;s=2;B\x07'), + # Painter path: text sizing scaled partial overlap with fillchar + ('\r\x1b]66;s=3;ABC\x07', 1, 6, ' \x1b]66;s=3;B\x07'), + # CSI movement + text sizing fully visible + ('ab\x1b[2D\x1b]66;w=2;XY\x07', 0, 4, '\x1b]66;w=2;XY\x07'), + # Painter path: text sizing entirely outside clip window (before start) + ('\r\x1b]66;w=2;XY\x07', 2, 4, ''), + # CR + text sizing with auto-width (w=0), partial overlap + ('\ra\x1b]66;s=2;BC\x07', 0, 5, 'a\x1b]66;s=2;BC\x07'), + # Painter path: padding when w=N has more units than graphemes + ('\r\x1b]66;w=3;A\x07', 0, 2, '\x1b]66;w=2;A\x07'), + # Painter path: text sizing with unit entirely before clip window (skip path) + ('\r\x1b]66;s=2;ABCD\x07', 4, 8, '\x1b]66;s=2;CD\x07'), +]) +def test_clip_text_sizing_painter(text, start, end, expected): + """Test clip() with text sizing sequences in the cursor-movement (painter) path.""" + assert repr(clip(text, start, end)) == repr(expected) diff --git a/contrib/python/wcwidth/py3/tests/test_textwrap.py b/contrib/python/wcwidth/py3/tests/test_textwrap.py index 094c8e56725..33da72a4fa9 100644 --- a/contrib/python/wcwidth/py3/tests/test_textwrap.py +++ b/contrib/python/wcwidth/py3/tests/test_textwrap.py @@ -1,4 +1,5 @@ """Tests for sequence-aware text wrapping functions.""" + # std imports import sys import platform @@ -76,17 +77,14 @@ def _colorize(text): ) -EDGE_CASES = [ [email protected]('text,w,expected', [ ('', 10, []), (' ', 10, []), ('\u5973', 0, ['\u5973']), ('\u5973', 1, ['\u5973']), (ZWJ_FAMILY, 1, [ZWJ_FAMILY]), (HANGUL_GA, 1, [HANGUL_GA]), -] - - [email protected]('text,w,expected', EDGE_CASES) +]) def test_wrap_edge_cases(text, w, expected): assert wrap(text, w) == expected @@ -95,28 +93,22 @@ def test_wrap_initial_indent(): assert wrap('hello world', 10, initial_indent='> ') == ['> hello', 'world'] -LONG_WORD_CASES = [ [email protected]('text,w,break_long,expected', [ ('abcdefghij', 3, True, ['abc', 'def', 'ghi', 'j']), ('abcdefghij', 3, False, ['abcdefghij']), -] - - [email protected]('text,w,break_long,expected', LONG_WORD_CASES) +]) def test_wrap_long_words(text, w, break_long, expected): assert wrap(text, w, break_long_words=break_long) == expected -HYPHEN_LONG_WORD_CASES = [ [email protected]('text,w,break_hyphens,propagate,expected', [ ('a-b-c-d', 3, True, True, ['a-', 'b-', 'c-d']), ('a-b-c-d', 3, False, True, ['a-b', '-c-', 'd']), ('---', 2, True, True, ['--', '-']), ('a---b', 2, True, True, ['a-', '--', 'b']), ('a-\x1b[31mb', 2, True, True, ['a-\x1b[31m\x1b[0m', '\x1b[31mb\x1b[0m']), ('a-\x1b[31mb', 2, True, False, ['a-\x1b[31m', 'b']), -] - - [email protected]('text,w,break_hyphens,propagate,expected', HYPHEN_LONG_WORD_CASES) +]) def test_wrap_hyphen_long_words(text, w, break_hyphens, propagate, expected): assert wrap(text, w, break_on_hyphens=break_hyphens, propagate_sgr=propagate) == expected @@ -182,7 +174,7 @@ def test_wrap_multiline_matches_stdlib(): assert wrap(given, 30) == textwrap.wrap(given, 30) -UNICODE_CASES = [ [email protected]('text,w,expected', [ # CJK (2 cells each) ('\u4e2d\u6587\u5b57\u7b26', 4, ['\u4e2d\u6587', '\u5b57\u7b26']), ('\u4e2d\u6587\u5b57', 5, ['\u4e2d\u6587', '\u5b57']), @@ -192,18 +184,14 @@ UNICODE_CASES = [ (f'{FAMILY_ZWJ} ab', 4, [FAMILY_ZWJ, 'ab']), (f'{SMILEY_VS16} ab', 3, [SMILEY_VS16, 'ab']), ('\U0001F469\U0001F467\U0001F466', 4, ['\U0001F469\U0001F467', '\U0001F466']), -] - - [email protected]('text,w,expected', UNICODE_CASES) +]) def test_wrap_unicode(benchmark, text, w, expected): kwargs = {'break_on_hyphens': False} if '-' in text else {} result = benchmark(wrap, text, w, **kwargs) assert result == expected -# Escape sequence preservation (with propagate_sgr=True default) -SEQUENCE_CASES = [ [email protected]('text,w,expected', [ # SGR sequences propagated across lines (f'{SGR_RED}red{SGR_RESET} blue', 4, [f'{SGR_RED}red{SGR_RESET}', 'blue']), # SGR at end of line propagates to next line @@ -221,43 +209,36 @@ SEQUENCE_CASES = [ # Sequences in long word breaking - red starts after 'x', continues across lines ('x\x1b[31mabcdefghij\x1b[0m', 3, ['x\x1b[31mab\x1b[0m', '\x1b[31mcde\x1b[0m', '\x1b[31mfgh\x1b[0m', '\x1b[31mij\x1b[0m']), - # Lone ESC - not a valid SGR sequence, stays with preceding text - ('abc\x1bdefghij', 3, ['abc\x1b', 'def', 'ghi', 'j']), -] - -SEQUENCE_CASES_NO_PROPAGATE = [ - (f'hello{SGR_RED} world', 6, [f'hello{SGR_RED}', 'world']), - ('x\x1b[31mabcdefghij\x1b[0m', 3, ['x\x1b[31mab', 'cde', 'fgh', 'ij\x1b[0m']), -] - - [email protected]('text,w,expected', SEQUENCE_CASES) + # Fs sequence (ESC d) - zero-width, stays with preceding text + ('abc\x1bdefghij', 3, ['abc\x1bd', 'efg', 'hij']), +]) def test_wrap_sequences(benchmark, text, w, expected): + """Escape sequence preservation (with propagate_sgr=True default)""" assert benchmark(wrap, text, w) == expected [email protected]('text,w,expected', SEQUENCE_CASES_NO_PROPAGATE) [email protected]('text,w,expected', [ + (f'hello{SGR_RED} world', 6, [f'hello{SGR_RED}', 'world']), + ('x\x1b[31mabcdefghij\x1b[0m', 3, ['x\x1b[31mab', 'cde', 'fgh', 'ij\x1b[0m']), +] +) def test_wrap_sequences_no_propagate(text, w, expected): result = wrap(text, w, propagate_sgr=False) assert result == expected -# Mixed: sequences + unicode -MIXED_CASES = [ [email protected]('text,w,expected', [ (f'{SGR_RED}\u4e2d\u6587{SGR_RESET} ab', 5, [f'{SGR_RED}\u4e2d\u6587{SGR_RESET}', 'ab']), (f'{SGR_RED}{FAMILY_ZWJ}{SGR_RESET} ab', 4, [f'{SGR_RED}{FAMILY_ZWJ}{SGR_RESET}', 'ab']), (f'{SGR_BOLD}\u4e2d{SGR_RESET}y z', 4, [f'{SGR_BOLD}\u4e2d{SGR_RESET}y', 'z']), -] - - [email protected]('text,w,expected', MIXED_CASES) +]) def test_wrap_mixed(benchmark, text, w, expected): + """Test mixed sequences + unicode.""" result = benchmark(wrap, text, w) assert result == expected -# Tabsize with wide characters - tests column alignment with different cell widths -TABSIZE_WIDE_CASES = [ [email protected]('text,w,tabsize,expected', [ # CJK (2 cells) + tab: tabsize=4, '\u4e2d' is 2 cols, tab expands to col 4 ('\u4e2d\ta b', 6, 4, ['\u4e2d a', 'b']), # CJK + tab with tabsize=8: '\u4e2d' is 2 cols, tab expands to col 8 @@ -268,10 +249,7 @@ TABSIZE_WIDE_CASES = [ ('\u4e2d\u6587\ta', 8, 4, ['\u4e2d\u6587 a']), # ASCII + tab + CJK: 'a' is 1 col, tab to 4 (3 spaces), CJK is 2 cols ('a\t\u4e2d b', 8, 4, ['a \u4e2d b']), -] - - [email protected]('text,w,tabsize,expected', TABSIZE_WIDE_CASES) +]) @pytest.mark.skipif( platform.python_implementation() == 'PyPy' and sys.version_info < (3, 9), reason='PyPy 3.8 str.expandtabs() counts UTF-8 bytes instead of characters' @@ -286,7 +264,8 @@ OSC_END_ST = '\x1b]8;;\x1b\\' OSC_START_BEL = '\x1b]8;;http://example.com\x07' OSC_END_BEL = '\x1b]8;;\x07' -HYPERLINK_WORD_BOUNDARY_CASES = [ + [email protected]('text,w,expected', [ ( # standard, ST-variant, f'{OSC_START_ST}link{OSC_END_ST}more', 5, @@ -408,18 +387,14 @@ HYPERLINK_WORD_BOUNDARY_CASES = [ '\x1b]8;foo=bar:id=mylink;http://example.com\x1b\\Click\x1b]8;;\x1b\\', '\x1b]8;foo=bar:id=mylink;http://example.com\x1b\\here\x1b]8;;\x1b\\', ], - ), -] - - [email protected]('text,w,expected', HYPERLINK_WORD_BOUNDARY_CASES) + ),]) def test_wrap_hyperlink_word_boundary(text, w, expected): """OSC hyperlink sequences should act as word boundaries.""" result = wrap(text, w) assert result == expected -PLACEHOLDER_STDLIB_CASES = [ [email protected]('text,kwargs', [ ('The quick brown fox jumps over the lazy dog', {'width': 10, 'max_lines': 3, 'placeholder': '...'}), ('1234567890 1234567890 extra', @@ -444,10 +419,7 @@ PLACEHOLDER_STDLIB_CASES = [ {'width': 10, 'subsequent_indent': ' ', 'max_lines': 2, 'placeholder': '...'}), ('hello world foo bar', {'width': 10, 'initial_indent': '> ', 'max_lines': 2, 'placeholder': '...'}), -] - - [email protected]('text,kwargs', PLACEHOLDER_STDLIB_CASES) +]) def test_wrap_max_lines_matches_stdlib(text, kwargs): expected = _adjust_stdlib_result(textwrap.wrap(text, **kwargs), kwargs) assert wrap(text, **kwargs) == expected @@ -460,7 +432,7 @@ def test_wrap_placeholder_too_large(): textwrap.wrap('fox', width=1, max_lines=3, placeholder='...') -MAX_LINES_SEQUENCE_CASES = [ [email protected]('text,w,ml,ph,expected', [ (f'{SGR_RED}hello world foo bar{SGR_RESET}', 8, 2, '...', [f'{SGR_RED}hello{SGR_RESET}', f'{SGR_RED}world...{SGR_RESET}']), (f'{SGR_RED}hello{SGR_RESET} world foo', @@ -470,10 +442,7 @@ MAX_LINES_SEQUENCE_CASES = [ ('\u4e2d\u6587 \u5b57\u7b26 hello', 5, 1, '~', ['\u4e2d\u6587~']), ('\u4e2d\u6587 \u5b57\u7b26 hello world', 5, 2, '~', ['\u4e2d\u6587', '\u5b57\u7b26~']), ('\u4e2d\u6587\u5b57\u7b26 hello', 12, 1, '...', ['\u4e2d\u6587\u5b57\u7b26...']), -] - - [email protected]('text,w,ml,ph,expected', MAX_LINES_SEQUENCE_CASES) +]) def test_wrap_max_lines_sequences(text, w, ml, ph, expected): assert wrap(text, w, max_lines=ml, placeholder=ph) == expected @@ -494,19 +463,14 @@ def test_wrap_max_lines_hyperlink_close_on_prev_line(): assert result == [f'{OSC_START_ST}ab{OSC_END_ST}...'] -# -- expand_tabs, replace_whitespace, fix_sentence_endings -- - -STDLIB_PARAM_CASES = [ [email protected]('text,kwargs', [ ('hello\tworld', {'width': 20, 'expand_tabs': False, 'replace_whitespace': False}), ('hello\tworld foo\tbar baz', {'width': 12, 'expand_tabs': False, 'tabsize': 8}), ('hello\nworld', {'width': 20, 'replace_whitespace': False}), ('a\t b\n c', {'width': 20, 'replace_whitespace': False}), ('Hello world. This is a test. More text.', {'width': 20, 'fix_sentence_endings': True}), ('Dr. Smith went to Washington. He left.', {'width': 20, 'fix_sentence_endings': True}), -] - - [email protected]('text,kwargs', STDLIB_PARAM_CASES) +]) def test_wrap_stdlib_params(text, kwargs): assert wrap(text, **kwargs) == textwrap.wrap(text, **kwargs) @@ -521,3 +485,9 @@ def test_wrap_replace_whitespace_false_newlines_zero_width(): """Newlines have zero display width, so more text fits per line than stdlib.""" assert wrap('hello\nworld foo\nbar', 10, replace_whitespace=False) == [ 'hello\nworld', 'foo\nbar'] + + +def test_wrap_bare_esc(): + """Bare ESC not part of a recognized sequence is treated as zero-width.""" + assert wrap('ab\x1bcd ef', 5) == ['ab\x1bcd', 'ef'] + assert wrap('ab\x1b\x00cdef', 3) == ['ab\x1b\x00c', 'def'] diff --git a/contrib/python/wcwidth/py3/tests/test_ucslevel.py b/contrib/python/wcwidth/py3/tests/test_ucslevel.py index 979cfe0fe8d..9aea2c9b73c 100644 --- a/contrib/python/wcwidth/py3/tests/test_ucslevel.py +++ b/contrib/python/wcwidth/py3/tests/test_ucslevel.py @@ -1,4 +1,5 @@ """Unicode version level tests for wcwidth.""" + # local import wcwidth diff --git a/contrib/python/wcwidth/py3/tests/test_width.py b/contrib/python/wcwidth/py3/tests/test_width.py index 67d7b017258..8e43b47b1de 100644 --- a/contrib/python/wcwidth/py3/tests/test_width.py +++ b/contrib/python/wcwidth/py3/tests/test_width.py @@ -1,10 +1,11 @@ """Tests for width() function.""" + # 3rd party import pytest # local import wcwidth -from wcwidth.escape_sequences import ZERO_WIDTH_PATTERN +from wcwidth.escape_sequences import ZERO_WIDTH_PATTERN, INDETERMINATE_EFFECT_SEQUENCE BASIC_WIDTH_CASES = [ ('', 0, 'empty'), @@ -29,7 +30,7 @@ IGNORE_MODE_CASES = [ ('\x1b[31mred\x1b[0m', 3, 'SGR_sequence'), ('hello\x80world', 10, 'C1_control'), ('\x1b', 0, 'lone_ESC'), - ('a\x1bb', 2, 'lone_ESC_between'), + ('a\x1bb', 1, 'fs_sequence_between'), ] @@ -45,8 +46,10 @@ STRICT_RAISES_CASES = [ ('hello\x7fworld', 'DEL'), ('hello\x80world', 'C1_control'), ('hello\nworld', 'LF'), + ('hello\rworld', 'CR'), ('hello\x1b[Hworld', 'cursor_home'), ('hello\x1b[Aworld', 'cursor_up'), + ('hello\x1b[5Gworld', 'hpa'), ] @@ -61,11 +64,11 @@ STRICT_ALLOWED_CASES = [ ('hello\x07world', 10, 'BEL'), ('hello\x00world', 10, 'NUL'), ('abc\bd', 3, 'backspace'), - ('abc\rxy', 3, 'CR'), ('\x1b[31mred\x1b[0m', 3, 'SGR_sequence'), ('a\x1b[2Cb', 4, 'cursor_right'), + ('ab\x1b[Db', 2, 'cursor_left'), ('\x1b', 0, 'lone_ESC'), - ('a\x1bb', 2, 'lone_ESC_between'), + ('a\x1bb', 1, 'fs_sequence_between'), ('\x1b!', 1, 'ESC_unrecognized'), ] @@ -88,6 +91,7 @@ STRICT_INDETERMINATE_SEQUENCES = [ ('\x1b[1X', 'erase_chars'), ('\x1b[1S', 'parm_index'), ('\x1b[1T', 'parm_rindex'), + ('\x1bc', 'full_reset'), ] @@ -106,6 +110,11 @@ PARSE_MODE_CASES = [ ('abcd\x1b[2De', 4, 'cursor_left'), ('\x1b[31mred\x1b[0m', 3, 'SGR'), ('ab\x1b[Hcd', 4, 'indeterminate'), + ('def\x1b[3Dabc', 3, 'cursor_left_overwrite'), + ('def\x1b[10Dabc', 3, 'cursor_left_past_start'), + ('abc\x1b[5Gde', 6, 'hpa_parse'), + ('abc\x1b[Gde', 3, 'hpa_no_param'), + ('\x1b[5Gabc', 7, 'hpa_before_text'), ] @@ -190,29 +199,26 @@ def test_vs16_selector(): def test_zwj_with_non_emoji_chars(): - """ZWJ with non-emoji characters and trailing VS16.""" - # ZWJ (Zero Width Joiner) skips both itself and the following character, treating them as a - # failed emoji ZWJ sequence. When followed by VS16, the VS16 should NOT apply to the earlier - # emoji because VS16 must immediately follow the character it modifies. - # - # In the full parse loop, VS16 checks `last_measured_idx == idx - 1` (immediate adjacency). - # The ZWJ+char skip means VS16 is not adjacent to the smiley, so VS16 has no effect. - # + """ + ZWJ with non-emoji characters and trailing VS16. + + These are invalid Unicode sequences (ZWJ followed by non-emoji), so behavior is implementation- + defined. The emoji base (smiley, width 1) is narrow, and VS16 looks back to it across the ZWJ- + consumed characters, adding 1 cell for a total width of 2. + """ # Control test, assert wcwidth.width("\u263A\uFE0F") == 2 # smiley + VS16 = 2 - # ZWJ followed by non-emoji, VS16 does not apply (not adjacent) - assert wcwidth.width("\u263A\u200Da\uFE0F") == 1 - assert wcwidth.width("\u263A\u200Dx\uFE0F") == 1 - assert wcwidth.width("\u263A\u200Da\u200Db\uFE0F") == 1 + # ZWJ followed by non-emoji: VS16 applies to the smiley base + assert wcwidth.width("\u263A\u200Da\uFE0F") == 2 + assert wcwidth.width("\u263A\u200Dx\uFE0F") == 2 + assert wcwidth.width("\u263A\u200Da\u200Db\uFE0F") == 2 # ZWJ at end of string assert wcwidth.width("\u263A\u200D") == 1 # smiley + ZWJ = 1 # Long strings (>20 chars) use fast path which routes to wcswidth(). - # wcswidth() has more lenient VS16 handling, causing VS16 to incorrectly apply (!) - # Multiply by 10 to exceed threshold: "\u263A\u200Da\uFE0F" (4 chars) * 10 = 40 chars - assert wcwidth.width("\u263A\u200Da\uFE0F" * 10) == 20 # (smiley(1) + ZWJ+a(0) + VS16(+1)) * 10 (!) + assert wcwidth.width("\u263A\u200Da\uFE0F" * 10) == 20 def test_vs16_after_control_chars(): @@ -228,10 +234,9 @@ def test_vs16_after_control_chars(): assert wcwidth.width("\u263A\x0d\uFE0F") == 1 # smiley(1) + CR(reset) + VS16(0), extent=1 # Long strings (>20 chars) use fast path which routes to wcswidth(). - # wcswidth() has more lenient VS16 handling (`last_measured_idx >= 0` vs `== idx - 1`), - # causing VS16 to incorrectly apply when separated by control chars (!) + # In ignore mode, BEL is stripped, so VS16 is adjacent to the smiley and applies correctly. # Multiply by 10 to exceed threshold - assert wcwidth.width(("\u263A\x07\uFE0F") * 10) == 20 # (smiley(1) + BEL(0) + VS16(+1)) * 10 (!) + assert wcwidth.width(("\u263A\x07\uFE0F") * 10) == 20 # (smiley(1) + BEL-stripped(0) + VS16(+1)) * 10 def test_width_long_horizontal_fastpath(): @@ -266,6 +271,42 @@ def test_carriage_return_resets_column(): assert wcwidth.width('abc\rde') == 3 +def test_carriage_return_strict_raises(): + """CR in strict mode raises ValueError (indeterminate starting column).""" + with pytest.raises(ValueError, match='Horizontal movement'): + wcwidth.width('hello\rworld', control_codes='strict') + + +def test_hpa_parse_best_effort(): + """HPA in parse mode assumes string begins at column 0.""" + assert wcwidth.width('abc\x1b[5Gde') == 6 + assert wcwidth.width('abc\x1b[Gde') == 3 + assert wcwidth.width('\x1b[10Ghi') == 11 + + +def test_hpa_strict_raises(): + """HPA in strict mode raises ValueError (indeterminate starting column).""" + with pytest.raises(ValueError, match='horizontal position'): + wcwidth.width('abc\x1b[5Gde', control_codes='strict') + + +def test_cursor_left_strict_out_of_bounds(): + """Cursor-left beyond string start raises ValueError in strict mode.""" + with pytest.raises(ValueError, match='Cursor left movement'): + wcwidth.width('a\x1b[5Da', control_codes='strict') + + +def test_cursor_left_out_of_bounds_parse_no_raise(): + """Cursor-left beyond string start is silently clamped in parse mode.""" + assert wcwidth.width('a\x1b[5Da') == 1 + assert wcwidth.width('abc\x1b[99Ddef') == 3 # 99D clamped to col 0, then b,c,d overwritten + + +def test_cursor_left_out_of_bounds_ignore_mode(): + """Cursor-left beyond string start is zero-width in ignore mode.""" + assert wcwidth.width('a\x1b[5Da', control_codes='ignore') == 2 + + def test_iter_sequences_lone_esc(): """Lone ESC is yielded as a sequence.""" assert list(wcwidth.iter_sequences('\x1b')) == [('\x1b', True)] @@ -449,3 +490,88 @@ def test_fitzpatrick_modifier_standalone_width(): """Standalone Fitzpatrick modifier, however, is wide character in width().""" result = wcwidth.width('\U0001F3FB') assert result == 2 + + +FS_SEQUENCE_CASES = [ + ('\x1bc', 'ris'), + ('\x1bl', 'memory_lock'), + ('\x1bm', 'memory_unlock'), + ('\x1bn', 'ls2'), + ('\x1bo', 'ls3'), + ('\x1b|', 'ls3r'), + ('\x1b}', 'ls2r'), + ('\x1b~', 'ls1r'), +] + + [email protected]('seq,name', FS_SEQUENCE_CASES) +def test_fs_sequences_matched(seq, name): + """Fs (independent function) sequences are matched as zero-width.""" + segments = list(wcwidth.iter_sequences(seq)) + assert segments == [(seq, True)] + assert wcwidth.width(seq) == 0 + + +FP_SEQUENCE_CASES = [ + ('\x1b7', 'decsc'), + ('\x1b8', 'decrc'), + ('\x1b=', 'deckpam'), + ('\x1b>', 'deckpnm'), + ('\x1b0', 'fp_0'), + ('\x1b1', 'fp_1'), + ('\x1b9', 'fp_9'), +] + + [email protected]('seq,name', FP_SEQUENCE_CASES) +def test_fp_sequences_matched(seq, name): + """Fp (private use) sequences are matched as zero-width.""" + segments = list(wcwidth.iter_sequences(seq)) + assert segments == [(seq, True)] + assert wcwidth.width(seq) == 0 + + +NF_SEQUENCE_CASES = [ + ('\x1b F', 's7c1t'), + ('\x1b G', 's8c1t'), + ('\x1b#3', 'decdhl_top'), + ('\x1b#4', 'decdhl_bottom'), + ('\x1b#5', 'decswl'), + ('\x1b#6', 'decdwl'), + ('\x1b#8', 'decaln'), + ('\x1b%G', 'utf8_designate'), + ('\x1b%@', 'iso2022_return'), +] + + [email protected]('seq,name', NF_SEQUENCE_CASES) +def test_nf_sequences_matched(seq, name): + """NF (multi-byte) escape sequences are matched as zero-width.""" + segments = list(wcwidth.iter_sequences(seq)) + assert segments == [(seq, True)] + assert wcwidth.width(seq) == 0 + + +def test_fs_sequence_embedded_in_text(): + """Fs sequence surrounded by text is correctly segmented.""" + segments = list(wcwidth.iter_sequences('abc\x1bcdef')) + assert segments == [('abc', False), ('\x1bc', True), ('def', False)] + assert wcwidth.width('abc\x1bcdef') == 6 + + +def test_nf_sequence_embedded_in_text(): + """NF sequence surrounded by text is correctly segmented.""" + segments = list(wcwidth.iter_sequences('abc\x1b#8def')) + assert segments == [('abc', False), ('\x1b#8', True), ('def', False)] + assert wcwidth.width('abc\x1b#8def') == 6 + + +def test_screen_title_sequences(): + """Screen/tmux title sequence ESC k hello ST.""" + segments = list(wcwidth.iter_sequences('\x1bkhello\x1b\\')) + assert segments[0] == ('\x1bk', True) + + +def test_ris_indeterminate(): + """RIS (ESC c) is flagged as indeterminate effect.""" + assert INDETERMINATE_EFFECT_SEQUENCE.match('\x1bc') |
