diff options
| author | robot-piglet <[email protected]> | 2026-02-11 10:55:54 +0300 |
|---|---|---|
| committer | robot-piglet <[email protected]> | 2026-02-11 11:24:08 +0300 |
| commit | 9b5f29efa00bba424cd32471a95ececc583fe046 (patch) | |
| tree | 58936c6fc2147c49fc2a4aec657f63fb6f789336 /contrib/python/wcwidth/py3/tests | |
| parent | df75a44af0e3c0cfce907e22f61d6c91fc3bbc39 (diff) | |
Intermediate changes
commit_hash:721c786fcb8a37574bec0881ae2194859f790fae
Diffstat (limited to 'contrib/python/wcwidth/py3/tests')
| -rw-r--r-- | contrib/python/wcwidth/py3/tests/test_benchmarks.py | 56 | ||||
| -rw-r--r-- | contrib/python/wcwidth/py3/tests/test_grapheme.py | 111 | ||||
| -rw-r--r-- | contrib/python/wcwidth/py3/tests/test_textwrap.py | 69 |
3 files changed, 234 insertions, 2 deletions
diff --git a/contrib/python/wcwidth/py3/tests/test_benchmarks.py b/contrib/python/wcwidth/py3/tests/test_benchmarks.py index 5c929f7ec37..6e0ffadcc45 100644 --- a/contrib/python/wcwidth/py3/tests/test_benchmarks.py +++ b/contrib/python/wcwidth/py3/tests/test_benchmarks.py @@ -1,6 +1,7 @@ """Performance benchmarks for wcwidth module.""" # std imports import os +import unicodedata # local import wcwidth @@ -54,6 +55,31 @@ def test_wcswidth_emoji_sequence(benchmark): benchmark(wcwidth.wcswidth, text) +# NFC vs NFD comparison - text with combining marks +DIACRITICS_COMPOSED = 'café résumé naïve ' * 100 +DIACRITICS_DECOMPOSED = unicodedata.normalize('NFD', DIACRITICS_COMPOSED) + + +def test_wcswidth_composed(benchmark): + """Benchmark wcswidth() with NFC-composed text.""" + benchmark(wcwidth.wcswidth, DIACRITICS_COMPOSED) + + +def test_wcswidth_decomposed(benchmark): + """Benchmark wcswidth() with NFD-decomposed text.""" + benchmark(wcwidth.wcswidth, DIACRITICS_DECOMPOSED) + + +def test_width_composed(benchmark): + """Benchmark width() with NFC-composed text.""" + benchmark(wcwidth.width, DIACRITICS_COMPOSED) + + +def test_width_decomposed(benchmark): + """Benchmark width() with NFD-decomposed text.""" + benchmark(wcwidth.width, DIACRITICS_DECOMPOSED) + + def test_width_ascii(benchmark): """Benchmark width() with ASCII string.""" benchmark(wcwidth.width, 'hello world') @@ -88,6 +114,36 @@ def test_iter_graphemes_combining(benchmark): benchmark(lambda: list(wcwidth.iter_graphemes(text))) +def test_grapheme_boundary_before_short(benchmark): + """Benchmark grapheme_boundary_before() near start of short string.""" + text = 'Hello 👋🏻!' + benchmark(wcwidth.grapheme_boundary_before, text, 8) + + +def test_grapheme_boundary_before_long_end(benchmark): + """Benchmark grapheme_boundary_before() near end of long line.""" + text = 'x' * 95 + '👨\u200d👩\u200d👧!' + benchmark(wcwidth.grapheme_boundary_before, text, 100) + + +def test_grapheme_boundary_before_long_mid(benchmark): + """Benchmark grapheme_boundary_before() in middle of long line.""" + text = 'x' * 50 + '👨\u200d👩\u200d👧' + 'y' * 50 + benchmark(wcwidth.grapheme_boundary_before, text, 55) + + +def test_iter_graphemes_reverse_short(benchmark): + """Benchmark iter_graphemes_reverse() with short string.""" + text = 'café\u0301 🇫🇷!' + benchmark(lambda: list(wcwidth.iter_graphemes_reverse(text))) + + +def test_iter_graphemes_reverse_long(benchmark): + """Benchmark iter_graphemes_reverse() with long string.""" + text = 'The quick brown 🦊 jumps over the lazy 🐕. ' * 5 + benchmark(lambda: list(wcwidth.iter_graphemes_reverse(text))) + + def test_ljust_ascii(benchmark): """Benchmark ljust() with ASCII string.""" benchmark(wcwidth.ljust, 'hello', 20) diff --git a/contrib/python/wcwidth/py3/tests/test_grapheme.py b/contrib/python/wcwidth/py3/tests/test_grapheme.py index f344ad32fde..d2cfa86c1c8 100644 --- a/contrib/python/wcwidth/py3/tests/test_grapheme.py +++ b/contrib/python/wcwidth/py3/tests/test_grapheme.py @@ -6,7 +6,7 @@ import os import pytest # local -from wcwidth import iter_graphemes +from wcwidth import iter_graphemes, iter_graphemes_reverse, grapheme_boundary_before try: chr(0x2fffe) @@ -145,3 +145,112 @@ def test_wide_unicode_graphemes(input_str, expected): def test_unicode_grapheme_break_test(input_str, expected): """Validate against official Unicode GraphemeBreakTest.txt.""" assert list(iter_graphemes(input_str)) == expected + + +# Prepend: Arabic Number Sign +PREPEND_CHAR = '\u0600' +# Multiple combining marks: e + acute + grave +MULTI_COMBINE = 'e\u0301\u0300' + + +# grapheme_boundary_before(text, pos) returns start of grapheme cluster before pos. +# (text, pos, expected): pos=search from here, expected=where cluster starts [email protected](("text", "pos", "expected"), [ + # 'abc': 0=a, 1=b, 2=c + ('abc', 3, 2), # from end -> 'c' at 2 + ('abc', 2, 1), # from 'c' -> 'b' at 1 + ('abc', 1, 0), # from 'b' -> 'a' at 0 + # 'a\r\nb': CRLF is one cluster (GB3) + ('a\r\nb', 3, 1), # from 'b' -> '\r\n' at 1 + # 'café': e + combining acute is one cluster (GB9) + ('cafe\u0301', 5, 3), # from end -> 'é' at 3 + ('cafe\u0301', 4, 3), # from acute -> still 'é' at 3 + # Multiple combining marks: e + acute + grave (GB9) + ('a' + MULTI_COMBINE + 'b', 4, 1), # from 'b' -> e+marks at 1 + # Prepend + char is one cluster (GB9b) + (PREPEND_CHAR + 'a', 2, 0), # whole cluster + # Prepend + Control: control breaks (GB4) + (PREPEND_CHAR + '\n', 2, 1), # '\n' separate at 1 + # C1 control (NEL, 0x85) stops backward scan in _find_cluster_start (GB4) + ('X\x85\u0301', 3, 2), +]) +def test_grapheme_boundary_before_basic(text, pos, expected): + """Basic grapheme_boundary_before tests.""" + assert grapheme_boundary_before(text, pos) == expected + + [email protected](NARROW_ONLY, reason="requires wide Unicode") [email protected](("text", "pos", "expected"), [ + # 'Hi 👋🏻!': 0=H,1=i,2=space,3=wave,4=skin,5=!; wave+skin is one cluster + ('Hi \U0001F44B\U0001F3FB!', 6, 5), # from end -> '!' at 5 + ('Hi \U0001F44B\U0001F3FB!', 5, 3), # from '!' -> wave+skin at 3 + ('Hi \U0001F44B\U0001F3FB!', 3, 2), # from wave -> space at 2 + # 'a🇺🇸b': 0=a,1-2=flag,3=b; flag is one cluster (GB12/13) + ('a' + FLAG_US + 'b', 4, 3), # from end -> 'b' at 3 + ('a' + FLAG_US + 'b', 3, 1), # from 'b' -> flag at 1 + # Three RIs (🇺🇸🇦): flag + solo RI + (FLAG_US + RI_A, 3, 2), # from end -> solo RI at 2 + (FLAG_US + RI_A, 2, 0), # from solo -> flag at 0 + # 'a👨👩👧b': 0=a,1-5=family,6=b; ZWJ sequence is one cluster (GB11) + ('a' + FAMILY + 'b', 7, 6), # from end -> 'b' at 6 + ('a' + FAMILY + 'b', 6, 1), # from 'b' -> family at 1 +]) +def test_grapheme_boundary_before_unicode(text, pos, expected): + """grapheme_boundary_before with emoji and wide Unicode.""" + assert grapheme_boundary_before(text, pos) == expected + + [email protected](("input_str", "expected"), [ + ('', []), + ('abc', ['c', 'b', 'a']), + # café with combining mark mixed with CRLF + ('cafe\u0301\r\nok', ['k', 'o', '\r\n', 'e\u0301', 'f', 'a', 'c']), +]) +def test_iter_graphemes_reverse_basic(input_str, expected): + """Basic iter_graphemes_reverse tests.""" + assert list(iter_graphemes_reverse(input_str)) == expected + + [email protected](NARROW_ONLY, reason="requires wide Unicode") [email protected](("input_str", "expected"), [ + # Multiple emoji types in one string + ('cafe\u0301 ' + WAVE_SKIN + ' ' + FLAG_US + '!', + ['!', FLAG_US, ' ', WAVE_SKIN, ' ', 'e\u0301', 'f', 'a', 'c']), + # Two families + (FAMILY + FAMILY, [FAMILY, FAMILY]), + # Flag + solo RI + text + ('Hi' + FLAG_US + RI_A + '!', ['!', RI_A, FLAG_US, 'i', 'H']), +]) +def test_iter_graphemes_reverse_unicode(input_str, expected): + """iter_graphemes_reverse with wide Unicode.""" + assert list(iter_graphemes_reverse(input_str)) == expected + + [email protected](NARROW_ONLY, reason="requires wide Unicode") [email protected](("input_str", "expected"), read_grapheme_break_test()) +def test_grapheme_roundtrip_consistency(input_str, expected): + """Forward and reverse iteration produce identical boundaries.""" + forward = list(iter_graphemes(input_str)) + reverse = list(iter_graphemes_reverse(input_str))[::-1] + assert forward == reverse + + +def test_grapheme_boundary_before_edge_cases(): + """Edge cases for grapheme_boundary_before.""" + assert grapheme_boundary_before('abc', 0) == 0 + assert grapheme_boundary_before('abc', 100) == 2 # pos > len clamps + assert grapheme_boundary_before('', 0) == 0 + + +def test_iter_graphemes_reverse_edge_cases(): + """Edge cases for iter_graphemes_reverse.""" + assert list(iter_graphemes_reverse('abcdef', start=2, end=5)) == ['e', 'd', 'c'] + assert list(iter_graphemes_reverse('abc', start=0, end=100)) == ['c', 'b', 'a'] + assert not list(iter_graphemes_reverse('abc', start=5)) + assert not list(iter_graphemes_reverse('abc', start=2, end=2)) + # PREPEND + char is one grapheme (GB9b), so start=1 yields nothing (won't split) + assert not list(iter_graphemes_reverse(PREPEND_CHAR + 'a', start=1)) + # But start=0 yields the full grapheme + assert list(iter_graphemes_reverse(PREPEND_CHAR + 'a', start=0)) == [PREPEND_CHAR + 'a'] + # Negative start is clamped to 0 + assert list(iter_graphemes_reverse('abc', start=-5)) == ['c', 'b', 'a'] diff --git a/contrib/python/wcwidth/py3/tests/test_textwrap.py b/contrib/python/wcwidth/py3/tests/test_textwrap.py index c2f28bffe1a..fc15f1917f9 100644 --- a/contrib/python/wcwidth/py3/tests/test_textwrap.py +++ b/contrib/python/wcwidth/py3/tests/test_textwrap.py @@ -12,6 +12,7 @@ from wcwidth import iter_sequences from wcwidth.textwrap import SequenceTextWrapper, wrap SGR_RED = '\x1b[31m' +SGR_BLUE = '\x1b[34m' SGR_BOLD = '\x1b[1m' SGR_RESET = '\x1b[0m' ATTRS = ('\x1b[31m', '\x1b[34m', '\x1b[4m', '\x1b[7m', '\x1b[41m', '\x1b[37m', '\x1b[107m') @@ -203,7 +204,7 @@ SEQUENCE_CASES = [ # Empty/adjacent sequences (f'{SGR_RED}{SGR_RESET}', 10, [f'{SGR_RED}{SGR_RESET}']), (f'hello {SGR_RED}{SGR_RESET}world', 6, ['hello', f'{SGR_RED}{SGR_RESET}world']), - # OSC hyperlinks + # OSC hyperlinks (with space separator) (f'{OSC_HYPERLINK} text', 5, [OSC_HYPERLINK, 'text']), # CSI cursor sequences (f'{CSI_CURSOR}text here', 10, [f'{CSI_CURSOR}text', 'here']), @@ -262,3 +263,69 @@ TABSIZE_WIDE_CASES = [ def test_wrap_tabsize_wide_chars(text, w, tabsize, expected): """Verify tabsize respects wide character column positions.""" assert wrap(text, w, tabsize=tabsize) == expected + + +OSC_START_ST = '\x1b]8;;http://example.com\x1b\\' +OSC_END_ST = '\x1b]8;;\x1b\\' +OSC_START_BEL = '\x1b]8;;http://example.com\x07' +OSC_END_BEL = '\x1b]8;;\x07' + +HYPERLINK_WORD_BOUNDARY_CASES = [ + ( # standard, ST-variant, + f'{OSC_START_ST}link{OSC_END_ST}more', + 5, + [f'{OSC_START_ST}link{OSC_END_ST}', 'more'], + ), + ( # BEL-variant, + f'{OSC_START_BEL}link{OSC_END_BEL}more', + 5, + [f'{OSC_START_BEL}link{OSC_END_BEL}', 'more'], + ), + ( # hyperlink breaks after word, 'prefix', + f'prefix{OSC_START_ST}link{OSC_END_ST}', + 6, + ['prefix', f'{OSC_START_ST}link{OSC_END_ST}'], + ), + ( + f'prefix{OSC_START_BEL}link{OSC_END_BEL}', + 6, + ['prefix', f'{OSC_START_BEL}link{OSC_END_BEL}'], + ), + ( # hyperlink breaks before following, 'suffix', + f'prefix{OSC_START_ST}link{OSC_END_ST}suffix', + 6, + ['prefix', f'{OSC_START_ST}link{OSC_END_ST}', 'suffix'], + ), + ( + f'prefix{OSC_START_BEL}link{OSC_END_BEL}suffix', + 6, + ['prefix', f'{OSC_START_BEL}link{OSC_END_BEL}', 'suffix'], + ), + ( # hyperlink *surrounded* by SGR attributes + f'foo {SGR_RED}{OSC_START_ST}link{OSC_END_ST}{SGR_RESET} bar', + 6, + ['foo', f'{SGR_RED}{OSC_START_ST}link{OSC_END_ST}{SGR_RESET}', 'bar'], + ), + ( + f'foo {SGR_RED}{OSC_START_BEL}link{OSC_END_BEL}{SGR_RESET} bar', + 6, + ['foo', f'{SGR_RED}{OSC_START_BEL}link{OSC_END_BEL}{SGR_RESET}', 'bar'], + ), + ( # hyperlink *containing* SGR attributes + f'foo {OSC_START_ST}{SGR_RED}link{SGR_RESET}{OSC_END_ST} bar', + 6, + ['foo', f'{OSC_START_ST}{SGR_RED}link{SGR_RESET}{OSC_END_ST}', 'bar'], + ), + ( + f'foo {OSC_START_BEL}{SGR_RED}link{SGR_RESET}{OSC_END_BEL} bar', + 6, + ['foo', f'{OSC_START_BEL}{SGR_RED}link{SGR_RESET}{OSC_END_BEL}', 'bar'], + ), +] + + [email protected]('text,w,expected', HYPERLINK_WORD_BOUNDARY_CASES) +def test_wrap_hyperlink_word_boundary(text, w, expected): + """OSC hyperlink sequences should act as word boundaries.""" + result = wrap(text, w) + assert result == expected |
