summaryrefslogtreecommitdiffstats
path: root/contrib/python/wcwidth/py3/tests
diff options
context:
space:
mode:
authorrobot-piglet <[email protected]>2026-02-11 10:55:54 +0300
committerrobot-piglet <[email protected]>2026-02-11 11:24:08 +0300
commit9b5f29efa00bba424cd32471a95ececc583fe046 (patch)
tree58936c6fc2147c49fc2a4aec657f63fb6f789336 /contrib/python/wcwidth/py3/tests
parentdf75a44af0e3c0cfce907e22f61d6c91fc3bbc39 (diff)
Intermediate changes
commit_hash:721c786fcb8a37574bec0881ae2194859f790fae
Diffstat (limited to 'contrib/python/wcwidth/py3/tests')
-rw-r--r--contrib/python/wcwidth/py3/tests/test_benchmarks.py56
-rw-r--r--contrib/python/wcwidth/py3/tests/test_grapheme.py111
-rw-r--r--contrib/python/wcwidth/py3/tests/test_textwrap.py69
3 files changed, 234 insertions, 2 deletions
diff --git a/contrib/python/wcwidth/py3/tests/test_benchmarks.py b/contrib/python/wcwidth/py3/tests/test_benchmarks.py
index 5c929f7ec37..6e0ffadcc45 100644
--- a/contrib/python/wcwidth/py3/tests/test_benchmarks.py
+++ b/contrib/python/wcwidth/py3/tests/test_benchmarks.py
@@ -1,6 +1,7 @@
"""Performance benchmarks for wcwidth module."""
# std imports
import os
+import unicodedata
# local
import wcwidth
@@ -54,6 +55,31 @@ def test_wcswidth_emoji_sequence(benchmark):
benchmark(wcwidth.wcswidth, text)
+# NFC vs NFD comparison - text with combining marks
+DIACRITICS_COMPOSED = 'café résumé naïve ' * 100
+DIACRITICS_DECOMPOSED = unicodedata.normalize('NFD', DIACRITICS_COMPOSED)
+
+
+def test_wcswidth_composed(benchmark):
+ """Benchmark wcswidth() with NFC-composed text."""
+ benchmark(wcwidth.wcswidth, DIACRITICS_COMPOSED)
+
+
+def test_wcswidth_decomposed(benchmark):
+ """Benchmark wcswidth() with NFD-decomposed text."""
+ benchmark(wcwidth.wcswidth, DIACRITICS_DECOMPOSED)
+
+
+def test_width_composed(benchmark):
+ """Benchmark width() with NFC-composed text."""
+ benchmark(wcwidth.width, DIACRITICS_COMPOSED)
+
+
+def test_width_decomposed(benchmark):
+ """Benchmark width() with NFD-decomposed text."""
+ benchmark(wcwidth.width, DIACRITICS_DECOMPOSED)
+
+
def test_width_ascii(benchmark):
"""Benchmark width() with ASCII string."""
benchmark(wcwidth.width, 'hello world')
@@ -88,6 +114,36 @@ def test_iter_graphemes_combining(benchmark):
benchmark(lambda: list(wcwidth.iter_graphemes(text)))
+def test_grapheme_boundary_before_short(benchmark):
+ """Benchmark grapheme_boundary_before() near start of short string."""
+ text = 'Hello 👋🏻!'
+ benchmark(wcwidth.grapheme_boundary_before, text, 8)
+
+
+def test_grapheme_boundary_before_long_end(benchmark):
+ """Benchmark grapheme_boundary_before() near end of long line."""
+ text = 'x' * 95 + '👨\u200d👩\u200d👧!'
+ benchmark(wcwidth.grapheme_boundary_before, text, 100)
+
+
+def test_grapheme_boundary_before_long_mid(benchmark):
+ """Benchmark grapheme_boundary_before() in middle of long line."""
+ text = 'x' * 50 + '👨\u200d👩\u200d👧' + 'y' * 50
+ benchmark(wcwidth.grapheme_boundary_before, text, 55)
+
+
+def test_iter_graphemes_reverse_short(benchmark):
+ """Benchmark iter_graphemes_reverse() with short string."""
+ text = 'café\u0301 🇫🇷!'
+ benchmark(lambda: list(wcwidth.iter_graphemes_reverse(text)))
+
+
+def test_iter_graphemes_reverse_long(benchmark):
+ """Benchmark iter_graphemes_reverse() with long string."""
+ text = 'The quick brown 🦊 jumps over the lazy 🐕. ' * 5
+ benchmark(lambda: list(wcwidth.iter_graphemes_reverse(text)))
+
+
def test_ljust_ascii(benchmark):
"""Benchmark ljust() with ASCII string."""
benchmark(wcwidth.ljust, 'hello', 20)
diff --git a/contrib/python/wcwidth/py3/tests/test_grapheme.py b/contrib/python/wcwidth/py3/tests/test_grapheme.py
index f344ad32fde..d2cfa86c1c8 100644
--- a/contrib/python/wcwidth/py3/tests/test_grapheme.py
+++ b/contrib/python/wcwidth/py3/tests/test_grapheme.py
@@ -6,7 +6,7 @@ import os
import pytest
# local
-from wcwidth import iter_graphemes
+from wcwidth import iter_graphemes, iter_graphemes_reverse, grapheme_boundary_before
try:
chr(0x2fffe)
@@ -145,3 +145,112 @@ def test_wide_unicode_graphemes(input_str, expected):
def test_unicode_grapheme_break_test(input_str, expected):
"""Validate against official Unicode GraphemeBreakTest.txt."""
assert list(iter_graphemes(input_str)) == expected
+
+
+# Prepend: Arabic Number Sign
+PREPEND_CHAR = '\u0600'
+# Multiple combining marks: e + acute + grave
+MULTI_COMBINE = 'e\u0301\u0300'
+
+
+# grapheme_boundary_before(text, pos) returns start of grapheme cluster before pos.
+# (text, pos, expected): pos=search from here, expected=where cluster starts
[email protected](("text", "pos", "expected"), [
+ # 'abc': 0=a, 1=b, 2=c
+ ('abc', 3, 2), # from end -> 'c' at 2
+ ('abc', 2, 1), # from 'c' -> 'b' at 1
+ ('abc', 1, 0), # from 'b' -> 'a' at 0
+ # 'a\r\nb': CRLF is one cluster (GB3)
+ ('a\r\nb', 3, 1), # from 'b' -> '\r\n' at 1
+ # 'café': e + combining acute is one cluster (GB9)
+ ('cafe\u0301', 5, 3), # from end -> 'é' at 3
+ ('cafe\u0301', 4, 3), # from acute -> still 'é' at 3
+ # Multiple combining marks: e + acute + grave (GB9)
+ ('a' + MULTI_COMBINE + 'b', 4, 1), # from 'b' -> e+marks at 1
+ # Prepend + char is one cluster (GB9b)
+ (PREPEND_CHAR + 'a', 2, 0), # whole cluster
+ # Prepend + Control: control breaks (GB4)
+ (PREPEND_CHAR + '\n', 2, 1), # '\n' separate at 1
+ # C1 control (NEL, 0x85) stops backward scan in _find_cluster_start (GB4)
+ ('X\x85\u0301', 3, 2),
+])
+def test_grapheme_boundary_before_basic(text, pos, expected):
+ """Basic grapheme_boundary_before tests."""
+ assert grapheme_boundary_before(text, pos) == expected
+
+
[email protected](NARROW_ONLY, reason="requires wide Unicode")
[email protected](("text", "pos", "expected"), [
+ # 'Hi 👋🏻!': 0=H,1=i,2=space,3=wave,4=skin,5=!; wave+skin is one cluster
+ ('Hi \U0001F44B\U0001F3FB!', 6, 5), # from end -> '!' at 5
+ ('Hi \U0001F44B\U0001F3FB!', 5, 3), # from '!' -> wave+skin at 3
+ ('Hi \U0001F44B\U0001F3FB!', 3, 2), # from wave -> space at 2
+ # 'a🇺🇸b': 0=a,1-2=flag,3=b; flag is one cluster (GB12/13)
+ ('a' + FLAG_US + 'b', 4, 3), # from end -> 'b' at 3
+ ('a' + FLAG_US + 'b', 3, 1), # from 'b' -> flag at 1
+ # Three RIs (🇺🇸🇦): flag + solo RI
+ (FLAG_US + RI_A, 3, 2), # from end -> solo RI at 2
+ (FLAG_US + RI_A, 2, 0), # from solo -> flag at 0
+ # 'a👨‍👩‍👧b': 0=a,1-5=family,6=b; ZWJ sequence is one cluster (GB11)
+ ('a' + FAMILY + 'b', 7, 6), # from end -> 'b' at 6
+ ('a' + FAMILY + 'b', 6, 1), # from 'b' -> family at 1
+])
+def test_grapheme_boundary_before_unicode(text, pos, expected):
+ """grapheme_boundary_before with emoji and wide Unicode."""
+ assert grapheme_boundary_before(text, pos) == expected
+
+
[email protected](("input_str", "expected"), [
+ ('', []),
+ ('abc', ['c', 'b', 'a']),
+ # café with combining mark mixed with CRLF
+ ('cafe\u0301\r\nok', ['k', 'o', '\r\n', 'e\u0301', 'f', 'a', 'c']),
+])
+def test_iter_graphemes_reverse_basic(input_str, expected):
+ """Basic iter_graphemes_reverse tests."""
+ assert list(iter_graphemes_reverse(input_str)) == expected
+
+
[email protected](NARROW_ONLY, reason="requires wide Unicode")
[email protected](("input_str", "expected"), [
+ # Multiple emoji types in one string
+ ('cafe\u0301 ' + WAVE_SKIN + ' ' + FLAG_US + '!',
+ ['!', FLAG_US, ' ', WAVE_SKIN, ' ', 'e\u0301', 'f', 'a', 'c']),
+ # Two families
+ (FAMILY + FAMILY, [FAMILY, FAMILY]),
+ # Flag + solo RI + text
+ ('Hi' + FLAG_US + RI_A + '!', ['!', RI_A, FLAG_US, 'i', 'H']),
+])
+def test_iter_graphemes_reverse_unicode(input_str, expected):
+ """iter_graphemes_reverse with wide Unicode."""
+ assert list(iter_graphemes_reverse(input_str)) == expected
+
+
[email protected](NARROW_ONLY, reason="requires wide Unicode")
[email protected](("input_str", "expected"), read_grapheme_break_test())
+def test_grapheme_roundtrip_consistency(input_str, expected):
+ """Forward and reverse iteration produce identical boundaries."""
+ forward = list(iter_graphemes(input_str))
+ reverse = list(iter_graphemes_reverse(input_str))[::-1]
+ assert forward == reverse
+
+
+def test_grapheme_boundary_before_edge_cases():
+ """Edge cases for grapheme_boundary_before."""
+ assert grapheme_boundary_before('abc', 0) == 0
+ assert grapheme_boundary_before('abc', 100) == 2 # pos > len clamps
+ assert grapheme_boundary_before('', 0) == 0
+
+
+def test_iter_graphemes_reverse_edge_cases():
+ """Edge cases for iter_graphemes_reverse."""
+ assert list(iter_graphemes_reverse('abcdef', start=2, end=5)) == ['e', 'd', 'c']
+ assert list(iter_graphemes_reverse('abc', start=0, end=100)) == ['c', 'b', 'a']
+ assert not list(iter_graphemes_reverse('abc', start=5))
+ assert not list(iter_graphemes_reverse('abc', start=2, end=2))
+ # PREPEND + char is one grapheme (GB9b), so start=1 yields nothing (won't split)
+ assert not list(iter_graphemes_reverse(PREPEND_CHAR + 'a', start=1))
+ # But start=0 yields the full grapheme
+ assert list(iter_graphemes_reverse(PREPEND_CHAR + 'a', start=0)) == [PREPEND_CHAR + 'a']
+ # Negative start is clamped to 0
+ assert list(iter_graphemes_reverse('abc', start=-5)) == ['c', 'b', 'a']
diff --git a/contrib/python/wcwidth/py3/tests/test_textwrap.py b/contrib/python/wcwidth/py3/tests/test_textwrap.py
index c2f28bffe1a..fc15f1917f9 100644
--- a/contrib/python/wcwidth/py3/tests/test_textwrap.py
+++ b/contrib/python/wcwidth/py3/tests/test_textwrap.py
@@ -12,6 +12,7 @@ from wcwidth import iter_sequences
from wcwidth.textwrap import SequenceTextWrapper, wrap
SGR_RED = '\x1b[31m'
+SGR_BLUE = '\x1b[34m'
SGR_BOLD = '\x1b[1m'
SGR_RESET = '\x1b[0m'
ATTRS = ('\x1b[31m', '\x1b[34m', '\x1b[4m', '\x1b[7m', '\x1b[41m', '\x1b[37m', '\x1b[107m')
@@ -203,7 +204,7 @@ SEQUENCE_CASES = [
# Empty/adjacent sequences
(f'{SGR_RED}{SGR_RESET}', 10, [f'{SGR_RED}{SGR_RESET}']),
(f'hello {SGR_RED}{SGR_RESET}world', 6, ['hello', f'{SGR_RED}{SGR_RESET}world']),
- # OSC hyperlinks
+ # OSC hyperlinks (with space separator)
(f'{OSC_HYPERLINK} text', 5, [OSC_HYPERLINK, 'text']),
# CSI cursor sequences
(f'{CSI_CURSOR}text here', 10, [f'{CSI_CURSOR}text', 'here']),
@@ -262,3 +263,69 @@ TABSIZE_WIDE_CASES = [
def test_wrap_tabsize_wide_chars(text, w, tabsize, expected):
"""Verify tabsize respects wide character column positions."""
assert wrap(text, w, tabsize=tabsize) == expected
+
+
+OSC_START_ST = '\x1b]8;;http://example.com\x1b\\'
+OSC_END_ST = '\x1b]8;;\x1b\\'
+OSC_START_BEL = '\x1b]8;;http://example.com\x07'
+OSC_END_BEL = '\x1b]8;;\x07'
+
+HYPERLINK_WORD_BOUNDARY_CASES = [
+ ( # standard, ST-variant,
+ f'{OSC_START_ST}link{OSC_END_ST}more',
+ 5,
+ [f'{OSC_START_ST}link{OSC_END_ST}', 'more'],
+ ),
+ ( # BEL-variant,
+ f'{OSC_START_BEL}link{OSC_END_BEL}more',
+ 5,
+ [f'{OSC_START_BEL}link{OSC_END_BEL}', 'more'],
+ ),
+ ( # hyperlink breaks after word, 'prefix',
+ f'prefix{OSC_START_ST}link{OSC_END_ST}',
+ 6,
+ ['prefix', f'{OSC_START_ST}link{OSC_END_ST}'],
+ ),
+ (
+ f'prefix{OSC_START_BEL}link{OSC_END_BEL}',
+ 6,
+ ['prefix', f'{OSC_START_BEL}link{OSC_END_BEL}'],
+ ),
+ ( # hyperlink breaks before following, 'suffix',
+ f'prefix{OSC_START_ST}link{OSC_END_ST}suffix',
+ 6,
+ ['prefix', f'{OSC_START_ST}link{OSC_END_ST}', 'suffix'],
+ ),
+ (
+ f'prefix{OSC_START_BEL}link{OSC_END_BEL}suffix',
+ 6,
+ ['prefix', f'{OSC_START_BEL}link{OSC_END_BEL}', 'suffix'],
+ ),
+ ( # hyperlink *surrounded* by SGR attributes
+ f'foo {SGR_RED}{OSC_START_ST}link{OSC_END_ST}{SGR_RESET} bar',
+ 6,
+ ['foo', f'{SGR_RED}{OSC_START_ST}link{OSC_END_ST}{SGR_RESET}', 'bar'],
+ ),
+ (
+ f'foo {SGR_RED}{OSC_START_BEL}link{OSC_END_BEL}{SGR_RESET} bar',
+ 6,
+ ['foo', f'{SGR_RED}{OSC_START_BEL}link{OSC_END_BEL}{SGR_RESET}', 'bar'],
+ ),
+ ( # hyperlink *containing* SGR attributes
+ f'foo {OSC_START_ST}{SGR_RED}link{SGR_RESET}{OSC_END_ST} bar',
+ 6,
+ ['foo', f'{OSC_START_ST}{SGR_RED}link{SGR_RESET}{OSC_END_ST}', 'bar'],
+ ),
+ (
+ f'foo {OSC_START_BEL}{SGR_RED}link{SGR_RESET}{OSC_END_BEL} bar',
+ 6,
+ ['foo', f'{OSC_START_BEL}{SGR_RED}link{SGR_RESET}{OSC_END_BEL}', 'bar'],
+ ),
+]
+
+
[email protected]('text,w,expected', HYPERLINK_WORD_BOUNDARY_CASES)
+def test_wrap_hyperlink_word_boundary(text, w, expected):
+ """OSC hyperlink sequences should act as word boundaries."""
+ result = wrap(text, w)
+ assert result == expected