Intermediate changes

commit_hash:721c786fcb8a37574bec0881ae2194859f790fae
author: robot-piglet <[email protected]> 2026-02-11 10:55:54 +0300
committer: robot-piglet <[email protected]> 2026-02-11 11:24:08 +0300
commit: 9b5f29efa00bba424cd32471a95ececc583fe046 (patch)
tree: 58936c6fc2147c49fc2a4aec657f63fb6f789336 /contrib/python/wcwidth
parent: df75a44af0e3c0cfce907e22f61d6c91fc3bbc39 (diff)
8 files changed, 403 insertions, 13 deletions
diff --git a/contrib/python/wcwidth/py3/.dist-info/METADATA b/contrib/python/wcwidth/py3/.dist-info/METADATA
index de002938d9d..c80ecb6d722 100644
--- a/contrib/python/wcwidth/py3/.dist-info/METADATA
+++ b/contrib/python/wcwidth/py3/.dist-info/METADATA
@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: wcwidth
-Version: 0.3.5
+Version: 0.4.0
 Summary: Measures the displayed width of unicode strings in a terminal
 Project-URL: Homepage, https://github.com/jquast/wcwidth
 Author-email: Jeff Quast <[email protected]>
@@ -78,8 +78,10 @@ Text-justification is solved by the grapheme and sequence-aware functions `ljust
 of the same names.
 
 The iterator functions `iter_graphemes()`_ and `iter_sequences()`_ allow for careful navigation of
-grapheme and terminal control sequence boundaries.  The `clip()`_ function extracts substrings by
-display column positions, and `strip_sequences()`_ removes terminal escape sequences from text.
+grapheme and terminal control sequence boundaries.  `iter_graphemes_reverse()`_, and
+`grapheme_boundary_before()`_ are useful for editing and searching of complex unicode.  The
+`clip()`_ function extracts substrings by display column positions, and `strip_sequences()`_ removes
+terminal escape sequences from text altogether.
 
 Discrepancies
 -------------
@@ -472,6 +474,10 @@ languages.
 History
 =======
 
+0.4.0 *2026-01-25*
+  * **New** Functions `iter_graphemes_reverse()`_, `grapheme_boundary_before()`_.
+  * **Bugfix** OSC Hyperlinks should not be broken by ``wrap()``
+
 0.3.5 *2026-01-24*
   * **Bugfix** packaging of 0.3.4 contains a failing test.
 
@@ -690,6 +696,8 @@ https://www.cl.cam.ac.uk/~mgk25/ucs/wcwidth.c::
 .. _`wcswidth()`: https://wcwidth.readthedocs.io/en/latest/api.html#wcwidth.wcswidth
 .. _`width()`: https://wcwidth.readthedocs.io/en/latest/api.html#wcwidth.width
 .. _`iter_graphemes()`: https://wcwidth.readthedocs.io/en/latest/api.html#wcwidth.iter_graphemes
+.. _`iter_graphemes_reverse()`: https://wcwidth.readthedocs.io/en/latest/api.html#wcwidth.iter_graphemes_reverse
+.. _`grapheme_boundary_before()`: https://wcwidth.readthedocs.io/en/latest/api.html#wcwidth.grapheme_boundary_before
 .. _`ljust()`: https://wcwidth.readthedocs.io/en/latest/api.html#wcwidth.ljust
 .. _`rjust()`: https://wcwidth.readthedocs.io/en/latest/api.html#wcwidth.rjust
 .. _`center()`: https://wcwidth.readthedocs.io/en/latest/api.html#wcwidth.center
diff --git a/contrib/python/wcwidth/py3/tests/test_benchmarks.py b/contrib/python/wcwidth/py3/tests/test_benchmarks.py
index 5c929f7ec37..6e0ffadcc45 100644
--- a/contrib/python/wcwidth/py3/tests/test_benchmarks.py
+++ b/contrib/python/wcwidth/py3/tests/test_benchmarks.py
@@ -1,6 +1,7 @@
 """Performance benchmarks for wcwidth module."""
 # std imports
 import os
+import unicodedata
 
 # local
 import wcwidth
@@ -54,6 +55,31 @@ def test_wcswidth_emoji_sequence(benchmark):
     benchmark(wcwidth.wcswidth, text)
 
 
+# NFC vs NFD comparison - text with combining marks
+DIACRITICS_COMPOSED = 'café résumé naïve ' * 100
+DIACRITICS_DECOMPOSED = unicodedata.normalize('NFD', DIACRITICS_COMPOSED)
+
+
+def test_wcswidth_composed(benchmark):
+    """Benchmark wcswidth() with NFC-composed text."""
+    benchmark(wcwidth.wcswidth, DIACRITICS_COMPOSED)
+
+
+def test_wcswidth_decomposed(benchmark):
+    """Benchmark wcswidth() with NFD-decomposed text."""
+    benchmark(wcwidth.wcswidth, DIACRITICS_DECOMPOSED)
+
+
+def test_width_composed(benchmark):
+    """Benchmark width() with NFC-composed text."""
+    benchmark(wcwidth.width, DIACRITICS_COMPOSED)
+
+
+def test_width_decomposed(benchmark):
+    """Benchmark width() with NFD-decomposed text."""
+    benchmark(wcwidth.width, DIACRITICS_DECOMPOSED)
+
+
 def test_width_ascii(benchmark):
     """Benchmark width() with ASCII string."""
     benchmark(wcwidth.width, 'hello world')
@@ -88,6 +114,36 @@ def test_iter_graphemes_combining(benchmark):
     benchmark(lambda: list(wcwidth.iter_graphemes(text)))
 
 
+def test_grapheme_boundary_before_short(benchmark):
+    """Benchmark grapheme_boundary_before() near start of short string."""
+    text = 'Hello 👋🏻!'
+    benchmark(wcwidth.grapheme_boundary_before, text, 8)
+
+
+def test_grapheme_boundary_before_long_end(benchmark):
+    """Benchmark grapheme_boundary_before() near end of long line."""
+    text = 'x' * 95 + '👨\u200d👩\u200d👧!'
+    benchmark(wcwidth.grapheme_boundary_before, text, 100)
+
+
+def test_grapheme_boundary_before_long_mid(benchmark):
+    """Benchmark grapheme_boundary_before() in middle of long line."""
+    text = 'x' * 50 + '👨\u200d👩\u200d👧' + 'y' * 50
+    benchmark(wcwidth.grapheme_boundary_before, text, 55)
+
+
+def test_iter_graphemes_reverse_short(benchmark):
+    """Benchmark iter_graphemes_reverse() with short string."""
+    text = 'café\u0301 🇫🇷!'
+    benchmark(lambda: list(wcwidth.iter_graphemes_reverse(text)))
+
+
+def test_iter_graphemes_reverse_long(benchmark):
+    """Benchmark iter_graphemes_reverse() with long string."""
+    text = 'The quick brown 🦊 jumps over the lazy 🐕. ' * 5
+    benchmark(lambda: list(wcwidth.iter_graphemes_reverse(text)))
+
+
 def test_ljust_ascii(benchmark):
     """Benchmark ljust() with ASCII string."""
     benchmark(wcwidth.ljust, 'hello', 20)
diff --git a/contrib/python/wcwidth/py3/tests/test_grapheme.py b/contrib/python/wcwidth/py3/tests/test_grapheme.py
index f344ad32fde..d2cfa86c1c8 100644
--- a/contrib/python/wcwidth/py3/tests/test_grapheme.py
+++ b/contrib/python/wcwidth/py3/tests/test_grapheme.py
@@ -6,7 +6,7 @@ import os
 import pytest
 
 # local
-from wcwidth import iter_graphemes
+from wcwidth import iter_graphemes, iter_graphemes_reverse, grapheme_boundary_before
 
 try:
     chr(0x2fffe)
@@ -145,3 +145,112 @@ def test_wide_unicode_graphemes(input_str, expected):
 def test_unicode_grapheme_break_test(input_str, expected):
     """Validate against official Unicode GraphemeBreakTest.txt."""
     assert list(iter_graphemes(input_str)) == expected
+
+
+# Prepend: Arabic Number Sign
+PREPEND_CHAR = '\u0600'
+# Multiple combining marks: e + acute + grave
+MULTI_COMBINE = 'e\u0301\u0300'
+
+
+# grapheme_boundary_before(text, pos) returns start of grapheme cluster before pos.
+# (text, pos, expected): pos=search from here, expected=where cluster starts
+[email protected](("text", "pos", "expected"), [
+    # 'abc': 0=a, 1=b, 2=c
+    ('abc', 3, 2),  # from end -> 'c' at 2
+    ('abc', 2, 1),  # from 'c' -> 'b' at 1
+    ('abc', 1, 0),  # from 'b' -> 'a' at 0
+    # 'a\r\nb': CRLF is one cluster (GB3)
+    ('a\r\nb', 3, 1),  # from 'b' -> '\r\n' at 1
+    # 'café': e + combining acute is one cluster (GB9)
+    ('cafe\u0301', 5, 3),  # from end -> 'é' at 3
+    ('cafe\u0301', 4, 3),  # from acute -> still 'é' at 3
+    # Multiple combining marks: e + acute + grave (GB9)
+    ('a' + MULTI_COMBINE + 'b', 4, 1),  # from 'b' -> e+marks at 1
+    # Prepend + char is one cluster (GB9b)
+    (PREPEND_CHAR + 'a', 2, 0),  # whole cluster
+    # Prepend + Control: control breaks (GB4)
+    (PREPEND_CHAR + '\n', 2, 1),  # '\n' separate at 1
+    # C1 control (NEL, 0x85) stops backward scan in _find_cluster_start (GB4)
+    ('X\x85\u0301', 3, 2),
+])
+def test_grapheme_boundary_before_basic(text, pos, expected):
+    """Basic grapheme_boundary_before tests."""
+    assert grapheme_boundary_before(text, pos) == expected
+
+
+[email protected](NARROW_ONLY, reason="requires wide Unicode")
+[email protected](("text", "pos", "expected"), [
+    # 'Hi 👋🏻!': 0=H,1=i,2=space,3=wave,4=skin,5=!; wave+skin is one cluster
+    ('Hi \U0001F44B\U0001F3FB!', 6, 5),  # from end -> '!' at 5
+    ('Hi \U0001F44B\U0001F3FB!', 5, 3),  # from '!' -> wave+skin at 3
+    ('Hi \U0001F44B\U0001F3FB!', 3, 2),  # from wave -> space at 2
+    # 'a🇺🇸b': 0=a,1-2=flag,3=b; flag is one cluster (GB12/13)
+    ('a' + FLAG_US + 'b', 4, 3),  # from end -> 'b' at 3
+    ('a' + FLAG_US + 'b', 3, 1),  # from 'b' -> flag at 1
+    # Three RIs (🇺🇸🇦): flag + solo RI
+    (FLAG_US + RI_A, 3, 2),  # from end -> solo RI at 2
+    (FLAG_US + RI_A, 2, 0),  # from solo -> flag at 0
+    # 'a👨‍👩‍👧b': 0=a,1-5=family,6=b; ZWJ sequence is one cluster (GB11)
+    ('a' + FAMILY + 'b', 7, 6),  # from end -> 'b' at 6
+    ('a' + FAMILY + 'b', 6, 1),  # from 'b' -> family at 1
+])
+def test_grapheme_boundary_before_unicode(text, pos, expected):
+    """grapheme_boundary_before with emoji and wide Unicode."""
+    assert grapheme_boundary_before(text, pos) == expected
+
+
+[email protected](("input_str", "expected"), [
+    ('', []),
+    ('abc', ['c', 'b', 'a']),
+    # café with combining mark mixed with CRLF
+    ('cafe\u0301\r\nok', ['k', 'o', '\r\n', 'e\u0301', 'f', 'a', 'c']),
+])
+def test_iter_graphemes_reverse_basic(input_str, expected):
+    """Basic iter_graphemes_reverse tests."""
+    assert list(iter_graphemes_reverse(input_str)) == expected
+
+
+[email protected](NARROW_ONLY, reason="requires wide Unicode")
+[email protected](("input_str", "expected"), [
+    # Multiple emoji types in one string
+    ('cafe\u0301 ' + WAVE_SKIN + ' ' + FLAG_US + '!',
+     ['!', FLAG_US, ' ', WAVE_SKIN, ' ', 'e\u0301', 'f', 'a', 'c']),
+    # Two families
+    (FAMILY + FAMILY, [FAMILY, FAMILY]),
+    # Flag + solo RI + text
+    ('Hi' + FLAG_US + RI_A + '!', ['!', RI_A, FLAG_US, 'i', 'H']),
+])
+def test_iter_graphemes_reverse_unicode(input_str, expected):
+    """iter_graphemes_reverse with wide Unicode."""
+    assert list(iter_graphemes_reverse(input_str)) == expected
+
+
+[email protected](NARROW_ONLY, reason="requires wide Unicode")
+[email protected](("input_str", "expected"), read_grapheme_break_test())
+def test_grapheme_roundtrip_consistency(input_str, expected):
+    """Forward and reverse iteration produce identical boundaries."""
+    forward = list(iter_graphemes(input_str))
+    reverse = list(iter_graphemes_reverse(input_str))[::-1]
+    assert forward == reverse
+
+
+def test_grapheme_boundary_before_edge_cases():
+    """Edge cases for grapheme_boundary_before."""
+    assert grapheme_boundary_before('abc', 0) == 0
+    assert grapheme_boundary_before('abc', 100) == 2  # pos > len clamps
+    assert grapheme_boundary_before('', 0) == 0
+
+
+def test_iter_graphemes_reverse_edge_cases():
+    """Edge cases for iter_graphemes_reverse."""
+    assert list(iter_graphemes_reverse('abcdef', start=2, end=5)) == ['e', 'd', 'c']
+    assert list(iter_graphemes_reverse('abc', start=0, end=100)) == ['c', 'b', 'a']
+    assert not list(iter_graphemes_reverse('abc', start=5))
+    assert not list(iter_graphemes_reverse('abc', start=2, end=2))
+    # PREPEND + char is one grapheme (GB9b), so start=1 yields nothing (won't split)
+    assert not list(iter_graphemes_reverse(PREPEND_CHAR + 'a', start=1))
+    # But start=0 yields the full grapheme
+    assert list(iter_graphemes_reverse(PREPEND_CHAR + 'a', start=0)) == [PREPEND_CHAR + 'a']
+    # Negative start is clamped to 0
+    assert list(iter_graphemes_reverse('abc', start=-5)) == ['c', 'b', 'a']
diff --git a/contrib/python/wcwidth/py3/tests/test_textwrap.py b/contrib/python/wcwidth/py3/tests/test_textwrap.py
index c2f28bffe1a..fc15f1917f9 100644
--- a/contrib/python/wcwidth/py3/tests/test_textwrap.py
+++ b/contrib/python/wcwidth/py3/tests/test_textwrap.py
@@ -12,6 +12,7 @@ from wcwidth import iter_sequences
 from wcwidth.textwrap import SequenceTextWrapper, wrap
 
 SGR_RED = '\x1b[31m'
+SGR_BLUE = '\x1b[34m'
 SGR_BOLD = '\x1b[1m'
 SGR_RESET = '\x1b[0m'
 ATTRS = ('\x1b[31m', '\x1b[34m', '\x1b[4m', '\x1b[7m', '\x1b[41m', '\x1b[37m', '\x1b[107m')
@@ -203,7 +204,7 @@ SEQUENCE_CASES = [
     # Empty/adjacent sequences
     (f'{SGR_RED}{SGR_RESET}', 10, [f'{SGR_RED}{SGR_RESET}']),
     (f'hello {SGR_RED}{SGR_RESET}world', 6, ['hello', f'{SGR_RED}{SGR_RESET}world']),
-    # OSC hyperlinks
+    # OSC hyperlinks (with space separator)
     (f'{OSC_HYPERLINK} text', 5, [OSC_HYPERLINK, 'text']),
     # CSI cursor sequences
     (f'{CSI_CURSOR}text here', 10, [f'{CSI_CURSOR}text', 'here']),
@@ -262,3 +263,69 @@ TABSIZE_WIDE_CASES = [
 def test_wrap_tabsize_wide_chars(text, w, tabsize, expected):
     """Verify tabsize respects wide character column positions."""
     assert wrap(text, w, tabsize=tabsize) == expected
+
+
+OSC_START_ST = '\x1b]8;;http://example.com\x1b\\'
+OSC_END_ST = '\x1b]8;;\x1b\\'
+OSC_START_BEL = '\x1b]8;;http://example.com\x07'
+OSC_END_BEL = '\x1b]8;;\x07'
+
+HYPERLINK_WORD_BOUNDARY_CASES = [
+    (   # standard, ST-variant,
+        f'{OSC_START_ST}link{OSC_END_ST}more',
+        5,
+        [f'{OSC_START_ST}link{OSC_END_ST}', 'more'],
+    ),
+    (   # BEL-variant,
+        f'{OSC_START_BEL}link{OSC_END_BEL}more',
+        5,
+        [f'{OSC_START_BEL}link{OSC_END_BEL}', 'more'],
+    ),
+    (   # hyperlink breaks after word, 'prefix',
+        f'prefix{OSC_START_ST}link{OSC_END_ST}',
+        6,
+        ['prefix', f'{OSC_START_ST}link{OSC_END_ST}'],
+    ),
+    (
+        f'prefix{OSC_START_BEL}link{OSC_END_BEL}',
+        6,
+        ['prefix', f'{OSC_START_BEL}link{OSC_END_BEL}'],
+    ),
+    (   # hyperlink breaks before following, 'suffix',
+        f'prefix{OSC_START_ST}link{OSC_END_ST}suffix',
+        6,
+        ['prefix', f'{OSC_START_ST}link{OSC_END_ST}', 'suffix'],
+    ),
+    (
+        f'prefix{OSC_START_BEL}link{OSC_END_BEL}suffix',
+        6,
+        ['prefix', f'{OSC_START_BEL}link{OSC_END_BEL}', 'suffix'],
+    ),
+    (   # hyperlink *surrounded* by SGR attributes
+        f'foo {SGR_RED}{OSC_START_ST}link{OSC_END_ST}{SGR_RESET} bar',
+        6,
+        ['foo', f'{SGR_RED}{OSC_START_ST}link{OSC_END_ST}{SGR_RESET}', 'bar'],
+    ),
+    (
+        f'foo {SGR_RED}{OSC_START_BEL}link{OSC_END_BEL}{SGR_RESET} bar',
+        6,
+        ['foo', f'{SGR_RED}{OSC_START_BEL}link{OSC_END_BEL}{SGR_RESET}', 'bar'],
+    ),
+    (   # hyperlink *containing* SGR attributes
+        f'foo {OSC_START_ST}{SGR_RED}link{SGR_RESET}{OSC_END_ST} bar',
+        6,
+        ['foo', f'{OSC_START_ST}{SGR_RED}link{SGR_RESET}{OSC_END_ST}', 'bar'],
+    ),
+    (
+        f'foo {OSC_START_BEL}{SGR_RED}link{SGR_RESET}{OSC_END_BEL} bar',
+        6,
+        ['foo', f'{OSC_START_BEL}{SGR_RED}link{SGR_RESET}{OSC_END_BEL}', 'bar'],
+    ),
+]
+
+
+[email protected]('text,w,expected', HYPERLINK_WORD_BOUNDARY_CASES)
+def test_wrap_hyperlink_word_boundary(text, w, expected):
+    """OSC hyperlink sequences should act as word boundaries."""
+    result = wrap(text, w)
+    assert result == expected
diff --git a/contrib/python/wcwidth/py3/wcwidth/__init__.py b/contrib/python/wcwidth/py3/wcwidth/__init__.py
index ed29279dcb8..03279ff863b 100644
--- a/contrib/python/wcwidth/py3/wcwidth/__init__.py
+++ b/contrib/python/wcwidth/py3/wcwidth/__init__.py
@@ -26,16 +26,18 @@ from .wcwidth import (WIDE_EASTASIAN,
                       _wcmatch_version,
                       _wcversion_value)
 from .bisearch import bisearch as _bisearch
-from .grapheme import iter_graphemes  # noqa
+from .grapheme import grapheme_boundary_before  # noqa
+from .grapheme import iter_graphemes, iter_graphemes_reverse
 from .textwrap import SequenceTextWrapper, wrap
 
 # The __all__ attribute defines the items exported from statement,
 # 'from wcwidth import *', but also to say, "This is the public API".
 __all__ = ('wcwidth', 'wcswidth', 'width', 'iter_sequences', 'iter_graphemes',
+           'iter_graphemes_reverse', 'grapheme_boundary_before',
            'ljust', 'rjust', 'center', 'wrap', 'clip', 'strip_sequences',
            'list_versions')
 
 # We also used pkg_resources to load unicode version tables from version.json,
 # generated by bin/update-tables.py, but some environments are unable to
 # import pkg_resources for one reason or another, yikes!
-__version__ = '0.3.5'
+__version__ = '0.4.0'
diff --git a/contrib/python/wcwidth/py3/wcwidth/grapheme.py b/contrib/python/wcwidth/py3/wcwidth/grapheme.py
index 1a83668b066..63713b9070d 100644
--- a/contrib/python/wcwidth/py3/wcwidth/grapheme.py
+++ b/contrib/python/wcwidth/py3/wcwidth/grapheme.py
@@ -36,6 +36,10 @@ if TYPE_CHECKING:  # pragma: no cover
     # std imports
     from collections.abc import Iterator
 
+# Maximum backward scan distance when finding grapheme cluster boundaries.
+# Covers all known Unicode grapheme clusters with margin; longer sequences are pathological.
+MAX_GRAPHEME_SCAN = 32
+
 
 class GCB(IntEnum):
     """Grapheme Cluster Break property values."""
@@ -304,3 +308,118 @@ def iter_graphemes(
 
     # Yield the final cluster
     yield unistr[cluster_start:end]
+
+
+def _find_cluster_start(text: str, pos: int) -> int:
+    """
+    Find the start of the grapheme cluster containing the character before pos.
+
+    Scans backwards from pos to find a safe starting point, then iterates forward using standard
+    break rules to find the actual cluster boundary.
+
+    :param text: The Unicode string.
+    :param pos: Position to search before (exclusive).
+    :returns: Start position of the grapheme cluster.
+    """
+    target_cp = ord(text[pos - 1])
+
+    # GB3: CR x LF - LF after CR is part of same cluster
+    if target_cp == 0x0A and pos >= 2 and text[pos - 2] == '\r':
+        return pos - 2
+
+    # Fast path: ASCII (except LF) starts its own cluster
+    if target_cp < 0x80:
+        # GB9b: Check for preceding PREPEND (rare: Arabic/Brahmic)
+        if pos >= 2 and target_cp >= 0x20:
+            prev_cp = ord(text[pos - 2])
+            if prev_cp >= 0x80 and _grapheme_cluster_break(prev_cp) == GCB.PREPEND:
+                return _find_cluster_start(text, pos - 1)
+        return pos - 1
+
+    # Scan backward to find a safe starting point
+    safe_start = pos - 1
+    while safe_start > 0 and (pos - safe_start) < MAX_GRAPHEME_SCAN:
+        cp = ord(text[safe_start])
+        if 0x20 <= cp < 0x80:  # ASCII always starts a cluster
+            break
+        if _grapheme_cluster_break(cp) == GCB.CONTROL:  # GB4
+            break
+        safe_start -= 1
+
+    # Verify forward to find the actual cluster boundary
+    cluster_start = safe_start
+    left_gcb = _grapheme_cluster_break(ord(text[safe_start]))
+    ri_count = 1 if left_gcb == GCB.REGIONAL_INDICATOR else 0
+
+    for i in range(safe_start + 1, pos):
+        right_gcb = _grapheme_cluster_break(ord(text[i]))
+        result = _should_break(left_gcb, right_gcb, text, i, ri_count)
+        ri_count = result.ri_count
+        if result.should_break:
+            cluster_start = i
+        left_gcb = right_gcb
+
+    return cluster_start
+
+
+def grapheme_boundary_before(unistr: str, pos: int) -> int:
+    r"""
+    Find the grapheme cluster boundary immediately before a position.
+
+    :param unistr: The Unicode string to search.
+    :param pos: Position in the string (0 < pos <= len(unistr)).
+    :returns: Start index of the grapheme cluster containing the character at pos-1.
+
+    Example::
+
+        >>> grapheme_boundary_before('Hello \U0001F44B\U0001F3FB', 8)
+        6
+        >>> grapheme_boundary_before('a\r\nb', 3)
+        1
+
+    .. versionadded:: 0.3.6
+    """
+    if pos <= 0:
+        return 0
+    return _find_cluster_start(unistr, min(pos, len(unistr)))
+
+
+def iter_graphemes_reverse(
+    unistr: str,
+    start: int = 0,
+    end: int | None = None,
+) -> Iterator[str]:
+    r"""
+    Iterate over grapheme clusters in reverse order (last to first).
+
+    :param unistr: The Unicode string to segment.
+    :param start: Starting index (default 0).
+    :param end: Ending index (default len(unistr)).
+    :yields: Grapheme cluster substrings in reverse order.
+
+    Example::
+
+        >>> list(iter_graphemes_reverse('cafe\u0301'))
+        ['e\u0301', 'f', 'a', 'c']
+
+    .. versionadded:: 0.3.6
+    """
+    if not unistr:
+        return
+
+    length = len(unistr)
+
+    end = length if end is None else min(end, length)
+    start = max(start, 0)
+
+    if start >= end or start >= length:
+        return
+
+    pos = end
+    while pos > start:
+        cluster_start = _find_cluster_start(unistr, pos)
+        # Don't yield partial graphemes that extend before start
+        if cluster_start < start:
+            break
+        yield unistr[cluster_start:pos]
+        pos = cluster_start
diff --git a/contrib/python/wcwidth/py3/wcwidth/textwrap.py b/contrib/python/wcwidth/py3/wcwidth/textwrap.py
index 8b91d6ff9ce..41d89a3dba0 100644
--- a/contrib/python/wcwidth/py3/wcwidth/textwrap.py
+++ b/contrib/python/wcwidth/py3/wcwidth/textwrap.py
@@ -34,6 +34,9 @@ class SequenceTextWrapper(textwrap.TextWrapper):
     The key difference from the blessed implementation is the addition of grapheme cluster support
     via :func:`~.iter_graphemes`, providing width calculation for ZWJ emoji sequences, VS-16 emojis
     and variations, regional indicator flags, and combining characters.
+
+    OSC hyperlink sequences are treated as word boundaries, ensuring that text adjacent to
+    hyperlinks wraps correctly without breaking the hyperlink structure.
     """
 
     def __init__(self, width: int = 70, *,
@@ -77,17 +80,25 @@ class SequenceTextWrapper(textwrap.TextWrapper):
         return ''.join(result)
 
     def _split(self, text: str) -> list[str]:  # pylint: disable=too-many-locals
-        """
+        r"""
         Sequence-aware variant of :meth:`textwrap.TextWrapper._split`.
 
         This method ensures that terminal escape sequences don't interfere with the text splitting
         logic, particularly for hyphen-based word breaking. It builds a position mapping from
         stripped text to original text, calls the parent's _split on stripped text, then maps chunks
         back.
+
+        OSC hyperlink sequences are treated as word boundaries::
+
+            >>> wrap('foo \x1b]8;;https://example.com\x07link\x1b]8;;\x07 bar', 6)
+            ['foo', '\x1b]8;;https://example.com\x07link\x1b]8;;\x07', 'bar']
+
+        Both BEL (``\x07``) and ST (``\x1b\\``) terminators are supported.
         """
         # pylint: disable=too-many-locals,too-many-branches
         # Build a mapping from stripped text positions to original text positions.
-        # We track where each character ENDS so that sequences between characters
+        #
+        # Track where each character ENDS so that sequences between characters
         # attach to the following text (not preceding text). This ensures sequences
         # aren't lost when whitespace is dropped.
         #
@@ -95,16 +106,32 @@ class SequenceTextWrapper(textwrap.TextWrapper):
         char_end: list[int] = []
         stripped_text = ''
         original_pos = 0
+        prev_was_hyperlink_close = False
 
         for segment, is_seq in iter_sequences(text):
             if not is_seq:
+                # Conditionally insert space after hyperlink close to force word boundary
+                if prev_was_hyperlink_close and segment and not segment[0].isspace():
+                    stripped_text += ' '
+                    char_end.append(original_pos)
                 for char in segment:
                     original_pos += 1
                     char_end.append(original_pos)
                     stripped_text += char
+                prev_was_hyperlink_close = False
             else:
+                # Conditionally insert space before OSC sequences to artificially create word
+                # boundary, but *not* before hyperlink close sequences, to ensure hyperlink is
+                # terminated on the same line.
+                is_hyperlink_close = segment.startswith(('\x1b]8;;\x1b\\', '\x1b]8;;\x07'))
+                if (segment.startswith('\x1b]') and stripped_text and not
+                        stripped_text[-1].isspace()):
+                    if not is_hyperlink_close:
+                        stripped_text += ' '
+                        char_end.append(original_pos)
                 # Escape sequences advance position but don't add to stripped text
                 original_pos += len(segment)
+                prev_was_hyperlink_close = is_hyperlink_close
 
         # Add sentinel for final position
         char_end.append(original_pos)
@@ -137,7 +164,9 @@ class SequenceTextWrapper(textwrap.TextWrapper):
                 end_orig = char_end[stripped_pos + chunk_len - 1]
 
             # Extract the corresponding portion from the original text
-            result.append(text[start_orig:end_orig])
+            # Skip empty chunks (from virtual spaces inserted at OSC boundaries)
+            if start_orig != end_orig:
+                result.append(text[start_orig:end_orig])
             stripped_pos += chunk_len
 
         return result
@@ -303,8 +332,8 @@ class SequenceTextWrapper(textwrap.TextWrapper):
                     idx = match.end()
                     continue
 
-            # Get grapheme
-            grapheme = next(iter_graphemes(text[idx:]))
+            # Get grapheme (use start= to avoid slice allocation)
+            grapheme = next(iter_graphemes(text, start=idx))
 
             grapheme_width = self._width(grapheme)
             if width_so_far + grapheme_width > max_width:
diff --git a/contrib/python/wcwidth/py3/ya.make b/contrib/python/wcwidth/py3/ya.make
index 36c1c8b8ee2..542d8f80c0f 100644
--- a/contrib/python/wcwidth/py3/ya.make
+++ b/contrib/python/wcwidth/py3/ya.make
@@ -2,7 +2,7 @@
 
 PY3_LIBRARY()
 
-VERSION(0.3.5)
+VERSION(0.4.0)
 
 LICENSE(MIT)
author	robot-piglet <[email protected]>	2026-02-11 10:55:54 +0300
committer	robot-piglet <[email protected]>	2026-02-11 11:24:08 +0300
commit	9b5f29efa00bba424cd32471a95ececc583fe046 (patch)
tree	58936c6fc2147c49fc2a4aec657f63fb6f789336 /contrib/python/wcwidth
parent	df75a44af0e3c0cfce907e22f61d6c91fc3bbc39 (diff)