1 files changed, 148 insertions, 22 deletions
diff --git a/contrib/python/wcwidth/py3/tests/test_width.py b/contrib/python/wcwidth/py3/tests/test_width.py
index 67d7b017258..8e43b47b1de 100644
--- a/contrib/python/wcwidth/py3/tests/test_width.py
+++ b/contrib/python/wcwidth/py3/tests/test_width.py
@@ -1,10 +1,11 @@
 """Tests for width() function."""
+
 # 3rd party
 import pytest
 
 # local
 import wcwidth
-from wcwidth.escape_sequences import ZERO_WIDTH_PATTERN
+from wcwidth.escape_sequences import ZERO_WIDTH_PATTERN, INDETERMINATE_EFFECT_SEQUENCE
 
 BASIC_WIDTH_CASES = [
     ('', 0, 'empty'),
@@ -29,7 +30,7 @@ IGNORE_MODE_CASES = [
     ('\x1b[31mred\x1b[0m', 3, 'SGR_sequence'),
     ('hello\x80world', 10, 'C1_control'),
     ('\x1b', 0, 'lone_ESC'),
-    ('a\x1bb', 2, 'lone_ESC_between'),
+    ('a\x1bb', 1, 'fs_sequence_between'),
 ]
 
 
@@ -45,8 +46,10 @@ STRICT_RAISES_CASES = [
     ('hello\x7fworld', 'DEL'),
     ('hello\x80world', 'C1_control'),
     ('hello\nworld', 'LF'),
+    ('hello\rworld', 'CR'),
     ('hello\x1b[Hworld', 'cursor_home'),
     ('hello\x1b[Aworld', 'cursor_up'),
+    ('hello\x1b[5Gworld', 'hpa'),
 ]
 
 
@@ -61,11 +64,11 @@ STRICT_ALLOWED_CASES = [
     ('hello\x07world', 10, 'BEL'),
     ('hello\x00world', 10, 'NUL'),
     ('abc\bd', 3, 'backspace'),
-    ('abc\rxy', 3, 'CR'),
     ('\x1b[31mred\x1b[0m', 3, 'SGR_sequence'),
     ('a\x1b[2Cb', 4, 'cursor_right'),
+    ('ab\x1b[Db', 2, 'cursor_left'),
     ('\x1b', 0, 'lone_ESC'),
-    ('a\x1bb', 2, 'lone_ESC_between'),
+    ('a\x1bb', 1, 'fs_sequence_between'),
     ('\x1b!', 1, 'ESC_unrecognized'),
 ]
 
@@ -88,6 +91,7 @@ STRICT_INDETERMINATE_SEQUENCES = [
     ('\x1b[1X', 'erase_chars'),
     ('\x1b[1S', 'parm_index'),
     ('\x1b[1T', 'parm_rindex'),
+    ('\x1bc', 'full_reset'),
 ]
 
 
@@ -106,6 +110,11 @@ PARSE_MODE_CASES = [
     ('abcd\x1b[2De', 4, 'cursor_left'),
     ('\x1b[31mred\x1b[0m', 3, 'SGR'),
     ('ab\x1b[Hcd', 4, 'indeterminate'),
+    ('def\x1b[3Dabc', 3, 'cursor_left_overwrite'),
+    ('def\x1b[10Dabc', 3, 'cursor_left_past_start'),
+    ('abc\x1b[5Gde', 6, 'hpa_parse'),
+    ('abc\x1b[Gde', 3, 'hpa_no_param'),
+    ('\x1b[5Gabc', 7, 'hpa_before_text'),
 ]
 
 
@@ -190,29 +199,26 @@ def test_vs16_selector():
 
 
 def test_zwj_with_non_emoji_chars():
-    """ZWJ with non-emoji characters and trailing VS16."""
-    # ZWJ (Zero Width Joiner) skips both itself and the following character, treating them as a
-    # failed emoji ZWJ sequence. When followed by VS16, the VS16 should NOT apply to the earlier
-    # emoji because VS16 must immediately follow the character it modifies.
-    #
-    # In the full parse loop, VS16 checks `last_measured_idx == idx - 1` (immediate adjacency).
-    # The ZWJ+char skip means VS16 is not adjacent to the smiley, so VS16 has no effect.
-    #
+    """
+    ZWJ with non-emoji characters and trailing VS16.
+
+    These are invalid Unicode sequences (ZWJ followed by non-emoji), so behavior is implementation-
+    defined.  The emoji base (smiley, width 1) is narrow, and VS16 looks back to it across the ZWJ-
+    consumed characters, adding 1 cell for a total width of 2.
+    """
     # Control test,
     assert wcwidth.width("\u263A\uFE0F") == 2  # smiley + VS16 = 2
 
-    # ZWJ followed by non-emoji, VS16 does not apply (not adjacent)
-    assert wcwidth.width("\u263A\u200Da\uFE0F") == 1
-    assert wcwidth.width("\u263A\u200Dx\uFE0F") == 1
-    assert wcwidth.width("\u263A\u200Da\u200Db\uFE0F") == 1
+    # ZWJ followed by non-emoji: VS16 applies to the smiley base
+    assert wcwidth.width("\u263A\u200Da\uFE0F") == 2
+    assert wcwidth.width("\u263A\u200Dx\uFE0F") == 2
+    assert wcwidth.width("\u263A\u200Da\u200Db\uFE0F") == 2
 
     # ZWJ at end of string
     assert wcwidth.width("\u263A\u200D") == 1  # smiley + ZWJ = 1
 
     # Long strings (>20 chars) use fast path which routes to wcswidth().
-    # wcswidth() has more lenient VS16 handling, causing VS16 to incorrectly apply (!)
-    # Multiply by 10 to exceed threshold: "\u263A\u200Da\uFE0F" (4 chars) * 10 = 40 chars
-    assert wcwidth.width("\u263A\u200Da\uFE0F" * 10) == 20  # (smiley(1) + ZWJ+a(0) + VS16(+1)) * 10 (!)
+    assert wcwidth.width("\u263A\u200Da\uFE0F" * 10) == 20
 
 
 def test_vs16_after_control_chars():
@@ -228,10 +234,9 @@ def test_vs16_after_control_chars():
     assert wcwidth.width("\u263A\x0d\uFE0F") == 1  # smiley(1) + CR(reset) + VS16(0), extent=1
 
     # Long strings (>20 chars) use fast path which routes to wcswidth().
-    # wcswidth() has more lenient VS16 handling (`last_measured_idx >= 0` vs `== idx - 1`),
-    # causing VS16 to incorrectly apply when separated by control chars (!)
+    # In ignore mode, BEL is stripped, so VS16 is adjacent to the smiley and applies correctly.
     # Multiply by 10 to exceed threshold
-    assert wcwidth.width(("\u263A\x07\uFE0F") * 10) == 20  # (smiley(1) + BEL(0) + VS16(+1)) * 10 (!)
+    assert wcwidth.width(("\u263A\x07\uFE0F") * 10) == 20  # (smiley(1) + BEL-stripped(0) + VS16(+1)) * 10
 
 
 def test_width_long_horizontal_fastpath():
@@ -266,6 +271,42 @@ def test_carriage_return_resets_column():
     assert wcwidth.width('abc\rde') == 3
 
 
+def test_carriage_return_strict_raises():
+    """CR in strict mode raises ValueError (indeterminate starting column)."""
+    with pytest.raises(ValueError, match='Horizontal movement'):
+        wcwidth.width('hello\rworld', control_codes='strict')
+
+
+def test_hpa_parse_best_effort():
+    """HPA in parse mode assumes string begins at column 0."""
+    assert wcwidth.width('abc\x1b[5Gde') == 6
+    assert wcwidth.width('abc\x1b[Gde') == 3
+    assert wcwidth.width('\x1b[10Ghi') == 11
+
+
+def test_hpa_strict_raises():
+    """HPA in strict mode raises ValueError (indeterminate starting column)."""
+    with pytest.raises(ValueError, match='horizontal position'):
+        wcwidth.width('abc\x1b[5Gde', control_codes='strict')
+
+
+def test_cursor_left_strict_out_of_bounds():
+    """Cursor-left beyond string start raises ValueError in strict mode."""
+    with pytest.raises(ValueError, match='Cursor left movement'):
+        wcwidth.width('a\x1b[5Da', control_codes='strict')
+
+
+def test_cursor_left_out_of_bounds_parse_no_raise():
+    """Cursor-left beyond string start is silently clamped in parse mode."""
+    assert wcwidth.width('a\x1b[5Da') == 1
+    assert wcwidth.width('abc\x1b[99Ddef') == 3  # 99D clamped to col 0, then b,c,d overwritten
+
+
+def test_cursor_left_out_of_bounds_ignore_mode():
+    """Cursor-left beyond string start is zero-width in ignore mode."""
+    assert wcwidth.width('a\x1b[5Da', control_codes='ignore') == 2
+
+
 def test_iter_sequences_lone_esc():
     """Lone ESC is yielded as a sequence."""
     assert list(wcwidth.iter_sequences('\x1b')) == [('\x1b', True)]
@@ -449,3 +490,88 @@ def test_fitzpatrick_modifier_standalone_width():
     """Standalone Fitzpatrick modifier, however, is wide character in width()."""
     result = wcwidth.width('\U0001F3FB')
     assert result == 2
+
+
+FS_SEQUENCE_CASES = [
+    ('\x1bc', 'ris'),
+    ('\x1bl', 'memory_lock'),
+    ('\x1bm', 'memory_unlock'),
+    ('\x1bn', 'ls2'),
+    ('\x1bo', 'ls3'),
+    ('\x1b|', 'ls3r'),
+    ('\x1b}', 'ls2r'),
+    ('\x1b~', 'ls1r'),
+]
+
+
+[email protected]('seq,name', FS_SEQUENCE_CASES)
+def test_fs_sequences_matched(seq, name):
+    """Fs (independent function) sequences are matched as zero-width."""
+    segments = list(wcwidth.iter_sequences(seq))
+    assert segments == [(seq, True)]
+    assert wcwidth.width(seq) == 0
+
+
+FP_SEQUENCE_CASES = [
+    ('\x1b7', 'decsc'),
+    ('\x1b8', 'decrc'),
+    ('\x1b=', 'deckpam'),
+    ('\x1b>', 'deckpnm'),
+    ('\x1b0', 'fp_0'),
+    ('\x1b1', 'fp_1'),
+    ('\x1b9', 'fp_9'),
+]
+
+
+[email protected]('seq,name', FP_SEQUENCE_CASES)
+def test_fp_sequences_matched(seq, name):
+    """Fp (private use) sequences are matched as zero-width."""
+    segments = list(wcwidth.iter_sequences(seq))
+    assert segments == [(seq, True)]
+    assert wcwidth.width(seq) == 0
+
+
+NF_SEQUENCE_CASES = [
+    ('\x1b F', 's7c1t'),
+    ('\x1b G', 's8c1t'),
+    ('\x1b#3', 'decdhl_top'),
+    ('\x1b#4', 'decdhl_bottom'),
+    ('\x1b#5', 'decswl'),
+    ('\x1b#6', 'decdwl'),
+    ('\x1b#8', 'decaln'),
+    ('\x1b%G', 'utf8_designate'),
+    ('\x1b%@', 'iso2022_return'),
+]
+
+
+[email protected]('seq,name', NF_SEQUENCE_CASES)
+def test_nf_sequences_matched(seq, name):
+    """NF (multi-byte) escape sequences are matched as zero-width."""
+    segments = list(wcwidth.iter_sequences(seq))
+    assert segments == [(seq, True)]
+    assert wcwidth.width(seq) == 0
+
+
+def test_fs_sequence_embedded_in_text():
+    """Fs sequence surrounded by text is correctly segmented."""
+    segments = list(wcwidth.iter_sequences('abc\x1bcdef'))
+    assert segments == [('abc', False), ('\x1bc', True), ('def', False)]
+    assert wcwidth.width('abc\x1bcdef') == 6
+
+
+def test_nf_sequence_embedded_in_text():
+    """NF sequence surrounded by text is correctly segmented."""
+    segments = list(wcwidth.iter_sequences('abc\x1b#8def'))
+    assert segments == [('abc', False), ('\x1b#8', True), ('def', False)]
+    assert wcwidth.width('abc\x1b#8def') == 6
+
+
+def test_screen_title_sequences():
+    """Screen/tmux title sequence ESC k hello ST."""
+    segments = list(wcwidth.iter_sequences('\x1bkhello\x1b\\'))
+    assert segments[0] == ('\x1bk', True)
+
+
+def test_ris_indeterminate():
+    """RIS (ESC c) is flagged as indeterminate effect."""
+    assert INDETERMINATE_EFFECT_SEQUENCE.match('\x1bc')