summaryrefslogtreecommitdiffstats
path: root/contrib/python/wcwidth/py3
diff options
context:
space:
mode:
authorrobot-piglet <[email protected]>2026-05-19 07:26:19 +0300
committerrobot-piglet <[email protected]>2026-05-19 07:47:21 +0300
commit5bf2e428d5a1e8e8cef995ebb1857890e73d55fd (patch)
tree55ed4b4e95a17bef5df0bfea947e3ee510a983f4 /contrib/python/wcwidth/py3
parent9f85ff287c0afb40d523e9261e77f27cd168cc47 (diff)
Intermediate changes
commit_hash:ce1258717e3f2e41b5b19e40e84850b1db4aa361
Diffstat (limited to 'contrib/python/wcwidth/py3')
-rw-r--r--contrib/python/wcwidth/py3/.dist-info/METADATA169
-rw-r--r--contrib/python/wcwidth/py3/tests/conftest.py2
-rw-r--r--contrib/python/wcwidth/py3/tests/test_ambiguous.py1
-rw-r--r--contrib/python/wcwidth/py3/tests/test_benchmarks.py179
-rw-r--r--contrib/python/wcwidth/py3/tests/test_clip.py234
-rw-r--r--contrib/python/wcwidth/py3/tests/test_clip_cjk_emoji.py47
-rw-r--r--contrib/python/wcwidth/py3/tests/test_clip_overtyping.py159
-rw-r--r--contrib/python/wcwidth/py3/tests/test_core.py48
-rw-r--r--contrib/python/wcwidth/py3/tests/test_emojis.py1
-rw-r--r--contrib/python/wcwidth/py3/tests/test_grapheme.py1
-rw-r--r--contrib/python/wcwidth/py3/tests/test_hyperlink.py75
-rw-r--r--contrib/python/wcwidth/py3/tests/test_justify.py1
-rw-r--r--contrib/python/wcwidth/py3/tests/test_sgr_state.py1
-rw-r--r--contrib/python/wcwidth/py3/tests/test_text_sizing.py327
-rw-r--r--contrib/python/wcwidth/py3/tests/test_textwrap.py108
-rw-r--r--contrib/python/wcwidth/py3/tests/test_ucslevel.py1
-rw-r--r--contrib/python/wcwidth/py3/tests/test_width.py170
-rw-r--r--contrib/python/wcwidth/py3/wcwidth/__init__.py59
-rw-r--r--contrib/python/wcwidth/py3/wcwidth/_clip.py809
-rw-r--r--contrib/python/wcwidth/py3/wcwidth/_constants.py65
-rw-r--r--contrib/python/wcwidth/py3/wcwidth/_wcswidth.py150
-rw-r--r--contrib/python/wcwidth/py3/wcwidth/_wcwidth.py158
-rw-r--r--contrib/python/wcwidth/py3/wcwidth/_width.py339
-rw-r--r--contrib/python/wcwidth/py3/wcwidth/align.py136
-rw-r--r--contrib/python/wcwidth/py3/wcwidth/bisearch.py5
-rw-r--r--contrib/python/wcwidth/py3/wcwidth/escape_sequences.py137
-rw-r--r--contrib/python/wcwidth/py3/wcwidth/grapheme.py12
-rw-r--r--contrib/python/wcwidth/py3/wcwidth/hyperlink.py142
-rw-r--r--contrib/python/wcwidth/py3/wcwidth/sgr_state.py3
-rw-r--r--contrib/python/wcwidth/py3/wcwidth/table_ambiguous.py2
-rw-r--r--contrib/python/wcwidth/py3/wcwidth/table_grapheme.py2
-rw-r--r--contrib/python/wcwidth/py3/wcwidth/table_mc.py2
-rw-r--r--contrib/python/wcwidth/py3/wcwidth/table_vs16.py2
-rw-r--r--contrib/python/wcwidth/py3/wcwidth/table_wide.py6
-rw-r--r--contrib/python/wcwidth/py3/wcwidth/table_zero.py2
-rw-r--r--contrib/python/wcwidth/py3/wcwidth/text_sizing.py200
-rw-r--r--contrib/python/wcwidth/py3/wcwidth/textwrap.py89
-rw-r--r--contrib/python/wcwidth/py3/wcwidth/wcwidth.py1009
-rw-r--r--contrib/python/wcwidth/py3/ya.make10
39 files changed, 3608 insertions, 1255 deletions
diff --git a/contrib/python/wcwidth/py3/.dist-info/METADATA b/contrib/python/wcwidth/py3/.dist-info/METADATA
index f6f0235df8f..0667f4042a5 100644
--- a/contrib/python/wcwidth/py3/.dist-info/METADATA
+++ b/contrib/python/wcwidth/py3/.dist-info/METADATA
@@ -1,6 +1,6 @@
Metadata-Version: 2.4
Name: wcwidth
-Version: 0.6.0
+Version: 0.7.0
Summary: Measures the displayed width of unicode strings in a terminal
Project-URL: Homepage, https://github.com/jquast/wcwidth
Author-email: Jeff Quast <[email protected]>
@@ -28,6 +28,7 @@ Classifier: Typing :: Typed
Requires-Python: >=3.8
Description-Content-Type: text/x-rst
+
|pypi_downloads| |codecov| |license|
============
@@ -65,33 +66,42 @@ Some examples of **incorrect results**:
Solution
--------
-The lowest-level functions in this library are the POSIX.1-2001 and POSIX.1-2008 `wcwidth(3)`_ and
-`wcswidth(3)`_, which this library precisely copies by interface as `wcwidth()`_ and `wcswidth()`_.
-These functions return -1 when C0 and C1 control codes are present.
+The lowest-level functions in this library are derived from POSIX.1-2001 and POSIX.1-2008
+`wcwidth(3)`_ and `wcswidth(3)`_, which this library precisely copies by interface as `wcwidth()`_
+and `wcswidth()`_. These functions return -1 when C0 and C1 control codes are present.
An easy-to-use `width()`_ function is provided as a wrapper of `wcswidth()`_ that is also capable of
measuring most terminal control codes and sequences, like colors, bold, tabstops, and horizontal
cursor movement.
-Text-justification is solved by the grapheme and sequence-aware functions `ljust()`_,
-`rjust()`_, `center()`_, and `wrap()`_, serving as drop-in replacements to python standard functions
-of the same names.
+Text-justification is solved by the sequence-aware functions `ljust()`_, `rjust()`_, `center()`_,
+and the grapheme-aware function `wrap()`_, serving as drop-in replacements to python standard
+functions.
+
+The `clip()`_ function extracts substrings by their displayed column positions, and
+`strip_sequences()`_ removes terminal escape sequences from text altogether.
The iterator functions `iter_graphemes()`_ and `iter_sequences()`_ allow for careful navigation of
-grapheme and terminal control sequence boundaries. `iter_graphemes_reverse()`_, and
-`grapheme_boundary_before()`_ are useful for editing and searching of complex unicode. The
-`clip()`_ function extracts substrings by display column positions, and `strip_sequences()`_ removes
-terminal escape sequences from text altogether.
+grapheme and terminal control sequence boundaries as required by editors or REPLs with cursor
+control. `iter_graphemes_reverse()`_, and `grapheme_boundary_before()`_ are often necessary for
+backward cursor control over complex unicode.
Discrepancies
-------------
-You may find that support *varies* for complex unicode sequences or codepoints.
+You may find that support *varies* for complex unicode sequences or codepoints. This library may be
+considered to presume the terminal is enabled for DEC Private Mode 2027 ("Grapheme Clustering"), but
+the specification does not fully describe varying unicode versions, feature levels, or details of
+specific language support. This library does *not* support any alternate "legacy width"
+measurement.
-A companion utility, `jquast/ucs-detect`_ was authored to gather and publish the results of Wide
-character, language/grapheme clustering and complex script support, emojis and zero-width joiner,
-variations, and regional indicator (flags) as a `General Tabulated Summary`_ by terminal emulator
-software and version.
+See `Grapheme Clusters and Terminal Emulators`_ and `terminal-unicode-core.tex`_, and `State of
+Terminal Emulators in 2025`_ for more details on Mode 2027 and unicode-aware terminals.
+
+The `jquast/ucs-detect`_ utility is used to gather and publish the results of compliance to our
+standard for Wide character, Languages, grapheme clustering, complex or combining scripts, emojis,
+zero-width joiner, variations, and regional indicator (flags) as a `General
+Tabulated Summary`_ by terminal emulator software and version.
========
Overview
@@ -148,30 +158,61 @@ Use function `width()`_ to measure a string with improved handling of ``control_
>>> # same support as wcswidth(), eg. regional indicator flag:
>>> wcwidth.width('\U0001F1FF\U0001F1FC')
2
- >>> # but also supports SGR colored text, 'WARN', followed by SGR reset
+ >>> # but also supports sequences, like SGR colored text, "WARN", followed by reset
>>> wcwidth.width('\x1b[38;2;255;150;100mWARN\x1b[0m')
4
- >>> # tabs,
+ >>> # tabs are measured as though the string begins at a tabstop,
>>> wcwidth.width('\t', tabsize=4)
4
- >>> # or, tab and all other control characters can be ignored
- >>> wcwidth.width('\t', control_codes='ignore')
- 0
- >>> # "vertical" control characters are ignored
- >>> wcwidth.width('\n')
+ >>> # or, all control characters can be ignored (including tab)
+ >>> wcwidth.width('\t\n\a\r', control_codes='ignore')
0
- >>> # as well as sequences with "indeterminate" effects like Home + Clear
+ >>> # sequences with "indeterminate" effects like Home + Clear are zero-width
>>> wcwidth.width('\x1b[H\x1b[2J')
0
+ >>> # horizontal cursor movements are parsed,
+ >>> wcwidth.width('hello\b\b\b\b\bworld')
+ 5
+ >>> wcwidth.width('hello\x1b[5Dworld')
+ 5
+ >>> # or ignored,
+ >>> wcwidth.width('hello\x1b[5Dworld', control_codes='ignore')
+ 10
+ >>> # Measure width of text using kitty text sizing protocol (OSC 66),
+ >>> width('\x1b]66;w=2;XY\x07')
+ 2
+ >>> # Scaled text sizing: each grapheme occupies 'scale' cells
+ >>> width('\x1b]66;s=2;ABC\x07')
+ 6
+
+Use ``control_codes='ignore'`` when the input is known not to contain any control characters or
+terminal sequences for slightly improved performance. Note that TAB (``'\t'``) is a control
+character and is also ignored, you may want to use `str.expandtabs()`_, first.
+
+Use ``control_codes='strict'`` when input is known to contain some control sequences, such as
+SGR color, bold, hyperlinks and cursor movement. Any sequence that cannot be accurately parsed,
+such as clearing the screen, vertical, or absolute cursor movement will raise ``ValueError``:
+
+.. code-block:: python
+
>>> # or, raise ValueError for "indeterminate" effects using control_codes='strict'
>>> wcwidth.width('\n', control_codes='strict')
Traceback (most recent call last):
...
ValueError: Vertical movement character 0xa at position 0
-Use ``control_codes='ignore'`` when the input is known not to contain any control characters or
-terminal sequences for slightly improved performance. Note that TAB (``'\t'``) is a control
-character and is also ignored, you may want to use `str.expandtabs()`_, first.
+
+ >>> wcwidth.width('\x1b[H\x1b[2J', control_codes='strict')
+ Traceback (most recent call last):
+ ...
+ ValueError: Indeterminate cursor sequence at position 0, '\x1b[H'
+
+
+ >>> # cursor left movement beyond string start raises in strict mode,
+ >>> wcwidth.width('a\x1b[5Da', control_codes='strict')
+ Traceback (most recent call last):
+ ...
+ ValueError: Cursor left movement at position 1 would move 5 cells left from column 1, exceeding string start
iter_sequences()
----------------
@@ -290,9 +331,29 @@ Use `clip()`_ to extract a substring by column positions, preserving terminal se
>>> clip('\x1b[1;31mHello world\x1b[0m', 6, 11)
'\x1b[1;31mworld\x1b[0m'
- >>> # Disable SGR propagation to preserve original sequences as-is
- >>> clip('\x1b[31m中文\x1b[0m', 0, 3, propagate_sgr=False)
- '\x1b[31m中 \x1b[0m'
+ >>> # Disable SGR propagation to preserve sequence order outside of clip boundary
+ >>> clip('\x1b[31m中文\x1b[32m', 0, 3, propagate_sgr=False)
+ '\x1b[31m中 \x1b[32m'
+
+ >>> # Cursor-left overwrites previous text (painter's algorithm)
+ >>> clip('hello\x1b[2DXY', 0, 5)
+ 'helXY'
+ >>> # Carriage return resets to column 0, overwriting earlier cells
+ >>> clip('abc\rXY', 0, 5)
+ 'XYc'
+
+ >>> # even OSC 8 hyperlink text may be clipped, 'Click This link' -> 'is link' !
+ >>> clip('\x1b]8;;http://example.com\x07Click This link\x1b]8;;\x07', 8, 15)
+ '\x1b]8;;http://example.com\x07is link\x1b]8;;\x07'
+
+ >>> # and OSC 66 kitty text sizing, supporting width and scale, 'Look' -> '...ook'
+ >>> clip('\x1b]66;w=4:s=4;Look\x07', 1, 16, fillchar='.')
+ '...\x1b]66;s=4:w=3;ook\x07'
+
+Use ``overtyping=False`` when the input is known not to contain any cursor movement characters
+(``\b``, ``\r``, ``CSI C``, ``CSI D``, ``CSI G``) for improved performance. When
+``overtyping=None`` (default), a slower "Painter's algorithm" may be used after testing for the
+presence of these characters. ``overtyping`` has no effect when ``control_codes='ignore'``.
strip_sequences()
-----------------
@@ -336,7 +397,7 @@ mode is to display an ambiguous character surrounded by a pair of Cursor Positio
queries with a terminal in cooked or raw mode, and to parse the responses for their ``(y, x)``
locations and measure the difference ``x``.
-This code should also be careful check whether it is attached to a terminal and be careful of
+This code should also be careful to check whether it is attached to a terminal and be careful of
possible timeout, slow network, or non-response when working with "dumb terminals" like a CI build.
`jquast/blessed`_ library provides such a helping `Terminal.detect_ambiguous_width()`_ method:
@@ -429,9 +490,18 @@ This library is used in:
- `jquast/blessed`_: a thin, practical wrapper around terminal capabilities in
Python.
+- `jquast/telix`_: A Modern telnet client especially designed for BBSs and MUDs.
+
- `prompt-toolkit/python-prompt-toolkit`_: a Library for building powerful
interactive command lines in Python.
+- `urwid/urwid`_: Console user interface library for Python
+
+- `prettytable/prettytable`_: Display tabular data in a visually appealing ASCII table format
+
+- `leviathan0992/Pylsy`_: Pylsy is a simple python library draw tables in the Terminal. Just two
+ lines of code.
+
- `dbcli/pgcli`_: Postgres CLI with autocompletion and syntax highlighting.
- `thomasballinger/curtsies`_: a Curses-like terminal wrapper with a display
@@ -448,8 +518,8 @@ This library is used in:
- `nbedos/termtosvg`_: Terminal recorder that renders sessions as SVG
animations.
-- `peterbrittain/asciimatics`_: Package to help people create full-screen text
- UIs.
+- `peterbrittain/asciimatics`_: A cross platform package to do curses-like operations, plus higher
+ level APIs and widgets to create text UIs and ASCII art animations
- `python-cmd2/cmd2`_: A tool for building interactive command line apps
@@ -469,6 +539,10 @@ Other Languages
There are similar implementations of the `wcwidth()`_ and `wcswidth()`_ functions in other
languages.
+- `contour-terminal/libunicode`_: C++20
+- `ridiculousfish/widecharwidth`_: Python
+- `termux/wcwidth`_: C
+- `powerman/wcwidth-icons`_: C
- `timoxley/wcwidth`_: JavaScript
- `janlelis/unicode-display_width`_: Ruby
- `alecrabbit/php-wcwidth`_: PHP
@@ -478,6 +552,9 @@ languages.
- `grepsuzette/wcwidth`_: Haxe
- `aperezdc/lua-wcwidth`_: Lua
- `joachimschmidt557/zig-wcwidth`_: Zig
+- `mycoboco/wcwidth.js`_: JavaScript
+- `ainame/swift-displaywidth`_: Swift
+- `pmonks/clj-wcwidth`_: Clojure
- `fumiyas/wcwidth-cjk`_: `LD_PRELOAD` override
- `joshuarubin/wcwidth9`_: Unicode version 9 in C
- `spectreconsole/wcwidth`_: C#
@@ -486,6 +563,15 @@ languages.
History
=======
+0.7.0 *2026-05-02*
+ * **New** support for `kitty text sizing protocol`_ (OSC 66) in `width()`_ and `clip()`_.
+ * **New** `clip()`_ parameter ``control_codes='parse'``, ``'ignore'``, and ``'strict'``. `clip()`_
+ is now able to clip OSC 8 hyperlinks and OSC 66 text sizing sequences.
+ * **Improved** `clip()`_ and `width()`_ to support horizontal cursor sequences (``cub``, ``cuf``,
+ ``hpa``). Cursor-left (``cub``) or backspace (``\b``) now overwrites text. ``column_address``
+ (``hpa``) and carriage return (``\r``) are now parsed, and more values conditionally raise
+ ``ValueError`` when ``control_codes='strict'``.
+
0.6.0 *2026-02-06*
* **New** Parameters ``expand_tabs``, ``replace_whitespace``, ``fix_sentence_endings``,
``drop_whitespace``, ``max_lines``, and ``placeholder`` for `wrap()`_, completing stdlib
@@ -711,6 +797,7 @@ https://www.cl.cam.ac.uk/~mgk25/ucs/wcwidth.c::
.. _`Issue #155`: https://github.com/jquast/wcwidth/issues/155
.. _`Issue #190`: https://github.com/jquast/wcwidth/issues/190
.. _`jquast/blessed`: https://github.com/jquast/blessed
+.. _`jquast/telix`: https://github.com/jquast/telix
.. _`selectel/pyte`: https://github.com/selectel/pyte
.. _`thomasballinger/curtsies`: https://github.com/thomasballinger/curtsies
.. _`dbcli/pgcli`: https://github.com/dbcli/pgcli
@@ -735,10 +822,20 @@ https://www.cl.cam.ac.uk/~mgk25/ucs/wcwidth.c::
.. _`fumiyas/wcwidth-cjk`: https://github.com/fumiyas/wcwidth-cjk
.. _`joshuarubin/wcwidth9`: https://github.com/joshuarubin/wcwidth9
.. _`spectreconsole/wcwidth`: https://github.com/spectreconsole/wcwidth
+.. _`contour-terminal/libunicode`: https://github.com/contour-terminal/libunicode
+.. _`ridiculousfish/widecharwidth`: https://github.com/ridiculousfish/widecharwidth
+.. _`termux/wcwidth`: https://github.com/termux/wcwidth
+.. _`powerman/wcwidth-icons`: https://github.com/powerman/wcwidth-icons
+.. _`mycoboco/wcwidth.js`: https://github.com/mycoboco/wcwidth.js
+.. _`ainame/swift-displaywidth`: https://github.com/ainame/swift-displaywidth
+.. _`pmonks/clj-wcwidth`: https://github.com/pmonks/clj-wcwidth
.. _`python-cmd2/cmd2`: https://github.com/python-cmd2/cmd2
.. _`stratis-storage/stratis-cli`: https://github.com/stratis-storage/stratis-cli
.. _`ihabunek/toot`: https://github.com/ihabunek/toot
.. _`saulpw/visidata`: https://github.com/saulpw/visidata
+.. _`urwid/urwid`: https://github.com/urwid/urwid
+.. _`prettytable/prettytable`: https://github.com/urwid/urwid
+.. _`leviathan0992/Pylsy`: https://github.com/leviathan0992/Pylsy
.. _`pip-tools`: https://pip-tools.readthedocs.io/
.. _`sphinx`: https://www.sphinx-doc.org/
.. _`textwrap.wrap()`: https://docs.python.org/3/library/textwrap.html#textwrap.wrap
@@ -760,11 +857,17 @@ https://www.cl.cam.ac.uk/~mgk25/ucs/wcwidth.c::
.. _`clip()`: https://wcwidth.readthedocs.io/en/latest/api.html#wcwidth.clip
.. _`strip_sequences()`: https://wcwidth.readthedocs.io/en/latest/api.html#wcwidth.strip_sequences
.. _`propagate_sgr()`: https://wcwidth.readthedocs.io/en/latest/api.html#wcwidth.propagate_sgr
+.. _`TextSizing`: https://wcwidth.readthedocs.io/en/latest/api.html#wcwidth.TextSizing
+.. _`TextSizingParams`: https://wcwidth.readthedocs.io/en/latest/api.html#wcwidth.TextSizingParams
.. _`iter_sequences()`: https://wcwidth.readthedocs.io/en/latest/api.html#wcwidth.iter_sequences
.. _`list_versions()`: https://wcwidth.readthedocs.io/en/latest/api.html#wcwidth.list_versions
.. _`Unicode Standard Annex #29`: https://www.unicode.org/reports/tr29/
.. _`Terminal.detect_ambiguous_width()`: https://blessed.readthedocs.io/en/latest/api/terminal.html#blessed.terminal.Terminal.detect_ambiguous_width
.. _`parity padding`: https://jazcap53.github.io/pythons-eccentric-strcenter.html
+.. _`kitty text sizing protocol`: https://sw.kovidgoyal.net/kitty/text-sizing-protocol/
+.. _`Grapheme Clusters and Terminal Emulators`: https://mitchellh.com/writing/grapheme-clusters-in-terminals
+.. _`terminal-unicode-core.tex`: https://github.com/contour-terminal/terminal-unicode-core/blob/master/spec/terminal-unicode-core.tex
+.. _`State of Terminal Emulators in 2025`: https://www.jeffquast.com/post/state-of-terminal-emulation-2025/
.. |pypi_downloads| image:: https://img.shields.io/pypi/dm/wcwidth.svg?logo=pypi
:alt: Downloads
:target: https://pypi.org/project/wcwidth/
diff --git a/contrib/python/wcwidth/py3/tests/conftest.py b/contrib/python/wcwidth/py3/tests/conftest.py
index 2d0a2779dbe..ecbbdc876e0 100644
--- a/contrib/python/wcwidth/py3/tests/conftest.py
+++ b/contrib/python/wcwidth/py3/tests/conftest.py
@@ -1,4 +1,5 @@
"""Pytest configuration and fixtures."""
+
# 3rd party
import pytest
@@ -10,6 +11,7 @@ except ImportError:
@pytest.fixture
def benchmark():
"""No-op benchmark fixture for environments without pytest-codspeed."""
+
def _passthrough(func, *args, **kwargs):
return func(*args, **kwargs)
return _passthrough
diff --git a/contrib/python/wcwidth/py3/tests/test_ambiguous.py b/contrib/python/wcwidth/py3/tests/test_ambiguous.py
index 0c61cdacf9c..20ed4d7d1b3 100644
--- a/contrib/python/wcwidth/py3/tests/test_ambiguous.py
+++ b/contrib/python/wcwidth/py3/tests/test_ambiguous.py
@@ -1,4 +1,5 @@
"""Tests for ambiguous_width parameter."""
+
# 3rd party
import pytest
diff --git a/contrib/python/wcwidth/py3/tests/test_benchmarks.py b/contrib/python/wcwidth/py3/tests/test_benchmarks.py
index be940ccc8a1..80c9be01de6 100644
--- a/contrib/python/wcwidth/py3/tests/test_benchmarks.py
+++ b/contrib/python/wcwidth/py3/tests/test_benchmarks.py
@@ -1,4 +1,5 @@
"""Performance benchmarks for wcwidth module."""
+
# std imports
import os
import sys
@@ -10,7 +11,7 @@ import pytest
# local
import wcwidth
-_wcwidth_module = sys.modules['wcwidth.wcwidth']
+_width_module = sys.modules['wcwidth._width']
def test_wcwidth_ascii(benchmark):
@@ -292,6 +293,149 @@ def test_clip_complex_sgr(benchmark):
benchmark(wcwidth.clip, text, 6, 11)
+def test_clip_long_cjk_past_window(benchmark):
+ """Benchmark clip() with long CJK text, narrow window (early-exit path)."""
+ text = '中文测试字符串' * 100 # 700 chars, no escape sequences
+ benchmark(wcwidth.clip, text, 0, 50)
+
+
+def test_clip_dense_ansi_past_window(benchmark):
+ """Benchmark clip() with dense ANSI sequences past clip window (SGR tracking)."""
+ text = '\x1b[31mred\x1b[0m \x1b[32mgreen\x1b[0m \x1b[33myellow\x1b[0m ' * 50
+ benchmark(wcwidth.clip, text, 6, 30)
+
+
+def test_clip_dense_ansi_no_propagate(benchmark):
+ """Benchmark clip() with dense ANSI sequences, SGR propagation disabled."""
+ text = '\x1b[31mred\x1b[0m \x1b[32mgreen\x1b[0m \x1b[33myellow\x1b[0m ' * 50
+ benchmark(wcwidth.clip, text, 6, 30, propagate_sgr=False)
+
+
+def test_clip_osc8_hyperlinks(benchmark):
+ """Benchmark clip() with OSC 8 hyperlinks (hyperlink parsing path)."""
+ text = '\x1b]8;;http://example.com\x07Click Here\x1b]8;;\x07 ' * 20
+ benchmark(wcwidth.clip, text, 0, 80)
+
+
+def test_width_osc66(benchmark):
+ """Benchmark width() with OSC 66 text sizing sequences."""
+ text = '\x1b]66;w=2;XY\x07\x1b]66;s=3;ABC\x07'
+ benchmark(wcwidth.width, text)
+
+
+def test_clip_osc66(benchmark):
+ """Benchmark clip() with OSC 66 text sizing sequences."""
+ text = '\x1b]66;w=2;XY\x07\x1b]66;s=3;ABC\x07'
+ benchmark(wcwidth.clip, text, 3, 8)
+
+
+def test_clip_cursor_cr_overwrite(benchmark):
+ """Benchmark clip() with carriage-return overwrite (painter path)."""
+ text = 'hello\rworld ' * 20
+ benchmark(wcwidth.clip, text, 0, 50)
+
+
+def test_clip_cursor_csi_backward(benchmark):
+ """Benchmark clip() with CSI cursor-backward sequences (painter path)."""
+ text = 'hello\x1b[2Dxy ' * 20
+ benchmark(wcwidth.clip, text, 0, 40)
+
+
+def test_clip_long_ascii_fastpath(benchmark):
+ """Benchmark clip() with long ASCII string (fast-path slice)."""
+ text = 'hello world ' * 1000
+ benchmark(wcwidth.clip, text, 500, 600)
+
+
+def test_clip_with_ansi_no_overtype(benchmark):
+ """Benchmark clip() with ANSI sequences, overtyping disabled."""
+ text = '\x1b[31m中文字\x1b[0m'
+ benchmark(wcwidth.clip, text, 0, 3, overtyping=False)
+
+
+def test_clip_complex_sgr_no_overtype(benchmark):
+ """Benchmark clip() with complex SGR, overtyping disabled."""
+ text = '\x1b[1;38;5;208mHello world text\x1b[0m'
+ benchmark(wcwidth.clip, text, 6, 11, overtyping=False)
+
+
+def test_clip_dense_ansi_no_overtype(benchmark):
+ """Benchmark clip() with dense ANSI, overtyping disabled."""
+ text = '\x1b[31mred\x1b[0m \x1b[32mgreen\x1b[0m \x1b[33myellow\x1b[0m ' * 50
+ benchmark(wcwidth.clip, text, 6, 30, overtyping=False)
+
+
+def test_clip_dense_ansi_no_propagate_no_overtype(benchmark):
+ """Benchmark clip() with dense ANSI, SGR propagation and overtyping disabled."""
+ text = '\x1b[31mred\x1b[0m \x1b[32mgreen\x1b[0m \x1b[33myellow\x1b[0m ' * 50
+ benchmark(wcwidth.clip, text, 6, 30, propagate_sgr=False, overtyping=False)
+
+
+def test_clip_dense_ansi_overtype(benchmark):
+ """Benchmark clip() with dense ANSI, overtyping forced (painter path)."""
+ text = '\x1b[31mred\x1b[0m \x1b[32mgreen\x1b[0m \x1b[33myellow\x1b[0m ' * 50
+ benchmark(wcwidth.clip, text, 6, 30, overtyping=True)
+
+
+def test_clip_long_cjk_overtype(benchmark):
+ """Benchmark clip() with long CJK, overtyping forced (painter path)."""
+ text = '中文测试字符串' * 100
+ benchmark(wcwidth.clip, text, 0, 50, overtyping=True)
+
+
+def test_width_dense_ansi_control_codes_ignore(benchmark):
+ """Benchmark width() with dense ANSI and control_codes='ignore'."""
+ text = '\x1b[31mred\x1b[0m \x1b[32mgreen\x1b[0m \x1b[33myellow\x1b[0m ' * 50
+ benchmark(wcwidth.width, text, control_codes='ignore')
+
+
+def test_width_complex_ansi_control_codes_ignore(benchmark):
+ """Benchmark width() with complex ANSI and control_codes='ignore'."""
+ text = '\x1b[38;2;255;150;100mWARN\x1b[0m: \x1b[1mBold\x1b[0m \x1b[4mUnderline\x1b[0m'
+ benchmark(wcwidth.width, text, control_codes='ignore')
+
+
+def test_clip_dense_ansi_control_codes_ignore(benchmark):
+ """Benchmark clip() with dense ANSI, control_codes='ignore' (skips painter/OSC)."""
+ text = '\x1b[31mred\x1b[0m \x1b[32mgreen\x1b[0m \x1b[33myellow\x1b[0m ' * 50
+ benchmark(wcwidth.clip, text, 6, 30, control_codes='ignore')
+
+
+def test_clip_long_cjk_control_codes_ignore(benchmark):
+ """Benchmark clip() with long CJK and control_codes='ignore' (early-exit path)."""
+ text = '中文测试字符串' * 100
+ benchmark(wcwidth.clip, text, 0, 50, control_codes='ignore')
+
+
+def test_clip_cursor_cr_control_codes_ignore(benchmark):
+ """Benchmark clip() with CR overwrite and control_codes='ignore' (painter skipped)."""
+ text = 'hello\rworld ' * 20
+ benchmark(wcwidth.clip, text, 0, 50, control_codes='ignore')
+
+
+def test_clip_dense_ansi_no_propagate_control_codes_ignore(benchmark):
+ """Benchmark clip() with dense ANSI, propagate_sgr=False and control_codes='ignore'."""
+ text = '\x1b[31mred\x1b[0m \x1b[32mgreen\x1b[0m \x1b[33myellow\x1b[0m ' * 50
+ benchmark(wcwidth.clip, text, 6, 30, propagate_sgr=False, control_codes='ignore')
+
+
+def test_clip_long_ascii_control_codes_ignore(benchmark):
+ """Benchmark clip() with long ASCII and control_codes='ignore' (fast-path slice)."""
+ text = 'hello world ' * 1000
+ benchmark(wcwidth.clip, text, 500, 600, control_codes='ignore')
+
+
+def test_wrap_with_ansi_control_codes_ignore(benchmark):
+ """Benchmark wrap() with ANSI sequences and control_codes='ignore'."""
+ text = '\x1b[31mThe quick brown fox jumps over the lazy dog.\x1b[0m Did it really? ' * 20
+ benchmark(wcwidth.wrap, text, 40, control_codes='ignore')
+
+
+def test_ljust_ascii_control_codes_ignore(benchmark):
+ """Benchmark ljust() with ASCII and control_codes='ignore'."""
+ benchmark(wcwidth.ljust, 'hello', 20, control_codes='ignore')
+
+
def test_propagate_sgr_multiline(benchmark):
"""Benchmark propagate_sgr() with multiple lines."""
lines = ['\x1b[1;31mline one', 'line two', 'line three\x1b[0m']
@@ -327,7 +471,7 @@ def test_iter_sequences_mixed(benchmark):
benchmark(lambda: list(wcwidth.iter_sequences(text)))
-# Brahmic script benchmarks — text with virama conjuncts
+# Brahmic script benchmarks -- text with virama conjuncts
BRAHMIC_DEVANAGARI = 'हिन्दी भाषा में लिखा गया पाठ है। क्षत्रिय स्त्री ' * 20
BRAHMIC_BENGALI = 'বাংলা ভাষায় লেখা একটি পাঠ। বাঙ্গালী ভাষা ' * 20
@@ -374,33 +518,48 @@ _udhr_skip = pytest.mark.skipif(
reason=f"{os.path.basename(UDHR_FILE)} is missing; run bin/update-tables.py",
)
+_py38_skip_pedantic = pytest.mark.skipif(
+ sys.version_info[:2] < (3, 9),
+ reason='benchmark.pedantic() not supported in python 3.8 or earlier')
+
@_udhr_skip
+@_py38_skip_pedantic
def test_wrap_udhr(benchmark):
"""Benchmark wrap() with multilingual UDHR text."""
+ if not hasattr(benchmark, 'pedantic'):
+ pytest.skip('pytest-codspeed not installed')
result = benchmark.pedantic(wcwidth.wrap, args=(UDHR_TEXT, 80), rounds=1, iterations=1)
assert len(result)
assert all(0 <= wcwidth.width(_l) <= 80 for _l in result)
@_udhr_skip
+@_py38_skip_pedantic
def test_width_udhr(benchmark):
"""Benchmark width() with multilingual UDHR text."""
+ if not hasattr(benchmark, 'pedantic'):
+ pytest.skip('pytest-codspeed not installed')
result = benchmark.pedantic(wcwidth.width, args=(UDHR_TEXT,), rounds=1, iterations=1)
assert result > 0
@_udhr_skip
+@_py38_skip_pedantic
def test_width_udhr_lines(benchmark):
"""Benchmark width() on individual UDHR lines."""
+ if not hasattr(benchmark, 'pedantic'):
+ pytest.skip('pytest-codspeed not installed')
result = benchmark.pedantic(lambda: sum(wcwidth.width(line) for line in UDHR_LINES),
rounds=1, iterations=1)
assert result > 0
@_udhr_skip
+@_py38_skip_pedantic
def test_width_wcswidth_consistency_udhr(benchmark):
"""Verify width() and wcswidth() agree for printable multilingual text."""
+
def check():
failures = []
for line in UDHR_LINES:
@@ -411,30 +570,38 @@ def test_width_wcswidth_consistency_udhr(benchmark):
if w != wcs:
failures.append((line[:60], w, wcs))
return failures
+ if not hasattr(benchmark, 'pedantic'):
+ pytest.skip('pytest-codspeed not installed')
failures = benchmark.pedantic(check, rounds=1, iterations=1)
assert not failures
@_udhr_skip
+@_py38_skip_pedantic
def test_width_fastpath_integrity_udhr(benchmark):
"""Verify width() produces identical results with and without the fast path."""
- saved = _wcwidth_module._WIDTH_FAST_PATH_MIN_LEN
+ saved = _width_module._WIDTH_FAST_PATH_MIN_LEN
def check():
- _wcwidth_module._WIDTH_FAST_PATH_MIN_LEN = 0
+ _width_module._WIDTH_FAST_PATH_MIN_LEN = 0
fast_total = sum(wcwidth.width(line) for line in UDHR_LINES)
- _wcwidth_module._WIDTH_FAST_PATH_MIN_LEN = 999_999
+ _width_module._WIDTH_FAST_PATH_MIN_LEN = 999_999
parse_total = sum(wcwidth.width(line) for line in UDHR_LINES)
return fast_total, parse_total
+ if not hasattr(benchmark, 'pedantic'):
+ pytest.skip('pytest-codspeed not installed')
fast_total, parse_total = benchmark.pedantic(check, rounds=1, iterations=1)
- _wcwidth_module._WIDTH_FAST_PATH_MIN_LEN = saved
+ _width_module._WIDTH_FAST_PATH_MIN_LEN = saved
assert fast_total == parse_total
@_udhr_skip
+@_py38_skip_pedantic
def test_ljust_udhr_lines(benchmark):
"""Benchmark ljust() on UDHR lines."""
+ if not hasattr(benchmark, 'pedantic'):
+ pytest.skip('pytest-codspeed not installed')
benchmark.pedantic(lambda: [wcwidth.ljust(line, w + 1, UDHR_FILLCHAR)
for line, w in zip(UDHR_LINES, UDHR_WIDTHS)],
rounds=1, iterations=1)
diff --git a/contrib/python/wcwidth/py3/tests/test_clip.py b/contrib/python/wcwidth/py3/tests/test_clip.py
index 995d383a8ac..8ab3f1d24e8 100644
--- a/contrib/python/wcwidth/py3/tests/test_clip.py
+++ b/contrib/python/wcwidth/py3/tests/test_clip.py
@@ -1,4 +1,5 @@
"""Tests for clip() and strip_sequences() functions."""
+
# 3rd party
import pytest
@@ -23,7 +24,7 @@ STRIP_SEQUENCES_CASES = [
('\x1b[1m\U0001F468\u200D\U0001F469\u200D\U0001F467\x1b[0m',
'\U0001F468\u200D\U0001F469\u200D\U0001F467'),
('\x1b', '\x1b'),
- ('a\x1bb', 'a\x1bb'),
+ ('a\x1bb', 'a'),
('\x1b[', ''),
('text\x1b[mmore', 'textmore'),
]
@@ -114,26 +115,162 @@ def test_clip_sequences_after_end():
# With propagate_sgr=True (default), no style active at start, so no prefix
assert clip('hello\x1b[31m world\x1b[0m', 0, 5) == 'hello'
# With propagate_sgr=False, all sequences preserved
- assert clip('hello\x1b[31m world\x1b[0m', 0, 5, propagate_sgr=False) == 'hello\x1b[31m\x1b[0m'
+ assert repr(clip('hello\x1b[31m world\x1b[0m', 0, 5, propagate_sgr=False)) == repr('hello\x1b[31m\x1b[0m')
def test_clip_sequences_multiple():
# With propagate_sgr=True (default), sequences collapsed to minimal
assert clip('\x1b[1m\x1b[31mbold red\x1b[0m', 0, 4) == '\x1b[1;31mbold\x1b[0m'
# With propagate_sgr=False, all sequences preserved separately
- assert clip('\x1b[1m\x1b[31mbold red\x1b[0m', 0, 4, propagate_sgr=False) == '\x1b[1m\x1b[31mbold\x1b[0m'
+ assert repr(clip('\x1b[1m\x1b[31mbold red\x1b[0m', 0, 4, propagate_sgr=False)) == repr('\x1b[1m\x1b[31mbold\x1b[0m')
def test_clip_sequences_only():
# With propagate_sgr=True (default), no visible text means empty result
assert clip('\x1b[31m\x1b[0m', 0, 10) == ''
# With propagate_sgr=False, sequences preserved
- assert clip('\x1b[31m\x1b[0m', 0, 10, propagate_sgr=False) == '\x1b[31m\x1b[0m'
+ assert repr(clip('\x1b[31m\x1b[0m', 0, 10, propagate_sgr=False)) == repr('\x1b[31m\x1b[0m')
def test_clip_sequences_osc_hyperlink():
- assert clip('\x1b]8;;https://example.com\x07link\x1b]8;;\x07', 0, 4) == \
+ assert repr(clip('\x1b]8;;https://example.com\x07link\x1b]8;;\x07', 0, 4)) == repr(
'\x1b]8;;https://example.com\x07link\x1b]8;;\x07'
+ )
+
+
+# OSC 8 hyperlink clipping
+
+OSC_START_BEL = '\x1b]8;;http://example.com\x07'
+OSC_END_BEL = '\x1b]8;;\x07'
+OSC_START_ST = '\x1b]8;;http://example.com\x1b\\'
+OSC_END_ST = '\x1b]8;;\x1b\\'
+
+
+CLIP_HYPERLINK_CASES = [
+ # Full hyperlink visible -- preserved as-is
+ (f'{OSC_START_BEL}link{OSC_END_BEL}', 0, 4,
+ f'{OSC_START_BEL}link{OSC_END_BEL}'),
+ # Clipping middle of hyperlink text -- rebuild around clipped inner text
+ (f'{OSC_START_BEL}Click This link{OSC_END_BEL}', 6, 10,
+ f'{OSC_START_BEL}This{OSC_END_BEL}'),
+ # Clipping from start -- only first portion
+ (f'{OSC_START_BEL}Click This{OSC_END_BEL}', 0, 5,
+ f'{OSC_START_BEL}Click{OSC_END_BEL}'),
+ # Clipping from end -- only last portion
+ (f'{OSC_START_BEL}Click This{OSC_END_BEL}', 6, 10,
+ f'{OSC_START_BEL}This{OSC_END_BEL}'),
+ # Hyperlink entirely before clip window -- dropped
+ (f'{OSC_START_BEL}link{OSC_END_BEL}world', 0, 4,
+ f'{OSC_START_BEL}link{OSC_END_BEL}'),
+ # Hyperlink entirely after clip window -- dropped
+ (f'hello{OSC_START_BEL}link{OSC_END_BEL}', 0, 5, 'hello'),
+ # Hyperlink clipped to nothing -- empty hyperlink dropped
+ (f'{OSC_START_BEL}link{OSC_END_BEL}', 5, 10, ''),
+ # Empty hyperlink (no inner text) -- dropped
+ (f'before{OSC_START_BEL}{OSC_END_BEL}after', 0, 11, 'beforeafter'),
+ # Hyperlink with CJK text clipped
+ (f'{OSC_START_BEL}中文文字{OSC_END_BEL}', 0, 4,
+ f'{OSC_START_BEL}中文{OSC_END_BEL}'),
+ # Hyperlink with CJK text clipped at odd column
+ (f'{OSC_START_BEL}中文文字{OSC_END_BEL}', 0, 3,
+ f'{OSC_START_BEL}中 {OSC_END_BEL}'),
+ # Hyperlink with ST terminator
+ (f'{OSC_START_ST}Click This{OSC_END_ST}', 0, 5,
+ f'{OSC_START_ST}Click{OSC_END_ST}'),
+ # Multiple non-overlapping hyperlinks
+ (f'{OSC_START_BEL}ab{OSC_END_BEL} {OSC_START_ST}cd{OSC_END_ST}', 0, 5,
+ f'{OSC_START_BEL}ab{OSC_END_BEL} {OSC_START_ST}cd{OSC_END_ST}'),
+ # Hyperlink with params preserved
+ ('\x1b]8;id=myid;http://example.com\x07link\x1b]8;;\x07', 1, 3,
+ '\x1b]8;id=myid;http://example.com\x07in\x1b]8;;\x07'),
+ # Hyperlink text before clip window, hyperlink within
+ (f'before{OSC_START_BEL}link{OSC_END_BEL}', 6, 10,
+ f'{OSC_START_BEL}link{OSC_END_BEL}'),
+ # SGR inside hyperlink is preserved
+ (f'{OSC_START_BEL}\x1b[31mred link\x1b[0m{OSC_END_BEL}', 4, 8,
+ f'{OSC_START_BEL}\x1b[31mlink\x1b[0m{OSC_END_BEL}'),
+ # Hyperlink open without matching close -- preserved as regular sequence
+ ('\x1b]8;;http://example.com\x07link', 0, 4, '\x1b]8;;http://example.com\x07link'),
+ # Bare ESC between hyperlink markers
+ ('\x1b]8;;url\x07ab\x1bxcd\x1b]8;;\x07', 0, 6,
+ '\x1b]8;;url\x07ab\x1bxcd\x1b]8;;\x07'),
+ # Per OSC 8 spec "A note on opening/closing hyperlinks": terminal
+ # emulators treat hyperlinks as a state attribute, not nested anchors.
+ # Opening a new hyperlink replaces the current one; a single close
+ # terminates the hyperlink regardless of how many opens preceded it.
+ #
+ # Two opens, one close: URL "b" replaces "a", close terminates.
+ ('\x1b]8;;a\x07AB\x1b]8;;b\x07CD\x1b]8;;\x07EF', 0, 6,
+ '\x1b]8;;a\x07AB\x1b]8;;b\x07CD\x1b]8;;\x07EF'),
+ # URL switch without closing: "b" replaces "a", no close in input.
+ ('\x1b]8;;a\x07AB\x1b]8;;b\x07CD', 0, 4,
+ '\x1b]8;;a\x07AB\x1b]8;;b\x07CD'),
+ # Multiple opens, close, bare close: "b" replaces "a", first close
+ # terminates, trailing close is harmless (closing when not open).
+ ('\x1b]8;;a\x07ABCD \x1b]8;;b\x07XY\x1b]8;;\x07 EF\x1b]8;;\x07', 0, 10,
+ '\x1b]8;;a\x07ABCD \x1b]8;;b\x07XY\x1b]8;;\x07 EF\x1b]8;;\x07'),
+]
+
+
[email protected]('text,start,end,expected', CLIP_HYPERLINK_CASES)
+def test_clip_osc_hyperlink_text_clipping(text, start, end, expected):
+ """OSC 8 hyperlink inner text is clipped and hyperlink rebuilt."""
+ assert repr(clip(text, start, end)) == repr(expected)
+
+
+# Control_codes variants with cursor movement into hyperlink
+#
+# Overwriting hyperlink cells causes corrupted "run on" hyperlinks in practical
+# testing with kitty, presumably the hidden "end hyperlink" sequence is
+# overwritten, in any case, we make no attempt to parse overwrite of
+# hyperlinks, we consider it a "glitch sequence
+_HLINK_OVERWRITE = f'{OSC_START_BEL}link{OSC_END_BEL}\x1b[2Dxy'
+CLIP_HYPERLINK_CONTROL_CODES_CASES = [
+ ('parse', 0, 4, f'{OSC_START_BEL}link{OSC_END_BEL}'),
+ ('parse', 0, 3, f'{OSC_START_BEL}lin{OSC_END_BEL}'),
+ ('parse', 0, 2, f'{OSC_START_BEL}li{OSC_END_BEL}'),
+ ('parse', 0, 1, f'{OSC_START_BEL}l{OSC_END_BEL}'),
+ # these next two are certainly "in error"
+ ('parse', 1, 4, f'{OSC_START_BEL}ink{OSC_END_BEL}y'),
+ ('parse', 1, 3, f'{OSC_START_BEL}in{OSC_END_BEL}x'),
+ ('parse', 1, 2, f'{OSC_START_BEL}i{OSC_END_BEL}'),
+ ('ignore', 0, 20, f'{_HLINK_OVERWRITE}'),
+ # and these two, 'xy' are missing entirely, also "in error"
+ ('parse', 0, 20, f'{OSC_START_BEL}link{OSC_END_BEL}'),
+ ('strict', 0, 20, f'{OSC_START_BEL}link{OSC_END_BEL}'),
+]
+
+
[email protected]('control_codes,start,end,expected',
+ CLIP_HYPERLINK_CONTROL_CODES_CASES)
+def test_clip_hyperlink_control_codes_overwrite(control_codes, start, end, expected):
+ assert repr(clip(_HLINK_OVERWRITE, start, end, control_codes=control_codes)) == repr(expected)
+
+
+# Painter-path hyperlink edge cases
+CLIP_HYPERLINK_PAINTER_CASES = [
+ # Empty hyperlink dropped
+ (f'\x1b[2D{OSC_START_BEL}{OSC_END_BEL}xy', 'parse', 0, 4, 'xy'),
+ # Hyperlink entirely after clip window -- skipped
+ (f'\x1b[2Dab{OSC_START_BEL}cde{OSC_END_BEL}', 'parse', 0, 2, 'ab'),
+ # Hyperlink entirely before clip window -- skipped
+ (f'{OSC_START_BEL}ab{OSC_END_BEL}\x1b[2Dcdef', 'parse', 2, 4, 'ef'),
+ # Hyperlink overlapping clip window -- clipped
+ (f'\x1b[2D{OSC_START_BEL}abcdef{OSC_END_BEL}', 'parse', 0, 3,
+ f'{OSC_START_BEL}abc{OSC_END_BEL}'),
+ # Bare ESC inside hyperlink in painter path
+ (f'\x1b[2D{OSC_START_BEL}a\x1bb{OSC_END_BEL}', 'parse', 0, 4,
+ f'{OSC_START_BEL}a\x1bb{OSC_END_BEL}'),
+ # strict mode: non-hyperlink cells don't overlap hyperlink_cells
+ (f'{OSC_START_BEL}link{OSC_END_BEL}\x1b[5Chi', 'strict', 0, 11,
+ f'{OSC_START_BEL}link{OSC_END_BEL} hi'),
+]
+
+
[email protected]('text,control_codes,start,end,expected',
+ CLIP_HYPERLINK_PAINTER_CASES)
+def test_clip_hyperlink_painter_cases(text, control_codes, start, end, expected):
+ assert repr(clip(text, start, end, control_codes=control_codes)) == repr(expected)
def test_clip_sequences_cjk_with_sequences():
@@ -148,7 +285,7 @@ def test_clip_sequences_between_chars():
assert clip('a\x1b[31mb\x1b[0mc', 1, 2) == '\x1b[31mb\x1b[0m'
-def test_clip_sequences_lone_esc():
+def test_clip_sequences_fs_escape():
assert clip('a\x1bb', 0, 2) == 'a\x1bb'
@@ -230,12 +367,13 @@ def test_clip_tab_with_sequences():
CLIP_CONTROL_CHAR_CASES = [
- ('abc\bde', 0, 5, 'abc\bde'),
- ('ab\acd', 0, 4, 'ab\acd'),
+ ('abc\bde', 0, 5, 'abde'),
+ ('ab\acd', 0, 4, 'ab\x07cd'),
('ab\x00cd', 0, 4, 'ab\x00cd'),
- ('abc\rde', 0, 5, 'abc\rde'),
- ('\a\b\rHello', 0, 5, '\a\b\rHello'),
+ ('abc\rde', 0, 5, 'dec'),
+ ('\a\b\rHello', 0, 5, '\x07Hello'),
('ab\x01\x02cd', 0, 4, 'ab\x01\x02cd'),
+ ('ab\x1b\x00cd', 0, 4, 'ab\x1b\x00cd'),
]
@@ -244,19 +382,73 @@ def test_clip_control_chars_zero_width(text, start, end, expected):
assert clip(text, start, end) == expected
-CLIP_CURSOR_SEQUENCE_CASES = [
- ('ab\x1b[5Ccd', 0, 4, 'ab\x1b[5Ccd'),
- ('abcde\x1b[2Df', 0, 6, 'abcde\x1b[2Df'),
- ('ab\x1b[10Ccd', 0, 4, 'ab\x1b[10Ccd'),
- ('ab\x1b[Ccd', 0, 4, 'ab\x1b[Ccd'),
+def test_clip_tab_first_visible_with_sgr():
+ """Tab as first visible character with SGR propagation."""
+ assert clip('\x1b[31m\tb', 0, 4, tabsize=8) == '\x1b[31m \x1b[0m'
+
+
+def test_clip_overtyping_override_by_control_codes_ignore():
+ """When overtyping=True and control_codes='ignore', overtyping is overridden to False."""
+ # elif entered: overtyping=True + control_codes='ignore' → overtyping=False
+ assert clip('hello world', 0, 5, overtyping=True, control_codes='ignore') == 'hello'
+ # Verify that overtyping is actually disabled: cursor movement chars are
+ # treated as zero-width, so the result is the same as without overtyping.
+ assert clip('ab\x08cd', 0, 4, overtyping=True, control_codes='ignore') == 'ab\x08cd'
+
+
+def test_clip_overtyping_without_ignore():
+ """When overtyping=True and control_codes='parse', elif is not entered."""
+ # elif skipped: overtyping=True + control_codes='parse' → overtyping stays True
+ # The painter path is used, cursor movement sequences affect output.
+ assert clip('ab\x1b[2Dcd', 0, 4, overtyping=True, control_codes='parse') == 'cd'
+
+
+# Indeterminate-effect sequences that raise ValueError in strict mode
+# (matching width() behavior). These are not cursor-movement sequences,
+# so they exercise the simple (non-overtyping) path.
+
+INDETERMINATE_SEQUENCES = [
+ ('\x1b[K', 'erase_in_line'),
+ ('\x1b[2K', 'erase_in_line_params'),
+ ('\x1b[J', 'erase_in_display'),
+ ('\x1b[2J', 'erase_in_display_params'),
+ ('\x1b[H', 'cursor_home'),
+ ('\x1b[1;1H', 'cursor_address'),
+ ('\x1b[A', 'cursor_up'),
+ ('\x1b[2A', 'cursor_up_params'),
+ ('\x1b[B', 'cursor_down'),
+ ('\x1b[5B', 'cursor_down_params'),
+ ('\x1b[P', 'delete_character'),
+ ('\x1b[1P', 'parm_dch'),
+ ('\x1b[M', 'delete_line'),
+ ('\x1b[1M', 'parm_delete_line'),
+ ('\x1b[L', 'insert_line'),
+ ('\x1b[1L', 'parm_insert_line'),
+ ('\x1b[@', 'insert_character'),
+ ('\x1b[1X', 'erase_chars'),
+ ('\x1b[S', 'scroll_up'),
+ ('\x1b[T', 'scroll_down'),
+ ('\x1b[?1049h', 'enter_fullscreen'),
+ ('\x1b[?1049l', 'exit_fullscreen'),
+ ('\x1bD', 'scroll_forward'),
+ ('\x1bM', 'scroll_reverse'),
+ ('\x1b8', 'restore_cursor'),
+ ('\x1bc', 'full_reset'),
]
[email protected]('text,start,end,expected', CLIP_CURSOR_SEQUENCE_CASES)
-def test_clip_cursor_sequences_zero_width(text, start, end, expected):
- assert clip(text, start, end) == expected
[email protected]('seq,cap_name', INDETERMINATE_SEQUENCES)
+def test_clip_strict_indeterminate_raises(seq, cap_name):
+ """Clip() strict mode raises ValueError on indeterminate-effect sequences."""
+ with pytest.raises(ValueError, match='Indeterminate cursor sequence'):
+ clip(f'hello{seq}world', 0, 10, control_codes='strict')
-def test_clip_tab_first_visible_with_sgr():
- """Tab as first visible character with SGR propagation."""
- assert clip('\x1b[31m\tb', 0, 4, tabsize=8) == '\x1b[31m \x1b[0m'
[email protected]('seq,cap_name', INDETERMINATE_SEQUENCES)
+def test_clip_parse_indeterminate_preserved(seq, cap_name):
+ """Clip() parse mode preserves indeterminate sequences as zero-width."""
+ result = clip(f'hello{seq}world', 0, 10, control_codes='parse')
+ # The sequence is preserved, visible text is hello + world = 10 chars
+ assert 'hello' in result
+ assert 'world' in result
+ assert seq in result
diff --git a/contrib/python/wcwidth/py3/tests/test_clip_cjk_emoji.py b/contrib/python/wcwidth/py3/tests/test_clip_cjk_emoji.py
new file mode 100644
index 00000000000..e41bd627663
--- /dev/null
+++ b/contrib/python/wcwidth/py3/tests/test_clip_cjk_emoji.py
@@ -0,0 +1,47 @@
+"""
+Tests for clip() with CJK and Emoji characters.
+
+These ensure wide graphemes (CJK / emoji / ZWJ sequences) are clipped correctly:
+- Partial columns of a wide grapheme are replaced by fillchar.
+- Full grapheme included when fully inside slice.
+"""
+
+# 3rd party
+import pytest
+
+# local
+from wcwidth import clip, width
+
+
+ "中",
+ "🙂",
+ "👨\u200d👩\u200d👧", # family ZWJ
+ "👩\u200d👩\u200d👧" # another ZWJ variant
+])
+def test_partial_and_full_wide_grapheme(ch):
+ w = width(ch)
+ assert w >= 1
+ if w > 1:
+ # partial clip of first column -> fillchar
+ assert clip(ch, 0, 1) == ' '
+ # full clip covering entire grapheme -> original grapheme
+ assert clip(ch, 0, w) == ch
+ # width of clipped full grapheme should match
+ assert width(clip(ch, 0, w)) == w
+ else:
+ # narrow grapheme: trivial
+ assert clip(ch, 0, 1) == ch
+
+
+def test_mixed_cjk_emoji_sequence():
+ text = 'A中🙂B'
+ total_w = width(text)
+ # sanity
+ assert total_w >= 4
+ # pick a slice that includes the middle two columns (center of string)
+ # ensure clip doesn't raise and width matches requested slice
+ start = 1
+ end = 4
+ out = clip(text, start, end)
+ assert width(out) == (end - start)
diff --git a/contrib/python/wcwidth/py3/tests/test_clip_overtyping.py b/contrib/python/wcwidth/py3/tests/test_clip_overtyping.py
new file mode 100644
index 00000000000..1d106bae4bb
--- /dev/null
+++ b/contrib/python/wcwidth/py3/tests/test_clip_overtyping.py
@@ -0,0 +1,159 @@
+"""
+Tests for clip()'s overtyping (painter) path.
+
+The painter algorithm is used when the text contains cursor movement sequences
+(CSI n C/D, backspace, carriage return, HPA) that require column-level tracking
+to determine the final visible output. Auto-detection of the overtyping path
+happens in clip() via the presence of \\x08, \\r, or horizontal cursor movement
+escape sequences, or can be forced with ``overtyping=True``.
+
+These tests codify expected visible results when cursor movement sequences
+affect horizontal positions.
+"""
+
+# 3rd party
+import pytest
+
+# local
+from wcwidth import clip
+
+
[email protected]("text,start,end,kwargs,expected", [
+ # Cursor-right introduces a gap that should be filled with spaces
+ ("hello\x1b[10Cworld", 0, 10, {}, "hello" + " " * 5),
+ # Clipping just the initial region ignores the later rightward write
+ ("hello\x1b[10Cworld", 0, 5, {}, "hello"),
+ # Cursor-left overwrites previous characters
+ ("hello\x1b[2DXY", 0, 5, {}, "helXY"),
+ # Cursor-left overwrites entire visible token
+ ("abc\x1b[3DXY", 0, 5, {}, "XYc"),
+ # Cursor-left at column 0 (prev_col not > col, no overwrite)
+ ("\x1b[2Dhi", 0, 2, {}, "hi"),
+ # Cursor-left with no visible tokens emitted
+ ("\x1b[5C\x1b[2Dhi", 5, 7, {}, ""),
+ # Cursor-left overwrites text, seq tokens preserve column spatial order
+ ("ab\x1b]8;;http://example.com\x07\x1b[2Dcd", 0, 4, {}, "cd\x1b]8;;http://example.com\x07"),
+ # Cursor-left into wide char twice, second time on empty token triggers i < 0 break
+ ("中\x1b[D\x1b[Da", 0, 4, {}, "a "),
+ ('ab\x1b[5Ccd', 0, 4, {}, 'ab '),
+ ('abcde\x1b[2Df', 0, 6, {}, 'abcfe'),
+ ('hello\x1b[5Dw', 0, 5, {}, 'wello'),
+ ('ab\x1b[10Ccd', 0, 4, {}, 'ab '),
+ ('XY\x1b[Czy', 0, 4, {}, 'XY z'),
+ ('XY\x1b[Czy', 0, 5, {}, 'XY zy'),
+ ('XY\x1b[Czy', 1, 3, {}, 'Y '),
+ ('XY\x1b[Czy', 1, 4, {}, 'Y z'),
+ ('LOL\x1b[5Clol', 0, 12, {}, 'LOL lol'),
+ ('LOL\x1b[5Clol', 1, 11, {}, 'OL lol'),
+ ('LOL\x1b[5Clol', 2, 11, {}, 'L lol'),
+ ('LOL\x1b[5Clol', 3, 11, {}, ' lol'),
+ ('LOL\x1b[5Clol', 4, 11, {}, ' lol'),
+ ('LOL\x1b[5Clol', 5, 11, {}, ' lol'),
+ ('LOL\x1b[5Clol', 6, 11, {}, ' lol'),
+ ('LOL\x1b[5Clol', 7, 11, {}, ' lol'),
+ ('LOL\x1b[5Clol', 8, 11, {}, 'lol'),
+ ('LOL\x1b[5Clol', 9, 11, {}, 'ol'),
+ # SGR + cursor movement: SGR state update in painter path (line 245)
+ ('\x1b[31mab\x1b[2Dcd', 0, 4, {}, '\x1b[31mcd\x1b[0m'),
+ # Tab tabsize=0 in painter path (line 272->280 else branch)
+ ('ab\x1b[2D\tcd', 0, 4, {'tabsize': 0}, '\tcd'),
+ # Zero-width grapheme outside clip window in painter (line 290->301)
+ ('\x1b[2D\u0301hello', 1, 4, {}, 'ell'),
+ # Wide char partially clipped in painter (lines 298-299)
+ ('ab\x1b[2D中d', 1, 4, {}, ' d'),
+ # walk_col >= end in painter reconstruction (327->328)
+ ('hello\x1b[2Dxy', 0, 3, {}, 'hel'),
+ # Hole fillchar in painter reconstruction (345->346)
+ ('\x1b[5Chi', 0, 7, {}, ' hi'),
+ # Trailing sequences stored at columns after col_limit (352, 354->355, 355->356)
+ ('abc\x1b[2D', 0, 2, {}, 'ab'),
+ # Bare ESC not part of any sequence, pass through in painter path (239->240)
+ ('a\x1bb\x1b[2Dc', 0, 3, {}, 'c\x1bb'),
+ # Tab with tabsize>0 in painter; `b` falls at col 4, inside (0,5) (277->284, 278->279, 278->280)
+ ('\x1b[2Da\tb', 0, 5, {'tabsize': 4}, 'a b'),
+ # propagate_sgr=False in painter path (225->226)
+ ('ab\x1b[2Dcd', 0, 4, {'propagate_sgr': False}, 'cd'),
+ # Non-SGR sequence before any visible text in painter (225->226 True)
+ ('\x1b]8;;http://example.com\x07ab\x1b[2Dcd', 0, 4, {}, '\x1b]8;;http://example.com\x07cd'),
+ # Bare ESC at end of text in painter (239->240)
+ ('ab\x1b[2D\x1b', 0, 2, {}, '\x1bab'),
+ # Wide char overwritten from right side (212 orphan fixup)
+ ('a中\x1b[Db', 0, 4, {}, 'a b'),
+ # Tab expansion with col+=1 not inside clip window (277->279, 293)
+ ('\x1b[2Ca\tb', 2, 4, {'tabsize': 8}, 'a '),
+ # CR: carriage return resets column to 0, overwriting earlier cells
+ ('aaa\r\r\rxxx', 0, 4, {}, 'xxx'),
+ ('abc\rXY', 0, 5, {}, 'XYc'),
+ ('hello\rworld', 0, 5, {}, 'world'),
+ # CR moves back to column 0 then writes within clip window
+ ('abc\rde', 1, 3, {}, 'ec'),
+ # BS: backspace overwrites previous character
+ ('abc\bde', 0, 5, {}, 'abde'),
+ ('abc\b\bXY', 0, 5, {}, 'aXY'),
+ ('ab\b\b\bXY', 0, 4, {}, 'XY'),
+ # HPA: horizontal position absolute (CSI n G)
+ ('abc\x1b[GXY', 0, 5, {}, 'XYc'),
+ ('abc\x1b[2GXY', 0, 5, {}, 'aXY'),
+ ('abc\x1b[5GXY', 0, 7, {}, 'abc XY'),
+ ('abc\x1b[5GXY', 0, 5, {}, 'abc X'),
+ ('\x1b[5GXY', 3, 7, {}, ' XY'),
+ # HPA no-param inside clip window
+ ('abc\x1b[GXY', 1, 4, {}, 'Yc'),
+ # walk_col >= end with sequences at column == end (line 351)
+ ('\x1b[5C\x1b]8;;http://example.com\x07', 0, 5, {'propagate_sgr': False}, ' \x1b]8;;http://example.com\x07'),
+ # Trailing sequences past col_limit (line 374)
+ ('\x1b[5C\x1b]8;;http://example.com\x07', 0, 3, {'propagate_sgr': False}, ' \x1b]8;;http://example.com\x07'),
+ # Lone ESC as first visible thing in painter (captured_style = current_style, line 398)
+ ('\x1b[D\x1b\x1bXy', 0, 3, {}, '\x1b\x1bXy'),
+ # Hyperlink VISIBLE after captured_style already set
+ ('a\x1b[C\x1b]8;;http://x\x07hi\x1b]8;;\x07', 0, 5, {}, 'a \x1b]8;;http://x\x07hi\x1b]8;;\x07'),
+ # Tab with tabsize=0 as first visible thing in painter
+ ('\x1b[D\tab', 0, 2, {'tabsize': 0}, '\tab'),
+ # Zero-width grapheme as first visible thing in painter
+ ('\x1b[D\u0301x', 0, 3, {}, '\u0301x'),
+ # Generic escape sequence as first visible in painter
+ ('\x1b[D\x1b[Hxy', 0, 3, {}, '\x1b[Hxy'),
+])
+def test_clip_cursor_sequences_expected_behaviour(text, start, end, kwargs, expected):
+ """Verify clip() output matches terminal-visible columns after cursor moves."""
+ result = clip(text, start, end, **kwargs)
+ assert repr(result) == repr(expected)
+
+
+def test_clip_cursor_left_strict_out_of_bounds():
+ """Clip() with control_codes='strict' raises on cursor-left beyond string start."""
+ with pytest.raises(ValueError, match='Cursor left movement'):
+ clip('a\x1b[5Da', 0, 1, control_codes='strict')
+
+
+def test_clip_cursor_left_strict_out_of_bounds_painter():
+ """Clip() strict-mode raises on cursor-left beyond start in painter path."""
+ with pytest.raises(ValueError, match='Cursor left movement'):
+ clip('\x1b[2Dab', 0, 2, control_codes='strict')
+
+
+def test_clip_cursor_left_out_of_bounds_parse_no_raise():
+ """Clip() parse mode silently clamps cursor-left beyond start."""
+ assert clip('a\x1b[5Da', 0, 1) == 'a'
+ assert clip('ab\x1b[99Dcd', 0, 4) == 'cd'
+
+
+def test_clip_strict_cr_allowed():
+ """Carriage return is allowed in strict mode (text begins at column 0)."""
+ assert clip('hello\rworld', 0, 5, control_codes='strict') == 'world'
+
+
+def test_clip_strict_hpa_allowed():
+ """HPA is allowed in strict mode (text begins at column 0)."""
+ assert clip('abc\x1b[5Gde', 0, 10, control_codes='strict') == 'abc de'
+
+
+def test_clip_strict_cursor_left_allowed():
+ """Cursor-left within bounds is allowed in strict mode."""
+ assert clip('hello\x1b[2Dxy', 0, 5, control_codes='strict') == 'helxy'
+
+
+def test_clip_strict_indeterminate_sequence_painter():
+ """Clip() strict-mode raises on indeterminate sequence in painter path."""
+ with pytest.raises(ValueError, match='Indeterminate cursor sequence'):
+ clip('a\x1b[D\x1b[Hb', 0, 3, control_codes='strict')
diff --git a/contrib/python/wcwidth/py3/tests/test_core.py b/contrib/python/wcwidth/py3/tests/test_core.py
index 024dcdba05d..dd1e3b7d1b7 100644
--- a/contrib/python/wcwidth/py3/tests/test_core.py
+++ b/contrib/python/wcwidth/py3/tests/test_core.py
@@ -1,6 +1,5 @@
"""Core tests for wcwidth module."""
# std imports
-import sys
import importlib.metadata
# 3rd party
@@ -8,9 +7,7 @@ import pytest
# local
import wcwidth
-
-_wcwidth_module = sys.modules['wcwidth.wcwidth']
-_WIDTH_FAST_PATH_MIN_LEN = _wcwidth_module._WIDTH_FAST_PATH_MIN_LEN
+from wcwidth._width import _WIDTH_FAST_PATH_MIN_LEN
def test_package_version():
@@ -68,9 +65,8 @@ def test_hello_jp():
"""
Width of Japanese phrase: コンニチハ, セカイ!
- Given a phrase of 5 and 3 Katakana ideographs, joined with
- 3 English-ASCII punctuation characters, totaling 11, this
- phrase consumes 19 cells of a terminal emulator.
+ Given a phrase of 5 and 3 Katakana ideographs, joined with 3 English-ASCII punctuation
+ characters, totaling 11, this phrase consumes 19 cells of a terminal emulator.
"""
# given,
phrase = 'コンニチハ, セカイ!'
@@ -90,8 +86,7 @@ def test_wcswidth_substr():
"""
Test wcswidth() optional 2nd parameter, ``n``.
- ``n`` determines at which position of the string
- to stop counting length.
+ ``n`` determines at which position of the string to stop counting length.
"""
# given,
phrase = 'コンニチハ, セカイ!'
@@ -414,13 +409,8 @@ def test_bengali_nukta_mc():
@pytest.mark.parametrize("repeat", [1, _WIDTH_FAST_PATH_MIN_LEN])
def test_mc_width_consistency(repeat):
- # width(), wcswidth(), and per-grapheme width sums must all agree.
- #
- # The repeat parameter ensures both the short (parse) and long (fast) code
- # paths of width() are exercised. At repeat=1 the phrases are short enough
- # to go through character-by-character parse mode. At repeat=_WIDTH_FAST_PATH_MIN_LEN
- # every phrase exceeds the threshold and takes the fast path that delegates
- # to wcswidth().
+ """Check width() to wcswidth() consistency."""
+ # repeat value 'WIDTH_FAST_PATH_MIN_LEN' ensures both "fast" and "slow" paths are taken
phrases = [
"\u0915\u094D\u0937\u093F",
"\u0b95\u0bcd\u0bb7\u0bcc",
@@ -464,6 +454,11 @@ def test_virama_conjunct(phrase, expected):
assert wcwidth.width(phrase) == expected
+def test_zwj_at_end_of_string():
+ """ZWJ at end of string (not after virama) is consumed with zero width."""
+ assert wcwidth.wcswidth('a\u200D') == 1
+
+
def test_soft_hyphen():
# Test SOFT HYPHEN, category 'Cf' usually are zero-width, but most
# implementations agree to draw it was '1' cell, visually
@@ -493,3 +488,24 @@ def test_prepended_concatenation_mark_width(codepoint, name):
"""Prepended Concatenation Marks have width 1, not 0."""
# https://github.com/jquast/wcwidth/issues/119
assert wcwidth.wcwidth(chr(codepoint)) == 1
+
+
+def test_legacy_module():
+ """Verify legacy ``wcwidth.wcwidth`` module's public items are importable."""
+ # pylint: disable=import-outside-toplevel
+ # std imports
+ import sys
+
+ # Access the legacy submodule via sys.modules (matching 0.6.0 where
+ # 'import wcwidth.wcwidth' returned the function, not the module).
+ _legacy = sys.modules['wcwidth.wcwidth']
+
+ for name in _legacy.__all__:
+ attr = getattr(_legacy, name)
+ assert attr is not None, f"wcwidth.wcwidth.{name} is None"
+
+ # Verify that individual imports from the legacy path also work,
+ # e.g. 'from wcwidth.wcwidth import wcswidth'
+ for name in _legacy.__all__:
+ obj = getattr(_legacy, name)
+ assert obj is not None, f"could not import {name} from wcwidth.wcwidth"
diff --git a/contrib/python/wcwidth/py3/tests/test_emojis.py b/contrib/python/wcwidth/py3/tests/test_emojis.py
index c7ce359939a..fedeff3cffe 100644
--- a/contrib/python/wcwidth/py3/tests/test_emojis.py
+++ b/contrib/python/wcwidth/py3/tests/test_emojis.py
@@ -1,4 +1,5 @@
"""Tests for emoji width measurement and ZWJ sequences."""
+
# std imports
import os
diff --git a/contrib/python/wcwidth/py3/tests/test_grapheme.py b/contrib/python/wcwidth/py3/tests/test_grapheme.py
index a02ac5e7529..de4489f8e7e 100644
--- a/contrib/python/wcwidth/py3/tests/test_grapheme.py
+++ b/contrib/python/wcwidth/py3/tests/test_grapheme.py
@@ -1,4 +1,5 @@
"""Tests for grapheme cluster segmentation."""
+
# std imports
import os
diff --git a/contrib/python/wcwidth/py3/tests/test_hyperlink.py b/contrib/python/wcwidth/py3/tests/test_hyperlink.py
new file mode 100644
index 00000000000..7b083a82573
--- /dev/null
+++ b/contrib/python/wcwidth/py3/tests/test_hyperlink.py
@@ -0,0 +1,75 @@
+"""Tests for OSC 8 hyperlink parsing."""
+
+# 3rd party
+import pytest
+
+# local
+from wcwidth.hyperlink import Hyperlink, HyperlinkParams
+
+PARAMS_PARSE_VALID = [
+ ('\x1b]8;;http://example.com\x07', 'http://example.com', '', '\x07'),
+ ('\x1b]8;id=a;http://example.com\x1b\\', 'http://example.com', 'id=a', '\x1b\\'),
+]
+
+
[email protected]('seq,url,params,term', PARAMS_PARSE_VALID)
+def test_hyperlinkparams_parse_valid(seq, url, params, term):
+ """Parse a valid OSC 8 open sequence."""
+ result = HyperlinkParams.parse(seq)
+ assert result is not None
+ assert result.url == url
+ assert result.params == params
+ assert result.terminator == term
+
+
+ 'not an escape',
+ '\x1b[31m',
+ '',
+])
+def test_hyperlinkparams_parse_invalid(seq):
+ """Parse an invalid/non-OSC-8 sequence returns None."""
+ assert HyperlinkParams.parse(seq) is None
+
+
+def test_hyperlinkparams_make_open():
+ assert HyperlinkParams(url='http://example.com', params='id=a', terminator='\x07').make_open() == '\x1b]8;id=a;http://example.com\x07'
+
+
+def test_hyperlinkparams_make_close():
+ assert HyperlinkParams(url='http://example.com', terminator='\x07').make_close() == '\x1b]8;;\x07'
+
+
+_HL = '\x1b]8;;http://example.com\x07Hello\x1b]8;;\x07'
+
+
+def test_hyperlink_parse_valid():
+ hl = Hyperlink.parse(_HL)
+ assert hl is not None
+ assert hl.text == 'Hello'
+ assert hl.params.url == 'http://example.com'
+
+
[email protected]('text,start', [
+ ('Hello world', 0),
+ ('\x1b[31mHello\x1b[0m', 0), # SGR, not OSC 8
+ ('\x1b]8;;http://example.com\x07Hello', 0), # open without close
+])
+def test_hyperlink_parse_returns_none(text, start):
+ assert Hyperlink.parse(text, start) is None
+
+
+def test_hyperlink_find_close_not_found():
+ assert Hyperlink.find_close('no escape here', 0) == (-1, -1)
+
+
+def test_hyperlink_make_sequence():
+ hl = Hyperlink.parse(_HL)
+ assert hl is not None
+ assert hl.make_sequence() == _HL
+
+
+def test_hyperlink_display_width():
+ hl = Hyperlink.parse(_HL)
+ assert hl is not None
+ assert hl.display_width() == 5
diff --git a/contrib/python/wcwidth/py3/tests/test_justify.py b/contrib/python/wcwidth/py3/tests/test_justify.py
index 71dec6199b6..f2639e8ca41 100644
--- a/contrib/python/wcwidth/py3/tests/test_justify.py
+++ b/contrib/python/wcwidth/py3/tests/test_justify.py
@@ -1,4 +1,5 @@
"""Tests for text justification functions."""
+
# local
from wcwidth import ljust, rjust, width, center
diff --git a/contrib/python/wcwidth/py3/tests/test_sgr_state.py b/contrib/python/wcwidth/py3/tests/test_sgr_state.py
index db9c8a9c94d..ecba402f237 100644
--- a/contrib/python/wcwidth/py3/tests/test_sgr_state.py
+++ b/contrib/python/wcwidth/py3/tests/test_sgr_state.py
@@ -1,4 +1,5 @@
"""Tests for SGR state tracking and propagation."""
+
from __future__ import annotations
# std imports
diff --git a/contrib/python/wcwidth/py3/tests/test_text_sizing.py b/contrib/python/wcwidth/py3/tests/test_text_sizing.py
new file mode 100644
index 00000000000..b5e18085e35
--- /dev/null
+++ b/contrib/python/wcwidth/py3/tests/test_text_sizing.py
@@ -0,0 +1,327 @@
+"""Tests for Text Sizing Protocol (OSC 66) support."""
+
+# 3rd party
+import pytest
+
+# local
+from wcwidth import (TextSizing,
+ TextSizingParams,
+ clip,
+ width,
+ wcswidth,
+ iter_sequences,
+ strip_sequences)
+from wcwidth.text_sizing import TEXT_FIELD_MAPPING
+from wcwidth.escape_sequences import TEXT_SIZING_PATTERN
+
+_W_HI = TEXT_FIELD_MAPPING['w'].high
+_N_HI = TEXT_FIELD_MAPPING['n'].high
+_D_HI = TEXT_FIELD_MAPPING['d'].high
+
+CONTROL_CODES_PARAMS_CASES = [
+ ('x=2', "", "Unknown text sizing field 'x' in "),
+ ('s=3:x=3', "s=3", "Unknown text sizing field 'x' in "),
+ ('s=2:x=3:w=9', f"s=2:w={_W_HI}", "Unknown text sizing field 'x' in "),
+ ('xyz=2', "", "Unknown text sizing field 'xyz' in "),
+ ('xxx', "", "Expected '=' in text sizing parameter"),
+ ('s=xxx', "", "Illegal text sizing value 'xxx' in "),
+ ('s=-99', "", "Out of bounds text sizing value '-99' in "),
+ ('s=99', f"s={_W_HI}", "Out of bounds text sizing value '99' in "),
+ ('w=-1', "", "Out of bounds text sizing value '-1' in "),
+ ('w=8', f"w={_W_HI}", "Out of bounds text sizing value '8' in "),
+ ('n=20', f"n={_N_HI}", "Out of bounds text sizing value '20' in "),
+ ('d=99', f"d={_D_HI}", "Out of bounds text sizing value '99' in "),
+ ('v=5', "v=2", "Out of bounds text sizing value '5' in "),
+ ('h=3', "h=2", "Out of bounds text sizing value '3' in "),
+]
+
+
[email protected]('given_params,expected_remainder,expected_exc,', CONTROL_CODES_PARAMS_CASES)
+def test_text_sizing_params_control_codes(given_params, expected_remainder, expected_exc):
+ """Verify control_codes='strict' and 'parse' behavior in TextSizingParams.from_params()."""
+ # assert control_codes='strict' raises expected exception,
+ with pytest.raises(ValueError) as exc_info:
+ TextSizingParams.from_params(given_params, control_codes='strict')
+ assert exc_info.value.args[0].startswith(expected_exc)
+
+ # when 'parse' (default), any illegal argument or value is filtered, excluded, or clipped
+ params = TextSizingParams.from_params(given_params)
+ assert params.make_sequence() == expected_remainder
+
+
[email protected]('given_params,expected_remainder,expected_exc,', CONTROL_CODES_PARAMS_CASES)
+def test_text_sizing_width_control_codes(given_params, expected_remainder, expected_exc):
+ """Verify control_codes='strict' with invalid OSC 66 sequences in wciwdth.width()."""
+ seq1 = '\x1b]66;' + given_params + ';ABC' + '\x07'
+ seq2 = '\x1b]66;' + given_params + ';ABC' + '\x1b\\'
+ for seq in (seq1, seq2):
+ with pytest.raises(ValueError) as exc_info:
+ width(seq, control_codes='strict')
+ assert exc_info.value.args[0].startswith(expected_exc)
+
+
[email protected]('params,expected_repr', [
+ (TextSizingParams(), 'TextSizingParams()'),
+ (TextSizingParams(scale=2, width=1), 'TextSizingParams(scale=2, width=1)'),
+ (TextSizingParams(scale=2, width=3, numerator=1, denominator=2,
+ vertical_align=1, horizontal_align=2),
+ 'TextSizingParams(scale=2, width=3, numerator=1, denominator=2, '
+ 'vertical_align=1, horizontal_align=2)'),
+])
+def test_text_sizing_params_repr(params, expected_repr):
+ """Verify TextSizingParams.__repr__ output."""
+ assert repr(params) == expected_repr
+
+
[email protected]('params,text,expected_width', [
+ # cases of static width=N values,
+ (TextSizingParams(scale=2, width=1), 'climclam', 2),
+ (TextSizingParams(scale=2, width=3), 'anything', 6),
+ (TextSizingParams(scale=1, width=5), '', 5),
+ (TextSizingParams(scale=3, width=1), 'x', 3),
+ # and automatic width (width=0) values,
+ (TextSizingParams(), '', 0),
+ (TextSizingParams(), 'AB', 2),
+ (TextSizingParams(), '中', 2),
+ (TextSizingParams(scale=2), 'AB', 4),
+ (TextSizingParams(scale=2), '中', 4),
+ (TextSizingParams(scale=3), '', 0),
+ (TextSizingParams(scale=7, width=7, numerator=15, denominator=15,
+ vertical_align=2, horizontal_align=2), 'x!yzzy', 49),
+])
+def test_text_sizing_width(params, text, expected_width):
+ """Verify width using with both kinds of terminator."""
+ # verify internal TextSizing.display_width() result,
+ assert TextSizing(params, text, terminator='\x07').display_width() == expected_width
+ assert TextSizing(params, text, terminator='\x1b\\').display_width() == expected_width
+ seq1 = TextSizing(params, text, terminator='\x07').make_sequence()
+ seq2 = TextSizing(params, text, terminator='\x1b\\').make_sequence()
+
+ # verify round-trip
+ ts_match1, ts_match2 = TEXT_SIZING_PATTERN.match(seq1), TEXT_SIZING_PATTERN.match(seq2)
+ assert ts_match1 and ts_match2
+ assert TextSizing.from_match(ts_match1) == TextSizing(params, text, terminator='\x07')
+ assert TextSizing.from_match(ts_match2) == TextSizing(params, text, terminator='\x1b\\')
+
+ # and external width(),
+ assert width(seq1) == expected_width
+ assert width(seq2) == expected_width
+
+ # verify 'strict' does not raise ValueError
+ width(seq1, control_codes='strict')
+ width(seq2, control_codes='strict')
+
+ # and verify 'ignore' measures only inner_text (does not parse scale or width)
+ assert width(seq1, control_codes='ignore') == wcswidth(text)
+ assert width(seq2, control_codes='ignore') == wcswidth(text)
+
+
[email protected]('given_sequence,expected_text,expected_params,expected_width', [
+ ('\x1b]66;s=2:w=2;AB\x07', 'AB', 's=2:w=2', 4),
+ ('\x1b]66;s=2:w=2;\u4e2d\x07', '\u4e2d', 's=2:w=2', 4),
+ ('\x1b]66;s=3:w=1;x\x07', 'x', 's=3:w=1', 3),
+ ('\x1b]66;w=5;hello\x07', 'hello', 'w=5', 5),
+ ('\x1b]66;s=2:w=3;anything\x07', 'anything', 's=2:w=3', 6),
+ ('\x1b]66;w=3;x\x07', 'x', 'w=3', 3),
+ ('\x1b]66;s=1;AB\x07', 'AB', '', 2),
+ ('\x1b]66;s=2;AB\x07', 'AB', 's=2', 4),
+ ('\x1b]66;s=2;中\x07', '中', 's=2', 4),
+ ('\x1b]66;s=2;\x07', '', 's=2', 0),
+ ('\x1b]66;s=1:w=1;\x07', '', 'w=1', 1),
+ ('\x1b]66;w=2;A\x07', 'A', 'w=2', 2),
+ ('\x1b]66;s=2:w=3;text\x1b\\', 'text', 's=2:w=3', 6),
+])
+def test_text_sizing_sequence(given_sequence, expected_text, expected_params, expected_width):
+ """Verify parsing and measured width of raw OSC 66 sequence."""
+ ts_match = TEXT_SIZING_PATTERN.match(given_sequence)
+ assert ts_match is not None
+ text_size = TextSizing.from_match(ts_match)
+ assert text_size.params.make_sequence() == expected_params
+ assert text_size.text == expected_text
+ assert width(given_sequence, control_codes='parse') == expected_width
+ assert width(given_sequence, control_codes='strict') == expected_width
+ assert width(given_sequence, control_codes='ignore') == wcswidth(expected_text)
+
+
[email protected]('text,expected', [
+ ('\x1b]66;s=2:w=3:n=1:d=2:v=1:h=2;x!yzzy\x1b\\', 6),
+ ('\x1b]66;s=2:w=3;anything\x07', 6),
+ ('\x1b]66;w=3;x\x07', 3),
+ ('\x1b]66;s=1:w=0;AB\x07', 2),
+ ('\x1b]66;s=2:w=0;AB\x07', 4),
+ ('\x1b]66;s=2:w=0;\u4e2d\x07', 4), # '中'
+ ('\x1b]66;s=1:w=0;\x07', 0),
+ ('abc\x1b]66;w=3;x\x07def', 9),
+ ('\x1b]66;w=2;A\x07\x1b]66;w=3;B\x07', 5),
+ ('\x1b]66;s=2:w=3;text\x1b\\', 6),
+ ('\x1b[31m\x1b]66;w=2;AB\x07\x1b[0m', 2),
+])
+def test_strings_with_text_sizing(text, expected):
+ """Verify measured width strings containing OSC66."""
+ assert width(text) == expected
+ assert width(text, control_codes='strict') == expected
+
+
[email protected]('text,expected', [
+ ('\x1b]66;s=2;hello\x07', 'hello'),
+ ('\x1b]66;s=2;hello\x1b\\', 'hello'),
+ ('\x1b]66;;text\x07', 'text'),
+ ('\x1b]66;s=3:w=2;\x07', ''),
+ ('abc\x1b]66;w=2;XY\x07def', 'abcXYdef'),
+ ('\x1b[31m\x1b]66;s=2;red\x07\x1b[0m', 'red'),
+ ('\x1b]66;w=1;A\x07\x1b]66;w=1;B\x07', 'AB'),
+])
+def test_strip_strings_with_text_sizing(text, expected):
+ assert strip_sequences(text) == expected
+
+
[email protected]('text,expected_segs', [
+ ('abc\x1b]66;s=2;hello\x07def', [('abc', False), ('\x1b]66;s=2;hello\x07', True), ('def', False)]),
+ ('abc\x1b]66;s=2;n=1,d=2,w=3;hello\x1b\\def', [('abc', False), ('\x1b]66;s=2;n=1,d=2,w=3;hello\x1b\\', True), ('def', False)]),
+])
+def test_iter_sequences_text_sizing(text, expected_segs):
+ assert list(iter_sequences(text)) == expected_segs
+
+
[email protected]('text,start,end,expected', [
+ ('\x1b]66;w=3;ABC\x07', 0, 3, '\x1b]66;w=3;ABC\x07'),
+ ('\x1b]66;w=3;ABC\x07', 0, 2, '\x1b]66;w=2;AB\x07'),
+ ('\x1b]66;w=3;ABC\x07', 1, 3, '\x1b]66;w=2;BC\x07'),
+ ('ab\x1b]66;w=2;XY\x07cd', 0, 6, 'ab\x1b]66;w=2;XY\x07cd'),
+ ('ab\x1b]66;w=2;XY\x07cd', 0, 3, 'ab\x1b]66;w=1;X\x07'),
+ ('ab\x1b]66;w=2;XY\x07cd', 3, 6, '\x1b]66;w=1;Y\x07cd'),
+ ('ab\x1b]66;w=2;XY\x07cd', 4, 6, 'cd'),
+])
+def test_clip_text_sizing_basic(text, start, end, expected):
+ """Test basic support of clip() with text sizing sequence."""
+ assert repr(clip(text, start, end)) == repr(expected)
+
+
[email protected]('text,start,end,expected', [
+ ('\x1b]66;s=2;ABC\x07', 0, 0, ''),
+ ('\x1b]66;s=2;ABC\x07', 6, 6, ''),
+ ('\x1b]66;s=2;ABC\x07', 0, 2, '\x1b]66;s=2;A\x07'),
+ ('\x1b]66;s=2;ABC\x07', 0, 4, '\x1b]66;s=2;AB\x07'),
+ ('\x1b]66;s=2;ABC\x07', 0, 6, '\x1b]66;s=2;ABC\x07'),
+ ('\x1b]66;s=2;ABC\x07', 2, 6, '\x1b]66;s=2;BC\x07'),
+ ('\x1b]66;s=2;ABC\x07', 4, 6, '\x1b]66;s=2;C\x07'),
+])
+def test_clip_text_sizing_scaled(text, start, end, expected):
+ """Test support of clip() with scale=N arguments."""
+ assert repr(clip(text, start, end)) == repr(expected)
+
+
[email protected]('text,start,end,expected', [
+ # a b c
+ # === === ===
+ # 012 345 678
+ # .
+ # ..
+ # *a*
+ # *a* .
+ # ... *b*
+ # ... *b* .
+ # ... *b* ..
+ # ... *b* *c*
+ ('\x1b]66;s=3;ABC\x07', 0, 0, ''),
+ ('\x1b]66;s=3;ABC\x07', 0, 1, '.'),
+ ('\x1b]66;s=3;ABC\x07', 0, 2, '..'),
+ ('\x1b]66;s=3;ABC\x07', 0, 3, '\x1b]66;s=3;A\x07'),
+ ('\x1b]66;s=3;ABC\x07', 0, 4, '\x1b]66;s=3;A\x07.'),
+ ('\x1b]66;s=3;ABC\x07', 0, 5, '\x1b]66;s=3;A\x07..'),
+ ('\x1b]66;s=3;ABC\x07', 0, 6, '\x1b]66;s=3;AB\x07'),
+ ('\x1b]66;s=3;ABC\x07', 0, 7, '\x1b]66;s=3;AB\x07.'),
+ ('\x1b]66;s=3;ABC\x07', 0, 8, '\x1b]66;s=3;AB\x07..'),
+ ('\x1b]66;s=3;ABC\x07', 0, 9, '\x1b]66;s=3;ABC\x07'),
+ ('\x1b]66;s=3;ABC\x07', 0, 10, '\x1b]66;s=3;ABC\x07'),
+ # a b
+ # === === ===
+ # 012 345 678
+ # . 1, 2
+ # .. 1, 3
+ # .. . 1, 4
+ # .. .. 1, 5
+ # .. *b* 1, 6
+ # .. *b* . 1, 7
+ # .. *b* .. 1, 8
+ # .. *b* *c* 1, 9
+ ('\x1b]66;s=3;ABC\x07', 1, 1, ''),
+ ('\x1b]66;s=3;ABC\x07', 1, 2, '.'),
+ ('\x1b]66;s=3;ABC\x07', 1, 3, '..'),
+ ('\x1b]66;s=3;ABC\x07', 1, 4, '...'),
+ ('\x1b]66;s=3;ABC\x07', 1, 5, '....'),
+ ('\x1b]66;s=3;ABC\x07', 1, 6, '..\x1b]66;s=3;B\x07'),
+ ('\x1b]66;s=3;ABC\x07', 1, 7, '..\x1b]66;s=3;B\x07.'),
+ ('\x1b]66;s=3;ABC\x07', 1, 8, '..\x1b]66;s=3;B\x07..'),
+ ('\x1b]66;s=3;ABC\x07', 1, 9, '..\x1b]66;s=3;BC\x07'),
+ ('\x1b]66;s=3;ABC\x07', 1, 10, '..\x1b]66;s=3;BC\x07'),
+ # two-thirds of string 'A' and half of string 'B' is fillchar
+ # ('\x1b]66;s=3;ABC\x07', 2, 4, '..'),
+ # half of string 'A' and all of string 'B'
+ # a b
+ # === === ===
+ # 012 345 678
+ # . 2, 3
+ # . . 2, 4
+ # . .. 2, 5
+ # . *b* 2, 6
+ # . *b* . 2, 7
+ # . *b* .. 2, 8
+ # . *b* *c* 2, 9
+ ('\x1b]66;s=3;ABC\x07', 2, 2, ''),
+ ('\x1b]66;s=3;ABC\x07', 2, 3, '.'),
+ ('\x1b]66;s=3;ABC\x07', 2, 4, '..'),
+ ('\x1b]66;s=3;ABC\x07', 2, 5, '...'),
+ ('\x1b]66;s=3;ABC\x07', 2, 6, '.\x1b]66;s=3;B\x07'),
+ ('\x1b]66;s=3;ABC\x07', 2, 7, '.\x1b]66;s=3;B\x07.'),
+ ('\x1b]66;s=3;ABC\x07', 2, 8, '.\x1b]66;s=3;B\x07..'),
+ ('\x1b]66;s=3;ABC\x07', 2, 9, '.\x1b]66;s=3;BC\x07'),
+ ('\x1b]66;s=3;ABC\x07', 2, 10, '.\x1b]66;s=3;BC\x07'),
+ # and now 3:10, should be easy ...
+ ('\x1b]66;s=3;ABC\x07', 3, 3, ''),
+ ('\x1b]66;s=3;ABC\x07', 3, 4, '.'),
+ ('\x1b]66;s=3;ABC\x07', 3, 5, '..'),
+ ('\x1b]66;s=3;ABC\x07', 3, 6, '\x1b]66;s=3;B\x07'),
+ ('\x1b]66;s=3;ABC\x07', 3, 7, '\x1b]66;s=3;B\x07.'),
+ ('\x1b]66;s=3;ABC\x07', 3, 8, '\x1b]66;s=3;B\x07..'),
+ ('\x1b]66;s=3;ABC\x07', 3, 9, '\x1b]66;s=3;BC\x07'),
+ ('\x1b]66;s=3;ABC\x07', 3, 10, '\x1b]66;s=3;BC\x07'),
+])
+def test_clip_text_sizing_scaled_with_fillchar(text, start, end, expected):
+ """Test support of clip() with scale=N and fillchar is needed to fill remainder."""
+ assert repr(clip(text, start, end, fillchar='.')) == repr(expected)
+
+
+def test_clip_simple_path_padding():
+ """Simple-path clip with w=N larger than text length exercises padding loop."""
+ # w=4 but only 1 grapheme 'X' — 3 empty units are padded.
+ # Clip window (0, 1) forces partial overlap, triggering
+ # _text_sizing_clip_simple's padding branch.
+ assert repr(clip('\x1b]66;w=4;X\x07', 0, 1)) == repr('\x1b]66;w=1;X\x07')
+
+
[email protected]('text,start,end,expected', [
+ # CR forces painter path; fully-visible text sizing sequence
+ ('\r\x1b]66;w=2;XY\x07', 0, 3, '\x1b]66;w=2;XY\x07'),
+ # CR painter path, text sizing partially clipped (first unit visible)
+ ('\r\x1b]66;w=2;XY\x07', 0, 1, '\x1b]66;w=1;X\x07'),
+ # BS forces painter path; text sizing fully visible
+ ('ab\b\b\x1b]66;w=2;XY\x07', 0, 4, '\x1b]66;w=2;XY\x07'),
+ # Painter path with partial text sizing overlap (exercises _text_sizing_clip_painter)
+ ('\ra\x1b]66;s=2;BC\x07', 0, 3, 'a\x1b]66;s=2;B\x07'),
+ # Painter path: text sizing scaled partial overlap with fillchar
+ ('\r\x1b]66;s=3;ABC\x07', 1, 6, ' \x1b]66;s=3;B\x07'),
+ # CSI movement + text sizing fully visible
+ ('ab\x1b[2D\x1b]66;w=2;XY\x07', 0, 4, '\x1b]66;w=2;XY\x07'),
+ # Painter path: text sizing entirely outside clip window (before start)
+ ('\r\x1b]66;w=2;XY\x07', 2, 4, ''),
+ # CR + text sizing with auto-width (w=0), partial overlap
+ ('\ra\x1b]66;s=2;BC\x07', 0, 5, 'a\x1b]66;s=2;BC\x07'),
+ # Painter path: padding when w=N has more units than graphemes
+ ('\r\x1b]66;w=3;A\x07', 0, 2, '\x1b]66;w=2;A\x07'),
+ # Painter path: text sizing with unit entirely before clip window (skip path)
+ ('\r\x1b]66;s=2;ABCD\x07', 4, 8, '\x1b]66;s=2;CD\x07'),
+])
+def test_clip_text_sizing_painter(text, start, end, expected):
+ """Test clip() with text sizing sequences in the cursor-movement (painter) path."""
+ assert repr(clip(text, start, end)) == repr(expected)
diff --git a/contrib/python/wcwidth/py3/tests/test_textwrap.py b/contrib/python/wcwidth/py3/tests/test_textwrap.py
index 094c8e56725..33da72a4fa9 100644
--- a/contrib/python/wcwidth/py3/tests/test_textwrap.py
+++ b/contrib/python/wcwidth/py3/tests/test_textwrap.py
@@ -1,4 +1,5 @@
"""Tests for sequence-aware text wrapping functions."""
+
# std imports
import sys
import platform
@@ -76,17 +77,14 @@ def _colorize(text):
)
-EDGE_CASES = [
[email protected]('text,w,expected', [
('', 10, []),
(' ', 10, []),
('\u5973', 0, ['\u5973']),
('\u5973', 1, ['\u5973']),
(ZWJ_FAMILY, 1, [ZWJ_FAMILY]),
(HANGUL_GA, 1, [HANGUL_GA]),
-]
-
-
[email protected]('text,w,expected', EDGE_CASES)
+])
def test_wrap_edge_cases(text, w, expected):
assert wrap(text, w) == expected
@@ -95,28 +93,22 @@ def test_wrap_initial_indent():
assert wrap('hello world', 10, initial_indent='> ') == ['> hello', 'world']
-LONG_WORD_CASES = [
[email protected]('text,w,break_long,expected', [
('abcdefghij', 3, True, ['abc', 'def', 'ghi', 'j']),
('abcdefghij', 3, False, ['abcdefghij']),
-]
-
-
[email protected]('text,w,break_long,expected', LONG_WORD_CASES)
+])
def test_wrap_long_words(text, w, break_long, expected):
assert wrap(text, w, break_long_words=break_long) == expected
-HYPHEN_LONG_WORD_CASES = [
[email protected]('text,w,break_hyphens,propagate,expected', [
('a-b-c-d', 3, True, True, ['a-', 'b-', 'c-d']),
('a-b-c-d', 3, False, True, ['a-b', '-c-', 'd']),
('---', 2, True, True, ['--', '-']),
('a---b', 2, True, True, ['a-', '--', 'b']),
('a-\x1b[31mb', 2, True, True, ['a-\x1b[31m\x1b[0m', '\x1b[31mb\x1b[0m']),
('a-\x1b[31mb', 2, True, False, ['a-\x1b[31m', 'b']),
-]
-
-
[email protected]('text,w,break_hyphens,propagate,expected', HYPHEN_LONG_WORD_CASES)
+])
def test_wrap_hyphen_long_words(text, w, break_hyphens, propagate, expected):
assert wrap(text, w, break_on_hyphens=break_hyphens, propagate_sgr=propagate) == expected
@@ -182,7 +174,7 @@ def test_wrap_multiline_matches_stdlib():
assert wrap(given, 30) == textwrap.wrap(given, 30)
-UNICODE_CASES = [
[email protected]('text,w,expected', [
# CJK (2 cells each)
('\u4e2d\u6587\u5b57\u7b26', 4, ['\u4e2d\u6587', '\u5b57\u7b26']),
('\u4e2d\u6587\u5b57', 5, ['\u4e2d\u6587', '\u5b57']),
@@ -192,18 +184,14 @@ UNICODE_CASES = [
(f'{FAMILY_ZWJ} ab', 4, [FAMILY_ZWJ, 'ab']),
(f'{SMILEY_VS16} ab', 3, [SMILEY_VS16, 'ab']),
('\U0001F469\U0001F467\U0001F466', 4, ['\U0001F469\U0001F467', '\U0001F466']),
-]
-
-
[email protected]('text,w,expected', UNICODE_CASES)
+])
def test_wrap_unicode(benchmark, text, w, expected):
kwargs = {'break_on_hyphens': False} if '-' in text else {}
result = benchmark(wrap, text, w, **kwargs)
assert result == expected
-# Escape sequence preservation (with propagate_sgr=True default)
-SEQUENCE_CASES = [
[email protected]('text,w,expected', [
# SGR sequences propagated across lines
(f'{SGR_RED}red{SGR_RESET} blue', 4, [f'{SGR_RED}red{SGR_RESET}', 'blue']),
# SGR at end of line propagates to next line
@@ -221,43 +209,36 @@ SEQUENCE_CASES = [
# Sequences in long word breaking - red starts after 'x', continues across lines
('x\x1b[31mabcdefghij\x1b[0m', 3,
['x\x1b[31mab\x1b[0m', '\x1b[31mcde\x1b[0m', '\x1b[31mfgh\x1b[0m', '\x1b[31mij\x1b[0m']),
- # Lone ESC - not a valid SGR sequence, stays with preceding text
- ('abc\x1bdefghij', 3, ['abc\x1b', 'def', 'ghi', 'j']),
-]
-
-SEQUENCE_CASES_NO_PROPAGATE = [
- (f'hello{SGR_RED} world', 6, [f'hello{SGR_RED}', 'world']),
- ('x\x1b[31mabcdefghij\x1b[0m', 3, ['x\x1b[31mab', 'cde', 'fgh', 'ij\x1b[0m']),
-]
-
-
[email protected]('text,w,expected', SEQUENCE_CASES)
+ # Fs sequence (ESC d) - zero-width, stays with preceding text
+ ('abc\x1bdefghij', 3, ['abc\x1bd', 'efg', 'hij']),
+])
def test_wrap_sequences(benchmark, text, w, expected):
+ """Escape sequence preservation (with propagate_sgr=True default)"""
assert benchmark(wrap, text, w) == expected
[email protected]('text,w,expected', SEQUENCE_CASES_NO_PROPAGATE)
[email protected]('text,w,expected', [
+ (f'hello{SGR_RED} world', 6, [f'hello{SGR_RED}', 'world']),
+ ('x\x1b[31mabcdefghij\x1b[0m', 3, ['x\x1b[31mab', 'cde', 'fgh', 'ij\x1b[0m']),
+]
+)
def test_wrap_sequences_no_propagate(text, w, expected):
result = wrap(text, w, propagate_sgr=False)
assert result == expected
-# Mixed: sequences + unicode
-MIXED_CASES = [
[email protected]('text,w,expected', [
(f'{SGR_RED}\u4e2d\u6587{SGR_RESET} ab', 5, [f'{SGR_RED}\u4e2d\u6587{SGR_RESET}', 'ab']),
(f'{SGR_RED}{FAMILY_ZWJ}{SGR_RESET} ab', 4, [f'{SGR_RED}{FAMILY_ZWJ}{SGR_RESET}', 'ab']),
(f'{SGR_BOLD}\u4e2d{SGR_RESET}y z', 4, [f'{SGR_BOLD}\u4e2d{SGR_RESET}y', 'z']),
-]
-
-
[email protected]('text,w,expected', MIXED_CASES)
+])
def test_wrap_mixed(benchmark, text, w, expected):
+ """Test mixed sequences + unicode."""
result = benchmark(wrap, text, w)
assert result == expected
-# Tabsize with wide characters - tests column alignment with different cell widths
-TABSIZE_WIDE_CASES = [
[email protected]('text,w,tabsize,expected', [
# CJK (2 cells) + tab: tabsize=4, '\u4e2d' is 2 cols, tab expands to col 4
('\u4e2d\ta b', 6, 4, ['\u4e2d a', 'b']),
# CJK + tab with tabsize=8: '\u4e2d' is 2 cols, tab expands to col 8
@@ -268,10 +249,7 @@ TABSIZE_WIDE_CASES = [
('\u4e2d\u6587\ta', 8, 4, ['\u4e2d\u6587 a']),
# ASCII + tab + CJK: 'a' is 1 col, tab to 4 (3 spaces), CJK is 2 cols
('a\t\u4e2d b', 8, 4, ['a \u4e2d b']),
-]
-
-
[email protected]('text,w,tabsize,expected', TABSIZE_WIDE_CASES)
+])
@pytest.mark.skipif(
platform.python_implementation() == 'PyPy' and sys.version_info < (3, 9),
reason='PyPy 3.8 str.expandtabs() counts UTF-8 bytes instead of characters'
@@ -286,7 +264,8 @@ OSC_END_ST = '\x1b]8;;\x1b\\'
OSC_START_BEL = '\x1b]8;;http://example.com\x07'
OSC_END_BEL = '\x1b]8;;\x07'
-HYPERLINK_WORD_BOUNDARY_CASES = [
+
[email protected]('text,w,expected', [
( # standard, ST-variant,
f'{OSC_START_ST}link{OSC_END_ST}more',
5,
@@ -408,18 +387,14 @@ HYPERLINK_WORD_BOUNDARY_CASES = [
'\x1b]8;foo=bar:id=mylink;http://example.com\x1b\\Click\x1b]8;;\x1b\\',
'\x1b]8;foo=bar:id=mylink;http://example.com\x1b\\here\x1b]8;;\x1b\\',
],
- ),
-]
-
-
[email protected]('text,w,expected', HYPERLINK_WORD_BOUNDARY_CASES)
+ ),])
def test_wrap_hyperlink_word_boundary(text, w, expected):
"""OSC hyperlink sequences should act as word boundaries."""
result = wrap(text, w)
assert result == expected
-PLACEHOLDER_STDLIB_CASES = [
[email protected]('text,kwargs', [
('The quick brown fox jumps over the lazy dog',
{'width': 10, 'max_lines': 3, 'placeholder': '...'}),
('1234567890 1234567890 extra',
@@ -444,10 +419,7 @@ PLACEHOLDER_STDLIB_CASES = [
{'width': 10, 'subsequent_indent': ' ', 'max_lines': 2, 'placeholder': '...'}),
('hello world foo bar',
{'width': 10, 'initial_indent': '> ', 'max_lines': 2, 'placeholder': '...'}),
-]
-
-
[email protected]('text,kwargs', PLACEHOLDER_STDLIB_CASES)
+])
def test_wrap_max_lines_matches_stdlib(text, kwargs):
expected = _adjust_stdlib_result(textwrap.wrap(text, **kwargs), kwargs)
assert wrap(text, **kwargs) == expected
@@ -460,7 +432,7 @@ def test_wrap_placeholder_too_large():
textwrap.wrap('fox', width=1, max_lines=3, placeholder='...')
-MAX_LINES_SEQUENCE_CASES = [
[email protected]('text,w,ml,ph,expected', [
(f'{SGR_RED}hello world foo bar{SGR_RESET}',
8, 2, '...', [f'{SGR_RED}hello{SGR_RESET}', f'{SGR_RED}world...{SGR_RESET}']),
(f'{SGR_RED}hello{SGR_RESET} world foo',
@@ -470,10 +442,7 @@ MAX_LINES_SEQUENCE_CASES = [
('\u4e2d\u6587 \u5b57\u7b26 hello', 5, 1, '~', ['\u4e2d\u6587~']),
('\u4e2d\u6587 \u5b57\u7b26 hello world', 5, 2, '~', ['\u4e2d\u6587', '\u5b57\u7b26~']),
('\u4e2d\u6587\u5b57\u7b26 hello', 12, 1, '...', ['\u4e2d\u6587\u5b57\u7b26...']),
-]
-
-
[email protected]('text,w,ml,ph,expected', MAX_LINES_SEQUENCE_CASES)
+])
def test_wrap_max_lines_sequences(text, w, ml, ph, expected):
assert wrap(text, w, max_lines=ml, placeholder=ph) == expected
@@ -494,19 +463,14 @@ def test_wrap_max_lines_hyperlink_close_on_prev_line():
assert result == [f'{OSC_START_ST}ab{OSC_END_ST}...']
-# -- expand_tabs, replace_whitespace, fix_sentence_endings --
-
-STDLIB_PARAM_CASES = [
[email protected]('text,kwargs', [
('hello\tworld', {'width': 20, 'expand_tabs': False, 'replace_whitespace': False}),
('hello\tworld foo\tbar baz', {'width': 12, 'expand_tabs': False, 'tabsize': 8}),
('hello\nworld', {'width': 20, 'replace_whitespace': False}),
('a\t b\n c', {'width': 20, 'replace_whitespace': False}),
('Hello world. This is a test. More text.', {'width': 20, 'fix_sentence_endings': True}),
('Dr. Smith went to Washington. He left.', {'width': 20, 'fix_sentence_endings': True}),
-]
-
-
[email protected]('text,kwargs', STDLIB_PARAM_CASES)
+])
def test_wrap_stdlib_params(text, kwargs):
assert wrap(text, **kwargs) == textwrap.wrap(text, **kwargs)
@@ -521,3 +485,9 @@ def test_wrap_replace_whitespace_false_newlines_zero_width():
"""Newlines have zero display width, so more text fits per line than stdlib."""
assert wrap('hello\nworld foo\nbar', 10, replace_whitespace=False) == [
'hello\nworld', 'foo\nbar']
+
+
+def test_wrap_bare_esc():
+ """Bare ESC not part of a recognized sequence is treated as zero-width."""
+ assert wrap('ab\x1bcd ef', 5) == ['ab\x1bcd', 'ef']
+ assert wrap('ab\x1b\x00cdef', 3) == ['ab\x1b\x00c', 'def']
diff --git a/contrib/python/wcwidth/py3/tests/test_ucslevel.py b/contrib/python/wcwidth/py3/tests/test_ucslevel.py
index 979cfe0fe8d..9aea2c9b73c 100644
--- a/contrib/python/wcwidth/py3/tests/test_ucslevel.py
+++ b/contrib/python/wcwidth/py3/tests/test_ucslevel.py
@@ -1,4 +1,5 @@
"""Unicode version level tests for wcwidth."""
+
# local
import wcwidth
diff --git a/contrib/python/wcwidth/py3/tests/test_width.py b/contrib/python/wcwidth/py3/tests/test_width.py
index 67d7b017258..8e43b47b1de 100644
--- a/contrib/python/wcwidth/py3/tests/test_width.py
+++ b/contrib/python/wcwidth/py3/tests/test_width.py
@@ -1,10 +1,11 @@
"""Tests for width() function."""
+
# 3rd party
import pytest
# local
import wcwidth
-from wcwidth.escape_sequences import ZERO_WIDTH_PATTERN
+from wcwidth.escape_sequences import ZERO_WIDTH_PATTERN, INDETERMINATE_EFFECT_SEQUENCE
BASIC_WIDTH_CASES = [
('', 0, 'empty'),
@@ -29,7 +30,7 @@ IGNORE_MODE_CASES = [
('\x1b[31mred\x1b[0m', 3, 'SGR_sequence'),
('hello\x80world', 10, 'C1_control'),
('\x1b', 0, 'lone_ESC'),
- ('a\x1bb', 2, 'lone_ESC_between'),
+ ('a\x1bb', 1, 'fs_sequence_between'),
]
@@ -45,8 +46,10 @@ STRICT_RAISES_CASES = [
('hello\x7fworld', 'DEL'),
('hello\x80world', 'C1_control'),
('hello\nworld', 'LF'),
+ ('hello\rworld', 'CR'),
('hello\x1b[Hworld', 'cursor_home'),
('hello\x1b[Aworld', 'cursor_up'),
+ ('hello\x1b[5Gworld', 'hpa'),
]
@@ -61,11 +64,11 @@ STRICT_ALLOWED_CASES = [
('hello\x07world', 10, 'BEL'),
('hello\x00world', 10, 'NUL'),
('abc\bd', 3, 'backspace'),
- ('abc\rxy', 3, 'CR'),
('\x1b[31mred\x1b[0m', 3, 'SGR_sequence'),
('a\x1b[2Cb', 4, 'cursor_right'),
+ ('ab\x1b[Db', 2, 'cursor_left'),
('\x1b', 0, 'lone_ESC'),
- ('a\x1bb', 2, 'lone_ESC_between'),
+ ('a\x1bb', 1, 'fs_sequence_between'),
('\x1b!', 1, 'ESC_unrecognized'),
]
@@ -88,6 +91,7 @@ STRICT_INDETERMINATE_SEQUENCES = [
('\x1b[1X', 'erase_chars'),
('\x1b[1S', 'parm_index'),
('\x1b[1T', 'parm_rindex'),
+ ('\x1bc', 'full_reset'),
]
@@ -106,6 +110,11 @@ PARSE_MODE_CASES = [
('abcd\x1b[2De', 4, 'cursor_left'),
('\x1b[31mred\x1b[0m', 3, 'SGR'),
('ab\x1b[Hcd', 4, 'indeterminate'),
+ ('def\x1b[3Dabc', 3, 'cursor_left_overwrite'),
+ ('def\x1b[10Dabc', 3, 'cursor_left_past_start'),
+ ('abc\x1b[5Gde', 6, 'hpa_parse'),
+ ('abc\x1b[Gde', 3, 'hpa_no_param'),
+ ('\x1b[5Gabc', 7, 'hpa_before_text'),
]
@@ -190,29 +199,26 @@ def test_vs16_selector():
def test_zwj_with_non_emoji_chars():
- """ZWJ with non-emoji characters and trailing VS16."""
- # ZWJ (Zero Width Joiner) skips both itself and the following character, treating them as a
- # failed emoji ZWJ sequence. When followed by VS16, the VS16 should NOT apply to the earlier
- # emoji because VS16 must immediately follow the character it modifies.
- #
- # In the full parse loop, VS16 checks `last_measured_idx == idx - 1` (immediate adjacency).
- # The ZWJ+char skip means VS16 is not adjacent to the smiley, so VS16 has no effect.
- #
+ """
+ ZWJ with non-emoji characters and trailing VS16.
+
+ These are invalid Unicode sequences (ZWJ followed by non-emoji), so behavior is implementation-
+ defined. The emoji base (smiley, width 1) is narrow, and VS16 looks back to it across the ZWJ-
+ consumed characters, adding 1 cell for a total width of 2.
+ """
# Control test,
assert wcwidth.width("\u263A\uFE0F") == 2 # smiley + VS16 = 2
- # ZWJ followed by non-emoji, VS16 does not apply (not adjacent)
- assert wcwidth.width("\u263A\u200Da\uFE0F") == 1
- assert wcwidth.width("\u263A\u200Dx\uFE0F") == 1
- assert wcwidth.width("\u263A\u200Da\u200Db\uFE0F") == 1
+ # ZWJ followed by non-emoji: VS16 applies to the smiley base
+ assert wcwidth.width("\u263A\u200Da\uFE0F") == 2
+ assert wcwidth.width("\u263A\u200Dx\uFE0F") == 2
+ assert wcwidth.width("\u263A\u200Da\u200Db\uFE0F") == 2
# ZWJ at end of string
assert wcwidth.width("\u263A\u200D") == 1 # smiley + ZWJ = 1
# Long strings (>20 chars) use fast path which routes to wcswidth().
- # wcswidth() has more lenient VS16 handling, causing VS16 to incorrectly apply (!)
- # Multiply by 10 to exceed threshold: "\u263A\u200Da\uFE0F" (4 chars) * 10 = 40 chars
- assert wcwidth.width("\u263A\u200Da\uFE0F" * 10) == 20 # (smiley(1) + ZWJ+a(0) + VS16(+1)) * 10 (!)
+ assert wcwidth.width("\u263A\u200Da\uFE0F" * 10) == 20
def test_vs16_after_control_chars():
@@ -228,10 +234,9 @@ def test_vs16_after_control_chars():
assert wcwidth.width("\u263A\x0d\uFE0F") == 1 # smiley(1) + CR(reset) + VS16(0), extent=1
# Long strings (>20 chars) use fast path which routes to wcswidth().
- # wcswidth() has more lenient VS16 handling (`last_measured_idx >= 0` vs `== idx - 1`),
- # causing VS16 to incorrectly apply when separated by control chars (!)
+ # In ignore mode, BEL is stripped, so VS16 is adjacent to the smiley and applies correctly.
# Multiply by 10 to exceed threshold
- assert wcwidth.width(("\u263A\x07\uFE0F") * 10) == 20 # (smiley(1) + BEL(0) + VS16(+1)) * 10 (!)
+ assert wcwidth.width(("\u263A\x07\uFE0F") * 10) == 20 # (smiley(1) + BEL-stripped(0) + VS16(+1)) * 10
def test_width_long_horizontal_fastpath():
@@ -266,6 +271,42 @@ def test_carriage_return_resets_column():
assert wcwidth.width('abc\rde') == 3
+def test_carriage_return_strict_raises():
+ """CR in strict mode raises ValueError (indeterminate starting column)."""
+ with pytest.raises(ValueError, match='Horizontal movement'):
+ wcwidth.width('hello\rworld', control_codes='strict')
+
+
+def test_hpa_parse_best_effort():
+ """HPA in parse mode assumes string begins at column 0."""
+ assert wcwidth.width('abc\x1b[5Gde') == 6
+ assert wcwidth.width('abc\x1b[Gde') == 3
+ assert wcwidth.width('\x1b[10Ghi') == 11
+
+
+def test_hpa_strict_raises():
+ """HPA in strict mode raises ValueError (indeterminate starting column)."""
+ with pytest.raises(ValueError, match='horizontal position'):
+ wcwidth.width('abc\x1b[5Gde', control_codes='strict')
+
+
+def test_cursor_left_strict_out_of_bounds():
+ """Cursor-left beyond string start raises ValueError in strict mode."""
+ with pytest.raises(ValueError, match='Cursor left movement'):
+ wcwidth.width('a\x1b[5Da', control_codes='strict')
+
+
+def test_cursor_left_out_of_bounds_parse_no_raise():
+ """Cursor-left beyond string start is silently clamped in parse mode."""
+ assert wcwidth.width('a\x1b[5Da') == 1
+ assert wcwidth.width('abc\x1b[99Ddef') == 3 # 99D clamped to col 0, then b,c,d overwritten
+
+
+def test_cursor_left_out_of_bounds_ignore_mode():
+ """Cursor-left beyond string start is zero-width in ignore mode."""
+ assert wcwidth.width('a\x1b[5Da', control_codes='ignore') == 2
+
+
def test_iter_sequences_lone_esc():
"""Lone ESC is yielded as a sequence."""
assert list(wcwidth.iter_sequences('\x1b')) == [('\x1b', True)]
@@ -449,3 +490,88 @@ def test_fitzpatrick_modifier_standalone_width():
"""Standalone Fitzpatrick modifier, however, is wide character in width()."""
result = wcwidth.width('\U0001F3FB')
assert result == 2
+
+
+FS_SEQUENCE_CASES = [
+ ('\x1bc', 'ris'),
+ ('\x1bl', 'memory_lock'),
+ ('\x1bm', 'memory_unlock'),
+ ('\x1bn', 'ls2'),
+ ('\x1bo', 'ls3'),
+ ('\x1b|', 'ls3r'),
+ ('\x1b}', 'ls2r'),
+ ('\x1b~', 'ls1r'),
+]
+
+
[email protected]('seq,name', FS_SEQUENCE_CASES)
+def test_fs_sequences_matched(seq, name):
+ """Fs (independent function) sequences are matched as zero-width."""
+ segments = list(wcwidth.iter_sequences(seq))
+ assert segments == [(seq, True)]
+ assert wcwidth.width(seq) == 0
+
+
+FP_SEQUENCE_CASES = [
+ ('\x1b7', 'decsc'),
+ ('\x1b8', 'decrc'),
+ ('\x1b=', 'deckpam'),
+ ('\x1b>', 'deckpnm'),
+ ('\x1b0', 'fp_0'),
+ ('\x1b1', 'fp_1'),
+ ('\x1b9', 'fp_9'),
+]
+
+
[email protected]('seq,name', FP_SEQUENCE_CASES)
+def test_fp_sequences_matched(seq, name):
+ """Fp (private use) sequences are matched as zero-width."""
+ segments = list(wcwidth.iter_sequences(seq))
+ assert segments == [(seq, True)]
+ assert wcwidth.width(seq) == 0
+
+
+NF_SEQUENCE_CASES = [
+ ('\x1b F', 's7c1t'),
+ ('\x1b G', 's8c1t'),
+ ('\x1b#3', 'decdhl_top'),
+ ('\x1b#4', 'decdhl_bottom'),
+ ('\x1b#5', 'decswl'),
+ ('\x1b#6', 'decdwl'),
+ ('\x1b#8', 'decaln'),
+ ('\x1b%G', 'utf8_designate'),
+ ('\x1b%@', 'iso2022_return'),
+]
+
+
[email protected]('seq,name', NF_SEQUENCE_CASES)
+def test_nf_sequences_matched(seq, name):
+ """NF (multi-byte) escape sequences are matched as zero-width."""
+ segments = list(wcwidth.iter_sequences(seq))
+ assert segments == [(seq, True)]
+ assert wcwidth.width(seq) == 0
+
+
+def test_fs_sequence_embedded_in_text():
+ """Fs sequence surrounded by text is correctly segmented."""
+ segments = list(wcwidth.iter_sequences('abc\x1bcdef'))
+ assert segments == [('abc', False), ('\x1bc', True), ('def', False)]
+ assert wcwidth.width('abc\x1bcdef') == 6
+
+
+def test_nf_sequence_embedded_in_text():
+ """NF sequence surrounded by text is correctly segmented."""
+ segments = list(wcwidth.iter_sequences('abc\x1b#8def'))
+ assert segments == [('abc', False), ('\x1b#8', True), ('def', False)]
+ assert wcwidth.width('abc\x1b#8def') == 6
+
+
+def test_screen_title_sequences():
+ """Screen/tmux title sequence ESC k hello ST."""
+ segments = list(wcwidth.iter_sequences('\x1bkhello\x1b\\'))
+ assert segments[0] == ('\x1bk', True)
+
+
+def test_ris_indeterminate():
+ """RIS (ESC c) is flagged as indeterminate effect."""
+ assert INDETERMINATE_EFFECT_SEQUENCE.match('\x1bc')
diff --git a/contrib/python/wcwidth/py3/wcwidth/__init__.py b/contrib/python/wcwidth/py3/wcwidth/__init__.py
index 400c8a61935..7c893e9f422 100644
--- a/contrib/python/wcwidth/py3/wcwidth/__init__.py
+++ b/contrib/python/wcwidth/py3/wcwidth/__init__.py
@@ -1,43 +1,52 @@
"""
-Wcwidth module.
+Python 'wcwidth' module.
https://github.com/jquast/wcwidth
"""
-# re-export all functions & definitions, even private ones, from top-level
-# module path, to allow for 'from wcwidth import _private_func'. Of course,
-# user beware that any _private functions or variables not exported by __all__
-# may disappear or change signature at any future version.
+
+# re-export common and outermost functions & definitions, even a few private
+# ones, some for convenience, others for legacy, only the items in __all__ are
+# documented as public API
# local
-from .wcwidth import ZERO_WIDTH # noqa
-from .wcwidth import (WIDE_EASTASIAN,
- AMBIGUOUS_EASTASIAN,
- VS16_NARROW_TO_WIDE,
- clip,
- ljust,
- rjust,
- width,
- center,
- wcwidth,
- wcswidth,
- list_versions,
- iter_sequences,
- strip_sequences,
- _wcmatch_version,
- _wcversion_value)
+from ._clip import clip
+from .align import ljust, rjust, center
+from ._width import width
from .bisearch import bisearch as _bisearch
-from .grapheme import grapheme_boundary_before # noqa
-from .grapheme import iter_graphemes, iter_graphemes_reverse
+from .grapheme import iter_graphemes, iter_graphemes_reverse, grapheme_boundary_before
from .textwrap import SequenceTextWrapper, wrap
+from ._wcswidth import wcswidth
+from .hyperlink import Hyperlink, HyperlinkParams
from .sgr_state import propagate_sgr
+from .table_vs16 import VS16_NARROW_TO_WIDE
+from .table_wide import WIDE_EASTASIAN
+from .table_zero import ZERO_WIDTH
+from .text_sizing import TextSizing, TextSizingParams
+from .table_ambiguous import AMBIGUOUS_EASTASIAN
+from .escape_sequences import iter_sequences, strip_sequences
+from .unicode_versions import list_versions
+
+# Pre-import the legacy submodule so that sys.modules['wcwidth.wcwidth'] is
+# populated during package initialization. This matches the 0.6.0 behavior
+# where ``from .wcwidth import wcwidth`` would have already loaded the
+# submodule. Without this, a later ``import wcwidth.wcwidth`` triggers
+# on-disk file discovery which rebinds wcwidth.wcwidth from the function to
+# the module object.
+#
+# NOTE: this sort order is important for legacy import API compatibility before release 0.7.0
+from . import wcwidth as _wcwidth_module # isort:skip
+from ._wcwidth import wcwidth, _wcmatch_version, _wcversion_value # isort:skip
+
# The __all__ attribute defines the items exported from statement,
# 'from wcwidth import *', but also to say, "This is the public API".
__all__ = ('wcwidth', 'wcswidth', 'width', 'iter_sequences', 'iter_graphemes',
'iter_graphemes_reverse', 'grapheme_boundary_before',
'ljust', 'rjust', 'center', 'wrap', 'clip', 'strip_sequences',
- 'list_versions', 'propagate_sgr')
+ 'list_versions', 'propagate_sgr', 'Hyperlink', 'HyperlinkParams',
+ 'TextSizing', 'TextSizingParams')
# Using 'hatchling', it does not seem to provide the pyproject.toml nicety, "dynamic = ['version']"
# like flit_core, maybe there is some better way but for now we have to duplicate it in both places
-__version__ = '0.6.0'
+# Prefer the installed distribution version when available (helps test environments)
+__version__ = '0.7.0' # don't forget to also update pyproject.toml:version
diff --git a/contrib/python/wcwidth/py3/wcwidth/_clip.py b/contrib/python/wcwidth/py3/wcwidth/_clip.py
new file mode 100644
index 00000000000..df67b636746
--- /dev/null
+++ b/contrib/python/wcwidth/py3/wcwidth/_clip.py
@@ -0,0 +1,809 @@
+"""This is a python implementation of clip()."""
+from __future__ import annotations
+
+# std imports
+import enum
+from itertools import islice
+
+from typing import Literal, Callable, Optional, NamedTuple
+
+# local
+from ._width import width
+from .grapheme import iter_graphemes
+from .hyperlink import Hyperlink, HyperlinkParams
+from .sgr_state import (_SGR_STATE_DEFAULT,
+ _SGRState,
+ _sgr_state_update,
+ _sgr_state_is_active,
+ _sgr_state_to_sequence)
+from .text_sizing import TextSizing, TextSizingParams
+from .escape_sequences import (_SEQUENCE_CLASSIFY,
+ _HORIZONTAL_CURSOR_MOVEMENT,
+ INDETERMINATE_EFFECT_SEQUENCE)
+
+
+class _HyperlinkAction(enum.Enum):
+ """Outcome of processing an OSC 8 hyperlink unit."""
+
+ NO_CLOSE = enum.auto() # open sequence without matching close
+ EMPTY = enum.auto() # hyperlink with no visible inner text
+ OUTSIDE = enum.auto() # hyperlink entirely outside the clip window
+ VISIBLE = enum.auto() # hyperlink overlaps the clip window
+
+
+class _HyperlinkResult(NamedTuple):
+ """
+ Result of processing an OSC 8 hyperlink.
+
+ Only the fields relevant to each action are populated.
+ """
+
+ action: _HyperlinkAction
+ close_end: int = 0
+ inner_width: int = 0
+ open_seq: str = ''
+ clipped_inner: str = ''
+ close_seq: str = ''
+ clipped_width: int = 0
+ hl_col_end: int = 0
+
+
+def _apply_sgr_wrap(result: str, captured_style: Optional[_SGRState]) -> str:
+ """
+ Apply SGR prefix/suffix around *result*.
+
+ If an SGR state was captured at the first visible character, prefix the result with the
+ corresponding SGR sequence and suffix with a reset if any styles are active.
+ """
+ if captured_style is not None:
+ if prefix := _sgr_state_to_sequence(captured_style):
+ result = prefix + result
+ if _sgr_state_is_active(captured_style):
+ result += '\x1b[0m'
+ return result
+
+
+def _process_hyperlink(
+ text: str,
+ start: int,
+ end: int,
+ fillchar: str,
+ tabsize: int,
+ ambiguous_width: int,
+ control_codes: Literal['parse', 'strict', 'ignore'],
+ *,
+ params: HyperlinkParams,
+ match_end: int,
+ col: int,
+) -> _HyperlinkResult:
+ """
+ Process an OSC 8 hyperlink unit.
+
+ Finds the matching close sequence, measures the inner text width, and determines whether the
+ hyperlink is empty, outside the clip window, or visible (requiring inner-text clipping).
+ """
+ # pylint: disable=too-many-locals,too-many-positional-arguments
+ close_start, close_end = Hyperlink.find_close(text, match_end)
+ if (close_start, close_end) == (-1, -1):
+ return _HyperlinkResult(_HyperlinkAction.NO_CLOSE)
+ inner_text = text[match_end:close_start]
+ inner_width = width(
+ inner_text, control_codes=control_codes,
+ tabsize=tabsize, ambiguous_width=ambiguous_width,
+ )
+
+ if inner_width == 0:
+ return _HyperlinkResult(_HyperlinkAction.EMPTY, close_end=close_end)
+
+ hl_col_end = col + inner_width
+
+ if hl_col_end <= start or col >= end:
+ return _HyperlinkResult(_HyperlinkAction.OUTSIDE, close_end=close_end,
+ inner_width=inner_width)
+
+ inner_clip_start = max(0, start - col)
+ inner_clip_end = end - col
+
+ clipped_inner = clip(
+ inner_text, inner_clip_start, inner_clip_end,
+ fillchar=fillchar, tabsize=tabsize,
+ ambiguous_width=ambiguous_width,
+ propagate_sgr=False,
+ control_codes=control_codes,
+ )
+
+ clipped_width = width(
+ clipped_inner, control_codes=control_codes,
+ tabsize=tabsize, ambiguous_width=ambiguous_width,
+ )
+
+ return _HyperlinkResult(
+ _HyperlinkAction.VISIBLE,
+ close_end=close_end,
+ inner_width=inner_width,
+ open_seq=params.make_open(),
+ clipped_inner=clipped_inner,
+ close_seq=params.make_close(),
+ clipped_width=clipped_width,
+ hl_col_end=hl_col_end,
+ )
+
+
+def _reconstruct_painter(
+ cells: dict[int, tuple[str, int]],
+ sequences: list[tuple[int, int, str]],
+ start: int,
+ end: int,
+ fillchar: str,
+) -> str:
+ """
+ Reconstruct the output string from painter's algorithm state.
+
+ Walks columns left-to-right, interleaving escape sequences and cell content, filling gaps with
+ *fillchar*.
+ """
+ # pylint: disable=too-many-locals
+ # Group and sort sequences by column, preserving insertion order within each.
+ seqs_by_col: dict[int, list[tuple[int, str]]] = {}
+ for col_pos, order, seq_text in sequences:
+ seqs_by_col.setdefault(col_pos, []).append((order, seq_text))
+ for entries in seqs_by_col.values():
+ entries.sort()
+
+ max_cell_col = max(cells.keys()) if cells else -1
+ max_seq_col = max(seqs_by_col.keys()) if seqs_by_col else -1
+ max_col = max(max_cell_col, max_seq_col)
+
+ parts: list[str] = []
+ walk_col = 0
+ col_limit = min(max_col, end)
+ while walk_col <= col_limit:
+ # Emit any sequences anchored at this column.
+ for _, seq_text in seqs_by_col.get(walk_col, ()):
+ parts.append(seq_text)
+
+ if walk_col >= end:
+ walk_col += 1
+ continue
+
+ if walk_col in cells:
+ cell_text, cell_w = cells[walk_col]
+ parts.append(cell_text)
+ walk_col += cell_w
+ else:
+ if start <= walk_col <= max_cell_col:
+ parts.append(fillchar)
+ walk_col += 1
+
+ # Emit sequences anchored beyond the visible region.
+ for c in sorted(seqs_by_col.keys()):
+ if c > col_limit:
+ for _, seq_text in seqs_by_col[c]:
+ parts.append(seq_text)
+
+ return ''.join(parts)
+
+
+def _clip_simple(
+ text: str,
+ start: int,
+ end: int,
+ *,
+ propagate_sgr: bool,
+ ambiguous_width: int,
+ fillchar: str,
+ tabsize: int,
+ strict: bool,
+ control_codes: Literal['parse', 'strict', 'ignore'],
+) -> tuple[str, Optional[_SGRState]]:
+ """
+ Clip text without cursor movement (simple append-to-output path).
+
+ Returns ``(result, captured_style)``. The caller applies SGR wrapping.
+ """
+ # pylint: disable=too-complex,too-many-locals,too-many-branches,too-many-statements
+ # pylint: disable=too-many-nested-blocks
+ # code length and complexity traded for performance, to allow this to be used as a "hot path"
+
+ output: list[str] = []
+ col = 0
+ idx = 0
+ # captured_style is a frozen snapshot of current_style taken at the first
+ # visible character emitted within the clip window (start, end). It stays
+ # None until that point. current_style, by contrast, is continuously
+ # updated by SGR sequences throughout the scan. The snapshot is what the
+ # caller uses to wrap the result in the correct SGR state.
+ #
+ # When propagate_sgr is False, current_style (and therefore captured_style)
+ # remain None, and SGR sequences pass through as literal text.
+ captured_style: Optional[_SGRState] = None
+ current_style = _SGR_STATE_DEFAULT if propagate_sgr else None
+
+ while idx < len(text):
+ char = text[idx]
+
+ # Early exit: past visible region.
+ if col >= end and char not in '\r\x08\t\x1b':
+ if captured_style is not None:
+ break
+ # propagate_sgr is always False here: with propagate_sgr=True,
+ # captured_style is set on the first visible emission in the
+ # clip window and we would have broken above. The skip-ahead
+ # optimization is only needed (and safe) when SGR tracking is off.
+ next_esc = text.find('\x1b', idx + 1)
+ if next_esc == -1:
+ break
+ idx = next_esc
+ continue
+
+ if char == '\x1b':
+ m = _SEQUENCE_CLASSIFY.match(text, idx)
+ if not m:
+ output.append(char)
+ idx += 1
+ continue
+
+ # SGR: update current_style, do not emit.
+ if m.group('sgr_params') is not None and propagate_sgr and current_style is not None:
+ current_style = _sgr_state_update(current_style, m.group())
+ idx = m.end()
+ continue
+
+ # OSC 8 hyperlink.
+ if hl_state := HyperlinkParams.parse(m.group()):
+ r = _process_hyperlink(
+ text, start, end, fillchar, tabsize, ambiguous_width,
+ control_codes,
+ params=hl_state, match_end=m.end(), col=col,
+ )
+ if r.action is _HyperlinkAction.NO_CLOSE:
+ output.append(m.group())
+ idx = m.end()
+ elif r.action is _HyperlinkAction.EMPTY:
+ idx = r.close_end
+ elif r.action is _HyperlinkAction.OUTSIDE:
+ col += r.inner_width
+ idx = r.close_end
+ else:
+ output.append(r.open_seq)
+ output.append(r.clipped_inner)
+ output.append(r.close_seq)
+ if propagate_sgr and captured_style is None:
+ captured_style = current_style
+ col += r.inner_width
+ idx = r.close_end
+ continue
+
+ # OSC 66 Text Sizing.
+ if (ts_meta := m.group('ts_meta')) is not None:
+ ts_text = m.group('ts_text')
+ ts_term = m.group('ts_term')
+ assert ts_text is not None and ts_term is not None
+ ts = TextSizing(
+ TextSizingParams.from_params(ts_meta, control_codes=control_codes),
+ ts_text, ts_term)
+ ts_width = ts.display_width(ambiguous_width)
+
+ if col >= start and col + ts_width <= end:
+ output.append(ts.make_sequence())
+ if propagate_sgr and captured_style is None:
+ captured_style = current_style
+ col += ts_width
+ elif col < end and col + ts_width > start:
+ ts_parts: list[str] = []
+
+ def _ts_write(s: str, _w: int, _col: int) -> None:
+ ts_parts.append(s)
+ col = _text_sizing_clip(
+ ts, col, start, end, fillchar, ambiguous_width,
+ _ts_write)
+ output.extend(ts_parts)
+ if propagate_sgr and captured_style is None:
+ captured_style = current_style
+ else:
+ col += ts_width
+ idx = m.end()
+ continue
+
+ # Indeterminate-effect sequences: raise in strict mode.
+ seq = m.group()
+ if strict and INDETERMINATE_EFFECT_SEQUENCE.match(seq):
+ raise ValueError(
+ f"Indeterminate cursor sequence at position {idx}, "
+ f"{seq!r}"
+ )
+
+ # Any other recognized sequence: preserve as-is.
+ output.append(seq)
+ idx = m.end()
+ continue
+
+ if char == '\t':
+ # Expand tab, filling clip window with spaces.
+ if tabsize > 0:
+ next_tab = col + (tabsize - (col % tabsize))
+ while col < next_tab:
+ if start <= col < end:
+ output.append(' ')
+ if propagate_sgr and captured_style is None:
+ captured_style = current_style
+ col += 1
+ else:
+ output.append('\t')
+ idx += 1
+ continue
+
+ grapheme = next(iter_graphemes(text, start=idx))
+ grapheme_w = width(grapheme, ambiguous_width=ambiguous_width)
+
+ # Emit grapheme or fillchar depending on visibility within clip window.
+ if grapheme_w == 0:
+ if start <= col < end:
+ output.append(grapheme)
+ elif col >= start and col + grapheme_w <= end:
+ output.append(grapheme)
+ if propagate_sgr and captured_style is None:
+ captured_style = current_style
+ elif col < end and col + grapheme_w > start:
+ output.append(fillchar * (min(end, col + grapheme_w) - max(start, col)))
+ if propagate_sgr and captured_style is None:
+ captured_style = current_style
+
+ col += grapheme_w
+ idx += len(grapheme)
+
+ return ''.join(output), captured_style
+
+
+def _text_sizing_clip(
+ ts: TextSizing,
+ col: int,
+ start: int,
+ end: int,
+ fillchar: str,
+ ambiguous_width: int,
+ write_cells: Callable[[str, int, int], None],
+) -> int:
+ """
+ Emit tokens for a text-sizing (OSC 66) sequence, clipped to (start, end).
+
+ Calls *write_cells(text, width, col)* for each emitted cell or sequence. Returns new column
+ position.
+ """
+ # pylint: disable=too-many-locals,too-many-branches,too-many-positional-arguments,too-complex
+ ts_width = ts.display_width(ambiguous_width)
+
+ # Fully visible: emit entire sequence
+ if col >= start and col + ts_width <= end:
+ write_cells(ts.make_sequence(), ts_width, col)
+ return col + ts_width
+ # Fully outside: just advance column
+ if col >= end or col + ts_width <= start:
+ return col + ts_width
+
+ # Partial overlap: decompose
+ rel_start = max(0, start - col)
+ rel_end = min(end, col + ts_width) - col
+ scale = ts.params.scale
+
+ units: list[tuple[str, int]] = []
+ if ts.params.width > 0:
+ for g in islice(iter_graphemes(ts.text), ts.params.width):
+ units.append((g, scale))
+ for _ in range(ts.params.width - len(units)):
+ units.append(('', scale))
+ else:
+ for g in iter_graphemes(ts.text):
+ units.append((g, width(g, ambiguous_width=ambiguous_width) * scale))
+
+ pending_units: list[tuple[str, int]] = []
+
+ def flush(flush_col: int) -> None:
+ if not pending_units:
+ return
+ texts = [u[0] for u in pending_units]
+ total_w = sum(u[1] for u in pending_units)
+ params = TextSizingParams(
+ scale,
+ len(texts) if ts.params.width > 0 else 0,
+ ts.params.numerator, ts.params.denominator,
+ ts.params.vertical_align, ts.params.horizontal_align)
+ write_cells(
+ TextSizing(params, ''.join(texts), ts.terminator).make_sequence(),
+ total_w,
+ flush_col)
+ pending_units.clear()
+
+ flush_col_pos = col + rel_start
+ unit_pos = 0
+ for unit_text, unit_w in units:
+ unit_end = unit_pos + unit_w
+ if unit_end <= rel_start:
+ unit_pos = unit_end
+ continue
+ if unit_pos >= rel_end:
+ break
+
+ overlap = min(unit_end, rel_end) - max(unit_pos, rel_start)
+ if overlap == unit_w and unit_w > 0:
+ if not pending_units:
+ flush_col_pos = col + max(unit_pos, rel_start)
+ pending_units.append((unit_text, unit_w))
+ else:
+ flush(flush_col_pos)
+ abs_start = col + max(unit_pos, rel_start)
+ for i in range(overlap):
+ write_cells(fillchar, 1, abs_start + i)
+ unit_pos = unit_end
+
+ flush(flush_col_pos)
+ return col + ts_width
+
+
+def _clip_painter(
+ text: str,
+ start: int,
+ end: int,
+ *,
+ propagate_sgr: bool,
+ ambiguous_width: int,
+ fillchar: str,
+ tabsize: int,
+ strict: bool,
+ control_codes: Literal['parse', 'strict', 'ignore'],
+) -> tuple[str, Optional[_SGRState]]:
+ """
+ Clip text with cursor movement (painter's algorithm path).
+
+ Returns ``(result, captured_style)``. The caller applies SGR wrapping.
+ """
+ # pylint: disable=too-complex,too-many-locals,too-many-branches
+ # pylint: disable=too-many-statements,too-many-nested-blocks
+ # code length and complexity traded for performance, to allow this to be used as a "hot path"
+
+ cells: dict[int, tuple[str, int]] = {}
+ hyperlink_cells: set[int] = set()
+ sequences: list[tuple[int, int, str]] = []
+ seq_order = 0
+
+ col = 0
+ idx = 0
+ # captured_style is a frozen snapshot of current_style taken at the first
+ # visible character emitted within the clip window (start, end). It stays
+ # None until that point. current_style, by contrast, is continuously
+ # updated by SGR sequences throughout the scan.
+ #
+ # When propagate_sgr is False, current_style (and therefore captured_style)
+ # remain None, and SGR sequences pass through as literal text.
+ captured_style: Optional[_SGRState] = None
+ current_style = _SGR_STATE_DEFAULT if propagate_sgr else None
+
+ def _write_cells(s: str, w: int, write_col: int,
+ is_hyperlink: bool = False) -> None:
+ """Write *w* cells of text *s* at *write_col*, handling wide-char splitting."""
+ nonlocal captured_style
+ for offset in range(w):
+ src_col = write_col + offset
+ if src_col > 0 and cells.get(src_col - 1, ('', 0))[1] == 2:
+ cells[src_col - 1] = (fillchar, 1)
+ hyperlink_cells.discard(src_col - 1)
+ if cells.get(src_col, ('', 0))[1] == 2:
+ cells[src_col + 1] = (fillchar, 1)
+ hyperlink_cells.discard(src_col + 1)
+ cells.pop(src_col, None)
+ hyperlink_cells.discard(src_col)
+ cells[write_col] = (s, w)
+ if is_hyperlink:
+ for offset in range(w):
+ hyperlink_cells.add(write_col + offset)
+ if propagate_sgr and captured_style is None:
+ captured_style = current_style
+
+ while idx < len(text):
+ char = text[idx]
+
+ # Early exit: past visible region, SGR captured, no escape ahead.
+ if col >= end and captured_style is not None and char != '\x1b':
+ break
+
+ if char == '\x1b':
+ m = _SEQUENCE_CLASSIFY.match(text, idx)
+ if not m:
+ # Record lone ESC as a zero-width sequence at current column.
+ sequences.append((col, seq_order, char))
+ seq_order += 1
+ if propagate_sgr and captured_style is None:
+ captured_style = current_style
+ idx += 1
+ continue
+
+ # SGR: update current_style, do not emit.
+ if m.group('sgr_params') is not None and propagate_sgr and current_style is not None:
+ current_style = _sgr_state_update(current_style, m.group())
+ idx = m.end()
+ continue
+
+ # OSC 8 hyperlink.
+ if hl_state := HyperlinkParams.parse(m.group()):
+ r = _process_hyperlink(
+ text, start, end, fillchar, tabsize, ambiguous_width,
+ control_codes,
+ params=hl_state, match_end=m.end(), col=col,
+ )
+ if r.action is _HyperlinkAction.NO_CLOSE:
+ sequences.append((col, seq_order, m.group()))
+ seq_order += 1
+ if propagate_sgr and captured_style is None:
+ captured_style = current_style
+ idx = m.end()
+ elif r.action is _HyperlinkAction.EMPTY:
+ idx = r.close_end
+ elif r.action is _HyperlinkAction.OUTSIDE:
+ col += r.inner_width
+ idx = r.close_end
+ else:
+ sequences.append((col, seq_order, r.open_seq))
+ seq_order += 1
+ if propagate_sgr and captured_style is None:
+ captured_style = current_style
+ _write_cells(r.clipped_inner, r.clipped_width, col,
+ is_hyperlink=True)
+ col += r.clipped_width
+ sequences.append((col, seq_order, r.close_seq))
+ seq_order += 1
+ col = r.hl_col_end
+ idx = r.close_end
+ continue
+
+ # OSC 66 Text Sizing.
+ if (ts_meta := m.group('ts_meta')) is not None:
+ ts_text = m.group('ts_text')
+ ts_term = m.group('ts_term')
+ assert ts_text is not None and ts_term is not None
+ ts = TextSizing(
+ TextSizingParams.from_params(ts_meta, control_codes=control_codes),
+ ts_text, ts_term)
+ col = _text_sizing_clip(
+ ts, col, start, end, fillchar, ambiguous_width,
+ _write_cells)
+ if propagate_sgr and captured_style is None:
+ captured_style = current_style
+ idx = m.end()
+ continue
+
+ # Indeterminate-effect sequences: raise in strict mode.
+ seq = m.group()
+ if strict and INDETERMINATE_EFFECT_SEQUENCE.match(seq):
+ raise ValueError(
+ f"Indeterminate cursor sequence at position {idx}, "
+ f"{seq!r}"
+ )
+
+ # Horizontal Position Absolute (CSI n G).
+ if (hpa_n := m.group('hpa_n')) is not None:
+ col = int(hpa_n) - 1 if hpa_n else 0
+ idx = m.end()
+ continue
+
+ # Cursor Forward (CSI n C).
+ if (cforward_n := m.group('cforward_n')) is not None:
+ n_forward = int(cforward_n) if cforward_n else 1
+ move_end = col + n_forward
+ if col < end and move_end > start:
+ for i in range(max(col, start), min(move_end, end)):
+ _write_cells(fillchar, 1, i)
+ col = move_end
+ idx = m.end()
+ continue
+
+ # Cursor Backward (CSI n D).
+ if (cbackward_n := m.group('cbackward_n')) is not None:
+ n_backward = int(cbackward_n) if cbackward_n else 1
+ if strict and n_backward > col:
+ raise ValueError(
+ f"Cursor left movement at position {idx} would move "
+ f"{n_backward} cells left from column {col}, "
+ f"exceeding string start"
+ )
+ col = max(0, col - n_backward)
+ idx = m.end()
+ continue
+
+ # Any other recognized sequence: preserve as-is.
+ sequences.append((col, seq_order, m.group()))
+ seq_order += 1
+ if propagate_sgr and captured_style is None:
+ captured_style = current_style
+ idx = m.end()
+ continue
+
+ # Carriage return.
+ if char == '\r':
+ col = 0
+ idx += 1
+ continue
+
+ # Backspace.
+ if char == '\x08':
+ if col > 0:
+ col -= 1
+ idx += 1
+ continue
+
+ # Tab expansion.
+ if char == '\t':
+ if tabsize > 0:
+ next_tab = col + (tabsize - (col % tabsize))
+ while col < next_tab:
+ if start <= col < end:
+ _write_cells(fillchar, 1, col)
+ col += 1
+ else:
+ sequences.append((col, seq_order, '\t'))
+ seq_order += 1
+ if propagate_sgr and captured_style is None:
+ captured_style = current_style
+ idx += 1
+ continue
+
+ # Grapheme cluster.
+ grapheme = next(iter_graphemes(text, start=idx))
+ grapheme_w = width(grapheme, ambiguous_width=ambiguous_width)
+
+ # Emit grapheme or fillchar depending on visibility within clip window.
+ if grapheme_w == 0:
+ if start <= col < end:
+ sequences.append((col, seq_order, grapheme))
+ seq_order += 1
+ if propagate_sgr and captured_style is None:
+ captured_style = current_style
+ elif col >= start and col + grapheme_w <= end:
+ _write_cells(grapheme, grapheme_w, col)
+ elif col < end and col + grapheme_w > start:
+ clip_start = max(start, col)
+ for offset in range(min(end, col + grapheme_w) - clip_start):
+ _write_cells(fillchar, 1, clip_start + offset)
+
+ col += grapheme_w
+ idx += len(grapheme)
+
+ return _reconstruct_painter(cells, sequences, start, end, fillchar), captured_style
+
+
+def clip(
+ text: str,
+ start: int,
+ end: int,
+ *,
+ fillchar: str = ' ',
+ tabsize: int = 8,
+ ambiguous_width: int = 1,
+ propagate_sgr: bool = True,
+ control_codes: Literal['parse', 'strict', 'ignore'] = 'parse',
+ overtyping: Optional[bool] = None,
+) -> str:
+ r"""
+ Clip text to display columns (start, end) while preserving all terminal sequences.
+
+ This function extracts a substring based on visible column positions rather than
+ character indices. Terminal escape sequences are preserved in the output since
+ they have zero display width. If a wide character (width 2) is split at
+ either boundary, it is replaced with ``fillchar``.
+
+ TAB characters (``\t``) are expanded to spaces up to the next tab stop,
+ controlled by the ``tabsize`` parameter. When cursor movement is detected,
+ a "painter's algorithm" is used, cursor movements actively change the write
+ position, allowing cursor-left and carriage return to overwrite previously
+ written cells. It is assumed that ``text`` begins at column 0.
+
+ **OSC 8 hyperlinks** are handled specially: the visible text inside a hyperlink
+ is clipped to the requested column range, and the hyperlink is rebuilt around
+ the clipped text. Empty hyperlinks (those with no remaining visible text after
+ clipping) are removed::
+
+ >>> clip('\x1b]8;;http://example.com\x07Click This link\x1b]8;;\x07', 6, 10)
+ '\x1b]8;;http://example.com\x07This\x1b]8;;\x07'
+
+ :param text: String to clip, may contain terminal escape sequences.
+ :param start: Absolute starting column (inclusive, 0-indexed).
+ :param end: Absolute ending column (exclusive).
+ :param fillchar: Character to use when a wide character must be split at
+ a boundary (default space). Must have display width of 1.
+ :param tabsize: Tab stop width (default 8). Set to 0 to pass tabs through
+ as zero-width (preserved in output but don't advance column position).
+ :param ambiguous_width: Width to use for East Asian Ambiguous (A)
+ characters. Default is ``1`` (narrow). Set to ``2`` for CJK contexts.
+ :param propagate_sgr: If True (default), SGR (terminal styling) sequences
+ are propagated. The result begins with any active style at the start
+ position and ends with a reset sequence if styles are active.
+ :param control_codes: How to handle control characters and sequences:
+
+ - ``'parse'`` (default): Track horizontal cursor movement and clip
+ hyperlink text. Cursor overwrite is always allowed, with best effort
+ results; indeterminate sequences (home, clear, reset, etc.) are
+ preserved as zero-width.
+ - ``'strict'``: Like ``parse``, but raises :exc:`ValueError` on
+ sequences with indeterminate effects (cursor home, clear screen,
+ reset, vertical movement, etc.) matching :func:`width` behavior.
+ Also raises on out-of-bounds horizontal cursor movement.
+ - ``'ignore'``: All control characters are treated as zero-width.
+ Cursor movement is not tracked (fastest path).
+
+ :param overtyping: Whether to use the painter's algorithm for cursor
+ movement (``\b`` backspace, ``\r`` carriage return, and CSI cursor
+ left/right/position sequences). When ``None`` (default), auto-detects
+ by scanning for these characters in *text*. Set to ``False`` for improved
+ performance when the caller knows *text* contains no cursor movement
+ characters. Set to ``True`` to force the painter's algorithm (useful
+ for testing). Has no effect when ``control_codes='ignore'``.
+
+ :returns: Substring of ``text`` spanning display columns (start, end),
+ with all terminal sequences preserved and wide characters at boundaries
+ replaced with ``fillchar``.
+
+ :raises ValueError: If ``control_codes='strict'`` and an indeterminate-effect
+ sequence or out-of-bounds cursor movement is encountered.
+
+ SGR (terminal styling) sequences are propagated by default. The result
+ begins with any active style and ends with a reset::
+
+ >>> clip('\x1b[1;34mHello world\x1b[0m', 6, 11)
+ '\x1b[1;34mworld\x1b[0m'
+
+ Set ``propagate_sgr=False`` to disable this behavior.
+
+ .. versionadded:: 0.3.0
+
+ .. versionchanged:: 0.5.0
+ Added ``propagate_sgr`` parameter (default True).
+
+ .. versionchanged:: 0.7.0
+ Added ``control_codes`` parameter (default 'parse').
+ OSC 8 hyperlink-aware clipping. OSC 66 text sizing protocol support.
+ Added ``overtyping`` parameter (default None, auto-detect).
+
+ Example::
+
+ >>> clip('hello world', 0, 5)
+ 'hello'
+ >>> clip('中文字', 0, 3) # Wide char split at column 3
+ '中 '
+ >>> clip('a\tb', 0, 10) # Tab expanded to spaces
+ 'a b'
+ """
+ start = max(start, 0)
+ if end <= start:
+ return ''
+
+ # Fast path: printable ASCII only.
+ if text.isascii() and text.isprintable():
+ return text[start:end]
+
+ # No escape sequences => no SGR tracking needed.
+ has_esc = '\x1b' in text
+ if propagate_sgr and not has_esc:
+ propagate_sgr = False
+
+ # Determine whether painter's algorithm is needed.
+ if overtyping is None:
+ # Auto-detect: scan for cursor movement characters.
+ overtyping = (
+ control_codes != 'ignore' and
+ ('\x08' in text or '\r' in text or
+ (has_esc and bool(_HORIZONTAL_CURSOR_MOVEMENT.search(text))))
+ )
+ elif overtyping and control_codes == 'ignore':
+ overtyping = False # control_codes='ignore' overrides
+ fn_clip = _clip_painter if overtyping else _clip_simple
+
+ return _apply_sgr_wrap(*fn_clip(
+ text=text,
+ start=start,
+ end=end,
+ propagate_sgr=propagate_sgr,
+ ambiguous_width=ambiguous_width,
+ fillchar=fillchar,
+ tabsize=tabsize,
+ strict=(control_codes == 'strict'),
+ control_codes=control_codes,
+ ))
diff --git a/contrib/python/wcwidth/py3/wcwidth/_constants.py b/contrib/python/wcwidth/py3/wcwidth/_constants.py
new file mode 100644
index 00000000000..7c2b627ceac
--- /dev/null
+++ b/contrib/python/wcwidth/py3/wcwidth/_constants.py
@@ -0,0 +1,65 @@
+"""Shared data tables and constants for wcwidth.py, _wcwidth.py, and _wcswidth.py."""
+
+# local
+from .table_mc import CATEGORY_MC
+from .table_wide import WIDE_EASTASIAN
+from .table_zero import ZERO_WIDTH
+from .table_grapheme import EXTENDED_PICTOGRAPHIC, GRAPHEME_REGIONAL_INDICATOR
+from .table_ambiguous import AMBIGUOUS_EASTASIAN
+from .unicode_versions import list_versions
+
+__all__ = (
+ "_REGIONAL_INDICATOR_SET",
+ "_ISC_VIRAMA_SET",
+ "_LATEST_VERSION",
+ "_CATEGORY_MC_TABLE",
+ "_EMOJI_ZWJ_SET",
+ "_FITZPATRICK_RANGE",
+ "_ZERO_WIDTH_TABLE",
+ "_WIDE_EASTASIAN_TABLE",
+ "_AMBIGUOUS_TABLE",
+)
+
+_REGIONAL_INDICATOR_SET = frozenset(
+ range(GRAPHEME_REGIONAL_INDICATOR[0][0], GRAPHEME_REGIONAL_INDICATOR[0][1] + 1)
+)
+_ISC_VIRAMA_SET = frozenset((
+ 0x094D, # DEVANAGARI SIGN VIRAMA
+ 0x09CD, # BENGALI SIGN VIRAMA
+ 0x0A4D, # GURMUKHI SIGN VIRAMA
+ 0x0ACD, # GUJARATI SIGN VIRAMA
+ 0x0B4D, # ORIYA SIGN VIRAMA
+ 0x0BCD, # TAMIL SIGN VIRAMA
+ 0x0C4D, # TELUGU SIGN VIRAMA
+ 0x0CCD, # KANNADA SIGN VIRAMA
+ 0x0D4D, # MALAYALAM SIGN VIRAMA
+ 0x0DCA, # SINHALA SIGN AL-LAKUNA
+ 0x1B44, # BALINESE ADEG ADEG
+ 0xA806, # SYLOTI NAGRI SIGN HASANTA
+ 0xA8C4, # SAURASHTRA SIGN VIRAMA
+ 0xA9C0, # JAVANESE PANGKON
+ 0x11046, # BRAHMI VIRAMA
+ 0x110B9, # KAITHI SIGN VIRAMA
+ 0x111C0, # SHARADA SIGN VIRAMA
+ 0x11235, # KHOJKI SIGN VIRAMA
+ 0x1134D, # GRANTHA SIGN VIRAMA
+ 0x11442, # NEWA SIGN VIRAMA
+ 0x114C2, # TIRHUTA SIGN VIRAMA
+ 0x115BF, # SIDDHAM SIGN VIRAMA
+ 0x1163F, # MODI SIGN VIRAMA
+ 0x116B6, # TAKRI SIGN VIRAMA
+ 0x11839, # DOGRA SIGN VIRAMA
+ 0x119E0, # NANDINAGARI SIGN VIRAMA
+ 0x11C3F, # BHAIKSUKI SIGN VIRAMA
+))
+# pylint: disable=invalid-name
+_LATEST_VERSION = list_versions()[-1]
+_CATEGORY_MC_TABLE = CATEGORY_MC[_LATEST_VERSION]
+_EMOJI_ZWJ_SET = frozenset(
+ cp for lo, hi in EXTENDED_PICTOGRAPHIC for cp in range(lo, hi + 1)
+) | _REGIONAL_INDICATOR_SET
+_FITZPATRICK_RANGE = (0x1F3FB, 0x1F3FF)
+
+_ZERO_WIDTH_TABLE = ZERO_WIDTH[_LATEST_VERSION]
+_WIDE_EASTASIAN_TABLE = WIDE_EASTASIAN[_LATEST_VERSION]
+_AMBIGUOUS_TABLE = AMBIGUOUS_EASTASIAN[_LATEST_VERSION]
diff --git a/contrib/python/wcwidth/py3/wcwidth/_wcswidth.py b/contrib/python/wcwidth/py3/wcwidth/_wcswidth.py
new file mode 100644
index 00000000000..eab9b740729
--- /dev/null
+++ b/contrib/python/wcwidth/py3/wcwidth/_wcswidth.py
@@ -0,0 +1,150 @@
+"""This is a python implementation of wcswidth()."""
+
+from __future__ import annotations
+
+from typing import Optional
+
+# local
+from ._wcwidth import wcwidth
+from .bisearch import bisearch
+from ._constants import (_EMOJI_ZWJ_SET,
+ _ISC_VIRAMA_SET,
+ _CATEGORY_MC_TABLE,
+ _FITZPATRICK_RANGE,
+ _REGIONAL_INDICATOR_SET)
+from .table_vs16 import VS16_NARROW_TO_WIDE
+from .table_grapheme import ISC_CONSONANT
+
+
+def wcswidth(
+ pwcs: str,
+ n: Optional[int] = None,
+ unicode_version: str = 'auto',
+ ambiguous_width: int = 1,
+) -> int:
+ """
+ Given a unicode string, return its printable length on a terminal.
+
+ See :ref:`Specification` for details of cell measurement.
+
+ This implementation differs from Markus Khun's original POSIX C implementation, in that this
+ ``wcswidth()`` processes graphemes strings yielded by :func:`wcwidth.iter_graphemes` defined by
+ `Unicode Standard Annex #29`_. POSIX wcswidth(3) is not grapheme-aware and does not measure many
+ kinds of Emojis or complex scripts correctly.
+
+ :param pwcs: Measure width of given unicode string.
+ :param n: When ``n`` is None (default), return the length of the entire
+ string, otherwise only the first ``n`` characters are measured.
+
+ :param unicode_version: Ignored. Retained for backwards compatibility.
+
+ .. deprecated:: 0.3.0
+ Only the latest Unicode version is now shipped.
+
+ :param ambiguous_width: Width to use for East Asian Ambiguous (A)
+ characters. Default is ``1`` (narrow). Set to ``2`` for CJK contexts.
+ :returns: The width, in cells, needed to display the first ``n`` characters
+ of the unicode string ``pwcs``. Returns ``-1`` for C0 and C1 control
+ characters!
+
+ .. _`Unicode Standard Annex #29`: https://www.unicode.org/reports/tr29/
+ """
+ # pylint: disable=unused-argument,too-many-locals,too-many-statements
+ # pylint: disable=too-complex,too-many-branches,duplicate-code
+ # This function intentionally keeps all logic inline for performance.
+
+ # Fast path: pure ASCII printable strings are always width == length
+ if n is None and pwcs.isascii() and pwcs.isprintable():
+ return len(pwcs)
+
+ # Select wcwidth call pattern for best lru_cache performance
+ _wcwidth = wcwidth if ambiguous_width == 1 else lambda c: wcwidth(c, 'auto', ambiguous_width)
+
+ end = len(pwcs) if n is None else n
+ total_width = 0
+ idx = 0
+
+ # grapheme-clustering state
+ last_measured_idx = -2
+ last_measured_ucs = -1
+ last_was_virama = False
+ conjunct_pending = False
+
+ while idx < end:
+ char = pwcs[idx]
+ ucs = ord(char)
+
+ # ZWJ (U+200D)
+ if ucs == 0x200D:
+ if last_was_virama:
+ idx += 1
+ elif idx + 1 < end:
+ last_was_virama = False
+ idx += 2
+ else:
+ last_was_virama = False
+ idx += 1
+ continue
+
+ # VS16 (U+FE0F): converts preceding narrow character to wide.
+ if ucs == 0xFE0F and last_measured_idx >= 0:
+ total_width += bisearch(
+ ord(pwcs[last_measured_idx]),
+ VS16_NARROW_TO_WIDE['9.0.0'],
+ )
+ last_measured_idx = -2 # prevent double application
+ idx += 1
+ continue
+
+ # Regional Indicator & Fitzpatrick (both above BMP)
+ if ucs > 0xFFFF:
+ if ucs in _REGIONAL_INDICATOR_SET:
+ ri_before = 0
+ j = idx - 1
+ while j >= 0 and ord(pwcs[j]) in _REGIONAL_INDICATOR_SET:
+ ri_before += 1
+ j -= 1
+ if ri_before % 2 == 1:
+ last_measured_ucs = ucs
+ idx += 1
+ continue
+ elif (_FITZPATRICK_RANGE[0] <= ucs <= _FITZPATRICK_RANGE[1]
+ and last_measured_ucs in _EMOJI_ZWJ_SET):
+ idx += 1
+ continue
+
+ # Virama conjunct formation
+ if last_was_virama and bisearch(ucs, ISC_CONSONANT):
+ last_measured_idx = idx
+ last_measured_ucs = ucs
+ last_was_virama = False
+ conjunct_pending = True
+ idx += 1
+ continue
+
+ # Normal character: measure with wcwidth
+ w = _wcwidth(char)
+ if w < 0:
+ # C0/C1 control character
+ return -1
+ if w > 0:
+ if conjunct_pending:
+ total_width += 1
+ conjunct_pending = False
+ total_width += w
+ last_measured_idx = idx
+ last_measured_ucs = ucs
+ last_was_virama = False
+ elif last_measured_idx >= 0 and bisearch(ucs, _CATEGORY_MC_TABLE):
+ # Spacing Combining Mark (Mc) following a base character adds 1
+ total_width += 1
+ last_measured_idx = -2
+ last_was_virama = False
+ conjunct_pending = False
+ else:
+ last_was_virama = ucs in _ISC_VIRAMA_SET
+ idx += 1
+
+ if conjunct_pending:
+ total_width += 1
+ return total_width
diff --git a/contrib/python/wcwidth/py3/wcwidth/_wcwidth.py b/contrib/python/wcwidth/py3/wcwidth/_wcwidth.py
new file mode 100644
index 00000000000..c055fb788b0
--- /dev/null
+++ b/contrib/python/wcwidth/py3/wcwidth/_wcwidth.py
@@ -0,0 +1,158 @@
+"""
+This is a python implementation of wcwidth() and wcswidth().
+
+https://github.com/jquast/wcwidth
+
+Derived from Markus Kuhn's C code,
+
+This is an implementation of wcwidth() and wcswidth() (defined in
+IEEE Std 1002.1-2001) for Unicode.
+
+http://www.opengroup.org/onlinepubs/007904975/functions/wcwidth.html
+http://www.opengroup.org/onlinepubs/007904975/functions/wcswidth.html
+
+In fixed-width output devices, Latin characters all occupy a single
+"cell" position of equal width, whereas ideographic CJK characters
+occupy two such cells. Interoperability between terminal-line
+applications and (teletype-style) character terminals using the
+UTF-8 encoding requires agreement on which character should advance
+the cursor by how many cell positions. No established formal
+standards exist at present on which Unicode character shall occupy
+how many cell positions on character terminals. These routines are
+a first attempt of defining such behavior based on simple rules
+applied to data provided by the Unicode Consortium.
+
+For some graphical characters, the Unicode standard explicitly
+defines a character-cell width via the definition of the East Asian
+FullWidth (F), Wide (W), Half-width (H), and Narrow (Na) classes.
+In all these cases, there is no ambiguity about which width a
+terminal shall use. For characters in the East Asian Ambiguous (A)
+class, the width choice depends purely on a preference of backward
+compatibility with either historic CJK or Western practice.
+Choosing single-width for these characters is easy to justify as
+the appropriate long-term solution, as the CJK practice of
+displaying these characters as double-width comes from historic
+implementation simplicity (8-bit encoded characters were displayed
+single-width and 16-bit ones double-width, even for Greek,
+Cyrillic, etc.) and not any typographic considerations.
+
+Much less clear is the choice of width for the Not East Asian
+(Neutral) class. Existing practice does not dictate a width for any
+of these characters. It would nevertheless make sense
+typographically to allocate two character cells to characters such
+as for instance EM SPACE or VOLUME INTEGRAL, which cannot be
+represented adequately with a single-width glyph. The following
+routines at present merely assign a single-cell width to all
+neutral characters, in the interest of simplicity. This is not
+entirely satisfactory and should be reconsidered before
+establishing a formal standard in this area. At the moment, the
+decision which Not East Asian (Neutral) characters should be
+represented by double-width glyphs cannot yet be answered by
+applying a simple rule from the Unicode database content. Setting
+up a proper standard for the behavior of UTF-8 character terminals
+will require a careful analysis not only of each Unicode character,
+but also of each presentation form, something the author of these
+routines has avoided to do so far.
+
+http://www.unicode.org/unicode/reports/tr11/
+
+Latest version: http://www.cl.cam.ac.uk/~mgk25/ucs/wcwidth.c
+"""
+
+from __future__ import annotations
+
+# std imports
+from functools import lru_cache
+
+# local
+from .bisearch import bisearch
+from ._constants import _LATEST_VERSION, _AMBIGUOUS_TABLE, _ZERO_WIDTH_TABLE, _WIDE_EASTASIAN_TABLE
+
+
+@lru_cache(maxsize=128)
+def _wcversion_value(ver_string: str) -> tuple[int, ...]: # pragma: no cover
+ """
+ Integer-mapped value of given dotted version string.
+
+ .. deprecated:: 0.3.0
+
+ This function is no longer used internally by wcwidth but is retained
+ for API compatibility with external tools.
+
+ :param ver_string: Unicode version string, of form ``n.n.n``.
+ :returns: tuple of digit tuples, ``tuple(int, [...])``.
+ """
+ retval = tuple(map(int, (ver_string.split('.'))))
+ return retval
+
+
+@lru_cache(maxsize=8)
+def _wcmatch_version(given_version: str) -> str: # pylint: disable=unused-argument
+ """
+ Return the supported Unicode version level.
+
+ .. deprecated:: 0.3.0
+ This function now always returns the latest version.
+
+ This function is no longer used internally by wcwidth but is retained
+ for API compatibility with external tools.
+
+ :param given_version: Ignored. Any value is accepted for compatibility.
+ :returns: The latest unicode version string.
+ """
+ return _LATEST_VERSION
+
+
+# maxsize=1024: western scripts need ~64 unique codepoints per session, but
+# CJK sessions may use ~2000 of ~3500 common hanzi/kanji. 1024 accommodates
+# heavy CJK use. Performance floor at 32; bisearch is ~100ns per miss.
+
+@lru_cache(maxsize=1024)
+def wcwidth(wc: str, unicode_version: str = 'auto', ambiguous_width: int = 1) -> int: # pylint: disable=unused-argument
+ r"""
+ Given one Unicode codepoint, return its printable length on a terminal.
+
+ :param wc: A single Unicode character.
+ :param unicode_version: Ignored. Retained for backwards compatibility.
+
+ .. deprecated:: 0.3.0
+ Only the latest Unicode version is now shipped.
+
+ :param ambiguous_width: Width to use for East Asian Ambiguous (A)
+ characters. Default is ``1`` (narrow). Set to ``2`` for CJK contexts
+ where ambiguous characters display as double-width. See
+ :ref:`ambiguous_width` for details.
+ :returns: The width, in cells, necessary to display the character of
+ Unicode string character, ``wc``. Returns 0 if the ``wc`` argument has
+ no printable effect on a terminal (such as NUL '\0'), -1 if ``wc`` is
+ not printable, or has an indeterminate effect on the terminal, such as
+ a control character. Otherwise, the number of column positions the
+ character occupies on a graphic terminal (1 or 2) is returned.
+
+ See :ref:`Specification` for details of cell measurement.
+ """
+ ucs = ord(wc) if wc else 0
+
+ # small optimization: early return of 1 for printable ASCII, this provides
+ # approximately 40% performance improvement for mostly-ascii documents, with
+ # less than 1% impact to others.
+ if 32 <= ucs < 0x7f:
+ return 1
+
+ # C0/C1 control characters are -1 for compatibility with POSIX-like calls
+ if ucs and ucs < 32 or 0x07F <= ucs < 0x0A0:
+ return -1
+
+ # Zero width
+ if bisearch(ucs, _ZERO_WIDTH_TABLE):
+ return 0
+
+ # Wide (F/W categories)
+ if bisearch(ucs, _WIDE_EASTASIAN_TABLE):
+ return 2
+
+ # Ambiguous width (A category) - only when ambiguous_width=2
+ if ambiguous_width == 2 and bisearch(ucs, _AMBIGUOUS_TABLE):
+ return 2
+
+ return 1
diff --git a/contrib/python/wcwidth/py3/wcwidth/_width.py b/contrib/python/wcwidth/py3/wcwidth/_width.py
new file mode 100644
index 00000000000..b2a44c8373d
--- /dev/null
+++ b/contrib/python/wcwidth/py3/wcwidth/_width.py
@@ -0,0 +1,339 @@
+"""This is a high-level width() supporting terminal output."""
+
+from typing import Literal
+
+# local
+from ._wcwidth import wcwidth
+from .bisearch import bisearch
+from ._wcswidth import wcswidth
+from ._constants import (_EMOJI_ZWJ_SET,
+ _ISC_VIRAMA_SET,
+ _CATEGORY_MC_TABLE,
+ _FITZPATRICK_RANGE,
+ _REGIONAL_INDICATOR_SET)
+from .table_vs16 import VS16_NARROW_TO_WIDE
+from .text_sizing import TextSizing, TextSizingParams
+from .control_codes import ILLEGAL_CTRL, VERTICAL_CTRL, HORIZONTAL_CTRL, ZERO_WIDTH_CTRL
+from .table_grapheme import ISC_CONSONANT
+from .escape_sequences import (_SEQUENCE_CLASSIFY,
+ TEXT_SIZING_PATTERN,
+ CURSOR_MOVEMENT_SEQUENCE,
+ INDETERMINATE_EFFECT_SEQUENCE,
+ strip_sequences)
+
+# In 'parse' mode, strings longer than this are checked for cursor-movement
+# controls (BS, TAB, CR, cursor sequences); when absent, mode downgrades to
+# 'ignore' to skip character-by-character parsing. The detection scan cost is
+# negligible for long strings but wasted on short ones like labels or headings.
+_WIDTH_FAST_PATH_MIN_LEN = 20
+
+# Translation table to strip C0/C1 control characters for fast 'ignore' mode.
+_CONTROL_CHAR_TABLE = str.maketrans('', '', (
+ ''.join(chr(c) for c in range(0x00, 0x20)) + # C0: NUL through US (including tab)
+ '\x7f' + # DEL
+ ''.join(chr(c) for c in range(0x80, 0xa0)) # C1: U+0080-U+009F
+))
+
+
+def _width_ignored_codes(text: str, ambiguous_width: int = 1) -> int:
+ """
+ Fast path for width() with control_codes='ignore'.
+
+ Strips escape sequences and control characters, then measures remaining text.
+ """
+ return wcswidth(
+ strip_sequences(text).translate(_CONTROL_CHAR_TABLE),
+ ambiguous_width=ambiguous_width
+ )
+
+
+def width(
+ text: str,
+ *,
+ control_codes: Literal['parse', 'strict', 'ignore'] = 'parse',
+ tabsize: int = 8,
+ ambiguous_width: int = 1,
+) -> int:
+ r"""
+ Return printable width of text containing many kinds of control codes and sequences.
+
+ Unlike :func:`wcswidth`, this function handles most control characters and many popular terminal
+ output sequences. Never returns -1.
+
+ :param text: String to measure.
+ :param control_codes: How to handle control characters and sequences:
+
+ - ``'parse'`` (default): Track horizontal cursor movement like BS ``\b``, CR ``\r``, TAB
+ ``\t``, cursor left and right movement sequences. Vertical movement (LF, VT, FF) and
+ indeterminate terminal sequences are zero-width. OSC 66 Kitty Text Sizing protocol, OSC 8
+ Hyperlink, and many other kinds of output sequences are parsed for displayed measurements.
+ - ``'strict'``: Like parse, but raises :exc:`ValueError` on control characters with
+ indeterminate results of the screen or cursor, like clear or vertical movement. Generally,
+ these should be handled with a virtual terminal emulator (like 'pyte').
+ - ``'ignore'``: All C0 and C1 control characters and escape sequences are measured as
+ width 0. This is the fastest measurement for text already filtered or known not to contain
+ any kinds of control codes or sequences. TAB ``\t`` is zero-width; to ensure
+ tab expansion, pre-process text using :func:`str.expandtabs`.
+
+ :param tabsize: Tab stop width for ``'parse'`` and ``'strict'`` modes. Default is 8.
+ Must be positive. Has no effect when ``control_codes='ignore'``.
+ :param ambiguous_width: Width to use for East Asian Ambiguous (A)
+ characters. Default is ``1`` (narrow). Set to ``2`` for CJK contexts.
+ :returns: Maximum cursor position reached, "extent", accounting for cursor movement sequences
+ present in ``text`` according to given parameters. This represents the rightmost column the
+ cursor reaches. Always a non-negative integer.
+
+ :raises ValueError: If ``control_codes='strict'`` and control characters with indeterminate
+ effects, such as vertical movement or clear sequences are encountered, or on unexpected
+ C0 or C1 control code. Also raised when ``control_codes`` is not one of the valid values.
+
+ .. versionadded:: 0.3.0
+
+ .. versionchanged:: 0.7.0
+ Expanded strict-mode to raise :exc:`ValueError` when cursor-left movement
+ (CSI D) would move beyond the beginning of the string. Previously, cursor-left
+ was silently clamped to column 0 in all modes.
+
+ Support horizontal cursor sequences (``cub``, ``cuf``, ``hpa``). Cursor-left (``cub``) or
+ backspace (``\b``) now overwrites text. ``column_address`` (``hpa``) and carriage return
+ (``\r``) are now parsed, and some values conditionally raise ``ValueError`` when
+ ``control_codes='parse'``.
+
+ Examples::
+
+ >>> width('hello')
+ 5
+ >>> width('コンニチハ')
+ 10
+ >>> width('\x1b[31mred\x1b[0m')
+ 3
+ >>> width('\x1b[31mred\x1b[0m', control_codes='ignore') # same result (ignored)
+ 3
+ >>> width('123\b4') # backspace overwrites previous cell (outputs '124')
+ 3
+ >>> width('abc\t') # tab caused cursor to move to column 8
+ 8
+ >>> width('1\x1b[10C') # '1' + cursor right 10, cursor ends on column 11
+ 11
+ >>> width('1\x1b[10C', control_codes='ignore') # faster but wrong in this case
+ 1
+ """
+ # pylint: disable=too-complex,too-many-branches,too-many-statements,too-many-locals
+ # This could be broken into sub-functions (#1, #3, and #6 especially), but for reduced overhead
+ # in consideration of this function a likely "hot path", they are inline, breaking many pylint
+ # complexity rules.
+
+ # Fast path for ASCII printable (no tabs, escapes, or control chars)
+ if text.isascii() and text.isprintable():
+ return len(text)
+
+ # Fast parse: if no horizontal cursor movements are possible, switch to 'ignore' mode.
+ # Only check longer strings - the detection overhead hurts short string performance.
+ if control_codes == 'parse' and len(text) > _WIDTH_FAST_PATH_MIN_LEN:
+ # Check for cursor-affecting control characters
+ if '\b' not in text and '\t' not in text and '\r' not in text:
+ # Check for escape sequences, if none contain cursor movement or
+ # text sizing, downgrade to 'ignore'
+ if '\x1b' not in text or (
+ not CURSOR_MOVEMENT_SEQUENCE.search(text)
+ and not TEXT_SIZING_PATTERN.search(text)
+ ):
+ control_codes = 'ignore'
+
+ # Fast path for ignore mode, useful if you know the text is already free of control codes
+ if control_codes == 'ignore':
+ return _width_ignored_codes(text, ambiguous_width)
+
+ strict = control_codes == 'strict'
+ # Track absolute positions: tab stops need modulo on absolute column, CR resets to 0.
+ # Initialize max_extent to 0 so backward movement (CR, BS) won't yield negative width.
+ current_col = 0
+ max_extent = 0
+ idx = 0
+ text_len = len(text)
+
+ # Select wcwidth call pattern for best lru_cache performance:
+ # - ambiguous_width=1 (default): single-arg calls share cache with direct wcwidth() calls
+ # - ambiguous_width=2: full positional args needed (results differ, separate cache is correct)
+ _wcwidth = wcwidth if ambiguous_width == 1 else lambda c: wcwidth(c, 'auto', ambiguous_width)
+
+ # grapheme-clustering state
+ last_measured_idx = -2
+ last_measured_ucs = -1
+ last_was_virama = False
+ conjunct_pending = False
+
+ while idx < text_len:
+ char = text[idx]
+
+ # 1. ESC sequences
+ if char == '\x1b':
+ m = _SEQUENCE_CLASSIFY.match(text, idx)
+ if not m:
+ # 1a. Errant ESC or unknown sequence: only the first character is zero-width
+ idx += 1
+ else:
+ seq = m.group()
+ if strict and INDETERMINATE_EFFECT_SEQUENCE.match(seq):
+ raise ValueError(f"Indeterminate cursor sequence at position {idx}, {seq!r}")
+
+ # 2b. horizontal position absolute (before forward/backward to
+ # avoid other_seq match in _SEQUENCE_CLASSIFY)
+ if (hpa_n := m.group('hpa_n')) is not None:
+ target_col = int(hpa_n) if hpa_n else 1
+ if strict:
+ raise ValueError(
+ f"Indeterminate horizontal position at position {idx}, "
+ f"{seq!r} (absolute column unknown)"
+ )
+ current_col = target_col - 1 # HPA is 1-indexed, convert to 0-indexed
+ # 2c. cursor forward, backward
+ elif (cforward_n := m.group('cforward_n')) is not None:
+ current_col += int(cforward_n) if cforward_n else 1
+ elif (cbackward_n := m.group('cbackward_n')) is not None:
+ n_backward = int(cbackward_n) if cbackward_n else 1
+ if strict and n_backward > current_col:
+ raise ValueError(
+ f"Cursor left movement at position {idx} would move "
+ f"{n_backward} cells left from column {current_col}, "
+ f"exceeding string start"
+ )
+ current_col = max(0, current_col - n_backward)
+ # 2d. OSC 66 Text Sizing — has positive display width
+ elif (ts_meta := m.group('ts_meta')) is not None:
+ ts_text = m.group('ts_text')
+ ts_term = m.group('ts_term')
+ assert ts_text is not None and ts_term is not None
+ text_size = TextSizing(
+ TextSizingParams.from_params(ts_meta, control_codes=control_codes),
+ ts_text, ts_term)
+ current_col += text_size.display_width(ambiguous_width)
+ # 2e. SGR and other zero-width sequences -- no column advance
+ idx = m.end()
+ # Escape sequences break VS16 adjacency: reset last-measured state
+ last_measured_idx = -2
+ last_measured_ucs = -1
+ max_extent = max(max_extent, current_col)
+ continue
+
+ # 2. Vertical or Illegal control characters zero width or error when 'strict'
+ if char in ILLEGAL_CTRL:
+ if strict:
+ raise ValueError(f"Illegal control character {ord(char):#x} at position {idx}")
+ idx += 1
+ last_measured_idx = -2
+ last_measured_ucs = -1
+ continue
+
+ if char in VERTICAL_CTRL:
+ if strict:
+ raise ValueError(f"Vertical movement character {ord(char):#x} at position {idx}")
+ idx += 1
+ last_measured_idx = -2
+ last_measured_ucs = -1
+ continue
+
+ # 3. Horizontal movement characters
+ if char in HORIZONTAL_CTRL:
+ if char == '\t' and tabsize > 0:
+ current_col += tabsize - (current_col % tabsize)
+ elif char == '\b':
+ if current_col > 0:
+ current_col -= 1
+ elif char == '\r':
+ if strict:
+ raise ValueError(
+ f"Horizontal movement character \\r at position {idx}: "
+ "indeterminate starting column"
+ )
+ current_col = 0
+ max_extent = max(max_extent, current_col)
+ idx += 1
+ last_measured_idx = -2
+ last_measured_ucs = -1
+ continue
+
+ # 4. Zero-width control characters
+ if char in ZERO_WIDTH_CTRL:
+ idx += 1
+ last_measured_idx = -2
+ last_measured_ucs = -1
+ continue
+
+ # 5. Inline grapheme-clustering: ZWJ, VS16, Regional Indicators,
+ # Fitzpatrick, Virama conjuncts, Mc, wcwidth
+ ucs = ord(char)
+
+ # ZWJ (U+200D)
+ if ucs == 0x200D:
+ if last_was_virama:
+ idx += 1
+ elif idx + 1 < text_len:
+ last_was_virama = False
+ idx += 2
+ else:
+ last_was_virama = False
+ idx += 1
+ continue
+
+ # VS16 (U+FE0F): converts preceding narrow character to wide.
+ if ucs == 0xFE0F and last_measured_idx >= 0:
+ if bisearch(ord(text[last_measured_idx]), VS16_NARROW_TO_WIDE['9.0.0']):
+ current_col += 1
+ max_extent = max(max_extent, current_col)
+ last_measured_idx = -2 # prevent double application
+ idx += 1
+ continue
+
+ # Regional Indicator & Fitzpatrick (both above BMP)
+ if ucs > 0xFFFF:
+ if ucs in _REGIONAL_INDICATOR_SET:
+ ri_before = 0
+ j = idx - 1
+ while j >= 0 and ord(text[j]) in _REGIONAL_INDICATOR_SET:
+ ri_before += 1
+ j -= 1
+ if ri_before % 2 == 1:
+ last_measured_ucs = ucs
+ idx += 1
+ continue
+ elif (_FITZPATRICK_RANGE[0] <= ucs <= _FITZPATRICK_RANGE[1]
+ and last_measured_ucs in _EMOJI_ZWJ_SET):
+ idx += 1
+ continue
+
+ # Virama conjunct formation
+ if last_was_virama and bisearch(ucs, ISC_CONSONANT):
+ last_measured_idx = idx
+ last_measured_ucs = ucs
+ last_was_virama = False
+ conjunct_pending = True
+ idx += 1
+ continue
+
+ # Normal character: measure with wcwidth
+ w = _wcwidth(char)
+ if w > 0:
+ if conjunct_pending:
+ current_col += 1
+ conjunct_pending = False
+ current_col += w
+ max_extent = max(max_extent, current_col)
+ last_measured_idx = idx
+ last_measured_ucs = ucs
+ last_was_virama = False
+ elif last_measured_idx >= 0 and bisearch(ucs, _CATEGORY_MC_TABLE):
+ # Spacing Combining Mark (Mc) following a base character adds 1
+ current_col += 1
+ max_extent = max(max_extent, current_col)
+ last_measured_idx = -2
+ last_was_virama = False
+ conjunct_pending = False
+ else:
+ last_was_virama = ucs in _ISC_VIRAMA_SET
+ idx += 1
+
+ if conjunct_pending:
+ current_col += 1
+ max_extent = max(max_extent, current_col)
+ return max_extent
diff --git a/contrib/python/wcwidth/py3/wcwidth/align.py b/contrib/python/wcwidth/py3/wcwidth/align.py
new file mode 100644
index 00000000000..328454bbe7c
--- /dev/null
+++ b/contrib/python/wcwidth/py3/wcwidth/align.py
@@ -0,0 +1,136 @@
+"""Python grapheme, emoji, and sequence-aware ljust, rjust, center()."""
+from typing import Literal
+
+# local
+from ._width import width
+
+
+def ljust(
+ text: str,
+ dest_width: int,
+ fillchar: str = ' ',
+ *,
+ control_codes: Literal['parse', 'strict', 'ignore'] = 'parse',
+ ambiguous_width: int = 1,
+) -> str:
+ r"""
+ Return text left-justified in a string of given display width.
+
+ :param text: String to justify, may contain terminal sequences.
+ :param dest_width: Total display width of result in terminal cells.
+ :param fillchar: Single character for padding (default space). Must have
+ display width of 1 (not wide, not zero-width, not combining). Unicode
+ characters like ``'·'`` are acceptable. The width is not validated.
+ :param control_codes: How to handle control sequences when measuring.
+ Passed to :func:`width` for measurement.
+ :param ambiguous_width: Width to use for East Asian Ambiguous (A)
+ characters. Default is ``1`` (narrow). Set to ``2`` for CJK contexts.
+ :returns: Text padded on the right to reach ``dest_width``.
+
+ .. versionadded:: 0.3.0
+
+ Example::
+
+ >>> wcwidth.ljust('hi', 5)
+ 'hi '
+ >>> wcwidth.ljust('\x1b[31mhi\x1b[0m', 5)
+ '\x1b[31mhi\x1b[0m '
+ >>> wcwidth.ljust('\U0001F468\u200D\U0001F469\u200D\U0001F467', 6)
+ '👨‍👩‍👧 '
+ """
+ if text.isascii() and text.isprintable():
+ text_width = len(text)
+ else:
+ text_width = width(text, control_codes=control_codes, ambiguous_width=ambiguous_width)
+ padding_cells = max(0, dest_width - text_width)
+ return text + fillchar * padding_cells
+
+
+def rjust(
+ text: str,
+ dest_width: int,
+ fillchar: str = ' ',
+ *,
+ control_codes: Literal['parse', 'strict', 'ignore'] = 'parse',
+ ambiguous_width: int = 1,
+) -> str:
+ r"""
+ Return text right-justified in a string of given display width.
+
+ :param text: String to justify, may contain terminal sequences.
+ :param dest_width: Total display width of result in terminal cells.
+ :param fillchar: Single character for padding (default space). Must have
+ display width of 1 (not wide, not zero-width, not combining). Unicode
+ characters like ``'·'`` are acceptable. The width is not validated.
+ :param control_codes: How to handle control sequences when measuring.
+ Passed to :func:`width` for measurement.
+ :param ambiguous_width: Width to use for East Asian Ambiguous (A)
+ characters. Default is ``1`` (narrow). Set to ``2`` for CJK contexts.
+ :returns: Text padded on the left to reach ``dest_width``.
+
+ .. versionadded:: 0.3.0
+
+ Example::
+
+ >>> wcwidth.rjust('hi', 5)
+ ' hi'
+ >>> wcwidth.rjust('\x1b[31mhi\x1b[0m', 5)
+ ' \x1b[31mhi\x1b[0m'
+ >>> wcwidth.rjust('\U0001F468\u200D\U0001F469\u200D\U0001F467', 6)
+ ' 👨‍👩‍👧'
+ """
+ if text.isascii() and text.isprintable():
+ text_width = len(text)
+ else:
+ text_width = width(text, control_codes=control_codes, ambiguous_width=ambiguous_width)
+ padding_cells = max(0, dest_width - text_width)
+ return fillchar * padding_cells + text
+
+
+def center(
+ text: str,
+ dest_width: int,
+ fillchar: str = ' ',
+ *,
+ control_codes: Literal['parse', 'strict', 'ignore'] = 'parse',
+ ambiguous_width: int = 1,
+) -> str:
+ r"""
+ Return text centered in a string of given display width.
+
+ :param text: String to center, may contain terminal sequences.
+ :param dest_width: Total display width of result in terminal cells.
+ :param fillchar: Single character for padding (default space). Must have
+ display width of 1 (not wide, not zero-width, not combining). Unicode
+ characters like ``'·'`` are acceptable. The width is not validated.
+ :param control_codes: How to handle control sequences when measuring.
+ Passed to :func:`width` for measurement.
+ :param ambiguous_width: Width to use for East Asian Ambiguous (A)
+ characters. Default is ``1`` (narrow). Set to ``2`` for CJK contexts.
+ :returns: Text padded on both sides to reach ``dest_width``.
+
+ For odd-width padding, the extra cell fills in the same cell position as
+ Python's :meth:`str.center` behavior (the left side when ``dest_width`` is
+ odd, the right side when ``dest_width`` is even).
+ See `the eccentric str.center <https://jazcap53.github.io/pythons-eccentric-strcenter.html>`_.
+
+ .. versionadded:: 0.3.0
+
+ Example::
+
+ >>> wcwidth.center('hi', 6)
+ ' hi '
+ >>> wcwidth.center('\x1b[31mhi\x1b[0m', 6)
+ ' \x1b[31mhi\x1b[0m '
+ >>> wcwidth.center('\U0001F468\u200D\U0001F469\u200D\U0001F467', 6)
+ ' 👨‍👩‍👧 '
+ """
+ if text.isascii() and text.isprintable():
+ text_width = len(text)
+ else:
+ text_width = width(text, control_codes=control_codes, ambiguous_width=ambiguous_width)
+ total_padding = max(0, dest_width - text_width)
+ # matching https://jazcap53.github.io/pythons-eccentric-strcenter.html
+ left_pad = total_padding // 2 + (total_padding & dest_width & 1)
+ right_pad = total_padding - left_pad
+ return fillchar * left_pad + text + fillchar * right_pad
diff --git a/contrib/python/wcwidth/py3/wcwidth/bisearch.py b/contrib/python/wcwidth/py3/wcwidth/bisearch.py
index becfe86a9d1..e95c51b8dc9 100644
--- a/contrib/python/wcwidth/py3/wcwidth/bisearch.py
+++ b/contrib/python/wcwidth/py3/wcwidth/bisearch.py
@@ -1,4 +1,5 @@
"""Binary search function for Unicode interval tables."""
+
from __future__ import annotations
@@ -7,8 +8,8 @@ def bisearch(ucs: int, table: tuple[tuple[int, int], ...]) -> int:
Binary search in interval table.
:param ucs: Ordinal value of unicode character.
- :param table: Tuple of starting and ending ranges of ordinal values,
- in form of ``((start, end), ...)``.
+ :param table: Tuple of starting and ending ranges of ordinal values, in form of ``((start, end),
+ ...)``.
:returns: 1 if ordinal value ucs is found within lookup table, else 0.
"""
lbound = 0
diff --git a/contrib/python/wcwidth/py3/wcwidth/escape_sequences.py b/contrib/python/wcwidth/py3/wcwidth/escape_sequences.py
index d4ac6cc36db..77d2b9b3cb1 100644
--- a/contrib/python/wcwidth/py3/wcwidth/escape_sequences.py
+++ b/contrib/python/wcwidth/py3/wcwidth/escape_sequences.py
@@ -5,15 +5,27 @@ This module provides regex patterns for matching terminal escape sequences. All
sequences that begin with ESC (``\x1b``). Before calling re.match with these patterns, callers
should first check that the character at the current position is ESC for optimal performance.
"""
+
# std imports
import re
+import typing
+
+# local
+from .sgr_state import _SGR_PATTERN
+
+# Text Sizing Protocol (OSC 66), https://sw.kovidgoyal.net/kitty/text-sizing-protocol/
+TEXT_SIZING_PATTERN = re.compile(
+ r'\x1b\]66;([^;\x07\x1b]*);([^\x07\x1b]*)(\x07|\x1b\\)'
+)
+
# Zero-width escape sequences (SGR, OSC, CSI, etc.). This table, like INDETERMINATE_EFFECT_SEQUENCE,
# originated from the 'blessed' library.
ZERO_WIDTH_PATTERN = re.compile(
# CSI sequences
r'\x1b\[[\x30-\x3f]*[\x20-\x2f]*[\x40-\x7e]|'
- # OSC sequences
+ # OSC sequences, note that text sizing protocol (OSC 66) is special case in width() and clip(),
+ # and contrary to the variable name, it is positive width.
r'\x1b\][^\x07\x1b]*(?:\x07|\x1b\\)|'
# APC sequences
r'\x1b_[^\x1b\x07]*(?:\x07|\x1b\\)|'
@@ -21,12 +33,16 @@ ZERO_WIDTH_PATTERN = re.compile(
r'\x1bP[^\x1b\x07]*(?:\x07|\x1b\\)|'
# PM sequences
r'\x1b\^[^\x1b\x07]*(?:\x07|\x1b\\)|'
- # Character set designation
+ # Character set designation (subset of nF, handled separately for clarity)
r'\x1b[()].|'
- # Fe sequences
+ # nF sequences: ESC + one or more intermediate bytes (0x20-0x2F) + final byte (0x30-0x7E)
+ r'\x1b[\x20-\x2f]+[\x30-\x7e]|'
+ # Fe sequences (C1 controls)
r'\x1b[\x40-\x5f]|'
- # Fp sequences
- r'\x1b[78=>g]'
+ # Fp sequences (private use)
+ r'\x1b[\x30-\x3f]|'
+ # Fs sequences (independent functions)
+ r'\x1b[\x60-\x7e]'
)
# Cursor right movement: CSI [n] C, parameter may be parsed by width()
@@ -35,6 +51,31 @@ CURSOR_RIGHT_SEQUENCE = re.compile(r'\x1b\[(\d*)C')
# Cursor left movement: CSI [n] D, parameter may be parsed by width()
CURSOR_LEFT_SEQUENCE = re.compile(r'\x1b\[(\d*)D')
+# Horizontal position absolute: CSI [n] G, parameter may be parsed by width()
+CURSOR_HPA_SEQUENCE = re.compile(r'\x1b\[(\d*)G')
+
+# Combined cursor movement: single regex for fast-path detection of any
+# horizontal cursor movement (left, right, hpa). Avoids two separate search()
+# calls in hot-path width() and clip() pre-checks.
+CURSOR_MOVEMENT_SEQUENCE = re.compile(r'\x1b\[(\d*)[CDG]')
+
+# Combined horizontal cursor movement: matches BS, CR, and CSI C/D/G cursor sequences
+# in a single regex pass. Used by clip() to decide between the simple append path
+# and the painter's algorithm.
+_HORIZONTAL_CURSOR_MOVEMENT = re.compile(r'[\x08\r]|\x1b\[(\d*)[CDG]')
+
+# Combined pattern: a single regex that matches any zero-width escape sequence
+# and classifies it via named groups, aprox 2x faster than redundant re.matches
+# in clip() and width().
+_SEQUENCE_CLASSIFY = re.compile(
+ _SGR_PATTERN.pattern.replace('(', '(?P<sgr_params>', 1)
+ + '|' + CURSOR_HPA_SEQUENCE.pattern.replace('(', '(?P<hpa_n>', 1)
+ + '|' + CURSOR_RIGHT_SEQUENCE.pattern.replace('(', '(?P<cforward_n>', 1)
+ + '|' + CURSOR_LEFT_SEQUENCE.pattern.replace('(', '(?P<cbackward_n>', 1)
+ + '|' + r'\x1b\]66;(?P<ts_meta>[^;\x07\x1b]*);(?P<ts_text>[^\x07\x1b]*)(?P<ts_term>\x07|\x1b\\)'
+ + '|' + r'(?P<other_seq>(?:' + ZERO_WIDTH_PATTERN.pattern + '))'
+)
+
# Indeterminate effect sequences - raise ValueError in 'strict' mode. The effects of these sequences
# are likely to be undesirable, moving the cursor vertically or to any unknown position, and
# otherwise not managed by the 'width' method of this library.
@@ -47,7 +88,6 @@ INDETERMINATE_EFFECT_SEQUENCE = re.compile(
r'\x1b\[\d+;\d+r', # change_scroll_region
r'\x1b\[\d*K', # erase_in_line (clr_eol, clr_bol)
r'\x1b\[\d*J', # erase_in_display (clr_eos, erase_display)
- r'\x1b\[\d*G', # column_address
r'\x1b\[\d+;\d+H', # cursor_address
r'\x1b\[\d*H', # cursor_home
r'\x1b\[\d*A', # cursor_up
@@ -65,5 +105,90 @@ INDETERMINATE_EFFECT_SEQUENCE = re.compile(
r'\x1b8', # restore_cursor
r'\x1bD', # scroll_forward (index)
r'\x1bM', # scroll_reverse (reverse index)
+ r'\x1bc', # full_reset (RIS)
))
)
+
+
+def iter_sequences(text: str) -> typing.Iterator[typing.Tuple[str, bool]]:
+ r"""
+ Iterate through text, yielding segments with sequence identification.
+
+ This generator yields tuples of ``(segment, is_sequence)`` for each part
+ of the input text, where ``is_sequence`` is ``True`` if the segment is
+ a recognized terminal escape sequence.
+
+ :param text: String to iterate through.
+ :returns: Iterator of (segment, is_sequence) tuples.
+
+ .. versionadded:: 0.3.0
+
+ Example::
+
+ >>> list(iter_sequences('hello'))
+ [('hello', False)]
+ >>> list(iter_sequences('\x1b[31mred'))
+ [('\x1b[31m', True), ('red', False)]
+ >>> list(iter_sequences('\x1b[1m\x1b[31m'))
+ [('\x1b[1m', True), ('\x1b[31m', True)]
+ """
+ idx = 0
+ text_len = len(text)
+ segment_start = 0
+
+ while idx < text_len:
+ char = text[idx]
+
+ if char == '\x1b':
+ # Yield any accumulated non-sequence text
+ if idx > segment_start:
+ yield (text[segment_start:idx], False)
+
+ # Try to match an escape sequence
+ match = ZERO_WIDTH_PATTERN.match(text, idx)
+ if match:
+ yield (match.group(), True)
+ idx = match.end()
+ else:
+ # Lone ESC or unrecognized - yield as sequence anyway
+ yield (char, True)
+ idx += 1
+ segment_start = idx
+ else:
+ idx += 1
+
+ # Yield any remaining text
+ if segment_start < text_len:
+ yield (text[segment_start:], False)
+
+
+def strip_sequences(text: str) -> str:
+ r"""
+ Return text with all terminal escape sequences removed.
+
+ Unknown or incomplete ESC sequences are preserved.
+
+ :param text: String that may contain terminal escape sequences.
+ :returns: The input text with all escape sequences stripped.
+
+ .. versionadded:: 0.3.0
+
+ .. versionchanged:: 0.7.0
+ Inner text of OSC 66 (Text sizing protocol) is preserved.
+
+ Example::
+
+ >>> strip_sequences('\x1b[31mred\x1b[0m')
+ 'red'
+ >>> strip_sequences('hello')
+ 'hello'
+ >>> strip_sequences('\x1b[1m\x1b[31mbold red\x1b[0m text')
+ 'bold red text'
+ >>> strip_sequences('\x1b]66;s=2;hello\x07')
+ 'hello'
+ >>> strip_sequences('\x1b]8;id=34;https://example.com\x1b\\[view]\x1b]8;;\x1b\\')
+ '[view]'
+ """
+ if '\x1b]66;' in text:
+ text = TEXT_SIZING_PATTERN.sub(r'\2', text)
+ return ZERO_WIDTH_PATTERN.sub('', text)
diff --git a/contrib/python/wcwidth/py3/wcwidth/grapheme.py b/contrib/python/wcwidth/py3/wcwidth/grapheme.py
index 7befc92052e..87f61f5f7f1 100644
--- a/contrib/python/wcwidth/py3/wcwidth/grapheme.py
+++ b/contrib/python/wcwidth/py3/wcwidth/grapheme.py
@@ -13,7 +13,7 @@ from __future__ import annotations
from enum import IntEnum
from functools import lru_cache
-from typing import TYPE_CHECKING, NamedTuple
+from typing import TYPE_CHECKING, Optional, NamedTuple
# local
from .bisearch import bisearch as _bisearch
@@ -130,7 +130,7 @@ class BreakResult(NamedTuple):
@lru_cache(maxsize=1024)
-def _simple_break_check(prev_gcb: GCB, curr_gcb: GCB) -> BreakResult | None:
+def _simple_break_check(prev_gcb: GCB, curr_gcb: GCB) -> Optional[BreakResult]:
"""
Check simple GCB-pair-based break rules (cacheable).
@@ -248,7 +248,7 @@ def _should_break(
def iter_graphemes(
unistr: str,
start: int = 0,
- end: int | None = None,
+ end: Optional[int] = None,
) -> Iterator[str]:
r"""
Iterate over grapheme clusters in a Unicode string.
@@ -266,9 +266,9 @@ def iter_graphemes(
>>> list(iter_graphemes('cafe\u0301'))
['c', 'a', 'f', 'e\u0301']
- >>> list(iter_graphemes('\U0001F468\u200D\U0001F469\u200D\U0001F467'))
+ >>> list(iter_graphemes('ok\U0001F468\u200D\U0001F469\u200D\U0001F467'))
['o', 'k', '\U0001F468\u200D\U0001F469\u200D\U0001F467']
- >>> list(iter_graphemes('\U0001F1FA\U0001F1F8'))
+ >>> list(iter_graphemes('ok\U0001F1FA\U0001F1F8'))
['o', 'k', '\U0001F1FA\U0001F1F8']
.. versionadded:: 0.3.0
@@ -390,7 +390,7 @@ def grapheme_boundary_before(unistr: str, pos: int) -> int:
def iter_graphemes_reverse(
unistr: str,
start: int = 0,
- end: int | None = None,
+ end: Optional[int] = None,
) -> Iterator[str]:
r"""
Iterate over grapheme clusters in reverse order (last to first).
diff --git a/contrib/python/wcwidth/py3/wcwidth/hyperlink.py b/contrib/python/wcwidth/py3/wcwidth/hyperlink.py
new file mode 100644
index 00000000000..da7a3aa08f4
--- /dev/null
+++ b/contrib/python/wcwidth/py3/wcwidth/hyperlink.py
@@ -0,0 +1,142 @@
+"""
+OSC 8 hyperlink parsing and measurement.
+
+.. versionadded:: 0.7.0
+"""
+
+from __future__ import annotations
+
+# std imports
+import re
+
+import typing
+
+# local
+from ._width import width as _width
+from .escape_sequences import _SEQUENCE_CLASSIFY
+
+HYPERLINK_OPEN_RE = re.compile(r'\x1b]8;([^;]*);([^\x07\x1b]*)(\x07|\x1b\\)')
+HYPERLINK_CLOSE_RE = re.compile(r'\x1b]8;;(\x07|\x1b\\)')
+
+
+class HyperlinkParams(typing.NamedTuple):
+ r"""
+ Parsed parameters from an OSC 8 hyperlink open sequence.
+
+ :param url: The hyperlink URL.
+ :param params: Colon-separated metadata string (often empty).
+ :param terminator: Sequence terminator (``\x07`` or ``\x1b\\``).
+ """
+
+ url: str
+ params: str = ''
+ terminator: str = '\x07'
+
+ @classmethod
+ def parse(cls, seq: str) -> HyperlinkParams | None:
+ r"""
+ Parse an OSC 8 open sequence string.
+
+ Returns ``None`` if *seq* is not a valid OSC 8 open.
+
+ Example::
+
+ >>> HyperlinkParams.parse('\x1b]8;;http://example.com\x07')
+ HyperlinkParams(url='http://example.com', params='', terminator='\\x07')
+ """
+ m = HYPERLINK_OPEN_RE.match(seq)
+ if m is None:
+ return None
+ return cls(url=m.group(2), params=m.group(1), terminator=m.group(3))
+
+ def make_open(self) -> str:
+ """Generate the OSC 8 open escape sequence."""
+ return f'\x1b]8;{self.params};{self.url}{self.terminator}'
+
+ def make_close(self) -> str:
+ """Generate the OSC 8 close escape sequence."""
+ return f'\x1b]8;;{self.terminator}'
+
+
+class Hyperlink(typing.NamedTuple):
+ """
+ A complete OSC 8 hyperlink with target and inner text.
+
+ :param params: Parsed open sequence parameters.
+ :param text: Inner text between the open and close sequences.
+ """
+
+ params: HyperlinkParams
+ text: str
+
+ @classmethod
+ def find_close(cls, text: str, open_end: int) -> tuple[int, int]:
+ """
+ Find the matching OSC 8 close sequence.
+
+ Searches 'text' starting at 'open_end', the position just past the open
+ sequence. Returns position of close sequence ``(close_start,
+ close_end)`` or ``(-1, -1)`` if not found.
+
+ Per the OSC 8 specification, terminal emulators treat hyperlinks as a
+ state attribute, not as nested HTML anchors. A close sequence closes
+ the current hyperlink regardless of how many open sequences preceded it.
+ """
+ m = HYPERLINK_CLOSE_RE.search(text, open_end)
+ if m is None:
+ return (-1, -1)
+ return (m.start(), m.end())
+
+ def display_width(
+ self,
+ *,
+ control_codes: typing.Literal['parse', 'strict', 'ignore'] = 'parse',
+ tabsize: int = 8,
+ ambiguous_width: int = 1,
+ ) -> int:
+ r"""
+ Measure the display width of the hyperlink's inner text.
+
+ Delegates to :func:`wcwidth.width` with the given parameters.
+
+ Example::
+
+ >>> hl = Hyperlink.parse('\x1b]8;;http://example.com\x07Hello\x1b]8;;\x07', 0)
+ >>> hl.display_width()
+ 5
+ """
+ return _width(
+ self.text,
+ control_codes=control_codes,
+ tabsize=tabsize,
+ ambiguous_width=ambiguous_width,
+ )
+
+ @classmethod
+ def parse(cls, text: str, start: int = 0) -> Hyperlink | None:
+ r"""
+ Parse a complete OSC 8 hyperlink unit from *text* at position *start*.
+
+ Locates the open sequence, finds the matching close, and returns a
+ ``Hyperlink`` containing the parsed parameters and inner text. Returns
+ ``None`` if the text at *start* is not a complete OSC 8 hyperlink.
+
+ Example::
+
+ >>> Hyperlink.parse('\x1b]8;;http://example.com\x07Hello\x1b]8;;\x07')
+ Hyperlink(params=HyperlinkParams(url='http://example.com', ...), text='Hello')
+ """
+ m = _SEQUENCE_CLASSIFY.match(text, start)
+ if m is None:
+ return None
+ params = HyperlinkParams.parse(m.group())
+ if params is None:
+ return None
+ close_start, close_end = cls.find_close(text, m.end())
+ if (close_start, close_end) == (-1, -1):
+ return None
+ return cls(params=params, text=text[m.end():close_start])
+
+ def make_sequence(self) -> str:
+ """Rebuild the complete OSC 8 hyperlink escape sequence."""
+ return self.params.make_open() + self.text + self.params.make_close()
diff --git a/contrib/python/wcwidth/py3/wcwidth/sgr_state.py b/contrib/python/wcwidth/py3/wcwidth/sgr_state.py
index b0c8648437e..8e6e5ccfe97 100644
--- a/contrib/python/wcwidth/py3/wcwidth/sgr_state.py
+++ b/contrib/python/wcwidth/py3/wcwidth/sgr_state.py
@@ -5,6 +5,7 @@ This module provides functions for tracking and propagating terminal styling (bo
etc.) via public API propagate_sgr(), and its dependent functions, cut() and wrap(). It only has
attributes necessary to perform its functions, eg 'RED' and 'BLUE' attributes are not defined.
"""
+
from __future__ import annotations
# std imports
@@ -307,7 +308,7 @@ def propagate_sgr(lines: Sequence[str]) -> list[str]:
['\x1b[31mhello\x1b[0m', '\x1b[31mworld\x1b[0m']
This is useful in cases of making special editors and viewers, and is used for the
- default modes (propagate_sgr=True) of :func:`wcwidth.width` and :func:`wcwidth.clip`.
+ default modes (propagate_sgr=True) of :func:`wcwidth.wrap` and :func:`wcwidth.clip`.
When wrapping and clipping text containing SGR sequences, maybe a previous line enabled the BLUE
color--if we are viewing *only* the line following, we would want the carry over the BLUE color,
diff --git a/contrib/python/wcwidth/py3/wcwidth/table_ambiguous.py b/contrib/python/wcwidth/py3/wcwidth/table_ambiguous.py
index e3dc0b1c3de..d2fdd6b8742 100644
--- a/contrib/python/wcwidth/py3/wcwidth/table_ambiguous.py
+++ b/contrib/python/wcwidth/py3/wcwidth/table_ambiguous.py
@@ -1,7 +1,7 @@
"""
Exports AMBIGUOUS_EASTASIAN table keyed by supporting unicode version level.
-This code generated by wcwidth/bin/update-tables.py on 2026-01-18 23:27:15 UTC.
+This code generated by python wcwidth project.
"""
# pylint: disable=duplicate-code
AMBIGUOUS_EASTASIAN = {
diff --git a/contrib/python/wcwidth/py3/wcwidth/table_grapheme.py b/contrib/python/wcwidth/py3/wcwidth/table_grapheme.py
index 42fd19e03df..563792af2b1 100644
--- a/contrib/python/wcwidth/py3/wcwidth/table_grapheme.py
+++ b/contrib/python/wcwidth/py3/wcwidth/table_grapheme.py
@@ -4,7 +4,7 @@ Exports grapheme cluster break property tables for Unicode version 17.0.0.
This module provides lookup tables for Unicode grapheme cluster break properties as defined in UAX
#29: Unicode Text Segmentation.
-This code generated by wcwidth/bin/update-tables.py on 2026-01-29 23:33:42 UTC.
+This code generated by python wcwidth project.
"""
# pylint: disable=duplicate-code
diff --git a/contrib/python/wcwidth/py3/wcwidth/table_mc.py b/contrib/python/wcwidth/py3/wcwidth/table_mc.py
index 7c2e6915920..663e93b7640 100644
--- a/contrib/python/wcwidth/py3/wcwidth/table_mc.py
+++ b/contrib/python/wcwidth/py3/wcwidth/table_mc.py
@@ -1,7 +1,7 @@
"""
Exports CATEGORY_MC table keyed by supporting unicode version level.
-This code generated by wcwidth/bin/update-tables.py on 2026-01-29 00:47:54 UTC.
+This code generated by python wcwidth project.
"""
# pylint: disable=duplicate-code
CATEGORY_MC = {
diff --git a/contrib/python/wcwidth/py3/wcwidth/table_vs16.py b/contrib/python/wcwidth/py3/wcwidth/table_vs16.py
index 70e4a7373ff..9420156e4de 100644
--- a/contrib/python/wcwidth/py3/wcwidth/table_vs16.py
+++ b/contrib/python/wcwidth/py3/wcwidth/table_vs16.py
@@ -1,7 +1,7 @@
"""
Exports VS16_NARROW_TO_WIDE table keyed by supporting unicode version level.
-This code generated by wcwidth/bin/update-tables.py on 2025-09-15 16:57:50 UTC.
+This code generated by python wcwidth project.
"""
# pylint: disable=duplicate-code
VS16_NARROW_TO_WIDE = {
diff --git a/contrib/python/wcwidth/py3/wcwidth/table_wide.py b/contrib/python/wcwidth/py3/wcwidth/table_wide.py
index ed6f48a7322..4ad7bc1e063 100644
--- a/contrib/python/wcwidth/py3/wcwidth/table_wide.py
+++ b/contrib/python/wcwidth/py3/wcwidth/table_wide.py
@@ -1,7 +1,7 @@
"""
Exports WIDE_EASTASIAN table keyed by supporting unicode version level.
-This code generated by wcwidth/bin/update-tables.py on 2026-01-30 00:58:17 UTC.
+This code generated by python wcwidth project.
"""
# pylint: disable=duplicate-code
WIDE_EASTASIAN = {
@@ -72,8 +72,8 @@ WIDE_EASTASIAN = {
(0x0ffe0, 0x0ffe6,), # Fullwidth Cent Sign ..Fullwidth Won Sign
(0x16fe0, 0x16fe3,), # Tangut Iteration Mark ..Old Chinese Iteration Ma
(0x16ff2, 0x16ff6,), # Chinese Small Simplified..Yangqin Sign Slow Two Be
- (0x17000, 0x18cd5,), # (nil) ..Khitan Small Script Char
- (0x18cff, 0x18d1e,), # Khitan Small Script Char..(nil)
+ (0x17000, 0x18cd5,), # Tangut Ideograph-17000 ..Khitan Small Script Char
+ (0x18cff, 0x18d1e,), # Khitan Small Script Char..Tangut Ideograph-18d1e
(0x18d80, 0x18df2,), # Tangut Component-769 ..Tangut Component-883
(0x1aff0, 0x1aff3,), # Katakana Letter Minnan T..Katakana Letter Minnan T
(0x1aff5, 0x1affb,), # Katakana Letter Minnan T..Katakana Letter Minnan N
diff --git a/contrib/python/wcwidth/py3/wcwidth/table_zero.py b/contrib/python/wcwidth/py3/wcwidth/table_zero.py
index c440bfcf156..bee2431ab78 100644
--- a/contrib/python/wcwidth/py3/wcwidth/table_zero.py
+++ b/contrib/python/wcwidth/py3/wcwidth/table_zero.py
@@ -1,7 +1,7 @@
"""
Exports ZERO_WIDTH table keyed by supporting unicode version level.
-This code generated by wcwidth/bin/update-tables.py on 2026-01-30 00:48:24 UTC.
+This code generated by python wcwidth project.
"""
# pylint: disable=duplicate-code
ZERO_WIDTH = {
diff --git a/contrib/python/wcwidth/py3/wcwidth/text_sizing.py b/contrib/python/wcwidth/py3/wcwidth/text_sizing.py
new file mode 100644
index 00000000000..c8e8cb7c8b4
--- /dev/null
+++ b/contrib/python/wcwidth/py3/wcwidth/text_sizing.py
@@ -0,0 +1,200 @@
+r"""
+`kitty text sizing protocol`_ (OSC 66) parsing and measurement.
+
+The kitty text sizing protocol allows terminal apps to explicitly tell
+terminals how many cells text occupies, using the escape sequence::
+
+ ESC ] 66 ; metadata ; text BEL/ST
+
+Metadata is colon-separated ``key=value`` pairs:
+
+- ``s``: scale
+- ``w``: width in cells
+- ``n``: fractional numerator
+- ``d``: fractional denominator
+- ``v``: vertical alignment
+- ``h``: horizontal alignment
+
+Parsing is pretty straight-forward:
+
+- When ``w > 0``, return ``s * w``.
+- Otherwise ``w == 0``, ``s * wcswidth(inner_text_width)`` cells.
+
+Numerator, denominator, and alignment codes and values are parsed but otherwise ignored
+and have no effect on measurements made in this library.
+
+.. _`kitty text sizing protocol`: https://sw.kovidgoyal.net/kitty/text-sizing-protocol/
+
+.. versionadded:: 0.7.0
+"""
+
+from __future__ import annotations
+
+# std imports
+import re
+
+import typing
+
+# local
+from ._wcswidth import wcswidth
+
+
+class _FieldMeta(typing.NamedTuple):
+ name: str
+ low: int
+ high: int
+ default: int
+
+
+TEXT_FIELD_MAPPING: dict[str, _FieldMeta] = {
+ 's': _FieldMeta(name='scale', low=1, high=7, default=1),
+ 'w': _FieldMeta(name='width', low=0, high=7, default=0),
+ 'n': _FieldMeta(name='numerator', low=0, high=15, default=0),
+ 'd': _FieldMeta(name='denominator', low=0, high=15, default=0),
+ 'v': _FieldMeta(name='vertical_align', low=0, high=2, default=0),
+ 'h': _FieldMeta(name='horizontal_align', low=0, high=2, default=0)}
+
+
+class TextSizingParams(typing.NamedTuple):
+ """
+ Parsed parameters from a text sizing escape sequence (OSC 66).
+
+ :param scale: Scale factor (1-7). Text occupies ``scale`` rows tall and ``scale * width``
+ columns wide.
+ :param width: Width in cells (0-7). When 0, width is auto-calculated from the inner text.
+ :param numerator: Fractional scaling numerator (0-15).
+ :param denominator: Fractional scaling denominator (0-15).
+ :param vertical_align: Vertical alignment (0=top, 1=bottom, 2=center).
+ :param horizontal_align: Horizontal alignment (0=left, 1=right, 2=center).
+ """
+
+ scale: int = 1
+ width: int = 0
+ numerator: int = 0
+ denominator: int = 0
+ vertical_align: int = 0
+ horizontal_align: int = 0
+
+ def __repr__(self) -> str:
+ """
+ Return a compact representation including only non-default fields.
+
+ This avoids verbose output when most fields are defaults.
+ """
+ # modified to show values only when non-default
+ repr_fmt = ', '.join(f'{field.name}={getattr(self, field.name)}'
+ for field in TEXT_FIELD_MAPPING.values()
+ if getattr(self, field.name) != field.default)
+ return f'{self.__class__.__name__}({repr_fmt})'
+
+ def make_sequence(self) -> str:
+ """Build and return sub-part of an OSC 66 sequence."""
+ parts = []
+ # build string for all known parameters of non-default values
+ for field_key, field in TEXT_FIELD_MAPPING.items():
+ if (val := getattr(self, field.name)) != field.default:
+ parts.append(f'{field_key}={val}')
+ return ':'.join(parts)
+
+ @classmethod
+ def from_params(cls, raw: str, control_codes: str = 'parse') -> TextSizingParams:
+ """
+ Parse colon-separated ``key=value`` metadata string.
+
+ :param raw: Metadata string, e.g. ``'s=2:w=3'``.
+ :param control_codes: 'parse' or 'strict'.
+ :raises ValueError: If ``control_codes='strict'`` unrecognized text sizing parameters raise
+ ValueError.
+ :returns: Parsed parameters with values clamped to valid ranges.
+ Unknown keys are ignored. Non-integer values use defaults.
+
+ Example::
+
+ >>> TextSizingParams.from_params('s=2:w=3')
+ TextSizingParams(scale=2, width=3, numerator=0, denominator=0, \
+ vertical_align=0, horizontal_align=0)
+ """
+ kwargs: typing.Dict[str, int] = {}
+ if not raw:
+ return cls()
+ for part in raw.split(':'):
+ if '=' not in part:
+ if control_codes == 'strict':
+ raise ValueError(f"Expected '=' in text sizing parameter (key=val), "
+ f"got {part!r} in OSC 66 sequence, {raw!r}")
+ continue
+ key, _eq, val = part.partition('=')
+ field = TEXT_FIELD_MAPPING.get(key)
+ if field is None:
+ if control_codes == 'strict':
+ raise ValueError(f"Unknown text sizing field '{key}' "
+ f"in OSC 66 sequence, {raw!r}")
+ # ignore unknown fields unless 'strict'
+ continue
+ try:
+ value = int(val)
+ except ValueError as exc:
+ if control_codes == 'strict':
+ raise ValueError(f"Illegal text sizing value '{val}' "
+ f"in OSC 66 sequence, {raw!r}: {exc}") from exc
+ # ignore value, uses default value without warning unless 'strict'
+ continue
+ if control_codes == 'strict' and (value > field.high or value < field.low):
+ raise ValueError(f"Out of bounds text sizing value '{val}' "
+ f"in OSC 66 sequence, {raw!r}: "
+ f"allowed range for '{key}' ({field.name}) "
+ f"is {field.low} to {field.high}")
+ kwargs[field.name] = max(field.low, min(field.high, value))
+ return cls(**kwargs)
+
+
+class TextSizing(typing.NamedTuple):
+ """Basic horizontal width measurement for kitty text sizing protocol."""
+
+ params: TextSizingParams
+ text: str
+ terminator: str
+
+ @classmethod
+ def from_match(cls, match: re.Match[str], control_codes: str = 'parse') -> TextSizing:
+ r"""
+ Parse using matching OSC 66 Sequence.
+
+ :param match: match object from :attr:`wcwidth.escape_sequences.TEXT_SIZING_PATTERN`.
+ :param control_codes: 'parse' or 'strict', same meaning as delegated by
+ :func:`wcwidth.width`.
+ :raises ValueError: When ``control_codes='strict'`` for unrecognized, invalid, or out of
+ bounds text sizing parameters.
+ :returns: TextSizing object from parsed sequence
+
+ Example::
+
+ from wcwidth.escape_sequences import TEXT_SIZING_PATTERN
+ >>> TextSizing.from_match(TEXT_SIZING_PATTERN.match('\x1b]66;w=2;XY\x07'))
+ TextSizing(params=TextSizingParams(scale=1, width=2, numerator=0, denominator=0, \
+ vertical_align=0, horizontal_align=0), text='XY', terminator='\x07')
+ """
+ return cls(params=TextSizingParams.from_params(match.group(1), control_codes=control_codes),
+ text=match.group(2),
+ terminator=match.group(3))
+
+ def display_width(self, ambiguous_width: int = 1) -> int:
+ """
+ Calculate the display width of a text sizing sequence.
+
+ :param ambiguous_width: Width for East Asian Ambiguous characters.
+ :returns: Display width in terminal cells. When ``width > 0``, returns
+ ``params.scale * params.width``. When ``width == 0``, returns
+ ``params.scale * measured_inner_width``.
+
+ .. note: Fractional scaling (numerator/denominator) does not affect the
+ cell count, it adjusts only the font size within the cells allocated by 'w'.
+ """
+ if self.params.width > 0:
+ return self.params.scale * self.params.width
+ w = wcswidth(self.text, ambiguous_width=ambiguous_width)
+ return self.params.scale * max(0, w)
+
+ def make_sequence(self) -> str:
+ """Build and return complete OSC 66 Terminal Sequence."""
+ return f'\x1b]66;{self.params.make_sequence()};{self.text}{self.terminator}'
diff --git a/contrib/python/wcwidth/py3/wcwidth/textwrap.py b/contrib/python/wcwidth/py3/wcwidth/textwrap.py
index 4582cd5e089..02cc31df73d 100644
--- a/contrib/python/wcwidth/py3/wcwidth/textwrap.py
+++ b/contrib/python/wcwidth/py3/wcwidth/textwrap.py
@@ -4,55 +4,26 @@ Sequence-aware text wrapping functions.
This module provides functions for wrapping text that may contain terminal escape sequences, with
proper handling of Unicode grapheme clusters and character display widths.
"""
+
from __future__ import annotations
# std imports
-import re
import secrets
import textwrap
-from typing import TYPE_CHECKING, NamedTuple
+from typing import TYPE_CHECKING, Optional
# local
-from .wcwidth import width as _width
-from .wcwidth import iter_sequences
+from ._width import width as wcwidth_width
from .grapheme import iter_graphemes
+from .hyperlink import HyperlinkParams
from .sgr_state import propagate_sgr as _propagate_sgr
-from .escape_sequences import ZERO_WIDTH_PATTERN
+from .escape_sequences import ZERO_WIDTH_PATTERN, iter_sequences
if TYPE_CHECKING: # pragma: no cover
from typing import Any, Literal
-class _HyperlinkState(NamedTuple):
- """State for tracking an open OSC 8 hyperlink across line breaks."""
-
- url: str # hyperlink target URL
- params: str # id=xxx and other key=value pairs separated by :
- terminator: str # BEL (\x07) or ST (\x1b\\)
-
-
-# Hyperlink parsing: captures (params, url, terminator)
-_HYPERLINK_OPEN_RE = re.compile(r'\x1b]8;([^;]*);([^\x07\x1b]*)(\x07|\x1b\\)')
-
-
-def _parse_hyperlink_open(seq: str) -> _HyperlinkState | None:
- """Parse OSC 8 open sequence, return state or None."""
- if (m := _HYPERLINK_OPEN_RE.match(seq)):
- return _HyperlinkState(url=m.group(2), params=m.group(1), terminator=m.group(3))
- return None
-
-
-def _make_hyperlink_open(url: str, params: str, terminator: str) -> str:
- """Generate OSC 8 open sequence."""
- return f'\x1b]8;{params};{url}{terminator}'
-
-
-def _make_hyperlink_close(terminator: str) -> str:
- """Generate OSC 8 close sequence."""
- return f'\x1b]8;;{terminator}'
-
-
class SequenceTextWrapper(textwrap.TextWrapper):
"""
Sequence-aware text wrapper extending :class:`textwrap.TextWrapper`.
@@ -99,8 +70,8 @@ class SequenceTextWrapper(textwrap.TextWrapper):
def _width(self, text: str) -> int:
"""Measure text width accounting for sequences."""
- return _width(text, control_codes=self.control_codes, tabsize=self.tabsize,
- ambiguous_width=self.ambiguous_width)
+ return wcwidth_width(text, control_codes=self.control_codes, tabsize=self.tabsize,
+ ambiguous_width=self.ambiguous_width)
def _strip_sequences(self, text: str) -> str:
"""Strip all terminal sequences from text."""
@@ -241,9 +212,9 @@ class SequenceTextWrapper(textwrap.TextWrapper):
lines: list[str] = []
is_first_line = True
- hyperlink_state: _HyperlinkState | None = None
+ hyperlink_state: Optional[HyperlinkParams] = None
# Track the id we're using for the current hyperlink continuation
- current_hyperlink_id: str | None = None
+ current_hyperlink_id: Optional[str] = None
# Arrange in reverse order so items can be efficiently popped
chunks = list(reversed(chunks))
@@ -258,8 +229,11 @@ class SequenceTextWrapper(textwrap.TextWrapper):
# If continuing a hyperlink from previous line, prepend open sequence
if hyperlink_state is not None:
- open_seq = _make_hyperlink_open(
- hyperlink_state.url, hyperlink_state.params, hyperlink_state.terminator)
+ open_seq = HyperlinkParams(
+ url=hyperlink_state.url,
+ params=hyperlink_state.params,
+ terminator=hyperlink_state.terminator,
+ ).make_open()
chunks[-1] = open_seq + chunks[-1]
# Drop leading whitespace (except at very start)
@@ -332,26 +306,33 @@ class SequenceTextWrapper(textwrap.TextWrapper):
if 'id=' in new_state.params:
current_hyperlink_id = new_state.params
elif new_state.params:
- # Prepend id to existing params (per OSC 8 spec, params can have
- # multiple key=value pairs separated by :)
+ # Prepend id to existing params. Per OSC 8 spec, params can have
+ # multiple key=value pairs separated by ':'.
current_hyperlink_id = (
f'id={self._next_hyperlink_id()}:{new_state.params}')
else:
current_hyperlink_id = f'id={self._next_hyperlink_id()}'
- line_content += _make_hyperlink_close(new_state.terminator)
+ line_content += HyperlinkParams(
+ terminator=new_state.terminator, url='').make_close()
# Also need to inject the id into the opening
# sequence if it didn't have one
if 'id=' not in new_state.params:
# Find and replace the original open sequence with one that has id
- old_open = _make_hyperlink_open(
- new_state.url, new_state.params, new_state.terminator)
- new_open = _make_hyperlink_open(
- new_state.url, current_hyperlink_id, new_state.terminator)
+ old_open = HyperlinkParams(
+ url=new_state.url,
+ params=new_state.params,
+ terminator=new_state.terminator,
+ ).make_open()
+ new_open = HyperlinkParams(
+ url=new_state.url,
+ params=current_hyperlink_id,
+ terminator=new_state.terminator,
+ ).make_open()
line_content = line_content.replace(old_open, new_open, 1)
# Update state for next line, using computed id
- hyperlink_state = _HyperlinkState(
+ hyperlink_state = HyperlinkParams(
new_state.url, current_hyperlink_id, new_state.terminator)
else:
hyperlink_state = None
@@ -364,7 +345,7 @@ class SequenceTextWrapper(textwrap.TextWrapper):
lines.append(indent + line_content)
is_first_line = False
else:
- # max_lines reached with remaining content —
+ # max_lines reached with remaining content.
# pop chunks until placeholder fits, then break.
placeholder_w = self._width(self.placeholder)
while current_line:
@@ -375,8 +356,8 @@ class SequenceTextWrapper(textwrap.TextWrapper):
new_state = self._track_hyperlink_state(
line_content, hyperlink_state)
if new_state is not None:
- line_content += _make_hyperlink_close(
- new_state.terminator)
+ line_content += HyperlinkParams(
+ terminator=new_state.terminator, url='').make_close()
lines.append(indent + line_content + self.placeholder)
break
current_width -= self._width(current_line[-1])
@@ -395,7 +376,7 @@ class SequenceTextWrapper(textwrap.TextWrapper):
def _track_hyperlink_state(
self, text: str,
- state: _HyperlinkState | None) -> _HyperlinkState | None:
+ state: Optional[HyperlinkParams]) -> Optional[HyperlinkParams]:
"""
Track hyperlink state through text.
@@ -405,7 +386,7 @@ class SequenceTextWrapper(textwrap.TextWrapper):
"""
for segment, is_seq in iter_sequences(text):
if is_seq:
- parsed_link = _parse_hyperlink_open(segment)
+ parsed_link = HyperlinkParams.parse(segment)
if parsed_link is not None and parsed_link.url: # has URL = open
state = parsed_link
elif segment.startswith(('\x1b]8;;\x1b\\', '\x1b]8;;\x07')): # close
@@ -545,7 +526,7 @@ def wrap(text: str, width: int = 70, *,
break_long_words: bool = True,
break_on_hyphens: bool = True,
drop_whitespace: bool = True,
- max_lines: int | None = None,
+ max_lines: Optional[int] = None,
placeholder: str = ' [...]',
propagate_sgr: bool = True) -> list[str]:
r"""
diff --git a/contrib/python/wcwidth/py3/wcwidth/wcwidth.py b/contrib/python/wcwidth/py3/wcwidth/wcwidth.py
index 98e7a635f69..e4895e99862 100644
--- a/contrib/python/wcwidth/py3/wcwidth/wcwidth.py
+++ b/contrib/python/wcwidth/py3/wcwidth/wcwidth.py
@@ -1,82 +1,38 @@
"""
-This is a python implementation of wcwidth() and wcswidth().
+Legacy compatibility module for wcwidth.wcwidth.
-https://github.com/jquast/wcwidth
+This file contains no new definitions and is provided only for backwards
+compatibility. This module exists solely to support legacy import paths::
-from Markus Kuhn's C code, retrieved from:
-
- http://www.cl.cam.ac.uk/~mgk25/ucs/wcwidth.c
-
-This is an implementation of wcwidth() and wcswidth() (defined in
-IEEE Std 1002.1-2001) for Unicode.
-
-http://www.opengroup.org/onlinepubs/007904975/functions/wcwidth.html
-http://www.opengroup.org/onlinepubs/007904975/functions/wcswidth.html
-
-In fixed-width output devices, Latin characters all occupy a single
-"cell" position of equal width, whereas ideographic CJK characters
-occupy two such cells. Interoperability between terminal-line
-applications and (teletype-style) character terminals using the
-UTF-8 encoding requires agreement on which character should advance
-the cursor by how many cell positions. No established formal
-standards exist at present on which Unicode character shall occupy
-how many cell positions on character terminals. These routines are
-a first attempt of defining such behavior based on simple rules
-applied to data provided by the Unicode Consortium.
-
-For some graphical characters, the Unicode standard explicitly
-defines a character-cell width via the definition of the East Asian
-FullWidth (F), Wide (W), Half-width (H), and Narrow (Na) classes.
-In all these cases, there is no ambiguity about which width a
-terminal shall use. For characters in the East Asian Ambiguous (A)
-class, the width choice depends purely on a preference of backward
-compatibility with either historic CJK or Western practice.
-Choosing single-width for these characters is easy to justify as
-the appropriate long-term solution, as the CJK practice of
-displaying these characters as double-width comes from historic
-implementation simplicity (8-bit encoded characters were displayed
-single-width and 16-bit ones double-width, even for Greek,
-Cyrillic, etc.) and not any typographic considerations.
-
-Much less clear is the choice of width for the Not East Asian
-(Neutral) class. Existing practice does not dictate a width for any
-of these characters. It would nevertheless make sense
-typographically to allocate two character cells to characters such
-as for instance EM SPACE or VOLUME INTEGRAL, which cannot be
-represented adequately with a single-width glyph. The following
-routines at present merely assign a single-cell width to all
-neutral characters, in the interest of simplicity. This is not
-entirely satisfactory and should be reconsidered before
-establishing a formal standard in this area. At the moment, the
-decision which Not East Asian (Neutral) characters should be
-represented by double-width glyphs cannot yet be answered by
-applying a simple rule from the Unicode database content. Setting
-up a proper standard for the behavior of UTF-8 character terminals
-will require a careful analysis not only of each Unicode character,
-but also of each presentation form, something the author of these
-routines has avoided to do so far.
-
-http://www.unicode.org/unicode/reports/tr11/
-
-Latest version: http://www.cl.cam.ac.uk/~mgk25/ucs/wcwidth.c
+ from wcwidth.wcwidth import iter_graphemes
+ from wcwidth.wcwidth import _SGR_PATTERN
+ import wcwidth.wcwidth as legacy
"""
-
-from __future__ import annotations
-
-# std imports
-from functools import lru_cache
-
-from typing import TYPE_CHECKING
+# pylint: disable=unused-import
# local
+from ._clip import clip
+from .align import ljust, rjust, center
+from ._width import _CONTROL_CHAR_TABLE, _WIDTH_FAST_PATH_MIN_LEN, width, _width_ignored_codes
+from ._wcwidth import wcwidth, _wcmatch_version, _wcversion_value
from .bisearch import bisearch as _bisearch
from .grapheme import iter_graphemes
from .table_mc import CATEGORY_MC
+from ._wcswidth import wcswidth
from .sgr_state import (_SGR_PATTERN,
_SGR_STATE_DEFAULT,
_sgr_state_update,
_sgr_state_is_active,
_sgr_state_to_sequence)
+from ._constants import (_EMOJI_ZWJ_SET,
+ _ISC_VIRAMA_SET,
+ _LATEST_VERSION,
+ _AMBIGUOUS_TABLE,
+ _ZERO_WIDTH_TABLE,
+ _CATEGORY_MC_TABLE,
+ _FITZPATRICK_RANGE,
+ _WIDE_EASTASIAN_TABLE,
+ _REGIONAL_INDICATOR_SET)
from .table_vs16 import VS16_NARROW_TO_WIDE
from .table_wide import WIDE_EASTASIAN
from .table_zero import ZERO_WIDTH
@@ -86,78 +42,13 @@ from .table_ambiguous import AMBIGUOUS_EASTASIAN
from .escape_sequences import (ZERO_WIDTH_PATTERN,
CURSOR_LEFT_SEQUENCE,
CURSOR_RIGHT_SEQUENCE,
- INDETERMINATE_EFFECT_SEQUENCE)
+ INDETERMINATE_EFFECT_SEQUENCE,
+ iter_sequences,
+ strip_sequences)
from .unicode_versions import list_versions
-if TYPE_CHECKING: # pragma: no cover
- # std imports
- from collections.abc import Iterator
-
- from typing import Literal
-
-# Pre-compute table references for the latest (and only) Unicode version.
-_LATEST_VERSION = list_versions()[-1]
-_ZERO_WIDTH_TABLE = ZERO_WIDTH[_LATEST_VERSION]
-_WIDE_EASTASIAN_TABLE = WIDE_EASTASIAN[_LATEST_VERSION]
-_AMBIGUOUS_TABLE = AMBIGUOUS_EASTASIAN[next(iter(AMBIGUOUS_EASTASIAN))]
-_CATEGORY_MC_TABLE = CATEGORY_MC[_LATEST_VERSION]
-_REGIONAL_INDICATOR_SET = frozenset(
- range(GRAPHEME_REGIONAL_INDICATOR[0][0], GRAPHEME_REGIONAL_INDICATOR[0][1] + 1)
-)
-_EMOJI_ZWJ_SET = frozenset(
- cp for lo, hi in EXTENDED_PICTOGRAPHIC for cp in range(lo, hi + 1)
-) | _REGIONAL_INDICATOR_SET
-_FITZPATRICK_RANGE = (0x1F3FB, 0x1F3FF)
-# Indic_Syllabic_Category=Virama codepoints, from IndicSyllabicCategory.txt.
-# These are structurally tied to their scripts and not expected to change.
-# https://www.unicode.org/Public/UCD/latest/ucd/IndicSyllabicCategory.txt
-_ISC_VIRAMA_SET = frozenset((
- 0x094D, # DEVANAGARI SIGN VIRAMA
- 0x09CD, # BENGALI SIGN VIRAMA
- 0x0A4D, # GURMUKHI SIGN VIRAMA
- 0x0ACD, # GUJARATI SIGN VIRAMA
- 0x0B4D, # ORIYA SIGN VIRAMA
- 0x0BCD, # TAMIL SIGN VIRAMA
- 0x0C4D, # TELUGU SIGN VIRAMA
- 0x0CCD, # KANNADA SIGN VIRAMA
- 0x0D4D, # MALAYALAM SIGN VIRAMA
- 0x0DCA, # SINHALA SIGN AL-LAKUNA
- 0x1B44, # BALINESE ADEG ADEG
- 0xA806, # SYLOTI NAGRI SIGN HASANTA
- 0xA8C4, # SAURASHTRA SIGN VIRAMA
- 0xA9C0, # JAVANESE PANGKON
- 0x11046, # BRAHMI VIRAMA
- 0x110B9, # KAITHI SIGN VIRAMA
- 0x111C0, # SHARADA SIGN VIRAMA
- 0x11235, # KHOJKI SIGN VIRAMA
- 0x1134D, # GRANTHA SIGN VIRAMA
- 0x11442, # NEWA SIGN VIRAMA
- 0x114C2, # TIRHUTA SIGN VIRAMA
- 0x115BF, # SIDDHAM SIGN VIRAMA
- 0x1163F, # MODI SIGN VIRAMA
- 0x116B6, # TAKRI SIGN VIRAMA
- 0x11839, # DOGRA SIGN VIRAMA
- 0x119E0, # NANDINAGARI SIGN VIRAMA
- 0x11C3F, # BHAIKSUKI SIGN VIRAMA
-))
_ISC_CONSONANT_TABLE = ISC_CONSONANT
-# In 'parse' mode, strings longer than this are checked for cursor-movement
-# controls (BS, TAB, CR, cursor sequences); when absent, mode downgrades to
-# 'ignore' to skip character-by-character parsing. The detection scan cost is
-# negligible for long strings but wasted on short ones like labels or headings.
-_WIDTH_FAST_PATH_MIN_LEN = 20
-
-# Translation table to strip C0/C1 control characters for fast 'ignore' mode.
-_CONTROL_CHAR_TABLE = str.maketrans('', '', (
- ''.join(chr(c) for c in range(0x00, 0x20)) + # C0: NUL through US (including tab)
- '\x7f' + # DEL
- ''.join(chr(c) for c in range(0x80, 0xa0)) # C1: U+0080-U+009F
-))
-
-# Unlike wcwidth.__all__, wcwidth.wcwidth.__all__ is NOT for the purpose of defining a public API,
-# or what we prefer to be imported with statement, "from wcwidth.wcwidth import *". Explicitly
-# re-export imports here for no other reason than to satisfy the type checkers (mypy). Yak shavings.
__all__ = (
'ZERO_WIDTH',
'WIDE_EASTASIAN',
@@ -176,855 +67,3 @@ __all__ = (
'_wcmatch_version',
'_wcversion_value',
)
-
-
-# maxsize=1024: western scripts need ~64 unique codepoints per session, but
-# CJK sessions may use ~2000 of ~3500 common hanzi/kanji. 1024 accommodates
-# heavy CJK use. Performance floor at 32; bisearch is ~100ns per miss.
-
-@lru_cache(maxsize=1024)
-def wcwidth(wc: str, unicode_version: str = 'auto', ambiguous_width: int = 1) -> int: # pylint: disable=unused-argument
- r"""
- Given one Unicode codepoint, return its printable length on a terminal.
-
- :param wc: A single Unicode character.
- :param unicode_version: Ignored. Retained for backwards compatibility.
-
- .. deprecated:: 0.3.0
- Only the latest Unicode version is now shipped.
-
- :param ambiguous_width: Width to use for East Asian Ambiguous (A)
- characters. Default is ``1`` (narrow). Set to ``2`` for CJK contexts
- where ambiguous characters display as double-width. See
- :ref:`ambiguous_width` for details.
- :returns: The width, in cells, necessary to display the character of
- Unicode string character, ``wc``. Returns 0 if the ``wc`` argument has
- no printable effect on a terminal (such as NUL '\0'), -1 if ``wc`` is
- not printable, or has an indeterminate effect on the terminal, such as
- a control character. Otherwise, the number of column positions the
- character occupies on a graphic terminal (1 or 2) is returned.
-
- See :ref:`Specification` for details of cell measurement.
- """
- ucs = ord(wc) if wc else 0
-
- # small optimization: early return of 1 for printable ASCII, this provides
- # approximately 40% performance improvement for mostly-ascii documents, with
- # less than 1% impact to others.
- if 32 <= ucs < 0x7f:
- return 1
-
- # C0/C1 control characters are -1 for compatibility with POSIX-like calls
- if ucs and ucs < 32 or 0x07F <= ucs < 0x0A0:
- return -1
-
- # Zero width
- if _bisearch(ucs, _ZERO_WIDTH_TABLE):
- return 0
-
- # Wide (F/W categories)
- if _bisearch(ucs, _WIDE_EASTASIAN_TABLE):
- return 2
-
- # Ambiguous width (A category) - only when ambiguous_width=2
- if ambiguous_width == 2 and _bisearch(ucs, _AMBIGUOUS_TABLE):
- return 2
-
- return 1
-
-
-def wcswidth(
- pwcs: str,
- n: int | None = None,
- unicode_version: str = 'auto',
- ambiguous_width: int = 1,
-) -> int:
- """
- Given a unicode string, return its printable length on a terminal.
-
- :param pwcs: Measure width of given unicode string.
- :param n: When ``n`` is None (default), return the length of the entire
- string, otherwise only the first ``n`` characters are measured.
-
- Better to use string slicing capability, ``wcswidth(pwcs[:n])``, instead,
- for performance. This argument is a holdover from the POSIX function for
- matching signatures. Be careful that ``n`` is at grapheme boundaries.
-
- :param unicode_version: Ignored. Retained for backwards compatibility.
-
- .. deprecated:: 0.3.0
- Only the latest Unicode version is now shipped.
-
- :param ambiguous_width: Width to use for East Asian Ambiguous (A)
- characters. Default is ``1`` (narrow). Set to ``2`` for CJK contexts.
- :returns: The width, in cells, needed to display the first ``n`` characters
- of the unicode string ``pwcs``. Returns ``-1`` for C0 and C1 control
- characters!
-
- See :ref:`Specification` for details of cell measurement.
- """
- # pylint: disable=unused-argument,too-many-locals,too-many-statements
- # pylint: disable=too-complex,too-many-branches
- # This function intentionally kept long without delegating functions to reduce function calls in
- # "hot path", the overhead per-character adds up.
-
- # Fast path: pure ASCII printable strings are always width == length
- if n is None and pwcs.isascii() and pwcs.isprintable():
- return len(pwcs)
-
- # Select wcwidth call pattern for best lru_cache performance:
- # - ambiguous_width=1 (default): single-arg calls share cache with direct wcwidth() calls
- # - ambiguous_width=2: full positional args needed (results differ, separate cache is correct)
- _wcwidth = wcwidth if ambiguous_width == 1 else lambda c: wcwidth(c, 'auto', ambiguous_width)
-
- end = len(pwcs) if n is None else n
- total_width = 0
- idx = 0
- last_measured_idx = -2 # Track index of last measured char for VS16
- last_measured_ucs = -1 # Codepoint of last measured char (for deferred emoji check)
- last_was_virama = False # Virama conjunct formation state
- conjunct_pending = False # Deferred +1 for bare conjuncts (no trailing Mc)
- while idx < end:
- char = pwcs[idx]
- ucs = ord(char)
- if ucs == 0x200D:
- if last_was_virama:
- # ZWJ after virama requests explicit half-form rendering but
- # does not change cell count — consume ZWJ only, let the next
- # consonant be handled by the virama conjunct rule.
- idx += 1
- elif idx + 1 < end:
- # Emoji ZWJ: skip next character unconditionally.
- idx += 2
- last_was_virama = False
- else:
- idx += 1
- last_was_virama = False
- continue
- if ucs == 0xFE0F and last_measured_idx >= 0:
- # VS16 following a measured character: add 1 if that character is
- # known to be converted from narrow to wide by VS16.
- total_width += _bisearch(ord(pwcs[last_measured_idx]),
- VS16_NARROW_TO_WIDE["9.0.0"])
- last_measured_idx = -2 # Prevent double application
- # VS16 preserves emoji context: last_measured_ucs stays as the base
- idx += 1
- continue
- # Regional Indicator & Fitzpatrick: both above BMP (U+1F1E6+)
- if ucs > 0xFFFF:
- if ucs in _REGIONAL_INDICATOR_SET:
- # Lazy RI pairing: count preceding consecutive RIs only when the last one is
- # received, because RI's are received so rarely its better than per-loop tracking of
- # 'last char was an RI'.
- ri_before = 0
- j = idx - 1
- while j >= 0 and ord(pwcs[j]) in _REGIONAL_INDICATOR_SET:
- ri_before += 1
- j -= 1
- if ri_before % 2 == 1:
- # Second RI in pair: contributes 0 (pair = one 2-cell flag) using an even-or-odd
- # check to determine, 'CAUS' would be two flags, but 'CAU' would be 1 flag
- # and wide 'U'.
- idx += 1
- last_measured_ucs = ucs
- continue
- # First or unpaired RI: measured normally (width 2 from table)
- # Fitzpatrick modifier: zero-width when following emoji base
- elif (_FITZPATRICK_RANGE[0] <= ucs <= _FITZPATRICK_RANGE[1]
- and last_measured_ucs in _EMOJI_ZWJ_SET):
- idx += 1
- continue
- # Virama conjunct formation: consonant following virama contributes 0 width.
- # See https://www.unicode.org/reports/tr44/#Indic_Syllabic_Category
- if last_was_virama and _bisearch(ucs, _ISC_CONSONANT_TABLE):
- last_measured_idx = idx
- last_measured_ucs = ucs
- last_was_virama = False
- conjunct_pending = True
- idx += 1
- continue
- wcw = _wcwidth(char)
- if wcw < 0:
- # early return -1 on C0 and C1 control characters
- return wcw
- if wcw > 0:
- if conjunct_pending:
- total_width += 1
- conjunct_pending = False
- last_measured_idx = idx
- last_measured_ucs = ucs
- last_was_virama = False
- elif last_measured_idx >= 0 and _bisearch(ucs, _CATEGORY_MC_TABLE):
- # Spacing Combining Mark (Mc) following a base character adds 1
- wcw = 1
- last_measured_idx = -2
- last_was_virama = False
- conjunct_pending = False
- else:
- last_was_virama = ucs in _ISC_VIRAMA_SET
- total_width += wcw
- idx += 1
- if conjunct_pending:
- total_width += 1
- return total_width
-
-
-# NOTE: _wcversion_value and _wcmatch_version are no longer used internally
-# by wcwidth since version 0.5.0 (only the latest Unicode version is shipped).
-#
-# They are retained for API compatibility with external tools like ucs-detect
-# that may use these private functions.
-
-
-@lru_cache(maxsize=128)
-def _wcversion_value(ver_string: str) -> tuple[int, ...]: # pragma: no cover
- """
- Integer-mapped value of given dotted version string.
-
- .. deprecated:: 0.3.0
-
- This function is no longer used internally by wcwidth but is retained
- for API compatibility with external tools.
-
- :param ver_string: Unicode version string, of form ``n.n.n``.
- :returns: tuple of digit tuples, ``tuple(int, [...])``.
- """
- retval = tuple(map(int, (ver_string.split('.'))))
- return retval
-
-
-@lru_cache(maxsize=8)
-def _wcmatch_version(given_version: str) -> str: # pylint: disable=unused-argument
- """
- Return the supported Unicode version level.
-
- .. deprecated:: 0.3.0
- This function now always returns the latest version.
-
- This function is no longer used internally by wcwidth but is retained
- for API compatibility with external tools.
-
- :param given_version: Ignored. Any value is accepted for compatibility.
- :returns: The latest unicode version string.
- """
- return _LATEST_VERSION
-
-
-def iter_sequences(text: str) -> Iterator[tuple[str, bool]]:
- r"""
- Iterate through text, yielding segments with sequence identification.
-
- This generator yields tuples of ``(segment, is_sequence)`` for each part
- of the input text, where ``is_sequence`` is ``True`` if the segment is
- a recognized terminal escape sequence.
-
- :param text: String to iterate through.
- :returns: Iterator of (segment, is_sequence) tuples.
-
- .. versionadded:: 0.3.0
-
- Example::
-
- >>> list(iter_sequences('hello'))
- [('hello', False)]
- >>> list(iter_sequences('\x1b[31mred'))
- [('\x1b[31m', True), ('red', False)]
- >>> list(iter_sequences('\x1b[1m\x1b[31m'))
- [('\x1b[1m', True), ('\x1b[31m', True)]
- """
- idx = 0
- text_len = len(text)
- segment_start = 0
-
- while idx < text_len:
- char = text[idx]
-
- if char == '\x1b':
- # Yield any accumulated non-sequence text
- if idx > segment_start:
- yield (text[segment_start:idx], False)
-
- # Try to match an escape sequence
- match = ZERO_WIDTH_PATTERN.match(text, idx)
- if match:
- yield (match.group(), True)
- idx = match.end()
- else:
- # Lone ESC or unrecognized - yield as sequence anyway
- yield (char, True)
- idx += 1
- segment_start = idx
- else:
- idx += 1
-
- # Yield any remaining text
- if segment_start < text_len:
- yield (text[segment_start:], False)
-
-
-def _width_ignored_codes(text: str, ambiguous_width: int = 1) -> int:
- """
- Fast path for width() with control_codes='ignore'.
-
- Strips escape sequences and control characters, then measures remaining text.
- """
- return wcswidth(
- strip_sequences(text).translate(_CONTROL_CHAR_TABLE),
- ambiguous_width=ambiguous_width
- )
-
-
-def width(
- text: str,
- *,
- control_codes: Literal['parse', 'strict', 'ignore'] = 'parse',
- tabsize: int = 8,
- ambiguous_width: int = 1,
-) -> int:
- r"""
- Return printable width of text containing many kinds of control codes and sequences.
-
- Unlike :func:`wcswidth`, this function handles most control characters and many popular terminal
- output sequences. Never returns -1.
-
- :param text: String to measure.
- :param control_codes: How to handle control characters and sequences:
-
- - ``'parse'`` (default): Track horizontal cursor movement from BS ``\b``, CR ``\r``, TAB
- ``\t``, and cursor left and right movement sequences. Vertical movement (LF, VT, FF) and
- indeterminate sequences are zero-width. Never raises.
- - ``'strict'``: Like parse, but raises :exc:`ValueError` on control characters with
- indeterminate results of the screen or cursor, like clear or vertical movement. Generally,
- these should be handled with a virtual terminal emulator (like 'pyte').
- - ``'ignore'``: All C0 and C1 control characters and escape sequences are measured as
- width 0. This is the fastest measurement for text already filtered or known not to contain
- any kinds of control codes or sequences. TAB ``\t`` is zero-width; for tab expansion,
- pre-process: ``text.replace('\t', ' ' * 8)``.
-
- :param tabsize: Tab stop width for ``'parse'`` and ``'strict'`` modes. Default is 8.
- Must be positive. Has no effect when ``control_codes='ignore'``.
- :param ambiguous_width: Width to use for East Asian Ambiguous (A)
- characters. Default is ``1`` (narrow). Set to ``2`` for CJK contexts.
- :returns: Maximum cursor position reached, "extent", accounting for cursor movement sequences
- present in ``text`` according to given parameters. This represents the rightmost column the
- cursor reaches. Always a non-negative integer.
-
- :raises ValueError: If ``control_codes='strict'`` and control characters with indeterminate
- effects, such as vertical movement or clear sequences are encountered, or on unexpected
- C0 or C1 control code. Also raised when ``control_codes`` is not one of the valid values.
-
- .. versionadded:: 0.3.0
-
- Examples::
-
- >>> width('hello')
- 5
- >>> width('コンニチハ')
- 10
- >>> width('\x1b[31mred\x1b[0m')
- 3
- >>> width('\x1b[31mred\x1b[0m', control_codes='ignore') # same result (ignored)
- 3
- >>> width('123\b4') # backspace overwrites previous cell (outputs '124')
- 3
- >>> width('abc\t') # tab caused cursor to move to column 8
- 8
- >>> width('1\x1b[10C') # '1' + cursor right 10, cursor ends on column 11
- 11
- >>> width('1\x1b[10C', control_codes='ignore') # faster but wrong in this case
- 1
- """
- # pylint: disable=too-complex,too-many-branches,too-many-statements,too-many-locals
- # This could be broken into sub-functions (#1, #3, and 6 especially), but for reduced overhead
- # considering this function is a likely "hot path", they are inlined, breaking many of our
- # complexity rules.
-
- # Fast path for ASCII printable (no tabs, escapes, or control chars)
- if text.isascii() and text.isprintable():
- return len(text)
-
- # Fast parse: if no horizontal cursor movements are possible, switch to 'ignore' mode.
- # Only check for longer strings - the detection overhead hurts short string performance.
- if control_codes == 'parse' and len(text) > _WIDTH_FAST_PATH_MIN_LEN:
- # Check for cursor-affecting control characters
- if '\b' not in text and '\t' not in text and '\r' not in text:
- # Check for escape sequences - if none, or only non-cursor-movement sequences
- if '\x1b' not in text or (
- not CURSOR_RIGHT_SEQUENCE.search(text) and
- not CURSOR_LEFT_SEQUENCE.search(text)
- ):
- control_codes = 'ignore'
-
- # Fast path for ignore mode -- this is useful if you know the text is already "clean"
- if control_codes == 'ignore':
- return _width_ignored_codes(text, ambiguous_width)
-
- strict = control_codes == 'strict'
- # Track absolute positions: tab stops need modulo on absolute column, CR resets to 0.
- # Initialize max_extent to 0 so backward movement (CR, BS) won't yield negative width.
- current_col = 0
- max_extent = 0
- idx = 0
- last_measured_idx = -2 # Track index of last measured char for VS16; -2 can never match idx-1
- last_measured_ucs = -1 # Codepoint of last measured char (for deferred emoji check)
- last_was_virama = False # Virama conjunct formation state
- conjunct_pending = False # Deferred +1 for bare conjuncts (no trailing Mc)
- text_len = len(text)
-
- # Select wcwidth call pattern for best lru_cache performance:
- # - ambiguous_width=1 (default): single-arg calls share cache with direct wcwidth() calls
- # - ambiguous_width=2: full positional args needed (results differ, separate cache is correct)
- _wcwidth = wcwidth if ambiguous_width == 1 else lambda c: wcwidth(c, 'auto', ambiguous_width)
-
- while idx < text_len:
- char = text[idx]
-
- # 1. Handle ESC sequences
- if char == '\x1b':
- match = ZERO_WIDTH_PATTERN.match(text, idx)
- if match:
- seq = match.group()
- if strict and INDETERMINATE_EFFECT_SEQUENCE.match(seq):
- raise ValueError(f"Indeterminate cursor sequence at position {idx}")
- # Apply cursor movement
- right = CURSOR_RIGHT_SEQUENCE.match(seq)
- if right:
- current_col += int(right.group(1) or 1)
- else:
- left = CURSOR_LEFT_SEQUENCE.match(seq)
- if left:
- current_col = max(0, current_col - int(left.group(1) or 1))
- idx = match.end()
- else:
- idx += 1
- max_extent = max(max_extent, current_col)
- continue
-
- # 2. Handle illegal and vertical control characters (zero width, error in strict)
- if char in ILLEGAL_CTRL:
- if strict:
- raise ValueError(f"Illegal control character {ord(char):#x} at position {idx}")
- idx += 1
- continue
-
- if char in VERTICAL_CTRL:
- if strict:
- raise ValueError(f"Vertical movement character {ord(char):#x} at position {idx}")
- idx += 1
- continue
-
- # 3. Handle horizontal movement characters
- if char in HORIZONTAL_CTRL:
- if char == '\x09' and tabsize > 0: # Tab
- current_col += tabsize - (current_col % tabsize)
- elif char == '\x08': # Backspace
- if current_col > 0:
- current_col -= 1
- elif char == '\x0d': # Carriage return
- current_col = 0
- max_extent = max(max_extent, current_col)
- idx += 1
- continue
-
- # 4. Handle ZWJ
- if char == '\u200D':
- if last_was_virama:
- # ZWJ after virama requests explicit half-form rendering but
- # does not change cell count — consume ZWJ only, let the next
- # consonant be handled by the virama conjunct rule.
- idx += 1
- elif idx + 1 < text_len:
- # Emoji ZWJ: skip next character unconditionally.
- idx += 2
- last_was_virama = False
- else:
- idx += 1
- last_was_virama = False
- continue
-
- # 5. Handle other zero-width characters (control chars)
- if char in ZERO_WIDTH_CTRL:
- idx += 1
- continue
-
- ucs = ord(char)
-
- # 6. Handle VS16: converts preceding narrow character to wide
- if ucs == 0xFE0F:
- if last_measured_idx == idx - 1:
- if _bisearch(ord(text[last_measured_idx]), VS16_NARROW_TO_WIDE["9.0.0"]):
- current_col += 1
- max_extent = max(max_extent, current_col)
- # VS16 preserves emoji context: last_measured_ucs stays as the base
- idx += 1
- continue
-
- # 6b. Regional Indicator & Fitzpatrick: both above BMP (U+1F1E6+)
- if ucs > 0xFFFF:
- if ucs in _REGIONAL_INDICATOR_SET:
- # Lazy RI pairing: count preceding consecutive RIs
- ri_before = 0
- j = idx - 1
- while j >= 0 and ord(text[j]) in _REGIONAL_INDICATOR_SET:
- ri_before += 1
- j -= 1
- if ri_before % 2 == 1:
- last_measured_ucs = ucs
- idx += 1
- continue
- # 6c. Fitzpatrick modifier: zero-width when following emoji base
- elif (_FITZPATRICK_RANGE[0] <= ucs <= _FITZPATRICK_RANGE[1]
- and last_measured_ucs in _EMOJI_ZWJ_SET):
- idx += 1
- continue
-
- # 7. Virama conjunct formation: consonant following virama contributes 0 width.
- # See https://www.unicode.org/reports/tr44/#Indic_Syllabic_Category
- if last_was_virama and _bisearch(ucs, _ISC_CONSONANT_TABLE):
- last_measured_idx = idx
- last_measured_ucs = ucs
- last_was_virama = False
- conjunct_pending = True
- idx += 1
- continue
-
- # 8. Normal characters: measure with wcwidth
- w = _wcwidth(char)
- if w > 0:
- if conjunct_pending:
- current_col += 1
- conjunct_pending = False
- current_col += w
- max_extent = max(max_extent, current_col)
- last_measured_idx = idx
- last_measured_ucs = ucs
- last_was_virama = False
- elif last_measured_idx >= 0 and _bisearch(ucs, _CATEGORY_MC_TABLE):
- # Spacing Combining Mark (Mc) following a base character adds 1
- current_col += 1
- max_extent = max(max_extent, current_col)
- last_measured_idx = -2
- last_was_virama = False
- conjunct_pending = False
- else:
- last_was_virama = ucs in _ISC_VIRAMA_SET
- idx += 1
-
- if conjunct_pending:
- current_col += 1
- max_extent = max(max_extent, current_col)
- return max_extent
-
-
-def ljust(
- text: str,
- dest_width: int,
- fillchar: str = ' ',
- *,
- control_codes: Literal['parse', 'strict', 'ignore'] = 'parse',
- ambiguous_width: int = 1,
-) -> str:
- r"""
- Return text left-justified in a string of given display width.
-
- :param text: String to justify, may contain terminal sequences.
- :param dest_width: Total display width of result in terminal cells.
- :param fillchar: Single character for padding (default space). Must have
- display width of 1 (not wide, not zero-width, not combining). Unicode
- characters like ``'·'`` are acceptable. The width is not validated.
- :param control_codes: How to handle control sequences when measuring.
- Passed to :func:`width` for measurement.
- :param ambiguous_width: Width to use for East Asian Ambiguous (A)
- characters. Default is ``1`` (narrow). Set to ``2`` for CJK contexts.
- :returns: Text padded on the right to reach ``dest_width``.
-
- .. versionadded:: 0.3.0
-
- Example::
-
- >>> wcwidth.ljust('hi', 5)
- 'hi '
- >>> wcwidth.ljust('\x1b[31mhi\x1b[0m', 5)
- '\x1b[31mhi\x1b[0m '
- >>> wcwidth.ljust('\U0001F468\u200D\U0001F469\u200D\U0001F467', 6)
- '👨‍👩‍👧 '
- """
- if text.isascii() and text.isprintable():
- text_width = len(text)
- else:
- text_width = width(text, control_codes=control_codes, ambiguous_width=ambiguous_width)
- padding_cells = max(0, dest_width - text_width)
- return text + fillchar * padding_cells
-
-
-def rjust(
- text: str,
- dest_width: int,
- fillchar: str = ' ',
- *,
- control_codes: Literal['parse', 'strict', 'ignore'] = 'parse',
- ambiguous_width: int = 1,
-) -> str:
- r"""
- Return text right-justified in a string of given display width.
-
- :param text: String to justify, may contain terminal sequences.
- :param dest_width: Total display width of result in terminal cells.
- :param fillchar: Single character for padding (default space). Must have
- display width of 1 (not wide, not zero-width, not combining). Unicode
- characters like ``'·'`` are acceptable. The width is not validated.
- :param control_codes: How to handle control sequences when measuring.
- Passed to :func:`width` for measurement.
- :param ambiguous_width: Width to use for East Asian Ambiguous (A)
- characters. Default is ``1`` (narrow). Set to ``2`` for CJK contexts.
- :returns: Text padded on the left to reach ``dest_width``.
-
- .. versionadded:: 0.3.0
-
- Example::
-
- >>> wcwidth.rjust('hi', 5)
- ' hi'
- >>> wcwidth.rjust('\x1b[31mhi\x1b[0m', 5)
- ' \x1b[31mhi\x1b[0m'
- >>> wcwidth.rjust('\U0001F468\u200D\U0001F469\u200D\U0001F467', 6)
- ' 👨‍👩‍👧'
- """
- if text.isascii() and text.isprintable():
- text_width = len(text)
- else:
- text_width = width(text, control_codes=control_codes, ambiguous_width=ambiguous_width)
- padding_cells = max(0, dest_width - text_width)
- return fillchar * padding_cells + text
-
-
-def center(
- text: str,
- dest_width: int,
- fillchar: str = ' ',
- *,
- control_codes: Literal['parse', 'strict', 'ignore'] = 'parse',
- ambiguous_width: int = 1,
-) -> str:
- r"""
- Return text centered in a string of given display width.
-
- :param text: String to center, may contain terminal sequences.
- :param dest_width: Total display width of result in terminal cells.
- :param fillchar: Single character for padding (default space). Must have
- display width of 1 (not wide, not zero-width, not combining). Unicode
- characters like ``'·'`` are acceptable. The width is not validated.
- :param control_codes: How to handle control sequences when measuring.
- Passed to :func:`width` for measurement.
- :param ambiguous_width: Width to use for East Asian Ambiguous (A)
- characters. Default is ``1`` (narrow). Set to ``2`` for CJK contexts.
- :returns: Text padded on both sides to reach ``dest_width``.
-
- For odd-width padding, the extra cell goes on the right (matching
- Python's :meth:`str.center` behavior).
-
- .. versionadded:: 0.3.0
-
- Example::
-
- >>> wcwidth.center('hi', 6)
- ' hi '
- >>> wcwidth.center('\x1b[31mhi\x1b[0m', 6)
- ' \x1b[31mhi\x1b[0m '
- >>> wcwidth.center('\U0001F468\u200D\U0001F469\u200D\U0001F467', 6)
- ' 👨‍👩‍👧 '
- """
- if text.isascii() and text.isprintable():
- text_width = len(text)
- else:
- text_width = width(text, control_codes=control_codes, ambiguous_width=ambiguous_width)
- total_padding = max(0, dest_width - text_width)
- # matching https://jazcap53.github.io/pythons-eccentric-strcenter.html
- left_pad = total_padding // 2 + (total_padding & dest_width & 1)
- right_pad = total_padding - left_pad
- return fillchar * left_pad + text + fillchar * right_pad
-
-
-def strip_sequences(text: str) -> str:
- r"""
- Return text with all terminal escape sequences removed.
-
- Unknown or incomplete ESC sequences are preserved.
-
- :param text: String that may contain terminal escape sequences.
- :returns: The input text with all escape sequences stripped.
-
- .. versionadded:: 0.3.0
-
- Example::
-
- >>> strip_sequences('\x1b[31mred\x1b[0m')
- 'red'
- >>> strip_sequences('hello')
- 'hello'
- >>> strip_sequences('\x1b[1m\x1b[31mbold red\x1b[0m text')
- 'bold red text'
- """
- return ZERO_WIDTH_PATTERN.sub('', text)
-
-
-def clip(
- text: str,
- start: int,
- end: int,
- *,
- fillchar: str = ' ',
- tabsize: int = 8,
- ambiguous_width: int = 1,
- propagate_sgr: bool = True,
-) -> str:
- r"""
- Clip text to display columns ``(start, end)`` while preserving all terminal sequences.
-
- This function extracts a substring based on visible column positions rather than
- character indices. Terminal escape sequences are preserved in the output since
- they have zero display width. If a wide character (width 2) would be split at
- either boundary, it is replaced with ``fillchar``.
-
- TAB characters (``\t``) are expanded to spaces up to the next tab stop,
- controlled by the ``tabsize`` parameter.
-
- Other cursor movement characters (backspace, carriage return) and cursor
- movement sequences are passed through unchanged as zero-width.
-
- :param text: String to clip, may contain terminal escape sequences.
- :param start: Absolute starting column (inclusive, 0-indexed).
- :param end: Absolute ending column (exclusive).
- :param fillchar: Character to use when a wide character must be split at
- a boundary (default space). Must have display width of 1.
- :param tabsize: Tab stop width (default 8). Set to 0 to pass tabs through
- as zero-width (preserved in output but don't advance column position).
- :param ambiguous_width: Width to use for East Asian Ambiguous (A)
- characters. Default is ``1`` (narrow). Set to ``2`` for CJK contexts.
- :param propagate_sgr: If True (default), SGR (terminal styling) sequences
- are propagated. The result begins with any active style at the start
- position and ends with a reset sequence if styles are active.
- :returns: Substring of ``text`` spanning display columns ``(start, end)``,
- with all terminal sequences preserved and wide characters at boundaries
- replaced with ``fillchar``.
-
- SGR (terminal styling) sequences are propagated by default. The result
- begins with any active style and ends with a reset::
-
- >>> clip('\x1b[1;34mHello world\x1b[0m', 6, 11)
- '\x1b[1;34mworld\x1b[0m'
-
- Set ``propagate_sgr=False`` to disable this behavior.
-
- .. versionadded:: 0.3.0
-
- .. versionchanged:: 0.5.0
- Added ``propagate_sgr`` parameter (default True).
-
- Example::
-
- >>> clip('hello world', 0, 5)
- 'hello'
- >>> clip('中文字', 0, 3) # Wide char split at column 3
- '中 '
- >>> clip('a\tb', 0, 10) # Tab expanded to spaces
- 'a b'
- """
- # pylint: disable=too-complex,too-many-locals,too-many-branches,too-many-statements,too-many-nested-blocks
- # Again, for 'hot path', we avoid additional delegate functions and accept the cost
- # of complexity for improved python performance.
- start = max(start, 0)
- if end <= start:
- return ''
-
- # Fast path: printable ASCII only (no tabs, escape sequences, or wide or zero-width chars)
- if text.isascii() and text.isprintable():
- return text[start:end]
-
- # Fast path: no escape sequences means no SGR tracking needed
- if propagate_sgr and '\x1b' not in text:
- propagate_sgr = False
-
- # SGR tracking state (only when propagate_sgr=True)
- sgr_at_clip_start = None # state when first visible char emitted (None = not yet)
- if propagate_sgr:
- sgr = _SGR_STATE_DEFAULT # current SGR state, updated by all sequences
-
- output: list[str] = []
- col = 0
- idx = 0
-
- while idx < len(text):
- char = text[idx]
-
- # Early exit: past visible region, SGR captured, no escape ahead
- if col >= end and sgr_at_clip_start is not None and char != '\x1b':
- break
-
- # Handle escape sequences
- if char == '\x1b' and (match := ZERO_WIDTH_PATTERN.match(text, idx)):
- seq = match.group()
- if propagate_sgr and _SGR_PATTERN.match(seq):
- # Update SGR state; will be applied as prefix when visible content starts
- sgr = _sgr_state_update(sgr, seq)
- else:
- # Non-SGR sequences always preserved
- output.append(seq)
- idx = match.end()
- continue
-
- # Handle bare ESC (not a valid sequence)
- if char == '\x1b':
- output.append(char)
- idx += 1
- continue
-
- # TAB expansion
- if char == '\t':
- if tabsize > 0:
- next_tab = col + (tabsize - (col % tabsize))
- while col < next_tab:
- if start <= col < end:
- output.append(' ')
- if propagate_sgr and sgr_at_clip_start is None:
- sgr_at_clip_start = sgr
- col += 1
- else:
- output.append(char)
- idx += 1
- continue
-
- # Grapheme clustering for everything else
- grapheme = next(iter_graphemes(text, start=idx))
- w = width(grapheme, ambiguous_width=ambiguous_width)
-
- if w == 0:
- if start <= col < end:
- output.append(grapheme)
- elif col >= start and col + w <= end:
- # Fully visible
- output.append(grapheme)
- if propagate_sgr and sgr_at_clip_start is None:
- sgr_at_clip_start = sgr
- col += w
- elif col < end and col + w > start:
- # Partially visible (wide char at boundary)
- output.append(fillchar * (min(end, col + w) - max(start, col)))
- if propagate_sgr and sgr_at_clip_start is None:
- sgr_at_clip_start = sgr
- col += w
- else:
- col += w
-
- idx += len(grapheme)
-
- result = ''.join(output)
-
- # Apply SGR prefix/suffix
- if sgr_at_clip_start is not None:
- if prefix := _sgr_state_to_sequence(sgr_at_clip_start):
- result = prefix + result
- if _sgr_state_is_active(sgr_at_clip_start):
- result += '\x1b[0m'
-
- return result
diff --git a/contrib/python/wcwidth/py3/ya.make b/contrib/python/wcwidth/py3/ya.make
index c50d8fe4296..510c3f49de0 100644
--- a/contrib/python/wcwidth/py3/ya.make
+++ b/contrib/python/wcwidth/py3/ya.make
@@ -2,7 +2,7 @@
PY3_LIBRARY()
-VERSION(0.6.0)
+VERSION(0.7.0)
LICENSE(MIT)
@@ -11,10 +11,17 @@ NO_LINT()
PY_SRCS(
TOP_LEVEL
wcwidth/__init__.py
+ wcwidth/_clip.py
+ wcwidth/_constants.py
+ wcwidth/_wcswidth.py
+ wcwidth/_wcwidth.py
+ wcwidth/_width.py
+ wcwidth/align.py
wcwidth/bisearch.py
wcwidth/control_codes.py
wcwidth/escape_sequences.py
wcwidth/grapheme.py
+ wcwidth/hyperlink.py
wcwidth/sgr_state.py
wcwidth/table_ambiguous.py
wcwidth/table_grapheme.py
@@ -22,6 +29,7 @@ PY_SRCS(
wcwidth/table_vs16.py
wcwidth/table_wide.py
wcwidth/table_zero.py
+ wcwidth/text_sizing.py
wcwidth/textwrap.py
wcwidth/unicode_versions.py
wcwidth/wcwidth.py