diff options
| author | robot-piglet <[email protected]> | 2026-02-11 10:55:54 +0300 |
|---|---|---|
| committer | robot-piglet <[email protected]> | 2026-02-11 11:24:08 +0300 |
| commit | 9b5f29efa00bba424cd32471a95ececc583fe046 (patch) | |
| tree | 58936c6fc2147c49fc2a4aec657f63fb6f789336 | |
| parent | df75a44af0e3c0cfce907e22f61d6c91fc3bbc39 (diff) | |
Intermediate changes
commit_hash:721c786fcb8a37574bec0881ae2194859f790fae
18 files changed, 474 insertions, 71 deletions
diff --git a/contrib/python/hypothesis/py3/.dist-info/METADATA b/contrib/python/hypothesis/py3/.dist-info/METADATA index 9b0ea14cc8e..fefdd240c7d 100644 --- a/contrib/python/hypothesis/py3/.dist-info/METADATA +++ b/contrib/python/hypothesis/py3/.dist-info/METADATA @@ -1,6 +1,6 @@ Metadata-Version: 2.4 Name: hypothesis -Version: 6.151.0 +Version: 6.151.2 Summary: The property-based testing library for Python Author-email: "David R. MacIver and Zac Hatfield-Dodds" <[email protected]> License-Expression: MPL-2.0 @@ -61,7 +61,7 @@ Provides-Extra: redis Requires-Dist: redis>=3.0.0; extra == "redis" Provides-Extra: crosshair Requires-Dist: hypothesis-crosshair>=0.0.27; extra == "crosshair" -Requires-Dist: crosshair-tool>=0.0.101; extra == "crosshair" +Requires-Dist: crosshair-tool>=0.0.102; extra == "crosshair" Provides-Extra: zoneinfo Requires-Dist: tzdata>=2025.3; (sys_platform == "win32" or sys_platform == "emscripten") and extra == "zoneinfo" Provides-Extra: django @@ -71,7 +71,7 @@ Requires-Dist: watchdog>=4.0.0; extra == "watchdog" Provides-Extra: all Requires-Dist: black>=20.8b0; extra == "all" Requires-Dist: click>=7.0; extra == "all" -Requires-Dist: crosshair-tool>=0.0.101; extra == "all" +Requires-Dist: crosshair-tool>=0.0.102; extra == "all" Requires-Dist: django>=4.2; extra == "all" Requires-Dist: dpcontracts>=0.4; extra == "all" Requires-Dist: hypothesis-crosshair>=0.0.27; extra == "all" diff --git a/contrib/python/hypothesis/py3/hypothesis/database.py b/contrib/python/hypothesis/py3/hypothesis/database.py index 4fbddd29021..eae2f579cb0 100644 --- a/contrib/python/hypothesis/py3/hypothesis/database.py +++ b/contrib/python/hypothesis/py3/hypothesis/database.py @@ -580,7 +580,9 @@ class DirectoryBasedExampleDatabase(ExampleDatabase): _metakeys_hash = self._metakeys_hash _broadcast_change = self._broadcast_change - class Handler(FileSystemEventHandler): + class Handler( + FileSystemEventHandler + ): # pragma: no cover # skipped in test_database.py for now def on_created(_self, event: FileCreatedEvent | DirCreatedEvent) -> None: # we only registered for the file creation event assert not isinstance(event, DirCreatedEvent) @@ -1274,7 +1276,7 @@ def _choices_from_bytes(buffer: bytes, /) -> tuple[ChoiceT, ...]: parts.append(bool(size)) continue if size == 0b11111: - (offset, size) = _unpack_uleb128(buffer[idx:]) + offset, size = _unpack_uleb128(buffer[idx:]) idx += offset chunk = buffer[idx : idx + size] idx += size diff --git a/contrib/python/hypothesis/py3/hypothesis/internal/conjecture/engine.py b/contrib/python/hypothesis/py3/hypothesis/internal/conjecture/engine.py index 531797c7ab0..22a0e7ec8f7 100644 --- a/contrib/python/hypothesis/py3/hypothesis/internal/conjecture/engine.py +++ b/contrib/python/hypothesis/py3/hypothesis/internal/conjecture/engine.py @@ -9,13 +9,12 @@ # obtain one at https://mozilla.org/MPL/2.0/. import importlib -import inspect import math import threading import time from collections import defaultdict from collections.abc import Callable, Generator, Sequence -from contextlib import AbstractContextManager, contextmanager, nullcontext, suppress +from contextlib import AbstractContextManager, contextmanager, nullcontext from dataclasses import dataclass, field from datetime import timedelta from enum import Enum @@ -70,7 +69,6 @@ from hypothesis.internal.escalation import InterestingOrigin from hypothesis.internal.healthcheck import fail_health_check from hypothesis.internal.observability import Observation, with_observability_callback from hypothesis.reporting import base_report, report, verbose_report -from hypothesis.utils.deprecation import note_deprecation # In most cases, the following constants are all Final. However, we do allow users # to monkeypatch all of these variables, which means we cannot annotate them as @@ -241,24 +239,8 @@ class DiscardObserver(DataObserver): def realize_choices(data: ConjectureData, *, for_failure: bool) -> None: - # backwards-compatibility with backends without for_failure, can remove - # in a few months - kwargs = {} - if for_failure: - if "for_failure" in inspect.signature(data.provider.realize).parameters: - kwargs["for_failure"] = True - else: - note_deprecation( - f"{type(data.provider).__qualname__}.realize does not have the " - "for_failure parameter. This will be an error in future versions " - "of Hypothesis. (If you installed this backend from a separate " - "package, upgrading that package may help).", - has_codemod=False, - since="2025-05-07", - ) - for node in data.nodes: - value = data.provider.realize(node.value, **kwargs) + value = data.provider.realize(node.value, for_failure=for_failure) expected_type = { "string": str, "float": float, @@ -275,7 +257,7 @@ def realize_choices(data: ConjectureData, *, for_failure: bool) -> None: constraints = cast( ChoiceConstraintsT, { - k: data.provider.realize(v, **kwargs) + k: data.provider.realize(v, for_failure=for_failure) for k, v in node.constraints.items() }, ) @@ -522,12 +504,9 @@ class ConjectureRunner: self.call_count += 1 interrupted = False - try: - self.__stoppable_test_function(data) - except KeyboardInterrupt: - interrupted = True - raise - except BackendCannotProceed as exc: + def _backend_cannot_proceed( + exc: BackendCannotProceed, data: ConjectureData + ) -> None: if exc.scope in ("verified", "exhausted"): self._switch_to_hypothesis_provider = True if exc.scope == "verified": @@ -553,16 +532,35 @@ class ConjectureRunner: # But we check self.valid_examples == 0 to determine whether to raise # Unsatisfiable, and that would throw this check off. self.invalid_examples += 1 + data.cannot_proceed_scope = exc.scope + # this fiddly bit of control flow is to work around `return` being + # disallowed in `finally` blocks as of python 3.14. Otherwise, we would + # just return in the _backend_cannot_proceed branch. + finally_early_return = False + + try: + self.__stoppable_test_function(data) + except KeyboardInterrupt: + interrupted = True + raise + except BackendCannotProceed as exc: + _backend_cannot_proceed(exc, data) # skip the post-test-case tracking; we're pretending this never happened interrupted = True - data.cannot_proceed_scope = exc.scope data.freeze() return except BaseException: data.freeze() if self.settings.backend != "hypothesis": - realize_choices(data, for_failure=True) + try: + realize_choices(data, for_failure=True) + except BackendCannotProceed as exc: + _backend_cannot_proceed(exc, data) + # skip the post-test-case tracking; we're pretending this + # never happened + interrupted = True + return self.save_choices(data.choices) raise finally: @@ -573,22 +571,35 @@ class ConjectureRunner: data.freeze() if self.settings.backend != "hypothesis": - realize_choices(data, for_failure=data.status is Status.INTERESTING) + try: + realize_choices( + data, for_failure=data.status is Status.INTERESTING + ) + except BackendCannotProceed as exc: + _backend_cannot_proceed(exc, data) + finally_early_return = True - call_stats: CallStats = { - "status": data.status.name.lower(), - "runtime": data.finish_time - data.start_time, - "drawtime": math.fsum(data.draw_times.values()), - "gctime": data.gc_finish_time - data.gc_start_time, - "events": sorted( - k if v == "" else f"{k}: {v}" for k, v in data.events.items() - ), - } - self.stats_per_test_case.append(call_stats) + if not finally_early_return: + call_stats: CallStats = { + "status": data.status.name.lower(), + "runtime": data.finish_time - data.start_time, + "drawtime": math.fsum(data.draw_times.values()), + "gctime": data.gc_finish_time - data.gc_start_time, + "events": sorted( + k if v == "" else f"{k}: {v}" + for k, v in data.events.items() + ), + } + self.stats_per_test_case.append(call_stats) - self._cache(data) - if data.misaligned_at is not None: # pragma: no branch # coverage bug? - self.misaligned_count += 1 + self._cache(data) + if ( + data.misaligned_at is not None + ): # pragma: no branch # coverage bug? + self.misaligned_count += 1 + + if finally_early_return: + return self.debug_data(data) @@ -1214,8 +1225,7 @@ class ConjectureRunner: # a novel prefix, ask the backend for an input. if not self.using_hypothesis_backend: data = self.new_conjecture_data([]) - with suppress(BackendCannotProceed): - self.test_function(data) + self.test_function(data) continue self._current_phase = "generate" @@ -1409,7 +1419,7 @@ class ConjectureRunner: # case (1): duplicate the choices in start1:start2. attempt = data.choices[:start2] + data.choices[start1:] else: - (start, end) = self.random.choice([(start1, end1), (start2, end2)]) + start, end = self.random.choice([(start1, end1), (start2, end2)]) replacement = data.choices[start:end] # We attempt to replace both the examples with # whichever choice we made. Note that this might end diff --git a/contrib/python/hypothesis/py3/hypothesis/internal/conjecture/providers.py b/contrib/python/hypothesis/py3/hypothesis/internal/conjecture/providers.py index 14064ed985e..f8e51a5d5d4 100644 --- a/contrib/python/hypothesis/py3/hypothesis/internal/conjecture/providers.py +++ b/contrib/python/hypothesis/py3/hypothesis/internal/conjecture/providers.py @@ -755,7 +755,7 @@ class HypothesisProvider(PrimitiveProvider): # split constants into two pools, so we still have a good chance to draw # global constants even if there are many local constants. - (global_constants, local_constants) = CONSTANTS_CACHE[key] + global_constants, local_constants = CONSTANTS_CACHE[key] constants_lists = ([global_constants] if global_constants else []) + ( [local_constants] if local_constants else [] ) diff --git a/contrib/python/hypothesis/py3/hypothesis/version.py b/contrib/python/hypothesis/py3/hypothesis/version.py index ccf10bfaaa6..a6c6132bb7b 100644 --- a/contrib/python/hypothesis/py3/hypothesis/version.py +++ b/contrib/python/hypothesis/py3/hypothesis/version.py @@ -8,5 +8,5 @@ # v. 2.0. If a copy of the MPL was not distributed with this file, You can # obtain one at https://mozilla.org/MPL/2.0/. -__version_info__ = (6, 151, 0) +__version_info__ = (6, 151, 2) __version__ = ".".join(map(str, __version_info__)) diff --git a/contrib/python/hypothesis/py3/ya.make b/contrib/python/hypothesis/py3/ya.make index 60d4b15be6b..422507d0d73 100644 --- a/contrib/python/hypothesis/py3/ya.make +++ b/contrib/python/hypothesis/py3/ya.make @@ -2,7 +2,7 @@ PY3_LIBRARY() -VERSION(6.151.0) +VERSION(6.151.2) LICENSE(MPL-2.0) diff --git a/contrib/python/wcwidth/py3/.dist-info/METADATA b/contrib/python/wcwidth/py3/.dist-info/METADATA index de002938d9d..c80ecb6d722 100644 --- a/contrib/python/wcwidth/py3/.dist-info/METADATA +++ b/contrib/python/wcwidth/py3/.dist-info/METADATA @@ -1,6 +1,6 @@ Metadata-Version: 2.4 Name: wcwidth -Version: 0.3.5 +Version: 0.4.0 Summary: Measures the displayed width of unicode strings in a terminal Project-URL: Homepage, https://github.com/jquast/wcwidth Author-email: Jeff Quast <[email protected]> @@ -78,8 +78,10 @@ Text-justification is solved by the grapheme and sequence-aware functions `ljust of the same names. The iterator functions `iter_graphemes()`_ and `iter_sequences()`_ allow for careful navigation of -grapheme and terminal control sequence boundaries. The `clip()`_ function extracts substrings by -display column positions, and `strip_sequences()`_ removes terminal escape sequences from text. +grapheme and terminal control sequence boundaries. `iter_graphemes_reverse()`_, and +`grapheme_boundary_before()`_ are useful for editing and searching of complex unicode. The +`clip()`_ function extracts substrings by display column positions, and `strip_sequences()`_ removes +terminal escape sequences from text altogether. Discrepancies ------------- @@ -472,6 +474,10 @@ languages. History ======= +0.4.0 *2026-01-25* + * **New** Functions `iter_graphemes_reverse()`_, `grapheme_boundary_before()`_. + * **Bugfix** OSC Hyperlinks should not be broken by ``wrap()`` + 0.3.5 *2026-01-24* * **Bugfix** packaging of 0.3.4 contains a failing test. @@ -690,6 +696,8 @@ https://www.cl.cam.ac.uk/~mgk25/ucs/wcwidth.c:: .. _`wcswidth()`: https://wcwidth.readthedocs.io/en/latest/api.html#wcwidth.wcswidth .. _`width()`: https://wcwidth.readthedocs.io/en/latest/api.html#wcwidth.width .. _`iter_graphemes()`: https://wcwidth.readthedocs.io/en/latest/api.html#wcwidth.iter_graphemes +.. _`iter_graphemes_reverse()`: https://wcwidth.readthedocs.io/en/latest/api.html#wcwidth.iter_graphemes_reverse +.. _`grapheme_boundary_before()`: https://wcwidth.readthedocs.io/en/latest/api.html#wcwidth.grapheme_boundary_before .. _`ljust()`: https://wcwidth.readthedocs.io/en/latest/api.html#wcwidth.ljust .. _`rjust()`: https://wcwidth.readthedocs.io/en/latest/api.html#wcwidth.rjust .. _`center()`: https://wcwidth.readthedocs.io/en/latest/api.html#wcwidth.center diff --git a/contrib/python/wcwidth/py3/tests/test_benchmarks.py b/contrib/python/wcwidth/py3/tests/test_benchmarks.py index 5c929f7ec37..6e0ffadcc45 100644 --- a/contrib/python/wcwidth/py3/tests/test_benchmarks.py +++ b/contrib/python/wcwidth/py3/tests/test_benchmarks.py @@ -1,6 +1,7 @@ """Performance benchmarks for wcwidth module.""" # std imports import os +import unicodedata # local import wcwidth @@ -54,6 +55,31 @@ def test_wcswidth_emoji_sequence(benchmark): benchmark(wcwidth.wcswidth, text) +# NFC vs NFD comparison - text with combining marks +DIACRITICS_COMPOSED = 'café résumé naïve ' * 100 +DIACRITICS_DECOMPOSED = unicodedata.normalize('NFD', DIACRITICS_COMPOSED) + + +def test_wcswidth_composed(benchmark): + """Benchmark wcswidth() with NFC-composed text.""" + benchmark(wcwidth.wcswidth, DIACRITICS_COMPOSED) + + +def test_wcswidth_decomposed(benchmark): + """Benchmark wcswidth() with NFD-decomposed text.""" + benchmark(wcwidth.wcswidth, DIACRITICS_DECOMPOSED) + + +def test_width_composed(benchmark): + """Benchmark width() with NFC-composed text.""" + benchmark(wcwidth.width, DIACRITICS_COMPOSED) + + +def test_width_decomposed(benchmark): + """Benchmark width() with NFD-decomposed text.""" + benchmark(wcwidth.width, DIACRITICS_DECOMPOSED) + + def test_width_ascii(benchmark): """Benchmark width() with ASCII string.""" benchmark(wcwidth.width, 'hello world') @@ -88,6 +114,36 @@ def test_iter_graphemes_combining(benchmark): benchmark(lambda: list(wcwidth.iter_graphemes(text))) +def test_grapheme_boundary_before_short(benchmark): + """Benchmark grapheme_boundary_before() near start of short string.""" + text = 'Hello 👋🏻!' + benchmark(wcwidth.grapheme_boundary_before, text, 8) + + +def test_grapheme_boundary_before_long_end(benchmark): + """Benchmark grapheme_boundary_before() near end of long line.""" + text = 'x' * 95 + '👨\u200d👩\u200d👧!' + benchmark(wcwidth.grapheme_boundary_before, text, 100) + + +def test_grapheme_boundary_before_long_mid(benchmark): + """Benchmark grapheme_boundary_before() in middle of long line.""" + text = 'x' * 50 + '👨\u200d👩\u200d👧' + 'y' * 50 + benchmark(wcwidth.grapheme_boundary_before, text, 55) + + +def test_iter_graphemes_reverse_short(benchmark): + """Benchmark iter_graphemes_reverse() with short string.""" + text = 'café\u0301 🇫🇷!' + benchmark(lambda: list(wcwidth.iter_graphemes_reverse(text))) + + +def test_iter_graphemes_reverse_long(benchmark): + """Benchmark iter_graphemes_reverse() with long string.""" + text = 'The quick brown 🦊 jumps over the lazy 🐕. ' * 5 + benchmark(lambda: list(wcwidth.iter_graphemes_reverse(text))) + + def test_ljust_ascii(benchmark): """Benchmark ljust() with ASCII string.""" benchmark(wcwidth.ljust, 'hello', 20) diff --git a/contrib/python/wcwidth/py3/tests/test_grapheme.py b/contrib/python/wcwidth/py3/tests/test_grapheme.py index f344ad32fde..d2cfa86c1c8 100644 --- a/contrib/python/wcwidth/py3/tests/test_grapheme.py +++ b/contrib/python/wcwidth/py3/tests/test_grapheme.py @@ -6,7 +6,7 @@ import os import pytest # local -from wcwidth import iter_graphemes +from wcwidth import iter_graphemes, iter_graphemes_reverse, grapheme_boundary_before try: chr(0x2fffe) @@ -145,3 +145,112 @@ def test_wide_unicode_graphemes(input_str, expected): def test_unicode_grapheme_break_test(input_str, expected): """Validate against official Unicode GraphemeBreakTest.txt.""" assert list(iter_graphemes(input_str)) == expected + + +# Prepend: Arabic Number Sign +PREPEND_CHAR = '\u0600' +# Multiple combining marks: e + acute + grave +MULTI_COMBINE = 'e\u0301\u0300' + + +# grapheme_boundary_before(text, pos) returns start of grapheme cluster before pos. +# (text, pos, expected): pos=search from here, expected=where cluster starts [email protected](("text", "pos", "expected"), [ + # 'abc': 0=a, 1=b, 2=c + ('abc', 3, 2), # from end -> 'c' at 2 + ('abc', 2, 1), # from 'c' -> 'b' at 1 + ('abc', 1, 0), # from 'b' -> 'a' at 0 + # 'a\r\nb': CRLF is one cluster (GB3) + ('a\r\nb', 3, 1), # from 'b' -> '\r\n' at 1 + # 'café': e + combining acute is one cluster (GB9) + ('cafe\u0301', 5, 3), # from end -> 'é' at 3 + ('cafe\u0301', 4, 3), # from acute -> still 'é' at 3 + # Multiple combining marks: e + acute + grave (GB9) + ('a' + MULTI_COMBINE + 'b', 4, 1), # from 'b' -> e+marks at 1 + # Prepend + char is one cluster (GB9b) + (PREPEND_CHAR + 'a', 2, 0), # whole cluster + # Prepend + Control: control breaks (GB4) + (PREPEND_CHAR + '\n', 2, 1), # '\n' separate at 1 + # C1 control (NEL, 0x85) stops backward scan in _find_cluster_start (GB4) + ('X\x85\u0301', 3, 2), +]) +def test_grapheme_boundary_before_basic(text, pos, expected): + """Basic grapheme_boundary_before tests.""" + assert grapheme_boundary_before(text, pos) == expected + + [email protected](NARROW_ONLY, reason="requires wide Unicode") [email protected](("text", "pos", "expected"), [ + # 'Hi 👋🏻!': 0=H,1=i,2=space,3=wave,4=skin,5=!; wave+skin is one cluster + ('Hi \U0001F44B\U0001F3FB!', 6, 5), # from end -> '!' at 5 + ('Hi \U0001F44B\U0001F3FB!', 5, 3), # from '!' -> wave+skin at 3 + ('Hi \U0001F44B\U0001F3FB!', 3, 2), # from wave -> space at 2 + # 'a🇺🇸b': 0=a,1-2=flag,3=b; flag is one cluster (GB12/13) + ('a' + FLAG_US + 'b', 4, 3), # from end -> 'b' at 3 + ('a' + FLAG_US + 'b', 3, 1), # from 'b' -> flag at 1 + # Three RIs (🇺🇸🇦): flag + solo RI + (FLAG_US + RI_A, 3, 2), # from end -> solo RI at 2 + (FLAG_US + RI_A, 2, 0), # from solo -> flag at 0 + # 'a👨👩👧b': 0=a,1-5=family,6=b; ZWJ sequence is one cluster (GB11) + ('a' + FAMILY + 'b', 7, 6), # from end -> 'b' at 6 + ('a' + FAMILY + 'b', 6, 1), # from 'b' -> family at 1 +]) +def test_grapheme_boundary_before_unicode(text, pos, expected): + """grapheme_boundary_before with emoji and wide Unicode.""" + assert grapheme_boundary_before(text, pos) == expected + + [email protected](("input_str", "expected"), [ + ('', []), + ('abc', ['c', 'b', 'a']), + # café with combining mark mixed with CRLF + ('cafe\u0301\r\nok', ['k', 'o', '\r\n', 'e\u0301', 'f', 'a', 'c']), +]) +def test_iter_graphemes_reverse_basic(input_str, expected): + """Basic iter_graphemes_reverse tests.""" + assert list(iter_graphemes_reverse(input_str)) == expected + + [email protected](NARROW_ONLY, reason="requires wide Unicode") [email protected](("input_str", "expected"), [ + # Multiple emoji types in one string + ('cafe\u0301 ' + WAVE_SKIN + ' ' + FLAG_US + '!', + ['!', FLAG_US, ' ', WAVE_SKIN, ' ', 'e\u0301', 'f', 'a', 'c']), + # Two families + (FAMILY + FAMILY, [FAMILY, FAMILY]), + # Flag + solo RI + text + ('Hi' + FLAG_US + RI_A + '!', ['!', RI_A, FLAG_US, 'i', 'H']), +]) +def test_iter_graphemes_reverse_unicode(input_str, expected): + """iter_graphemes_reverse with wide Unicode.""" + assert list(iter_graphemes_reverse(input_str)) == expected + + [email protected](NARROW_ONLY, reason="requires wide Unicode") [email protected](("input_str", "expected"), read_grapheme_break_test()) +def test_grapheme_roundtrip_consistency(input_str, expected): + """Forward and reverse iteration produce identical boundaries.""" + forward = list(iter_graphemes(input_str)) + reverse = list(iter_graphemes_reverse(input_str))[::-1] + assert forward == reverse + + +def test_grapheme_boundary_before_edge_cases(): + """Edge cases for grapheme_boundary_before.""" + assert grapheme_boundary_before('abc', 0) == 0 + assert grapheme_boundary_before('abc', 100) == 2 # pos > len clamps + assert grapheme_boundary_before('', 0) == 0 + + +def test_iter_graphemes_reverse_edge_cases(): + """Edge cases for iter_graphemes_reverse.""" + assert list(iter_graphemes_reverse('abcdef', start=2, end=5)) == ['e', 'd', 'c'] + assert list(iter_graphemes_reverse('abc', start=0, end=100)) == ['c', 'b', 'a'] + assert not list(iter_graphemes_reverse('abc', start=5)) + assert not list(iter_graphemes_reverse('abc', start=2, end=2)) + # PREPEND + char is one grapheme (GB9b), so start=1 yields nothing (won't split) + assert not list(iter_graphemes_reverse(PREPEND_CHAR + 'a', start=1)) + # But start=0 yields the full grapheme + assert list(iter_graphemes_reverse(PREPEND_CHAR + 'a', start=0)) == [PREPEND_CHAR + 'a'] + # Negative start is clamped to 0 + assert list(iter_graphemes_reverse('abc', start=-5)) == ['c', 'b', 'a'] diff --git a/contrib/python/wcwidth/py3/tests/test_textwrap.py b/contrib/python/wcwidth/py3/tests/test_textwrap.py index c2f28bffe1a..fc15f1917f9 100644 --- a/contrib/python/wcwidth/py3/tests/test_textwrap.py +++ b/contrib/python/wcwidth/py3/tests/test_textwrap.py @@ -12,6 +12,7 @@ from wcwidth import iter_sequences from wcwidth.textwrap import SequenceTextWrapper, wrap SGR_RED = '\x1b[31m' +SGR_BLUE = '\x1b[34m' SGR_BOLD = '\x1b[1m' SGR_RESET = '\x1b[0m' ATTRS = ('\x1b[31m', '\x1b[34m', '\x1b[4m', '\x1b[7m', '\x1b[41m', '\x1b[37m', '\x1b[107m') @@ -203,7 +204,7 @@ SEQUENCE_CASES = [ # Empty/adjacent sequences (f'{SGR_RED}{SGR_RESET}', 10, [f'{SGR_RED}{SGR_RESET}']), (f'hello {SGR_RED}{SGR_RESET}world', 6, ['hello', f'{SGR_RED}{SGR_RESET}world']), - # OSC hyperlinks + # OSC hyperlinks (with space separator) (f'{OSC_HYPERLINK} text', 5, [OSC_HYPERLINK, 'text']), # CSI cursor sequences (f'{CSI_CURSOR}text here', 10, [f'{CSI_CURSOR}text', 'here']), @@ -262,3 +263,69 @@ TABSIZE_WIDE_CASES = [ def test_wrap_tabsize_wide_chars(text, w, tabsize, expected): """Verify tabsize respects wide character column positions.""" assert wrap(text, w, tabsize=tabsize) == expected + + +OSC_START_ST = '\x1b]8;;http://example.com\x1b\\' +OSC_END_ST = '\x1b]8;;\x1b\\' +OSC_START_BEL = '\x1b]8;;http://example.com\x07' +OSC_END_BEL = '\x1b]8;;\x07' + +HYPERLINK_WORD_BOUNDARY_CASES = [ + ( # standard, ST-variant, + f'{OSC_START_ST}link{OSC_END_ST}more', + 5, + [f'{OSC_START_ST}link{OSC_END_ST}', 'more'], + ), + ( # BEL-variant, + f'{OSC_START_BEL}link{OSC_END_BEL}more', + 5, + [f'{OSC_START_BEL}link{OSC_END_BEL}', 'more'], + ), + ( # hyperlink breaks after word, 'prefix', + f'prefix{OSC_START_ST}link{OSC_END_ST}', + 6, + ['prefix', f'{OSC_START_ST}link{OSC_END_ST}'], + ), + ( + f'prefix{OSC_START_BEL}link{OSC_END_BEL}', + 6, + ['prefix', f'{OSC_START_BEL}link{OSC_END_BEL}'], + ), + ( # hyperlink breaks before following, 'suffix', + f'prefix{OSC_START_ST}link{OSC_END_ST}suffix', + 6, + ['prefix', f'{OSC_START_ST}link{OSC_END_ST}', 'suffix'], + ), + ( + f'prefix{OSC_START_BEL}link{OSC_END_BEL}suffix', + 6, + ['prefix', f'{OSC_START_BEL}link{OSC_END_BEL}', 'suffix'], + ), + ( # hyperlink *surrounded* by SGR attributes + f'foo {SGR_RED}{OSC_START_ST}link{OSC_END_ST}{SGR_RESET} bar', + 6, + ['foo', f'{SGR_RED}{OSC_START_ST}link{OSC_END_ST}{SGR_RESET}', 'bar'], + ), + ( + f'foo {SGR_RED}{OSC_START_BEL}link{OSC_END_BEL}{SGR_RESET} bar', + 6, + ['foo', f'{SGR_RED}{OSC_START_BEL}link{OSC_END_BEL}{SGR_RESET}', 'bar'], + ), + ( # hyperlink *containing* SGR attributes + f'foo {OSC_START_ST}{SGR_RED}link{SGR_RESET}{OSC_END_ST} bar', + 6, + ['foo', f'{OSC_START_ST}{SGR_RED}link{SGR_RESET}{OSC_END_ST}', 'bar'], + ), + ( + f'foo {OSC_START_BEL}{SGR_RED}link{SGR_RESET}{OSC_END_BEL} bar', + 6, + ['foo', f'{OSC_START_BEL}{SGR_RED}link{SGR_RESET}{OSC_END_BEL}', 'bar'], + ), +] + + [email protected]('text,w,expected', HYPERLINK_WORD_BOUNDARY_CASES) +def test_wrap_hyperlink_word_boundary(text, w, expected): + """OSC hyperlink sequences should act as word boundaries.""" + result = wrap(text, w) + assert result == expected diff --git a/contrib/python/wcwidth/py3/wcwidth/__init__.py b/contrib/python/wcwidth/py3/wcwidth/__init__.py index ed29279dcb8..03279ff863b 100644 --- a/contrib/python/wcwidth/py3/wcwidth/__init__.py +++ b/contrib/python/wcwidth/py3/wcwidth/__init__.py @@ -26,16 +26,18 @@ from .wcwidth import (WIDE_EASTASIAN, _wcmatch_version, _wcversion_value) from .bisearch import bisearch as _bisearch -from .grapheme import iter_graphemes # noqa +from .grapheme import grapheme_boundary_before # noqa +from .grapheme import iter_graphemes, iter_graphemes_reverse from .textwrap import SequenceTextWrapper, wrap # The __all__ attribute defines the items exported from statement, # 'from wcwidth import *', but also to say, "This is the public API". __all__ = ('wcwidth', 'wcswidth', 'width', 'iter_sequences', 'iter_graphemes', + 'iter_graphemes_reverse', 'grapheme_boundary_before', 'ljust', 'rjust', 'center', 'wrap', 'clip', 'strip_sequences', 'list_versions') # We also used pkg_resources to load unicode version tables from version.json, # generated by bin/update-tables.py, but some environments are unable to # import pkg_resources for one reason or another, yikes! -__version__ = '0.3.5' +__version__ = '0.4.0' diff --git a/contrib/python/wcwidth/py3/wcwidth/grapheme.py b/contrib/python/wcwidth/py3/wcwidth/grapheme.py index 1a83668b066..63713b9070d 100644 --- a/contrib/python/wcwidth/py3/wcwidth/grapheme.py +++ b/contrib/python/wcwidth/py3/wcwidth/grapheme.py @@ -36,6 +36,10 @@ if TYPE_CHECKING: # pragma: no cover # std imports from collections.abc import Iterator +# Maximum backward scan distance when finding grapheme cluster boundaries. +# Covers all known Unicode grapheme clusters with margin; longer sequences are pathological. +MAX_GRAPHEME_SCAN = 32 + class GCB(IntEnum): """Grapheme Cluster Break property values.""" @@ -304,3 +308,118 @@ def iter_graphemes( # Yield the final cluster yield unistr[cluster_start:end] + + +def _find_cluster_start(text: str, pos: int) -> int: + """ + Find the start of the grapheme cluster containing the character before pos. + + Scans backwards from pos to find a safe starting point, then iterates forward using standard + break rules to find the actual cluster boundary. + + :param text: The Unicode string. + :param pos: Position to search before (exclusive). + :returns: Start position of the grapheme cluster. + """ + target_cp = ord(text[pos - 1]) + + # GB3: CR x LF - LF after CR is part of same cluster + if target_cp == 0x0A and pos >= 2 and text[pos - 2] == '\r': + return pos - 2 + + # Fast path: ASCII (except LF) starts its own cluster + if target_cp < 0x80: + # GB9b: Check for preceding PREPEND (rare: Arabic/Brahmic) + if pos >= 2 and target_cp >= 0x20: + prev_cp = ord(text[pos - 2]) + if prev_cp >= 0x80 and _grapheme_cluster_break(prev_cp) == GCB.PREPEND: + return _find_cluster_start(text, pos - 1) + return pos - 1 + + # Scan backward to find a safe starting point + safe_start = pos - 1 + while safe_start > 0 and (pos - safe_start) < MAX_GRAPHEME_SCAN: + cp = ord(text[safe_start]) + if 0x20 <= cp < 0x80: # ASCII always starts a cluster + break + if _grapheme_cluster_break(cp) == GCB.CONTROL: # GB4 + break + safe_start -= 1 + + # Verify forward to find the actual cluster boundary + cluster_start = safe_start + left_gcb = _grapheme_cluster_break(ord(text[safe_start])) + ri_count = 1 if left_gcb == GCB.REGIONAL_INDICATOR else 0 + + for i in range(safe_start + 1, pos): + right_gcb = _grapheme_cluster_break(ord(text[i])) + result = _should_break(left_gcb, right_gcb, text, i, ri_count) + ri_count = result.ri_count + if result.should_break: + cluster_start = i + left_gcb = right_gcb + + return cluster_start + + +def grapheme_boundary_before(unistr: str, pos: int) -> int: + r""" + Find the grapheme cluster boundary immediately before a position. + + :param unistr: The Unicode string to search. + :param pos: Position in the string (0 < pos <= len(unistr)). + :returns: Start index of the grapheme cluster containing the character at pos-1. + + Example:: + + >>> grapheme_boundary_before('Hello \U0001F44B\U0001F3FB', 8) + 6 + >>> grapheme_boundary_before('a\r\nb', 3) + 1 + + .. versionadded:: 0.3.6 + """ + if pos <= 0: + return 0 + return _find_cluster_start(unistr, min(pos, len(unistr))) + + +def iter_graphemes_reverse( + unistr: str, + start: int = 0, + end: int | None = None, +) -> Iterator[str]: + r""" + Iterate over grapheme clusters in reverse order (last to first). + + :param unistr: The Unicode string to segment. + :param start: Starting index (default 0). + :param end: Ending index (default len(unistr)). + :yields: Grapheme cluster substrings in reverse order. + + Example:: + + >>> list(iter_graphemes_reverse('cafe\u0301')) + ['e\u0301', 'f', 'a', 'c'] + + .. versionadded:: 0.3.6 + """ + if not unistr: + return + + length = len(unistr) + + end = length if end is None else min(end, length) + start = max(start, 0) + + if start >= end or start >= length: + return + + pos = end + while pos > start: + cluster_start = _find_cluster_start(unistr, pos) + # Don't yield partial graphemes that extend before start + if cluster_start < start: + break + yield unistr[cluster_start:pos] + pos = cluster_start diff --git a/contrib/python/wcwidth/py3/wcwidth/textwrap.py b/contrib/python/wcwidth/py3/wcwidth/textwrap.py index 8b91d6ff9ce..41d89a3dba0 100644 --- a/contrib/python/wcwidth/py3/wcwidth/textwrap.py +++ b/contrib/python/wcwidth/py3/wcwidth/textwrap.py @@ -34,6 +34,9 @@ class SequenceTextWrapper(textwrap.TextWrapper): The key difference from the blessed implementation is the addition of grapheme cluster support via :func:`~.iter_graphemes`, providing width calculation for ZWJ emoji sequences, VS-16 emojis and variations, regional indicator flags, and combining characters. + + OSC hyperlink sequences are treated as word boundaries, ensuring that text adjacent to + hyperlinks wraps correctly without breaking the hyperlink structure. """ def __init__(self, width: int = 70, *, @@ -77,17 +80,25 @@ class SequenceTextWrapper(textwrap.TextWrapper): return ''.join(result) def _split(self, text: str) -> list[str]: # pylint: disable=too-many-locals - """ + r""" Sequence-aware variant of :meth:`textwrap.TextWrapper._split`. This method ensures that terminal escape sequences don't interfere with the text splitting logic, particularly for hyphen-based word breaking. It builds a position mapping from stripped text to original text, calls the parent's _split on stripped text, then maps chunks back. + + OSC hyperlink sequences are treated as word boundaries:: + + >>> wrap('foo \x1b]8;;https://example.com\x07link\x1b]8;;\x07 bar', 6) + ['foo', '\x1b]8;;https://example.com\x07link\x1b]8;;\x07', 'bar'] + + Both BEL (``\x07``) and ST (``\x1b\\``) terminators are supported. """ # pylint: disable=too-many-locals,too-many-branches # Build a mapping from stripped text positions to original text positions. - # We track where each character ENDS so that sequences between characters + # + # Track where each character ENDS so that sequences between characters # attach to the following text (not preceding text). This ensures sequences # aren't lost when whitespace is dropped. # @@ -95,16 +106,32 @@ class SequenceTextWrapper(textwrap.TextWrapper): char_end: list[int] = [] stripped_text = '' original_pos = 0 + prev_was_hyperlink_close = False for segment, is_seq in iter_sequences(text): if not is_seq: + # Conditionally insert space after hyperlink close to force word boundary + if prev_was_hyperlink_close and segment and not segment[0].isspace(): + stripped_text += ' ' + char_end.append(original_pos) for char in segment: original_pos += 1 char_end.append(original_pos) stripped_text += char + prev_was_hyperlink_close = False else: + # Conditionally insert space before OSC sequences to artificially create word + # boundary, but *not* before hyperlink close sequences, to ensure hyperlink is + # terminated on the same line. + is_hyperlink_close = segment.startswith(('\x1b]8;;\x1b\\', '\x1b]8;;\x07')) + if (segment.startswith('\x1b]') and stripped_text and not + stripped_text[-1].isspace()): + if not is_hyperlink_close: + stripped_text += ' ' + char_end.append(original_pos) # Escape sequences advance position but don't add to stripped text original_pos += len(segment) + prev_was_hyperlink_close = is_hyperlink_close # Add sentinel for final position char_end.append(original_pos) @@ -137,7 +164,9 @@ class SequenceTextWrapper(textwrap.TextWrapper): end_orig = char_end[stripped_pos + chunk_len - 1] # Extract the corresponding portion from the original text - result.append(text[start_orig:end_orig]) + # Skip empty chunks (from virtual spaces inserted at OSC boundaries) + if start_orig != end_orig: + result.append(text[start_orig:end_orig]) stripped_pos += chunk_len return result @@ -303,8 +332,8 @@ class SequenceTextWrapper(textwrap.TextWrapper): idx = match.end() continue - # Get grapheme - grapheme = next(iter_graphemes(text[idx:])) + # Get grapheme (use start= to avoid slice allocation) + grapheme = next(iter_graphemes(text, start=idx)) grapheme_width = self._width(grapheme) if width_so_far + grapheme_width > max_width: diff --git a/contrib/python/wcwidth/py3/ya.make b/contrib/python/wcwidth/py3/ya.make index 36c1c8b8ee2..542d8f80c0f 100644 --- a/contrib/python/wcwidth/py3/ya.make +++ b/contrib/python/wcwidth/py3/ya.make @@ -2,7 +2,7 @@ PY3_LIBRARY() -VERSION(0.3.5) +VERSION(0.4.0) LICENSE(MIT) diff --git a/contrib/python/ydb/py3/.dist-info/METADATA b/contrib/python/ydb/py3/.dist-info/METADATA index 58443d25933..65593309207 100644 --- a/contrib/python/ydb/py3/.dist-info/METADATA +++ b/contrib/python/ydb/py3/.dist-info/METADATA @@ -1,6 +1,6 @@ Metadata-Version: 2.1 Name: ydb -Version: 3.26.3 +Version: 3.26.4 Summary: YDB Python SDK Home-page: http://github.com/ydb-platform/ydb-python-sdk Author: Yandex LLC diff --git a/contrib/python/ydb/py3/ya.make b/contrib/python/ydb/py3/ya.make index 01514a1a60c..1fe3532c5ff 100644 --- a/contrib/python/ydb/py3/ya.make +++ b/contrib/python/ydb/py3/ya.make @@ -2,7 +2,7 @@ PY3_LIBRARY() -VERSION(3.26.3) +VERSION(3.26.4) LICENSE(Apache-2.0) diff --git a/contrib/python/ydb/py3/ydb/aio/query/pool.py b/contrib/python/ydb/py3/ydb/aio/query/pool.py index 13cb4610412..7561a21bc76 100644 --- a/contrib/python/ydb/py3/ydb/aio/query/pool.py +++ b/contrib/python/ydb/py3/ydb/aio/query/pool.py @@ -107,7 +107,8 @@ class QuerySessionPool: try: session = await self._create_new_session() except Exception as e: - logger.error("Failed to create new session") + # TODO: this exception could be retried via retrier, so no need to log error here. Probably we should retry this right in create_new_session method. + logger.warning("Failed to create new session") self._current_size -= 1 raise e diff --git a/contrib/python/ydb/py3/ydb/ydb_version.py b/contrib/python/ydb/py3/ydb/ydb_version.py index 9cc98233183..1b6a705b715 100644 --- a/contrib/python/ydb/py3/ydb/ydb_version.py +++ b/contrib/python/ydb/py3/ydb/ydb_version.py @@ -1 +1 @@ -VERSION = "3.26.3" +VERSION = "3.26.4" |
