diff options
| author | robot-piglet <[email protected]> | 2025-08-05 17:17:44 +0300 |
|---|---|---|
| committer | robot-piglet <[email protected]> | 2025-08-05 17:45:07 +0300 |
| commit | cf6ff7ef6b9295d66cf3be9908ea6dc829cdfde0 (patch) | |
| tree | 80681d3cee1effe8992f2ffb1067eb947f62aa9d /contrib/python/hypothesis | |
| parent | cd649dea60f5d85081c37347c3369f689a93f07e (diff) | |
Intermediate changes
commit_hash:b3eacf07b53448e0238e608b705a8978584bfe91
Diffstat (limited to 'contrib/python/hypothesis')
34 files changed, 1049 insertions, 710 deletions
diff --git a/contrib/python/hypothesis/py3/.dist-info/METADATA b/contrib/python/hypothesis/py3/.dist-info/METADATA index f4d09309a39..d39f7ab465b 100644 --- a/contrib/python/hypothesis/py3/.dist-info/METADATA +++ b/contrib/python/hypothesis/py3/.dist-info/METADATA @@ -1,6 +1,6 @@ Metadata-Version: 2.1 Name: hypothesis -Version: 6.115.1 +Version: 6.120.0 Summary: A library for property-based testing Home-page: https://hypothesis.works Author: David R. MacIver and Zac Hatfield-Dodds @@ -40,10 +40,10 @@ Requires-Dist: exceptiongroup>=1.0.0; python_version < "3.11" Provides-Extra: all Requires-Dist: black>=19.10b0; extra == "all" Requires-Dist: click>=7.0; extra == "all" -Requires-Dist: crosshair-tool>=0.0.74; extra == "all" +Requires-Dist: crosshair-tool>=0.0.77; extra == "all" Requires-Dist: django>=4.2; extra == "all" Requires-Dist: dpcontracts>=0.4; extra == "all" -Requires-Dist: hypothesis-crosshair>=0.0.16; extra == "all" +Requires-Dist: hypothesis-crosshair>=0.0.18; extra == "all" Requires-Dist: lark>=0.10.1; extra == "all" Requires-Dist: libcst>=0.3.16; extra == "all" Requires-Dist: numpy>=1.19.3; extra == "all" @@ -61,8 +61,8 @@ Requires-Dist: rich>=9.0.0; extra == "cli" Provides-Extra: codemods Requires-Dist: libcst>=0.3.16; extra == "codemods" Provides-Extra: crosshair -Requires-Dist: hypothesis-crosshair>=0.0.16; extra == "crosshair" -Requires-Dist: crosshair-tool>=0.0.74; extra == "crosshair" +Requires-Dist: hypothesis-crosshair>=0.0.18; extra == "crosshair" +Requires-Dist: crosshair-tool>=0.0.77; extra == "crosshair" Provides-Extra: dateutil Requires-Dist: python-dateutil>=1.4; extra == "dateutil" Provides-Extra: django diff --git a/contrib/python/hypothesis/py3/hypothesis/_settings.py b/contrib/python/hypothesis/py3/hypothesis/_settings.py index 2f18ff920fe..f4c77ca420d 100644 --- a/contrib/python/hypothesis/py3/hypothesis/_settings.py +++ b/contrib/python/hypothesis/py3/hypothesis/_settings.py @@ -63,6 +63,7 @@ class settingsProperty: from hypothesis.database import ExampleDatabase result = ExampleDatabase(not_set) + assert result is not not_set return result except KeyError: raise AttributeError(self.name) from None @@ -96,7 +97,7 @@ class settingsMeta(type): v = default_variable.value if v is not None: return v - if hasattr(settings, "_current_profile"): + if getattr(settings, "_current_profile", None) is not None: settings.load_profile(settings._current_profile) assert default_variable.value is not None return default_variable.value @@ -131,6 +132,7 @@ class settings(metaclass=settingsMeta): __definitions_are_locked = False _profiles: ClassVar[dict[str, "settings"]] = {} __module__ = "hypothesis" + _current_profile = None def __getattr__(self, name): if name in all_settings: @@ -315,9 +317,15 @@ class settings(metaclass=settingsMeta): :class:`~hypothesis.settings`: optional ``parent`` settings, and keyword arguments for each setting that will be set differently to parent (or settings.default, if parent is None). + + If you register a profile that has already been defined and that profile + is the currently loaded profile, the new changes will take effect immediately, + and do not require reloading the profile. """ check_type(str, name, "name") settings._profiles[name] = settings(parent=parent, **kwargs) + if settings._current_profile == name: + settings.load_profile(name) @staticmethod def get_profile(name: str) -> "settings": @@ -407,6 +415,8 @@ This allows you to `check for regressions and look for bugs :ref:`separate settings profiles <settings_profiles>` - for example running quick deterministic tests on every commit, and a longer non-deterministic nightly testing run. + +By default when running on CI, this will be set to True. """, ) @@ -682,6 +692,8 @@ errors (but will not necessarily be if close to the deadline, to allow some variability in test run time). Set this to ``None`` to disable this behaviour entirely. + +By default when running on CI, this will be set to None. """, ) @@ -694,13 +706,11 @@ def is_in_ci() -> bool: settings._define_setting( "print_blob", - default=is_in_ci(), - show_default=False, + default=False, options=(True, False), description=""" If set to ``True``, Hypothesis will print code for failing examples that can be used with :func:`@reproduce_failure <hypothesis.reproduce_failure>` to reproduce the failing example. -The default is ``True`` if the ``CI`` or ``TF_BUILD`` env vars are set, ``False`` otherwise. """, ) @@ -750,6 +760,23 @@ def note_deprecation( settings.register_profile("default", settings()) settings.load_profile("default") + +assert settings.default is not None + +CI = settings( + derandomize=True, + deadline=None, + database=None, + print_blob=True, + suppress_health_check=[HealthCheck.too_slow], +) + +settings.register_profile("ci", CI) + + +if is_in_ci(): + settings.load_profile("ci") + assert settings.default is not None diff --git a/contrib/python/hypothesis/py3/hypothesis/control.py b/contrib/python/hypothesis/py3/hypothesis/control.py index f36ceb676b8..9457b368899 100644 --- a/contrib/python/hypothesis/py3/hypothesis/control.py +++ b/contrib/python/hypothesis/py3/hypothesis/control.py @@ -149,10 +149,10 @@ class BuildContext: arg_labels = {} kwargs = {} for k, s in kwarg_strategies.items(): - start_idx = self.data.index + start_idx = len(self.data.ir_nodes) with deprecate_random_in_strategy("from {}={!r}", k, s) as check: obj = check(self.data.draw(s, observe_as=f"generate:{k}")) - end_idx = self.data.index + end_idx = len(self.data.ir_nodes) kwargs[k] = obj # This high up the stack, we can't see or really do much with the conjecture diff --git a/contrib/python/hypothesis/py3/hypothesis/core.py b/contrib/python/hypothesis/py3/hypothesis/core.py index 9a1a19c1e29..34a274a6eaf 100644 --- a/contrib/python/hypothesis/py3/hypothesis/core.py +++ b/contrib/python/hypothesis/py3/hypothesis/core.py @@ -9,7 +9,6 @@ # obtain one at https://mozilla.org/MPL/2.0/. """This module provides the core primitives of Hypothesis, such as given.""" - import base64 import contextlib import datetime @@ -24,7 +23,7 @@ import unittest import warnings import zlib from collections import defaultdict -from collections.abc import Coroutine, Hashable +from collections.abc import Coroutine, Generator, Hashable from functools import partial from random import Random from typing import ( @@ -58,6 +57,7 @@ from hypothesis.errors import ( FlakyFailure, FlakyReplay, Found, + Frozen, HypothesisException, HypothesisWarning, InvalidArgument, @@ -477,9 +477,6 @@ def execute_explicit_examples(state, wrapped_test, arguments, kwargs, original_s fragments_reported = [] empty_data = ConjectureData.for_buffer(b"") try: - bits = ", ".join(nicerepr(x) for x in arguments) + ", ".join( - f"{k}={nicerepr(v)}" for k, v in example_kwargs.items() - ) execute_example = partial( state.execute_once, empty_data, @@ -492,7 +489,9 @@ def execute_explicit_examples(state, wrapped_test, arguments, kwargs, original_s execute_example() else: # @example(...).xfail(...) - + bits = ", ".join(nicerepr(x) for x in arguments) + ", ".join( + f"{k}={nicerepr(v)}" for k, v in example_kwargs.items() + ) try: execute_example() except failure_exceptions_to_catch() as err: @@ -735,6 +734,69 @@ def get_executor(runner): return default_executor +def unwrap_markers_from_group() -> Generator[None, None, None]: + # This function is a crude solution, a better way of resolving it would probably + # be to rewrite a bunch of exception handlers to use except*. + T = TypeVar("T", bound=BaseException) + + def _flatten_group(excgroup: BaseExceptionGroup[T]) -> list[T]: + found_exceptions: list[T] = [] + for exc in excgroup.exceptions: + if isinstance(exc, BaseExceptionGroup): + found_exceptions.extend(_flatten_group(exc)) + else: + found_exceptions.append(exc) + return found_exceptions + + try: + yield + except BaseExceptionGroup as excgroup: + frozen_exceptions, non_frozen_exceptions = excgroup.split(Frozen) + + # group only contains Frozen, reraise the group + # it doesn't matter what we raise, since any exceptions get disregarded + # and reraised as StopTest if data got frozen. + if non_frozen_exceptions is None: + raise + # in all other cases they are discarded + + # Can RewindRecursive end up in this group? + _, user_exceptions = non_frozen_exceptions.split( + lambda e: isinstance(e, (StopTest, HypothesisException)) + ) + + # this might contain marker exceptions, or internal errors, but not frozen. + if user_exceptions is not None: + raise + + # single marker exception - reraise it + flattened_non_frozen_exceptions: list[BaseException] = _flatten_group( + non_frozen_exceptions + ) + if len(flattened_non_frozen_exceptions) == 1: + e = flattened_non_frozen_exceptions[0] + # preserve the cause of the original exception to not hinder debugging + # note that __context__ is still lost though + raise e from e.__cause__ + + # multiple marker exceptions. If we re-raise the whole group we break + # a bunch of logic so ....? + stoptests, non_stoptests = non_frozen_exceptions.split(StopTest) + + # TODO: stoptest+hypothesisexception ...? Is it possible? If so, what do? + + if non_stoptests: + # TODO: multiple marker exceptions is easy to produce, but the logic in the + # engine does not handle it... so we just reraise the first one for now. + e = _flatten_group(non_stoptests)[0] + raise e from e.__cause__ + assert stoptests is not None + + # multiple stoptests: raising the one with the lowest testcounter + raise min(_flatten_group(stoptests), key=lambda s_e: s_e.testcounter) + + class StateForActualGivenExecution: def __init__(self, stuff, test, settings, random, wrapped_test): self.test_runner = get_executor(stuff.selfy) @@ -808,7 +870,7 @@ class StateForActualGivenExecution: @proxies(self.test) def test(*args, **kwargs): - with ensure_free_stackframes(): + with unwrap_markers_from_group(), ensure_free_stackframes(): return self.test(*args, **kwargs) else: @@ -820,7 +882,7 @@ class StateForActualGivenExecution: arg_gctime = gc_cumulative_time() start = time.perf_counter() try: - with ensure_free_stackframes(): + with unwrap_markers_from_group(), ensure_free_stackframes(): result = self.test(*args, **kwargs) finally: finish = time.perf_counter() @@ -864,15 +926,6 @@ class StateForActualGivenExecution: if expected_failure is not None: nonlocal text_repr text_repr = repr_call(test, args, kwargs) - if text_repr in self.xfail_example_reprs: - warnings.warn( - f"We generated {text_repr}, which seems identical " - "to one of your `@example(...).xfail()` cases. " - "Revise the strategy to avoid this overlap?", - HypothesisWarning, - # Checked in test_generating_xfailed_examples_warns! - stacklevel=6, - ) if print_example or current_verbosity() >= Verbosity.verbose: printer = RepresentationPrinter(context=context) @@ -1002,18 +1055,17 @@ class StateForActualGivenExecution: """ trace: Trace = set() try: - if self._should_trace() and Tracer.can_trace(): # pragma: no cover - # This is in fact covered by our *non-coverage* tests, but due to the - # settrace() contention *not* by our coverage tests. Ah well. - with Tracer() as tracer: - try: - result = self.execute_once(data) - if data.status == Status.VALID: - self.explain_traces[None].add(frozenset(tracer.branches)) - finally: - trace = tracer.branches - else: - result = self.execute_once(data) + with Tracer(should_trace=self._should_trace()) as tracer: + try: + result = self.execute_once(data) + if ( + data.status == Status.VALID and tracer.branches + ): # pragma: no cover + # This is in fact covered by our *non-coverage* tests, but due + # to the settrace() contention *not* by our coverage tests. + self.explain_traces[None].add(frozenset(tracer.branches)) + finally: + trace = tracer.branches if result is not None: fail_health_check( self.settings, @@ -1221,6 +1273,7 @@ class StateForActualGivenExecution: ran_example.slice_comments = falsifying_example.slice_comments tb = None origin = None + assert info is not None assert info._expected_exception is not None try: with with_reporter(fragments.append): @@ -1289,7 +1342,7 @@ class StateForActualGivenExecution: "coverage": None, # Not recorded when we're replaying the MFE "metadata": { "traceback": tb, - "predicates": ran_example._observability_predicates, + "predicates": dict(ran_example._observability_predicates), **_system_metadata(), }, } @@ -1564,23 +1617,23 @@ def given( "to ensure that each example is run in a separate " "database transaction." ) - if settings.database is not None: - nonlocal prev_self - # Check selfy really is self (not e.g. a mock) before we health-check - cur_self = ( - stuff.selfy - if getattr(type(stuff.selfy), test.__name__, None) is wrapped_test - else None + + nonlocal prev_self + # Check selfy really is self (not e.g. a mock) before we health-check + cur_self = ( + stuff.selfy + if getattr(type(stuff.selfy), test.__name__, None) is wrapped_test + else None + ) + if prev_self is Unset: + prev_self = cur_self + elif cur_self is not prev_self: + msg = ( + f"The method {test.__qualname__} was called from multiple " + "different executors. This may lead to flaky tests and " + "nonreproducible errors when replaying from database." ) - if prev_self is Unset: - prev_self = cur_self - elif cur_self is not prev_self: - msg = ( - f"The method {test.__qualname__} was called from multiple " - "different executors. This may lead to flaky tests and " - "nonreproducible errors when replaying from database." - ) - fail_health_check(settings, msg, HealthCheck.differing_executors) + fail_health_check(settings, msg, HealthCheck.differing_executors) state = StateForActualGivenExecution( stuff, test, settings, random, wrapped_test @@ -1675,7 +1728,6 @@ def given( # The exception caught here should either be an actual test # failure (or BaseExceptionGroup), or some kind of fatal error # that caused the engine to stop. - generated_seed = wrapped_test._hypothesis_internal_use_generated_seed with local_settings(settings): if not (state.failed_normally or generated_seed is None): @@ -1733,7 +1785,7 @@ def given( state = StateForActualGivenExecution( stuff, test, settings, random, wrapped_test ) - digest = function_digest(test) + database_key = function_digest(test) + b".secondary" # We track the minimal-so-far example for each distinct origin, so # that we track log-n instead of n examples for long runs. In particular # it means that we saturate for common errors in long runs instead of @@ -1759,7 +1811,7 @@ def given( if settings.database is not None and ( known is None or sort_key(buffer) <= sort_key(known) ): - settings.database.save(digest, buffer) + settings.database.save(database_key, buffer) minimal_failures[data.interesting_origin] = buffer raise return bytes(data.buffer) diff --git a/contrib/python/hypothesis/py3/hypothesis/database.py b/contrib/python/hypothesis/py3/hypothesis/database.py index 72b6d8251df..d30ffbf40de 100644 --- a/contrib/python/hypothesis/py3/hypothesis/database.py +++ b/contrib/python/hypothesis/py3/hypothesis/database.py @@ -12,6 +12,7 @@ import abc import binascii import json import os +import struct import sys import warnings from collections.abc import Iterable @@ -27,6 +28,7 @@ from zipfile import BadZipFile, ZipFile from hypothesis.configuration import storage_directory from hypothesis.errors import HypothesisException, HypothesisWarning +from hypothesis.internal.conjecture.data import IRType from hypothesis.utils.conventions import not_set __all__ = [ @@ -671,3 +673,75 @@ class GitHubArtifactDatabase(ExampleDatabase): def delete(self, key: bytes, value: bytes) -> None: raise RuntimeError(self._read_only_message) + + +def ir_to_bytes(ir: Iterable[IRType], /) -> bytes: + """Serialize a list of IR elements to a bytestring. Inverts ir_from_bytes.""" + # We use a custom serialization format for this, which might seem crazy - but our + # data is a flat sequence of elements, and standard tools like protobuf or msgpack + # don't deal well with e.g. nonstandard bit-pattern-NaNs, or invalid-utf8 unicode. + # + # We simply encode each element with a metadata byte, if needed a uint16 size, and + # then the payload bytes. For booleans, the payload is inlined into the metadata. + parts = [] + for elem in ir: + if isinstance(elem, bool): + # `000_0000v` - tag zero, low bit payload. + parts.append(b"\1" if elem else b"\0") + continue + + # `tag_ssss [uint16 size?] [payload]` + if isinstance(elem, float): + tag = 1 << 5 + elem = struct.pack("!d", elem) + elif isinstance(elem, int): + tag = 2 << 5 + elem = elem.to_bytes(1 + elem.bit_length() // 8, "big", signed=True) + elif isinstance(elem, bytes): + tag = 3 << 5 + else: + assert isinstance(elem, str) + tag = 4 << 5 + elem = elem.encode(errors="surrogatepass") + + size = len(elem) + if size < 0b11111: + parts.append((tag | size).to_bytes(1, "big")) + else: + parts.append((tag | 0b11111).to_bytes(1, "big")) + parts.append(struct.pack("!H", size)) + parts.append(elem) + + return b"".join(parts) + + +def ir_from_bytes(buffer: bytes, /) -> list[IRType]: + """Deserialize a bytestring to a list of IR elements. Inverts ir_to_bytes.""" + # See above for an explanation of the format. + parts: list[IRType] = [] + idx = 0 + while idx < len(buffer): + tag = buffer[idx] >> 5 + size = buffer[idx] & 0b11111 + idx += 1 + + if tag == 0: + parts.append(bool(size)) + continue + if size == 0b11111: + (size,) = struct.unpack_from("!H", buffer, offset=idx) + idx += 2 + chunk = buffer[idx : idx + size] + idx += size + + if tag == 1: + assert size == 8, "expected float64" + parts.extend(struct.unpack("!d", chunk)) + elif tag == 2: + parts.append(int.from_bytes(chunk, "big", signed=True)) + elif tag == 3: + parts.append(chunk) + else: + assert tag == 4 + parts.append(chunk.decode(errors="surrogatepass")) + return parts diff --git a/contrib/python/hypothesis/py3/hypothesis/extra/dateutil.py b/contrib/python/hypothesis/py3/hypothesis/extra/dateutil.py index 810d0477a24..14d1003666c 100644 --- a/contrib/python/hypothesis/py3/hypothesis/extra/dateutil.py +++ b/contrib/python/hypothesis/py3/hypothesis/extra/dateutil.py @@ -17,6 +17,10 @@ This module provides :pypi:`dateutil <python-dateutil>` timezones. You can use this strategy to make :func:`~hypothesis.strategies.datetimes` and :func:`~hypothesis.strategies.times` produce timezone-aware values. + +.. tip:: + Consider using the stdlib :mod:`zoneinfo` module, via + :func:`st.timezones() <hypothesis.strategies.timezones>`. """ import datetime as dt diff --git a/contrib/python/hypothesis/py3/hypothesis/extra/numpy.py b/contrib/python/hypothesis/py3/hypothesis/extra/numpy.py index a94471769ba..34f8278c249 100644 --- a/contrib/python/hypothesis/py3/hypothesis/extra/numpy.py +++ b/contrib/python/hypothesis/py3/hypothesis/extra/numpy.py @@ -531,7 +531,7 @@ def arrays( lambda s: arrays(dtype, s, elements=elements, fill=fill, unique=unique) ) # From here on, we're only dealing with values and it's relatively simple. - dtype = np.dtype(dtype) # type: ignore[arg-type,assignment] + dtype = np.dtype(dtype) # type: ignore[arg-type] assert isinstance(dtype, np.dtype) # help mypy out a bit... if elements is None or isinstance(elements, Mapping): if dtype.kind in ("m", "M") and "[" not in dtype.str: diff --git a/contrib/python/hypothesis/py3/hypothesis/extra/pytz.py b/contrib/python/hypothesis/py3/hypothesis/extra/pytz.py index 5ac18907f24..aac9f0c8939 100644 --- a/contrib/python/hypothesis/py3/hypothesis/extra/pytz.py +++ b/contrib/python/hypothesis/py3/hypothesis/extra/pytz.py @@ -15,9 +15,14 @@ hypothesis[pytz] This module provides :pypi:`pytz` timezones. -You can use this strategy to make -:py:func:`hypothesis.strategies.datetimes` and -:py:func:`hypothesis.strategies.times` produce timezone-aware values. +If you are unable to use the stdlib :mod:`zoneinfo` module, e.g. via the +:func:`hypothesis.strategies.timezones` strategy, you can use this +strategy with :py:func:`hypothesis.strategies.datetimes` and +:py:func:`hypothesis.strategies.times` to produce timezone-aware values. + +.. deprecated:: :mod:`zoneinfo` was added + we intend to remove ``hypothesis.extra.pytz``, after libraries + such as Pandas and Django complete their own migrations. """ import datetime as dt @@ -37,8 +42,12 @@ def timezones() -> st.SearchStrategy[dt.tzinfo]: """Any timezone in the Olsen database, as a pytz tzinfo object. This strategy minimises to UTC, or the smallest possible fixed - offset, and is designed for use with - :py:func:`hypothesis.strategies.datetimes`. + offset, and is designed for use with :func:`hypothesis.strategies.datetimes`. + + .. tip:: + Prefer the :func:`hypothesis.strategies.timezones` strategy, which uses + the stdlib :mod:`zoneinfo` module and avoids `the many footguns in pytz + <https://blog.ganssle.io/articles/2018/03/pytz-fastest-footgun.html>`__. """ all_timezones = [pytz.timezone(tz) for tz in pytz.all_timezones] # Some timezones have always had a constant offset from UTC. This makes diff --git a/contrib/python/hypothesis/py3/hypothesis/internal/charmap.py b/contrib/python/hypothesis/py3/hypothesis/internal/charmap.py index 8a1e98ef180..bf6bb59fb2d 100644 --- a/contrib/python/hypothesis/py3/hypothesis/internal/charmap.py +++ b/contrib/python/hypothesis/py3/hypothesis/internal/charmap.py @@ -15,18 +15,64 @@ import os import sys import tempfile import unicodedata +from collections.abc import Iterable from functools import lru_cache +from pathlib import Path +from typing import TYPE_CHECKING, Literal, Optional from hypothesis.configuration import storage_directory from hypothesis.control import _current_build_context from hypothesis.errors import InvalidArgument -from hypothesis.internal.intervalsets import IntervalSet +from hypothesis.internal.intervalsets import IntervalSet, IntervalsT -intervals = tuple[tuple[int, int], ...] -cache_type = dict[tuple[tuple[str, ...], int, int, intervals], IntervalSet] +if TYPE_CHECKING: + from typing import TypeAlias +# See https://en.wikipedia.org/wiki/Unicode_character_property#General_Category +CategoryName: "TypeAlias" = Literal[ + "L", # Letter + "Lu", # Letter, uppercase + "Ll", # Letter, lowercase + "Lt", # Letter, titlecase + "Lm", # Letter, modifier + "Lo", # Letter, other + "M", # Mark + "Mn", # Mark, nonspacing + "Mc", # Mark, spacing combining + "Me", # Mark, enclosing + "N", # Number + "Nd", # Number, decimal digit + "Nl", # Number, letter + "No", # Number, other + "P", # Punctuation + "Pc", # Punctuation, connector + "Pd", # Punctuation, dash + "Ps", # Punctuation, open + "Pe", # Punctuation, close + "Pi", # Punctuation, initial quote + "Pf", # Punctuation, final quote + "Po", # Punctuation, other + "S", # Symbol + "Sm", # Symbol, math + "Sc", # Symbol, currency + "Sk", # Symbol, modifier + "So", # Symbol, other + "Z", # Separator + "Zs", # Separator, space + "Zl", # Separator, line + "Zp", # Separator, paragraph + "C", # Other + "Cc", # Other, control + "Cf", # Other, format + "Cs", # Other, surrogate + "Co", # Other, private use + "Cn", # Other, not assigned +] +Categories: "TypeAlias" = Iterable[CategoryName] +CategoriesTuple: "TypeAlias" = tuple[CategoryName, ...] -def charmap_file(fname="charmap"): + +def charmap_file(fname: str = "charmap") -> Path: return storage_directory( "unicode_data", unicodedata.unidata_version, f"{fname}.json.gz" ) @@ -35,7 +81,7 @@ def charmap_file(fname="charmap"): _charmap = None -def charmap(): +def charmap() -> dict[CategoryName, IntervalsT]: """Return a dict that maps a Unicode category, to a tuple of 2-tuples covering the codepoint intervals for characters in that category. @@ -49,8 +95,8 @@ def charmap(): if _charmap is None: f = charmap_file() try: - with gzip.GzipFile(f, "rb") as i: - tmp_charmap = dict(json.load(i)) + with gzip.GzipFile(f, "rb") as d: + tmp_charmap = dict(json.load(d)) except Exception: # This loop is reduced to using only local variables for performance; @@ -63,9 +109,9 @@ def charmap(): for i in range(1, sys.maxunicode + 1): cat = category(chr(i)) if cat != last_cat: - tmp_charmap.setdefault(last_cat, []).append([last_start, i - 1]) + tmp_charmap.setdefault(last_cat, []).append((last_start, i - 1)) last_cat, last_start = cat, i - tmp_charmap.setdefault(last_cat, []).append([last_start, sys.maxunicode]) + tmp_charmap.setdefault(last_cat, []).append((last_start, sys.maxunicode)) try: # Write the Unicode table atomically @@ -135,10 +181,10 @@ def intervals_from_codec(codec_name: str) -> IntervalSet: # pragma: no cover return res -_categories = None +_categories: Optional[Categories] = None -def categories(): +def categories() -> Categories: """Return a tuple of Unicode categories in a normalised order. >>> categories() # doctest: +ELLIPSIS @@ -147,15 +193,16 @@ def categories(): global _categories if _categories is None: cm = charmap() - _categories = sorted(cm.keys(), key=lambda c: len(cm[c])) - _categories.remove("Cc") # Other, Control - _categories.remove("Cs") # Other, Surrogate - _categories.append("Cc") - _categories.append("Cs") - return tuple(_categories) + categories = sorted(cm.keys(), key=lambda c: len(cm[c])) + categories.remove("Cc") # Other, Control + categories.remove("Cs") # Other, Surrogate + categories.append("Cc") + categories.append("Cs") + _categories = tuple(categories) + return _categories -def as_general_categories(cats, name="cats"): +def as_general_categories(cats: Categories, name: str = "cats") -> CategoriesTuple: """Return a tuple of Unicode categories in a normalised order. This function expands one-letter designations of a major class to include @@ -170,8 +217,6 @@ def as_general_categories(cats, name="cats"): If the collection ``cats`` includes any elements that do not represent a major class or a class with subclass, a deprecation warning is raised. """ - if cats is None: - return None major_classes = ("L", "M", "N", "P", "S", "Z", "C") cs = categories() out = set(cats) @@ -186,10 +231,10 @@ def as_general_categories(cats, name="cats"): return tuple(c for c in cs if c in out) -category_index_cache = {(): ()} +category_index_cache: dict[frozenset[CategoryName], IntervalsT] = {frozenset(): ()} -def _category_key(cats): +def _category_key(cats: Optional[Iterable[str]]) -> CategoriesTuple: """Return a normalised tuple of all Unicode categories that are in `include`, but not in `exclude`. @@ -205,7 +250,7 @@ def _category_key(cats): return tuple(c for c in cs if c in cats) -def _query_for_key(key): +def _query_for_key(key: Categories) -> IntervalsT: """Return a tuple of codepoint intervals covering characters that match one or more categories in the tuple of categories `key`. @@ -214,10 +259,13 @@ def _query_for_key(key): >>> _query_for_key(('Zl', 'Zp', 'Co')) ((8232, 8233), (57344, 63743), (983040, 1048573), (1048576, 1114109)) """ + key = tuple(key) + # ignore ordering on the cache key to increase potential cache hits. + cache_key = frozenset(key) context = _current_build_context.value if context is None or not context.data.provider.avoid_realization: try: - return category_index_cache[key] + return category_index_cache[cache_key] except KeyError: pass elif not key: # pragma: no cover # only on alternative backends @@ -231,21 +279,23 @@ def _query_for_key(key): ) assert isinstance(result, IntervalSet) if context is None or not context.data.provider.avoid_realization: - category_index_cache[key] = result.intervals + category_index_cache[cache_key] = result.intervals return result.intervals -limited_category_index_cache: cache_type = {} +limited_category_index_cache: dict[ + tuple[CategoriesTuple, int, int, IntervalsT, IntervalsT], IntervalSet +] = {} def query( *, - categories=None, - min_codepoint=None, - max_codepoint=None, - include_characters="", - exclude_characters="", -): + categories: Optional[Categories] = None, + min_codepoint: Optional[int] = None, + max_codepoint: Optional[int] = None, + include_characters: str = "", + exclude_characters: str = "", +) -> IntervalSet: """Return a tuple of intervals covering the codepoints for all characters that meet the criteria. diff --git a/contrib/python/hypothesis/py3/hypothesis/internal/compat.py b/contrib/python/hypothesis/py3/hypothesis/internal/compat.py index baa9284bce1..7d9bbe99bfe 100644 --- a/contrib/python/hypothesis/py3/hypothesis/internal/compat.py +++ b/contrib/python/hypothesis/py3/hypothesis/internal/compat.py @@ -36,6 +36,8 @@ if typing.TYPE_CHECKING: # pragma: no cover TypedDict as TypedDict, override as override, ) + + from hypothesis.internal.conjecture.engine import ConjectureRunner else: # In order to use NotRequired, we need the version of TypedDict included in Python 3.11+. if sys.version_info[:2] >= (3, 11): @@ -129,7 +131,7 @@ def _hint_and_args(x): return (x, *get_args(x)) -def get_type_hints(thing): +def get_type_hints(thing: object) -> dict[str, Any]: """Like the typing version, but tries harder and never errors. Tries harder: if the thing to inspect is a class but typing.get_type_hints @@ -237,7 +239,7 @@ except AttributeError: # pragma: no cover bit_count = lambda self: sum(extract_bits(abs(self))) -def bad_django_TestCase(runner): +def bad_django_TestCase(runner: Optional["ConjectureRunner"]) -> bool: if runner is None or "django.test" not in sys.modules: return False else: # pragma: no cover diff --git a/contrib/python/hypothesis/py3/hypothesis/internal/conjecture/data.py b/contrib/python/hypothesis/py3/hypothesis/internal/conjecture/data.py index 3664d767296..b818719b318 100644 --- a/contrib/python/hypothesis/py3/hypothesis/internal/conjecture/data.py +++ b/contrib/python/hypothesis/py3/hypothesis/internal/conjecture/data.py @@ -58,6 +58,7 @@ from hypothesis.internal.floats import ( sign_aware_lte, ) from hypothesis.internal.intervalsets import IntervalSet +from hypothesis.reporting import debug_report if TYPE_CHECKING: from typing import TypeAlias @@ -88,7 +89,7 @@ T = TypeVar("T") class IntegerKWargs(TypedDict): min_value: Optional[int] max_value: Optional[int] - weights: Optional[Sequence[float]] + weights: Optional[dict[int, float]] shrink_towards: int @@ -356,8 +357,6 @@ class ExampleProperty: self.block(self.block_count) self.block_count += 1 elif record == IR_NODE_RECORD: - data = self.examples.ir_nodes[self.ir_node_count] - self.ir_node(data) self.ir_node_count += 1 elif record >= START_EXAMPLE_RECORD: self.__push(record - START_EXAMPLE_RECORD) @@ -399,9 +398,6 @@ class ExampleProperty: index of the example and ``discarded`` being ``True`` if ``stop_example`` was called with ``discard=True``.""" - def ir_node(self, node: "IRNode") -> None: - """Called when an ir node is drawn.""" - def finish(self) -> Any: return self.result @@ -457,23 +453,8 @@ class ExampleRecord: def freeze(self) -> None: self.__index_of_labels = None - def record_ir_draw( - self, - ir_type: IRTypeName, - value: IRType, - *, - kwargs: IRKWargsType, - was_forced: bool, - ) -> None: + def record_ir_draw(self) -> None: self.trail.append(IR_NODE_RECORD) - node = IRNode( - ir_type=ir_type, - value=value, - kwargs=kwargs, - was_forced=was_forced, - index=len(self.ir_nodes), - ) - self.ir_nodes.append(node) def start_example(self, label: int) -> None: assert self.__index_of_labels is not None @@ -507,7 +488,6 @@ class Examples: def __init__(self, record: ExampleRecord, blocks: "Blocks") -> None: self.trail = record.trail - self.ir_nodes = record.ir_nodes self.labels = record.labels self.__length = self.trail.count( STOP_EXAMPLE_DISCARD_RECORD @@ -569,7 +549,7 @@ class Examples: class _discarded(ExampleProperty): def begin(self) -> None: - self.result: "set[int]" = set() + self.result: set[int] = set() def finish(self) -> frozenset[int]: return frozenset(self.result) @@ -583,7 +563,7 @@ class Examples: class _trivial(ExampleProperty): def begin(self) -> None: self.nontrivial = IntList.of_length(len(self.examples)) - self.result: "set[int]" = set() + self.result: set[int] = set() def block(self, i: int) -> None: if not self.examples.blocks.trivial(i): @@ -617,15 +597,6 @@ class Examples: depths: IntList = calculated_example_property(_depths) - class _ir_tree_nodes(ExampleProperty): - def begin(self) -> None: - self.result = [] - - def ir_node(self, ir_node: "IRNode") -> None: - self.result.append(ir_node) - - ir_tree_nodes: "list[IRNode]" = calculated_example_property(_ir_tree_nodes) - class _label_indices(ExampleProperty): def start_example(self, i: int, label_index: int) -> None: self.result[i] = label_index @@ -971,7 +942,8 @@ class IRNode: ) -> "IRNode": # we may want to allow this combination in the future, but for now it's # a footgun. - assert not self.was_forced, "modifying a forced node doesn't make sense" + if self.was_forced: + assert with_value is None, "modifying a forced node doesn't make sense" # explicitly not copying index. node indices are only assigned via # ExampleRecord. This prevents footguns with relying on stale indices # after copying. @@ -998,6 +970,11 @@ class IRNode: min_value = self.kwargs["min_value"] max_value = self.kwargs["max_value"] + # shrink_towards is not respected for unbounded integers. (though + # probably it should be?) + if min_value is None and max_value is None: + return self.value == 0 + if min_value is not None: shrink_towards = max(min_value, shrink_towards) if max_value is not None: @@ -1029,6 +1006,9 @@ class IRNode: # also not incorrect to be conservative here. return False if self.ir_type == "boolean": + p = self.kwargs["p"] + if p == 1.0: + return True return self.value is False if self.ir_type == "string": # smallest size and contains only the smallest-in-shrink-order character. @@ -1067,6 +1047,15 @@ class IRNode: return f"{self.ir_type} {self.value!r}{forced_marker} {self.kwargs!r}" [email protected](slots=True) +class NodeTemplate: + type: Literal["simplest"] = attr.ib() + size: int = attr.ib() + + def __attrs_post_init__(self) -> None: + assert self.size > 0 + + def ir_value_permitted(value, ir_type, kwargs): if ir_type == "integer": min_value = kwargs["min_value"] @@ -1108,6 +1097,19 @@ def ir_value_permitted(value, ir_type, kwargs): raise NotImplementedError(f"unhandled type {type(value)} of ir value {value}") +def ir_size(ir: Iterable[IRType]) -> int: + from hypothesis.database import ir_to_bytes + + return len(ir_to_bytes(ir)) + + +def ir_size_nodes(nodes: Iterable[Union[IRNode, NodeTemplate]]) -> int: + size = 0 + for node in nodes: + size += node.size if isinstance(node, NodeTemplate) else ir_size([node.value]) + return size + + def ir_value_key(ir_type, v): if ir_type == "float": return float_to_int(v) @@ -1159,6 +1161,7 @@ class ConjectureResult: # If we consider blocks or examples in equality checks, multiple semantically equal # results get stored in e.g. the pareto front. blocks: Blocks = attr.ib(eq=False) + ir_nodes: tuple[IRNode, ...] = attr.ib(eq=False, repr=False) output: str = attr.ib() extra_information: Optional[ExtraInformation] = attr.ib() has_discards: bool = attr.ib() @@ -1179,6 +1182,10 @@ class ConjectureResult: def as_result(self) -> "ConjectureResult": return self + @property + def choices(self) -> tuple[IRType, ...]: + return tuple(node.value for node in self.ir_nodes) + # Masks for masking off the first byte of an n-bit buffer. # The appropriate mask is stored at position n % 8. @@ -1287,7 +1294,7 @@ class PrimitiveProvider(abc.ABC): max_value: Optional[int] = None, *, # weights are for choosing an element index from a bounded range - weights: Optional[Sequence[float]] = None, + weights: Optional[dict[int, float]] = None, shrink_towards: int = 0, forced: Optional[int] = None, fake_forced: bool = False, @@ -1456,8 +1463,7 @@ class HypothesisProvider(PrimitiveProvider): min_value: Optional[int] = None, max_value: Optional[int] = None, *, - # weights are for choosing an element index from a bounded range - weights: Optional[Sequence[float]] = None, + weights: Optional[dict[int, float]] = None, shrink_towards: int = 0, forced: Optional[int] = None, fake_forced: bool = False, @@ -1475,22 +1481,31 @@ class HypothesisProvider(PrimitiveProvider): assert min_value is not None assert max_value is not None - sampler = Sampler(weights, observe=False) - gap = max_value - shrink_towards - - forced_idx = None - if forced is not None: - if forced >= shrink_towards: - forced_idx = forced - shrink_towards - else: - forced_idx = shrink_towards + gap - forced - idx = sampler.sample(self._cd, forced=forced_idx, fake_forced=fake_forced) + # format of weights is a mapping of ints to p, where sum(p) < 1. + # The remaining probability mass is uniformly distributed over + # *all* ints (not just the unmapped ones; this is somewhat undesirable, + # but simplifies things). + # + # We assert that sum(p) is strictly less than 1 because it simplifies + # handling forced values when we can force into the unmapped probability + # mass. We should eventually remove this restriction. + sampler = Sampler( + [1 - sum(weights.values()), *weights.values()], observe=False + ) + # if we're forcing, it's easiest to force into the unmapped probability + # mass and then force the drawn value after. + idx = sampler.sample( + self._cd, forced=None if forced is None else 0, fake_forced=fake_forced + ) - # For range -2..2, interpret idx = 0..4 as [0, 1, 2, -1, -2] - if idx <= gap: - return shrink_towards + idx - else: - return shrink_towards - (idx - gap) + return self._draw_bounded_integer( + min_value, + max_value, + # implicit reliance on dicts being sorted for determinism + forced=forced if idx == 0 else list(weights)[idx - 1], + center=shrink_towards, + fake_forced=fake_forced, + ) if min_value is None and max_value is None: return self._draw_unbounded_integer(forced=forced, fake_forced=fake_forced) @@ -1977,18 +1992,22 @@ class ConjectureData: @classmethod def for_ir_tree( cls, - ir_tree_prefix: list[IRNode], + ir_tree_prefix: Sequence[Union[IRNode, NodeTemplate]], *, observer: Optional[DataObserver] = None, provider: Union[type, PrimitiveProvider] = HypothesisProvider, max_length: Optional[int] = None, + random: Optional[Random] = None, ) -> "ConjectureData": from hypothesis.internal.conjecture.engine import BUFFER_SIZE return cls( - max_length=BUFFER_SIZE if max_length is None else max_length, + max_length=BUFFER_SIZE, + max_length_ir=( + ir_size_nodes(ir_tree_prefix) if max_length is None else max_length + ), prefix=b"", - random=None, + random=random, ir_tree_prefix=ir_tree_prefix, observer=observer, provider=provider, @@ -2002,14 +2021,18 @@ class ConjectureData: random: Optional[Random], observer: Optional[DataObserver] = None, provider: Union[type, PrimitiveProvider] = HypothesisProvider, - ir_tree_prefix: Optional[list[IRNode]] = None, + ir_tree_prefix: Optional[Sequence[Union[IRNode, NodeTemplate]]] = None, + max_length_ir: Optional[int] = None, ) -> None: + from hypothesis.internal.conjecture.engine import BUFFER_SIZE_IR + if observer is None: observer = DataObserver() assert isinstance(observer, DataObserver) self._bytes_drawn = 0 self.observer = observer self.max_length = max_length + self.max_length_ir = BUFFER_SIZE_IR if max_length_ir is None else max_length_ir self.is_find = False self.overdraw = 0 self.__prefix = bytes(prefix) @@ -2021,6 +2044,8 @@ class ConjectureData: self.blocks = Blocks(self) self.buffer: "Union[bytes, bytearray]" = bytearray() self.index = 0 + self.length_ir = 0 + self.index_ir = 0 self.output = "" self.status = Status.VALID self.frozen = False @@ -2074,9 +2099,9 @@ class ConjectureData: self.extra_information = ExtraInformation() - self.ir_tree_nodes = ir_tree_prefix + self.ir_prefix = ir_tree_prefix + self.ir_nodes: tuple[IRNode, ...] = () self.misaligned_at: Optional[MisalignedAt] = None - self._node_index = 0 self.start_example(TOP_LABEL) def __repr__(self) -> str: @@ -2086,6 +2111,10 @@ class ConjectureData: ", frozen" if self.frozen else "", ) + @property + def choices(self) -> tuple[IRType, ...]: + return tuple(node.value for node in self.ir_nodes) + # A bit of explanation of the `observe` and `fake_forced` arguments in our # draw_* functions. # @@ -2111,13 +2140,65 @@ class ConjectureData: # value to be returned, but we don't want to treat that block as fixed for # e.g. the shrinker. + def _draw(self, ir_type, kwargs, *, observe, forced, fake_forced): + # this is somewhat redundant with the length > max_length check at the + # end of the function, but avoids trying to use a null self.random when + # drawing past the node of a ConjectureData.for_ir_tree data. + if self.length_ir == self.max_length_ir: + debug_report(f"overrun because hit {self.max_length_ir=}") + self.mark_overrun() + + if self.ir_prefix is not None and observe: + if self.index_ir < len(self.ir_prefix): + node_value = self._pop_ir_tree_node(ir_type, kwargs, forced=forced) + else: + try: + (node_value, _buf) = ir_to_buffer( + ir_type, kwargs, forced=forced, random=self.__random + ) + except StopTest: + debug_report("overrun because ir_to_buffer overran") + self.mark_overrun() + + if forced is None: + forced = node_value + fake_forced = True + + value = getattr(self.provider, f"draw_{ir_type}")( + **kwargs, forced=forced, fake_forced=fake_forced + ) + + if observe: + was_forced = forced is not None and not fake_forced + getattr(self.observer, f"draw_{ir_type}")( + value, kwargs=kwargs, was_forced=was_forced + ) + size = ir_size([value]) + if self.length_ir + size > self.max_length_ir: + debug_report( + f"overrun because {self.length_ir=} + {size=} > {self.max_length_ir=}" + ) + self.mark_overrun() + + node = IRNode( + ir_type=ir_type, + value=value, + kwargs=kwargs, + was_forced=was_forced, + index=len(self.ir_nodes), + ) + self.__example_record.record_ir_draw() + self.ir_nodes += (node,) + self.length_ir += size + + return value + def draw_integer( self, min_value: Optional[int] = None, max_value: Optional[int] = None, *, - # weights are for choosing an element index from a bounded range - weights: Optional[Sequence[float]] = None, + weights: Optional[dict[int, float]] = None, shrink_towards: int = 0, forced: Optional[int] = None, fake_forced: bool = False, @@ -2127,9 +2208,14 @@ class ConjectureData: if weights is not None: assert min_value is not None assert max_value is not None - width = max_value - min_value + 1 - assert width <= 255 # arbitrary practical limit - assert len(weights) == width + assert len(weights) <= 255 # arbitrary practical limit + # We can and should eventually support total weights. But this + # complicates shrinking as we can no longer assume we can force + # a value to the unmapped probability mass if that mass might be 0. + assert sum(weights.values()) < 1 + # similarly, things get simpler if we assume every value is possible. + # we'll want to drop this restriction eventually. + assert all(w != 0 for w in weights.values()) if forced is not None and (min_value is None or max_value is None): # We draw `forced=forced - shrink_towards` here internally, after clamping. @@ -2157,28 +2243,9 @@ class ConjectureData: "shrink_towards": shrink_towards, }, ) - - if self.ir_tree_nodes is not None and observe: - node_value = self._pop_ir_tree_node("integer", kwargs, forced=forced) - if forced is None: - assert isinstance(node_value, int) - forced = node_value - fake_forced = True - - value = self.provider.draw_integer( - **kwargs, forced=forced, fake_forced=fake_forced + return self._draw( + "integer", kwargs, observe=observe, forced=forced, fake_forced=fake_forced ) - if observe: - self.observer.draw_integer( - value, kwargs=kwargs, was_forced=forced is not None and not fake_forced - ) - self.__example_record.record_ir_draw( - "integer", - value, - kwargs=kwargs, - was_forced=forced is not None and not fake_forced, - ) - return value def draw_float( self, @@ -2214,28 +2281,9 @@ class ConjectureData: "smallest_nonzero_magnitude": smallest_nonzero_magnitude, }, ) - - if self.ir_tree_nodes is not None and observe: - node_value = self._pop_ir_tree_node("float", kwargs, forced=forced) - if forced is None: - assert isinstance(node_value, float) - forced = node_value - fake_forced = True - - value = self.provider.draw_float( - **kwargs, forced=forced, fake_forced=fake_forced + return self._draw( + "float", kwargs, observe=observe, forced=forced, fake_forced=fake_forced ) - if observe: - self.observer.draw_float( - value, kwargs=kwargs, was_forced=forced is not None and not fake_forced - ) - self.__example_record.record_ir_draw( - "float", - value, - kwargs=kwargs, - was_forced=forced is not None and not fake_forced, - ) - return value def draw_string( self, @@ -2258,27 +2306,9 @@ class ConjectureData: "max_size": max_size, }, ) - if self.ir_tree_nodes is not None and observe: - node_value = self._pop_ir_tree_node("string", kwargs, forced=forced) - if forced is None: - assert isinstance(node_value, str) - forced = node_value - fake_forced = True - - value = self.provider.draw_string( - **kwargs, forced=forced, fake_forced=fake_forced + return self._draw( + "string", kwargs, observe=observe, forced=forced, fake_forced=fake_forced ) - if observe: - self.observer.draw_string( - value, kwargs=kwargs, was_forced=forced is not None and not fake_forced - ) - self.__example_record.record_ir_draw( - "string", - value, - kwargs=kwargs, - was_forced=forced is not None and not fake_forced, - ) - return value def draw_bytes( self, @@ -2295,28 +2325,9 @@ class ConjectureData: kwargs: BytesKWargs = self._pooled_kwargs( "bytes", {"min_size": min_size, "max_size": max_size} ) - - if self.ir_tree_nodes is not None and observe: - node_value = self._pop_ir_tree_node("bytes", kwargs, forced=forced) - if forced is None: - assert isinstance(node_value, bytes) - forced = node_value - fake_forced = True - - value = self.provider.draw_bytes( - **kwargs, forced=forced, fake_forced=fake_forced + return self._draw( + "bytes", kwargs, observe=observe, forced=forced, fake_forced=fake_forced ) - if observe: - self.observer.draw_bytes( - value, kwargs=kwargs, was_forced=forced is not None and not fake_forced - ) - self.__example_record.record_ir_draw( - "bytes", - value, - kwargs=kwargs, - was_forced=forced is not None and not fake_forced, - ) - return value def draw_boolean( self, @@ -2336,28 +2347,9 @@ class ConjectureData: assert (forced is not False) or p < (1 - eps) kwargs: BooleanKWargs = self._pooled_kwargs("boolean", {"p": p}) - - if self.ir_tree_nodes is not None and observe: - node_value = self._pop_ir_tree_node("boolean", kwargs, forced=forced) - if forced is None: - assert isinstance(node_value, bool) - forced = node_value - fake_forced = True - - value = self.provider.draw_boolean( - **kwargs, forced=forced, fake_forced=fake_forced + return self._draw( + "boolean", kwargs, observe=observe, forced=forced, fake_forced=fake_forced ) - if observe: - self.observer.draw_boolean( - value, kwargs=kwargs, was_forced=forced is not None and not fake_forced - ) - self.__example_record.record_ir_draw( - "boolean", - value, - kwargs=kwargs, - was_forced=forced is not None and not fake_forced, - ) - return value def _pooled_kwargs(self, ir_type, kwargs): """Memoize common dictionary objects to reduce memory pressure.""" @@ -2365,18 +2357,7 @@ class ConjectureData: if self.provider.avoid_realization: return kwargs - key = [] - for k, v in kwargs.items(): - if ir_type == "float" and k in ["min_value", "max_value"]: - # handle -0.0 vs 0.0, etc. - v = float_to_int(v) - elif ir_type == "integer" and k == "weights": - # make hashable - v = v if v is None else tuple(v) - key.append((k, v)) - - key = (ir_type, *sorted(key)) - + key = (ir_type, *ir_kwargs_key(ir_type, kwargs)) try: return POOLED_KWARGS_CACHE[key] except KeyError: @@ -2388,12 +2369,30 @@ class ConjectureData: ) -> IRType: from hypothesis.internal.conjecture.engine import BUFFER_SIZE - assert self.ir_tree_nodes is not None + assert self.ir_prefix is not None + # checked in _draw + assert self.index_ir < len(self.ir_prefix) - if self._node_index == len(self.ir_tree_nodes): - self.mark_overrun() + node = self.ir_prefix[self.index_ir] + if isinstance(node, NodeTemplate): + assert node.size >= 0 + # node templates have to be at the end for now, since it's not immediately + # apparent how to handle overruning a node template while generating a single + # node if the alternative is not "the entire data is an overrun". + assert self.index_ir == len(self.ir_prefix) - 1 + if node.type == "simplest": + try: + value = buffer_to_ir(ir_type, kwargs, buffer=bytes(BUFFER_SIZE)) + except StopTest: + self.mark_overrun() + else: + raise NotImplementedError + + node.size -= ir_size([value]) + if node.size < 0: + self.mark_overrun() + return value - node = self.ir_tree_nodes[self._node_index] value = node.value # If we're trying to: # * draw a different ir type at the same location @@ -2418,11 +2417,11 @@ class ConjectureData: ): # only track first misalignment for now. if self.misaligned_at is None: - self.misaligned_at = (self._node_index, ir_type, kwargs, forced) - (_value, buffer) = ir_to_buffer( - node.ir_type, node.kwargs, forced=node.value - ) + self.misaligned_at = (self.index_ir, ir_type, kwargs, forced) try: + (_value, buffer) = ir_to_buffer( + node.ir_type, node.kwargs, forced=node.value + ) value = buffer_to_ir( ir_type, kwargs, buffer=buffer + bytes(BUFFER_SIZE - len(buffer)) ) @@ -2434,7 +2433,7 @@ class ConjectureData: # buffer_to_ir(ir_type, kwargs, buffer=bytes(BUFFER_SIZE)) self.mark_overrun() - self._node_index += 1 + self.index_ir += 1 return value def as_result(self) -> Union[ConjectureResult, _Overrun]: @@ -2450,6 +2449,7 @@ class ConjectureData: interesting_origin=self.interesting_origin, buffer=self.buffer, examples=self.examples, + ir_nodes=self.ir_nodes, blocks=self.blocks, output=self.output, extra_information=( @@ -2485,6 +2485,9 @@ class ConjectureData: label: Optional[int] = None, observe_as: Optional[str] = None, ) -> "Ex": + from hypothesis.internal.observability import TESTCASE_CALLBACKS + from hypothesis.strategies._internal.utils import to_jsonable + if self.is_find and not strategy.supports_find: raise InvalidArgument( f"Cannot use strategy {strategy!r} within a call to find " @@ -2521,15 +2524,21 @@ class ConjectureData: try: strategy.validate() try: - return strategy.do_draw(self) + v = strategy.do_draw(self) finally: # Subtract the time spent in GC to avoid overcounting, as it is # accounted for at the overall example level. in_gctime = gc_cumulative_time() - gc_start_time self.draw_times[key] = time.perf_counter() - start_time - in_gctime except Exception as err: - add_note(err, f"while generating {key[9:]!r} from {strategy!r}") + add_note( + err, + f"while generating {key.removeprefix('generate:')!r} from {strategy!r}", + ) raise + if TESTCASE_CALLBACKS: + self._observability_args[key] = to_jsonable(v) + return v finally: self.stop_example() diff --git a/contrib/python/hypothesis/py3/hypothesis/internal/conjecture/datatree.py b/contrib/python/hypothesis/py3/hypothesis/internal/conjecture/datatree.py index 5ec38c2edf4..98eafedb255 100644 --- a/contrib/python/hypothesis/py3/hypothesis/internal/conjecture/datatree.py +++ b/contrib/python/hypothesis/py3/hypothesis/internal/conjecture/datatree.py @@ -174,18 +174,13 @@ def compute_max_children(ir_type, kwargs): if ir_type == "integer": min_value = kwargs["min_value"] max_value = kwargs["max_value"] - weights = kwargs["weights"] if min_value is None and max_value is None: # full 128 bit range. return 2**128 - 1 if min_value is not None and max_value is not None: # count between min/max value. - n = max_value - min_value + 1 - # remove any values with a zero probability of being drawn (weight=0). - if weights is not None: - n -= sum(weight == 0 for weight in weights) - return n + return max_value - min_value + 1 # hard case: only one bound was specified. Here we probe either upwards # or downwards with our full 128 bit generation, but only half of these @@ -267,21 +262,13 @@ def all_children(ir_type, kwargs): if ir_type == "integer": min_value = kwargs["min_value"] max_value = kwargs["max_value"] - weights = kwargs["weights"] if min_value is None and max_value is None: # full 128 bit range. yield from range(-(2**127) + 1, 2**127 - 1) elif min_value is not None and max_value is not None: - if weights is None: - yield from range(min_value, max_value + 1) - else: - # skip any values with a corresponding weight of 0 (can never be drawn). - for weight, n in zip(weights, range(min_value, max_value + 1)): - if weight == 0: - continue - yield n + yield from range(min_value, max_value + 1) else: assert (min_value is None) ^ (max_value is None) # hard case: only one bound was specified. Here we probe in 128 bits @@ -723,12 +710,15 @@ class DataTree: for it to be uniform at random, but previous attempts to do that have proven too expensive. """ + from hypothesis.internal.conjecture.data import IRNode assert not self.is_exhausted - novel_prefix = bytearray() + novel_prefix = [] - def append_buf(buf): - novel_prefix.extend(buf) + def append_node(node): + if node.ir_type == "float": + node.value = int_to_float(node.value) + novel_prefix.append(node) current_node = self.root while True: @@ -737,18 +727,17 @@ class DataTree: zip(current_node.ir_types, current_node.kwargs, current_node.values) ): if i in current_node.forced: - if ir_type == "float": - value = int_to_float(value) - (_value, buf) = self._draw( - ir_type, kwargs, forced=value, random=random + append_node( + IRNode( + ir_type=ir_type, value=value, kwargs=kwargs, was_forced=True + ) ) - append_buf(buf) else: attempts = 0 while True: if attempts <= 10: try: - (v, buf) = self._draw(ir_type, kwargs, random=random) + node = self._draw(ir_type, kwargs, random=random) except StopTest: # pragma: no cover # it is possible that drawing from a fresh data can # overrun BUFFER_SIZE, due to eg unlucky rejection sampling @@ -756,24 +745,24 @@ class DataTree: attempts += 1 continue else: - (v, buf) = self._draw_from_cache( + node = self._draw_from_cache( ir_type, kwargs, key=id(current_node), random=random ) - if v != value: - append_buf(buf) + if node.value != value: + append_node(node) break attempts += 1 self._reject_child( - ir_type, kwargs, child=v, key=id(current_node) + ir_type, kwargs, child=node.value, key=id(current_node) ) # We've now found a value that is allowed to # vary, so what follows is not fixed. - return bytes(novel_prefix) + return tuple(novel_prefix) else: assert not isinstance(current_node.transition, (Conclusion, Killed)) if current_node.transition is None: - return bytes(novel_prefix) + return tuple(novel_prefix) branch = current_node.transition assert isinstance(branch, Branch) @@ -781,28 +770,28 @@ class DataTree: while True: if attempts <= 10: try: - (v, buf) = self._draw( + node = self._draw( branch.ir_type, branch.kwargs, random=random ) except StopTest: # pragma: no cover attempts += 1 continue else: - (v, buf) = self._draw_from_cache( + node = self._draw_from_cache( branch.ir_type, branch.kwargs, key=id(branch), random=random ) try: - child = branch.children[v] + child = branch.children[node.value] except KeyError: - append_buf(buf) - return bytes(novel_prefix) + append_node(node) + return tuple(novel_prefix) if not child.is_exhausted: - append_buf(buf) + append_node(node) current_node = child break attempts += 1 self._reject_child( - branch.ir_type, branch.kwargs, child=v, key=id(branch) + branch.ir_type, branch.kwargs, child=node.value, key=id(branch) ) # We don't expect this assertion to ever fire, but coverage @@ -814,19 +803,17 @@ class DataTree: or any(not v.is_exhausted for v in branch.children.values()) ) - def rewrite(self, buffer): + def rewrite(self, nodes): """Use previously seen ConjectureData objects to return a tuple of - the rewritten buffer and the status we would get from running that - buffer with the test function. If the status cannot be predicted + the rewritten choice sequence and the status we would get from running + that with the test function. If the status cannot be predicted from the existing values it will be None.""" - buffer = bytes(buffer) - - data = ConjectureData.for_buffer(buffer) + data = ConjectureData.for_ir_tree(nodes) try: self.simulate_test_function(data) - return (data.buffer, data.status) + return (data.ir_nodes, data.status) except PreviouslyUnseenBehaviour: - return (buffer, None) + return (nodes, None) def simulate_test_function(self, data): """Run a simulated version of the test function recorded by @@ -877,10 +864,10 @@ class DataTree: def new_observer(self): return TreeRecordingObserver(self) - def _draw(self, ir_type, kwargs, *, random, forced=None): - from hypothesis.internal.conjecture.data import ir_to_buffer + def _draw(self, ir_type, kwargs, *, random): + from hypothesis.internal.conjecture.data import IRNode, ir_to_buffer - (value, buf) = ir_to_buffer(ir_type, kwargs, forced=forced, random=random) + (value, buf) = ir_to_buffer(ir_type, kwargs, random=random) # using floats as keys into branch.children breaks things, because # e.g. hash(0.0) == hash(-0.0) would collide as keys when they are # in fact distinct child branches. @@ -891,7 +878,7 @@ class DataTree: # buffer), and converting between the two forms as appropriate. if ir_type == "float": value = float_to_int(value) - return (value, buf) + return IRNode(ir_type=ir_type, value=value, kwargs=kwargs, was_forced=False) def _get_children_cache(self, ir_type, kwargs, *, key): # cache the state of the children generator per node/branch (passed as @@ -912,6 +899,8 @@ class DataTree: return self._children_cache[key] def _draw_from_cache(self, ir_type, kwargs, *, key, random): + from hypothesis.internal.conjecture.data import IRNode + (generator, children, rejected) = self._get_children_cache( ir_type, kwargs, key=key ) @@ -931,11 +920,8 @@ class DataTree: if len(children) >= 100: break - forced = random.choice(children) - if ir_type == "float": - forced = int_to_float(forced) - (value, buf) = self._draw(ir_type, kwargs, forced=forced, random=random) - return (value, buf) + value = random.choice(children) + return IRNode(ir_type=ir_type, value=value, kwargs=kwargs, was_forced=True) def _reject_child(self, ir_type, kwargs, *, child, key): (_generator, children, rejected) = self._get_children_cache( diff --git a/contrib/python/hypothesis/py3/hypothesis/internal/conjecture/engine.py b/contrib/python/hypothesis/py3/hypothesis/internal/conjecture/engine.py index 7cf4e72ed85..2b182ed6750 100644 --- a/contrib/python/hypothesis/py3/hypothesis/internal/conjecture/engine.py +++ b/contrib/python/hypothesis/py3/hypothesis/internal/conjecture/engine.py @@ -13,7 +13,7 @@ import math import textwrap import time from collections import defaultdict -from collections.abc import Generator +from collections.abc import Generator, Sequence from contextlib import contextmanager, suppress from datetime import timedelta from enum import Enum @@ -44,29 +44,24 @@ from hypothesis.errors import ( StopTest, ) from hypothesis.internal.cache import LRUReusedCache -from hypothesis.internal.compat import ( - NotRequired, - TypeAlias, - TypedDict, - ceil, - int_from_bytes, - override, -) +from hypothesis.internal.compat import NotRequired, TypeAlias, TypedDict, ceil, override from hypothesis.internal.conjecture.data import ( AVAILABLE_PROVIDERS, ConjectureData, ConjectureResult, DataObserver, - Example, HypothesisProvider, InterestingOrigin, IRKWargsType, IRNode, + NodeTemplate, Overrun, PrimitiveProvider, Status, _Overrun, ir_kwargs_key, + ir_size, + ir_size_nodes, ir_value_key, ) from hypothesis.internal.conjecture.datatree import ( @@ -74,7 +69,10 @@ from hypothesis.internal.conjecture.datatree import ( PreviouslyUnseenBehaviour, TreeRecordingObserver, ) -from hypothesis.internal.conjecture.junkdrawer import clamp, ensure_free_stackframes +from hypothesis.internal.conjecture.junkdrawer import ( + ensure_free_stackframes, + startswith, +) from hypothesis.internal.conjecture.pareto import NO_SCORE, ParetoFront, ParetoOptimiser from hypothesis.internal.conjecture.shrinker import Shrinker, sort_key from hypothesis.internal.healthcheck import fail_health_check @@ -85,6 +83,7 @@ CACHE_SIZE: Final[int] = 10000 MUTATION_POOL_SIZE: Final[int] = 100 MIN_TEST_CALLS: Final[int] = 10 BUFFER_SIZE: Final[int] = 8 * 1024 +BUFFER_SIZE_IR: Final[int] = 8 * 1024 # If the shrinking phase takes more than five minutes, abort it early and print # a warning. Many CI systems will kill a build after around ten minutes with @@ -201,6 +200,17 @@ StatisticsDict = TypedDict( ) +def truncate_nodes_to_size(nodes: Sequence[IRNode], size: int) -> tuple[IRNode, ...]: + s = 0 + i = 0 + for node in nodes: + s += ir_size([node.value]) + if s > size: + break + i += 1 + return tuple(nodes[:i]) + + class ConjectureRunner: def __init__( self, @@ -266,6 +276,8 @@ class ConjectureRunner: self.__data_cache = LRUReusedCache(CACHE_SIZE) self.__data_cache_ir = LRUReusedCache(CACHE_SIZE) + self.reused_previously_shrunk_test_case = False + self.__pending_call_explanation: Optional[str] = None self._switch_to_hypothesis_provider: bool = False @@ -330,12 +342,12 @@ class ConjectureRunner: def _cache_key_ir( self, *, - nodes: Optional[list[IRNode]] = None, + nodes: Optional[Sequence[IRNode]] = None, data: Union[ConjectureData, ConjectureResult, None] = None, ) -> tuple[tuple[Any, ...], ...]: assert (nodes is not None) ^ (data is not None) if data is not None: - nodes = data.examples.ir_tree_nodes + nodes = data.ir_nodes assert nodes is not None # intentionally drop was_forced from equality here, because the was_forced @@ -368,18 +380,34 @@ class ConjectureRunner: # smallest buffer via forced=. The overhead here is small because almost # all interesting data are ir-based via the shrinker (and that overhead # will tend towards zero as we move generation to the ir). - if data.ir_tree_nodes is not None or data.status < Status.INTERESTING: + if data.ir_prefix is not None or data.status < Status.INTERESTING: key = self._cache_key_ir(data=data) self.__data_cache_ir[key] = result def cached_test_function_ir( - self, nodes: list[IRNode], *, error_on_discard: bool = False + self, + nodes: Sequence[Union[IRNode, NodeTemplate]], + *, + error_on_discard: bool = False, + extend: int = 0, ) -> Union[ConjectureResult, _Overrun]: - key = self._cache_key_ir(nodes=nodes) - try: - return self.__data_cache_ir[key] - except KeyError: - pass + # node templates represent a not-yet-filled hole and therefore cannot + # be cached or retrieved from the cache. + if not any(isinstance(node, NodeTemplate) for node in nodes): + # this type cast is validated by the isinstance check above (ie, there + # are no NodeTemplate elements). + nodes = cast(Sequence[IRNode], nodes) + key = self._cache_key_ir(nodes=nodes) + try: + cached = self.__data_cache_ir[key] + # if we have a cached overrun for this key, but we're allowing extensions + # of the nodes, it could in fact run to a valid data if we try. + if extend == 0 or cached.status is not Status.OVERRUN: + return cached + except KeyError: + pass + + max_length = min(BUFFER_SIZE_IR, ir_size_nodes(nodes) + extend) # explicitly use a no-op DataObserver here instead of a TreeRecordingObserver. # The reason is we don't expect simulate_test_function to explore new choices @@ -396,19 +424,27 @@ class ConjectureRunner: trial_observer = DiscardObserver() try: - trial_data = self.new_conjecture_data_ir(nodes, observer=trial_observer) + trial_data = self.new_conjecture_data_ir( + nodes, observer=trial_observer, max_length=max_length + ) self.tree.simulate_test_function(trial_data) except PreviouslyUnseenBehaviour: pass else: trial_data.freeze() key = self._cache_key_ir(data=trial_data) + if trial_data.status is Status.OVERRUN: + # if we simulated to an overrun, then we our result is certainly + # an overrun; no need to consult the cache. (and we store this result + # for simulation-less lookup later). + self.__data_cache_ir[key] = Overrun + return Overrun try: return self.__data_cache_ir[key] except KeyError: pass - data = self.new_conjecture_data_ir(nodes) + data = self.new_conjecture_data_ir(nodes, max_length=max_length) # note that calling test_function caches `data` for us, for both an ir # tree key and a buffer key. self.test_function(data) @@ -458,7 +494,7 @@ class ConjectureRunner: } self.stats_per_test_case.append(call_stats) if self.settings.backend != "hypothesis": - for node in data.examples.ir_tree_nodes: + for node in data.ir_nodes: value = data.provider.realize(node.value) expected_type = { "string": str, @@ -533,7 +569,7 @@ class ConjectureRunner: initial_traceback = getattr( data.extra_information, "_expected_traceback", None ) - data = ConjectureData.for_ir_tree(data.examples.ir_tree_nodes) + data = ConjectureData.for_ir_tree(data.ir_nodes) self.__stoppable_test_function(data) data.freeze() # TODO: Convert to FlakyFailure on the way out. Should same-origin @@ -622,7 +658,7 @@ class ConjectureRunner: def on_pareto_evict(self, data: ConjectureData) -> None: self.settings.database.delete(self.pareto_key, data.buffer) - def generate_novel_prefix(self) -> bytes: + def generate_novel_prefix(self) -> tuple[IRNode, ...]: """Uses the tree to proactively generate a starting sequence of bytes that we haven't explored yet for this test. @@ -744,34 +780,14 @@ class ConjectureRunner: if not self.report_debug_info: return - stack: list[Ls] = [[]] - - def go(ex: Example) -> None: - if ex.length == 0: - return - if len(ex.children) == 0: - stack[-1].append(int_from_bytes(data.buffer[ex.start : ex.end])) - else: - node: Ls = [] - stack.append(node) - - for v in ex.children: - go(v) - stack.pop() - if len(node) == 1: - stack[-1].extend(node) - else: - stack[-1].append(node) - - go(data.examples[0]) - assert len(stack) == 1 - status = repr(data.status) - if data.status == Status.INTERESTING: status = f"{status} ({data.interesting_origin!r})" - self.debug(f"{data.index} bytes {stack[0]!r} -> {status}, {data.output}") + self.debug( + f"{len(data.choices)} choices {data.choices} -> {status}" + f"{', ' + data.output if data.output else ''}" + ) def run(self) -> None: with local_settings(self.settings): @@ -825,6 +841,7 @@ class ConjectureRunner: ) factor = 0.1 if (Phase.generate in self.settings.phases) else 1 desired_size = max(2, ceil(factor * self.settings.max_examples)) + primary_corpus_size = len(corpus) if len(corpus) < desired_size: extra_corpus = list(self.settings.database.fetch(self.secondary_key)) @@ -838,11 +855,29 @@ class ConjectureRunner: extra.sort(key=sort_key) corpus.extend(extra) - for existing in corpus: + # We want a fast path where every primary entry in the database was + # interesting. + found_interesting_in_primary = False + all_interesting_in_primary_were_exact = True + + for i, existing in enumerate(corpus): + if i >= primary_corpus_size and found_interesting_in_primary: + break data = self.cached_test_function(existing, extend=BUFFER_SIZE) if data.status != Status.INTERESTING: self.settings.database.delete(self.database_key, existing) self.settings.database.delete(self.secondary_key, existing) + else: + if i < primary_corpus_size: + found_interesting_in_primary = True + assert not isinstance(data, _Overrun) + if existing != data.buffer: + all_interesting_in_primary_were_exact = False + if not self.settings.report_multiple_bugs: + break + if found_interesting_in_primary: + if all_interesting_in_primary_were_exact: + self.reused_previously_shrunk_test_case = True # Because self.database is not None (because self.has_existing_examples()) # and self.database_key is not None (because we fetched using it above), @@ -984,9 +1019,14 @@ class ConjectureRunner: # because any fixed size might be too small, and any size based # on the strategy in general can fall afoul of strategies that # have very different sizes for different prefixes. - small_example_cap = clamp(10, self.settings.max_examples // 10, 50) - - optimise_at = max(self.settings.max_examples // 2, small_example_cap + 1) + # + # We previously set a minimum value of 10 on small_example_cap, with the + # reasoning of avoiding flaky health checks. However, some users set a + # low max_examples for performance. A hard lower bound in this case biases + # the distribution towards small (and less powerful) examples. Flaky + # and loud health checks are better than silent performance degradation. + small_example_cap = min(self.settings.max_examples // 10, 50) + optimise_at = max(self.settings.max_examples // 2, small_example_cap + 1, 10) ran_optimisations = False while self.should_generate_more(): @@ -1005,15 +1045,17 @@ class ConjectureRunner: # it is possible, if unlikely, to generate a > BUFFER_SIZE novel prefix, # as nodes in the novel tree may be variable sized due to eg integer # probe retries. - prefix = prefix[:BUFFER_SIZE] + prefix = truncate_nodes_to_size(prefix, BUFFER_SIZE_IR) if ( self.valid_examples <= small_example_cap and self.call_count <= 5 * small_example_cap and not self.interesting_examples and consecutive_zero_extend_is_invalid < 5 ): - minimal_example = self.cached_test_function( - prefix + bytes(BUFFER_SIZE - len(prefix)) + prefix_size = ir_size_nodes(prefix) + minimal_example = self.cached_test_function_ir( + prefix + + (NodeTemplate("simplest", size=BUFFER_SIZE_IR - prefix_size),) ) if minimal_example.status < Status.VALID: @@ -1023,12 +1065,11 @@ class ConjectureRunner: # Status.OVERRUN, which guarantees that the minimal_example is a # ConjectureResult object. assert isinstance(minimal_example, ConjectureResult) - consecutive_zero_extend_is_invalid = 0 - - minimal_extension = len(minimal_example.buffer) - len(prefix) - - max_length = min(len(prefix) + minimal_extension * 10, BUFFER_SIZE) + minimal_extension = ( + ir_size_nodes(minimal_example.ir_nodes) - prefix_size + ) + max_length = min(prefix_size + minimal_extension * 10, BUFFER_SIZE_IR) # We could end up in a situation where even though the prefix was # novel when we generated it, because we've now tried zero extending @@ -1038,10 +1079,7 @@ class ConjectureRunner: # running the test function for real here. If however we encounter # some novel behaviour, we try again with the real test function, # starting from the new novel prefix that has discovered. - - trial_data = self.new_conjecture_data( - prefix=prefix, max_length=max_length - ) + trial_data = self.new_conjecture_data_ir(prefix, max_length=max_length) try: self.tree.simulate_test_function(trial_data) continue @@ -1059,17 +1097,16 @@ class ConjectureRunner: if not self.should_generate_more(): break - prefix = trial_data.buffer + prefix = trial_data.ir_nodes else: - max_length = BUFFER_SIZE - - data = self.new_conjecture_data(prefix=prefix, max_length=max_length) + max_length = BUFFER_SIZE_IR + data = self.new_conjecture_data_ir(prefix, max_length=max_length) self.test_function(data) if ( - data.status == Status.OVERRUN - and max_length < BUFFER_SIZE + data.status is Status.OVERRUN + and max_length < BUFFER_SIZE_IR and "invalid because" not in data.events ): data.events["invalid because"] = ( @@ -1146,7 +1183,7 @@ class ConjectureRunner: (start1, end1), (start2, end2) = (start2, end2), (start1, end1) assert end1 <= start2 - nodes = data.examples.ir_tree_nodes + nodes = data.ir_nodes (start, end) = self.random.choice([(start1, end1), (start2, end2)]) replacement = nodes[start:end] @@ -1241,6 +1278,11 @@ class ConjectureRunner: self._switch_to_hypothesis_provider = True with self._log_phase_statistics("reuse"): self.reuse_existing_examples() + # Fast path for development: If the database gave us interesting + # examples from the previously stored primary key, don't try + # shrinking it again as it's unlikely to work. + if self.reused_previously_shrunk_test_case: + self.exit_with(ExitReason.finished) # ...but we should use the supplied provider when generating... self._switch_to_hypothesis_provider = False with self._log_phase_statistics("generate"): @@ -1259,7 +1301,7 @@ class ConjectureRunner: def new_conjecture_data_ir( self, - ir_tree_prefix: list[IRNode], + ir_tree_prefix: Sequence[Union[IRNode, NodeTemplate]], *, observer: Optional[DataObserver] = None, max_length: Optional[int] = None, @@ -1272,7 +1314,11 @@ class ConjectureRunner: observer = DataObserver() return ConjectureData.for_ir_tree( - ir_tree_prefix, observer=observer, provider=provider, max_length=max_length + ir_tree_prefix, + observer=observer, + provider=provider, + max_length=max_length, + random=self.random, ) def new_conjecture_data( @@ -1318,7 +1364,7 @@ class ConjectureRunner: self.interesting_examples.values(), key=lambda d: sort_key(d.buffer) ): assert prev_data.status == Status.INTERESTING - data = self.new_conjecture_data_ir(prev_data.examples.ir_tree_nodes) + data = self.new_conjecture_data_ir(prev_data.ir_nodes) self.test_function(data) if data.status != Status.INTERESTING: self.exit_with(ExitReason.flaky) @@ -1334,7 +1380,7 @@ class ConjectureRunner: ), key=lambda kv: (sort_key(kv[1].buffer), sort_key(repr(kv[0]))), ) - self.debug(f"Shrinking {target!r}") + self.debug(f"Shrinking {target!r}: {data.choices}") if not self.settings.report_multiple_bugs: # If multi-bug reporting is disabled, we shrink our currently-minimal @@ -1411,7 +1457,7 @@ class ConjectureRunner: buffer: Union[bytes, bytearray], *, extend: int = 0, - ) -> Union[ConjectureResult, _Overrun]: + ) -> Union[ConjectureResult, _Overrun]: # pragma: no cover # removing function soon """Checks the tree to see if we've tested this buffer, and returns the previous result if we have. @@ -1488,15 +1534,17 @@ class ConjectureRunner: self.__data_cache[buffer] = result return result - def passing_buffers(self, prefix: bytes = b"") -> frozenset[bytes]: - """Return a collection of bytestrings which cause the test to pass. - + def passing_choice_sequences( + self, prefix: Sequence[IRNode] = () + ) -> frozenset[bytes]: + """Return a collection of choice sequence nodes which cause the test to pass. Optionally restrict this by a certain prefix, which is useful for explain mode. """ return frozenset( - buf - for buf in self.__data_cache - if buf.startswith(prefix) and self.__data_cache[buf].status == Status.VALID + result.ir_nodes + for key in self.__data_cache_ir + if (result := self.__data_cache_ir[key]).status is Status.VALID + and startswith(result.ir_nodes, prefix) ) diff --git a/contrib/python/hypothesis/py3/hypothesis/internal/conjecture/junkdrawer.py b/contrib/python/hypothesis/py3/hypothesis/internal/conjecture/junkdrawer.py index 2c46a952239..51ced29bd94 100644 --- a/contrib/python/hypothesis/py3/hypothesis/internal/conjecture/junkdrawer.py +++ b/contrib/python/hypothesis/py3/hypothesis/internal/conjecture/junkdrawer.py @@ -468,3 +468,15 @@ def gc_cumulative_time() -> float: _gc_initialized = True return _gc_cumulative_time + + +def startswith(l1: Sequence[T], l2: Sequence[T]) -> bool: + if len(l1) < len(l2): + return False + return all(v1 == v2 for v1, v2 in zip(l1[: len(l2)], l2)) + + +def endswith(l1: Sequence[T], l2: Sequence[T]) -> bool: + if len(l1) < len(l2): + return False + return all(v1 == v2 for v1, v2 in zip(l1[-len(l2) :], l2)) diff --git a/contrib/python/hypothesis/py3/hypothesis/internal/conjecture/optimiser.py b/contrib/python/hypothesis/py3/hypothesis/internal/conjecture/optimiser.py index a8f4478453e..324d19c5715 100644 --- a/contrib/python/hypothesis/py3/hypothesis/internal/conjecture/optimiser.py +++ b/contrib/python/hypothesis/py3/hypothesis/internal/conjecture/optimiser.py @@ -11,8 +11,16 @@ from typing import Union from hypothesis.internal.compat import int_from_bytes, int_to_bytes -from hypothesis.internal.conjecture.data import ConjectureResult, Status, _Overrun -from hypothesis.internal.conjecture.engine import BUFFER_SIZE, ConjectureRunner +from hypothesis.internal.conjecture.data import ( + ConjectureResult, + IRType, + Status, + _Overrun, + bits_to_bytes, + ir_size_nodes, + ir_value_permitted, +) +from hypothesis.internal.conjecture.engine import BUFFER_SIZE_IR, ConjectureRunner from hypothesis.internal.conjecture.junkdrawer import find_integer from hypothesis.internal.conjecture.pareto import NO_SCORE @@ -75,9 +83,9 @@ class Optimiser: return True assert score == self.current_score # We allow transitions that leave the score unchanged as long as they - # don't increase the buffer size. This gives us a certain amount of + # don't increase the number of nodes. This gives us a certain amount of # freedom for lateral moves that will take us out of local maxima. - if len(data.buffer) <= len(self.current_data.buffer): + if len(data.ir_nodes) <= len(self.current_data.ir_nodes): self.current_data = data return True return False @@ -88,92 +96,121 @@ class Optimiser: a data object and returns an index to an example where we should focus our efforts.""" - blocks_examined = set() + nodes_examined = set() prev = None - i = len(self.current_data.blocks) - 1 + i = len(self.current_data.ir_nodes) - 1 while i >= 0 and self.improvements <= self.max_improvements: if prev is not self.current_data: - i = len(self.current_data.blocks) - 1 + i = len(self.current_data.ir_nodes) - 1 prev = self.current_data - if i in blocks_examined: + if i in nodes_examined: i -= 1 continue - blocks_examined.add(i) - data = self.current_data - block = data.blocks[i] - prefix = data.buffer[: block.start] + nodes_examined.add(i) + node = self.current_data.ir_nodes[i] + assert node.index is not None + # we can only (sensibly & easily) define hill climbing for + # numeric-style nodes. It's not clear hill-climbing a string is + # useful, for instance. + if node.ir_type not in {"integer", "float", "bytes", "boolean"}: + continue - existing = data.buffer[block.start : block.end] - existing_as_int = int_from_bytes(existing) - max_int_value = (256 ** len(existing)) - 1 + def attempt_replace(k: int) -> bool: + """ + Try replacing the current node in the current best test case + with a value which is "k times larger", where the exact notion + of "larger" depends on the ir_type. - if existing_as_int == max_int_value: - continue + Note that we use the *current* best and not the one we started with. + This helps ensure that if we luck into a good draw when making + random choices we get to keep the good bits. + """ + # we don't want to infinitely drive up an unbounded score. + if abs(k) > 2**20: + return False + + node = self.current_data.ir_nodes[i] + assert node.index is not None + if node.was_forced: + return False # pragma: no cover + + new_value: IRType + if node.ir_type in {"integer", "float"}: + assert isinstance(node.value, (int, float)) + new_value = node.value + k + elif node.ir_type == "boolean": + assert isinstance(node.value, bool) + if abs(k) > 1: + return False + if k == -1: + new_value = False + if k == 1: + new_value = True + if k == 0: # pragma: no cover + new_value = node.value + else: + assert node.ir_type == "bytes" + assert isinstance(node.value, bytes) + v = int_from_bytes(node.value) + # can't go below zero for bytes + if v + k < 0: + return False + v += k + # allow adding k to increase the number of bytes. we don't want + # to decrease so that b"01" doesn't turn into b"1". + size = max(len(node.value), bits_to_bytes(v.bit_length())) + new_value = int_to_bytes(v, size) - def attempt_replace(v: int) -> bool: - """Try replacing the current block in the current best test case - with an integer of value i. Note that we use the *current* - best and not the one we started with. This helps ensure that - if we luck into a good draw when making random choices we get - to keep the good bits.""" - if v < 0 or v > max_int_value: + if not ir_value_permitted(new_value, node.ir_type, node.kwargs): return False - v_as_bytes = int_to_bytes(v, len(existing)) - # We make a couple attempts at replacement. This only matters - # if we end up growing the buffer - otherwise we exit the loop - # early - but in the event that there *is* some randomized - # component we want to give it a couple of tries to succeed. for _ in range(3): - attempt = self.engine.cached_test_function( - prefix - + v_as_bytes - + self.current_data.buffer[block.end :] - + bytes(BUFFER_SIZE), + nodes = self.current_data.ir_nodes + attempt_nodes = ( + nodes[: node.index] + + (node.copy(with_value=new_value),) + + nodes[node.index + 1 :] + ) + attempt = self.engine.cached_test_function_ir( + attempt_nodes, + extend=BUFFER_SIZE_IR - ir_size_nodes(attempt_nodes), ) if self.consider_new_data(attempt): return True - if attempt.status == Status.OVERRUN: + if attempt.status is Status.OVERRUN: return False assert isinstance(attempt, ConjectureResult) - if len(attempt.buffer) == len(self.current_data.buffer): + if len(attempt.ir_nodes) == len(self.current_data.ir_nodes): return False - for i, ex in enumerate(self.current_data.examples): - if ex.start >= block.end: + for j, ex in enumerate(self.current_data.examples): + if ex.ir_start >= node.index + 1: break # pragma: no cover - if ex.end <= block.start: + if ex.ir_end <= node.index: continue - ex_attempt = attempt.examples[i] - if ex.length == ex_attempt.length: + ex_attempt = attempt.examples[j] + if ex.ir_length == ex_attempt.ir_length: continue # pragma: no cover - replacement = attempt.buffer[ex_attempt.start : ex_attempt.end] + replacement = attempt.ir_nodes[ + ex_attempt.ir_start : ex_attempt.ir_end + ] if self.consider_new_data( - self.engine.cached_test_function( - prefix + self.engine.cached_test_function_ir( + nodes[: node.index] + replacement - + self.current_data.buffer[ex.end :] + + self.current_data.ir_nodes[ex.ir_end :] ) ): return True return False - # We unconditionally scan both upwards and downwards. The reason - # for this is that we allow "lateral" moves that don't increase the - # score but instead leave it constant. All else being equal we'd - # like to leave the test case closer to shrunk, so afterwards we - # try lowering the value towards zero even if we've just raised it. - - if not attempt_replace(max_int_value): - find_integer(lambda k: attempt_replace(k + existing_as_int)) - - existing = self.current_data.buffer[block.start : block.end] - existing_as_int = int_from_bytes(existing) - if not attempt_replace(0): - find_integer(lambda k: attempt_replace(existing_as_int - k)) + # we don't know whether a target score increases or decreases with + # respect to the value of some node, so try both directions. + find_integer(lambda k: attempt_replace(k)) + find_integer(lambda k: attempt_replace(-k)) diff --git a/contrib/python/hypothesis/py3/hypothesis/internal/conjecture/shrinker.py b/contrib/python/hypothesis/py3/hypothesis/internal/conjecture/shrinker.py index a1a34249638..0e940d47b42 100644 --- a/contrib/python/hypothesis/py3/hypothesis/internal/conjecture/shrinker.py +++ b/contrib/python/hypothesis/py3/hypothesis/internal/conjecture/shrinker.py @@ -9,6 +9,7 @@ # obtain one at https://mozilla.org/MPL/2.0/. from collections import defaultdict +from collections.abc import Sequence from typing import TYPE_CHECKING, Callable, Optional, TypeVar, Union import attr @@ -22,13 +23,20 @@ from hypothesis.internal.conjecture.choicetree import ( from hypothesis.internal.conjecture.data import ( ConjectureData, ConjectureResult, + IRNode, Status, - bits_to_bytes, + ir_size_nodes, + ir_to_buffer, ir_value_equal, ir_value_key, ir_value_permitted, ) -from hypothesis.internal.conjecture.junkdrawer import find_integer, replace_all +from hypothesis.internal.conjecture.junkdrawer import ( + endswith, + find_integer, + replace_all, + startswith, +) from hypothesis.internal.conjecture.shrinking import ( Bytes, Float, @@ -386,12 +394,9 @@ class Shrinker: self.check_calls() return result - def consider_new_tree(self, tree): + def consider_new_tree(self, tree: Sequence[IRNode]) -> bool: tree = tree[: len(self.nodes)] - def startswith(t1, t2): - return t1[: len(t2)] == t2 - if startswith(tree, self.nodes): return True @@ -408,7 +413,9 @@ class Shrinker: buffer = bytes(buffer) return buffer.startswith(self.buffer) or self.incorporate_new_buffer(buffer) - def incorporate_new_buffer(self, buffer): + def incorporate_new_buffer( + self, buffer + ): # pragma: no cover # removing function soon """Either runs the test function on this buffer and returns True if that changed the shrink_target, or determines that doing so would be useless and returns False without running it.""" @@ -538,20 +545,21 @@ class Shrinker: self.explain() def explain(self): + from hypothesis.internal.conjecture.engine import BUFFER_SIZE_IR + if not self.should_explain or not self.shrink_target.arg_slices: return - from hypothesis.internal.conjecture.engine import BUFFER_SIZE self.max_stall = 1e999 shrink_target = self.shrink_target - buffer = shrink_target.buffer + nodes = self.nodes chunks = defaultdict(list) # Before we start running experiments, let's check for known inputs which would # make them redundant. The shrinking process means that we've already tried many # variations on the minimal example, so this can save a lot of time. - seen_passing_buffers = self.engine.passing_buffers( - prefix=buffer[: min(self.shrink_target.arg_slices)[0]] + seen_passing_seq = self.engine.passing_choice_sequences( + prefix=self.nodes[: min(self.shrink_target.arg_slices)[0]] ) # Now that we've shrunk to a minimal failing example, it's time to try @@ -563,8 +571,8 @@ class Shrinker: # Check for any previous examples that match the prefix and suffix, # so we can skip if we found a passing example while shrinking. if any( - seen.startswith(buffer[:start]) and seen.endswith(buffer[end:]) - for seen in seen_passing_buffers + startswith(seen, nodes[:start]) and endswith(seen, nodes[end:]) + for seen in seen_passing_seq ): continue @@ -579,47 +587,55 @@ class Shrinker: # stop early if we're seeing mostly invalid examples break # pragma: no cover - buf_attempt_fixed = bytearray(buffer) - buf_attempt_fixed[start:end] = [ - self.random.randint(0, 255) for _ in range(end - start) - ] - result = self.engine.cached_test_function( - buf_attempt_fixed, extend=BUFFER_SIZE - len(buf_attempt_fixed) + # replace start:end with random values + replacement = [] + for i in range(start, end): + node = nodes[i] + if not node.was_forced: + (value, _buf) = ir_to_buffer( + node.ir_type, node.kwargs, random=self.random + ) + node = node.copy(with_value=value) + replacement.append(node) + + attempt = nodes[:start] + tuple(replacement) + nodes[end:] + result = self.engine.cached_test_function_ir( + attempt, extend=BUFFER_SIZE_IR - ir_size_nodes(attempt) ) # Turns out this was a variable-length part, so grab the infix... - if result.status == Status.OVERRUN: + if result.status is Status.OVERRUN: continue # pragma: no cover # flakily covered if not ( - len(buf_attempt_fixed) == len(result.buffer) - and result.buffer.endswith(buffer[end:]) + len(attempt) == len(result.ir_nodes) + and endswith(result.ir_nodes, nodes[end:]) ): for ex, res in zip(shrink_target.examples, result.examples): - assert ex.start == res.start - assert ex.start <= start + assert ex.ir_start == res.ir_start + assert ex.ir_start <= start assert ex.label == res.label - if start == ex.start and end == ex.end: - res_end = res.end + if start == ex.ir_start and end == ex.ir_end: + res_end = res.ir_end break else: raise NotImplementedError("Expected matching prefixes") - buf_attempt_fixed = ( - buffer[:start] + result.buffer[start:res_end] + buffer[end:] + attempt = ( + nodes[:start] + result.ir_nodes[start:res_end] + nodes[end:] ) - chunks[(start, end)].append(result.buffer[start:res_end]) - result = self.engine.cached_test_function(buf_attempt_fixed) + chunks[(start, end)].append(result.ir_nodes[start:res_end]) + result = self.engine.cached_test_function_ir(attempt) - if result.status == Status.OVERRUN: + if result.status is Status.OVERRUN: continue # pragma: no cover # flakily covered else: - chunks[(start, end)].append(result.buffer[start:end]) + chunks[(start, end)].append(result.ir_nodes[start:end]) if shrink_target is not self.shrink_target: # pragma: no cover # If we've shrunk further without meaning to, bail out. self.shrink_target.slice_comments.clear() return - if result.status == Status.VALID: + if result.status is Status.VALID: # The test passed, indicating that this param can't vary freely. # However, it's really hard to write a simple and reliable covering # test, because of our `seen_passing_buffers` check above. @@ -638,15 +654,15 @@ class Shrinker: chunks_by_start_index = sorted(chunks.items()) for _ in range(500): # pragma: no branch # no-branch here because we don't coverage-test the abort-at-500 logic. - new_buf = bytearray() + new_nodes = [] prev_end = 0 for (start, end), ls in chunks_by_start_index: assert prev_end <= start < end, "these chunks must be nonoverlapping" - new_buf.extend(buffer[prev_end:start]) - new_buf.extend(self.random.choice(ls)) + new_nodes.extend(nodes[prev_end:start]) + new_nodes.extend(self.random.choice(ls)) prev_end = end - result = self.engine.cached_test_function(new_buf) + result = self.engine.cached_test_function_ir(new_nodes) # This *can't* be a shrink because none of the components were. assert shrink_target is self.shrink_target @@ -681,7 +697,7 @@ class Shrinker: "reorder_examples", "minimize_duplicated_nodes", "minimize_individual_nodes", - "redistribute_block_pairs", + "redistribute_integer_pairs", "lower_blocks_together", ] ) @@ -796,7 +812,11 @@ class Shrinker: @property def nodes(self): - return self.shrink_target.examples.ir_tree_nodes + return self.shrink_target.ir_nodes + + @property + def choices(self): + return self.shrink_target.choices @property def examples(self): @@ -936,7 +956,7 @@ class Shrinker: def consider(n, sign): return self.consider_new_tree( replace_all( - st.examples.ir_tree_nodes, + st.ir_nodes, [ offset_node(node, sign * (n + v)) for node, v in zip(changed, ints) @@ -964,8 +984,8 @@ class Shrinker: prev_target = self.__last_checked_changed_at new_target = self.shrink_target assert prev_target is not new_target - prev_nodes = prev_target.examples.ir_tree_nodes - new_nodes = new_target.examples.ir_tree_nodes + prev_nodes = prev_target.ir_nodes + new_nodes = new_target.ir_nodes assert sort_key(new_target.buffer) < sort_key(prev_target.buffer) if len(prev_nodes) != len(new_nodes) or any( @@ -1078,7 +1098,9 @@ class Shrinker: # if the size *increased*, we would have to guess what to pad with # in order to try fixing up this attempt. Just give up. if node.kwargs["min_size"] <= attempt_kwargs["min_size"]: - return False + # attempts which increase min_size tend to overrun rather than + # be misaligned, making a covering case difficult. + return False # pragma: no cover # the size decreased in our attempt. Try again, but replace with # the min_size that we would have gotten, and truncate the value # to that size by removing any elements past min_size. @@ -1095,7 +1117,7 @@ class Shrinker: + initial_attempt[node.index :] ) - lost_nodes = len(self.nodes) - len(attempt.examples.ir_tree_nodes) + lost_nodes = len(self.nodes) - len(attempt.ir_nodes) if lost_nodes <= 0: return False @@ -1227,42 +1249,32 @@ class Shrinker: self.minimize_nodes(nodes) @defines_shrink_pass() - def redistribute_block_pairs(self, chooser): + def redistribute_integer_pairs(self, chooser): """If there is a sum of generated integers that we need their sum to exceed some bound, lowering one of them requires raising the other. This pass enables that.""" + # TODO_SHRINK let's extend this to floats as well. - node = chooser.choose( + # look for a pair of nodes (node1, node2) which are both integers and + # aren't separated by too many other nodes. We'll decrease node1 and + # increase node2 (note that the other way around doesn't make sense as + # it's strictly worse in the ordering). + node1 = chooser.choose( self.nodes, lambda node: node.ir_type == "integer" and not node.trivial ) + node2 = chooser.choose( + self.nodes, + lambda node: node.ir_type == "integer" + # Note that it's fine for node2 to be trivial, because we're going to + # explicitly make it *not* trivial by adding to its value. + and not node.was_forced + # to avoid quadratic behavior, scan ahead only a small amount for + # the related node. + and node1.index < node.index <= node1.index + 4, + ) - # The preconditions for this pass are that the two integer draws are only - # separated by non-integer nodes, and have the same size value in bytes. - # - # This isn't particularly principled. For instance, this wouldn't reduce - # e.g. @given(integers(), integers(), integers()) where the sum property - # involves the first and last integers. - # - # A better approach may be choosing *two* such integer nodes arbitrarily - # from the list, instead of conditionally scanning forward. - - for j in range(node.index + 1, len(self.nodes)): - next_node = self.nodes[j] - if next_node.ir_type == "integer" and bits_to_bytes( - node.value.bit_length() - ) == bits_to_bytes(next_node.value.bit_length()): - break - else: - return - - if next_node.was_forced: - # avoid modifying a forced node. Note that it's fine for next_node - # to be trivial, because we're going to explicitly make it *not* - # trivial by adding to its value. - return - - m = node.value - n = next_node.value + m = node1.value + n = node2.value def boost(k): if k > m: @@ -1272,11 +1284,11 @@ class Shrinker: next_node_value = n + k return self.consider_new_tree( - self.nodes[: node.index] - + [node.copy(with_value=node_value)] - + self.nodes[node.index + 1 : next_node.index] - + [next_node.copy(with_value=next_node_value)] - + self.nodes[next_node.index + 1 :] + self.nodes[: node1.index] + + (node1.copy(with_value=node_value),) + + self.nodes[node1.index + 1 : node2.index] + + (node2.copy(with_value=next_node_value),) + + self.nodes[node2.index + 1 :] ) find_integer(boost) @@ -1424,15 +1436,15 @@ class Shrinker: lowered = ( self.nodes[: node.index] - + [node.copy(with_value=node.value - 1)] + + (node.copy(with_value=node.value - 1),) + self.nodes[node.index + 1 :] ) attempt = self.cached_test_function_ir(lowered) if ( attempt is None or attempt.status < Status.VALID - or len(attempt.examples.ir_tree_nodes) == len(self.nodes) - or len(attempt.examples.ir_tree_nodes) == node.index + 1 + or len(attempt.ir_nodes) == len(self.nodes) + or len(attempt.ir_nodes) == node.index + 1 ): # no point in trying our size-dependency-logic if our attempt at # lowering the node resulted in: @@ -1507,14 +1519,12 @@ class Shrinker: range(len(examples)), lambda indices: self.consider_new_tree( replace_all( - st.examples.ir_nodes, + st.ir_nodes, [ ( u, v, - st.examples.ir_nodes[ - examples[i].ir_start : examples[i].ir_end - ], + st.ir_nodes[examples[i].ir_start : examples[i].ir_end], ) for (u, v), i in zip(endpoints, indices) ], @@ -1538,9 +1548,9 @@ class Shrinker: Returns True if this successfully changes the underlying shrink target, else False. """ - if i + len(description) > len(original.examples.ir_tree_nodes) or i < 0: + if i + len(description) > len(original.ir_nodes) or i < 0: return False - attempt = list(original.examples.ir_tree_nodes) + attempt = list(original.ir_nodes) for _ in range(repeats): for k, command in reversed(list(enumerate(description))): j = i + k diff --git a/contrib/python/hypothesis/py3/hypothesis/internal/conjecture/utils.py b/contrib/python/hypothesis/py3/hypothesis/internal/conjecture/utils.py index c08cb7e094e..06f49ce3fea 100644 --- a/contrib/python/hypothesis/py3/hypothesis/internal/conjecture/utils.py +++ b/contrib/python/hypothesis/py3/hypothesis/internal/conjecture/utils.py @@ -160,9 +160,10 @@ class Sampler: table[small.pop()][2] = zero self.table: "list[tuple[int, int, float]]" = [] - for base, alternate, alternate_chance in table: # type: ignore + for base, alternate, alternate_chance in table: assert isinstance(base, int) assert isinstance(alternate, int) or alternate is None + assert alternate_chance is not None if alternate is None: self.table.append((base, base, alternate_chance)) elif alternate < base: diff --git a/contrib/python/hypothesis/py3/hypothesis/internal/entropy.py b/contrib/python/hypothesis/py3/hypothesis/internal/entropy.py index 21d17c465d3..d7ce1463bdd 100644 --- a/contrib/python/hypothesis/py3/hypothesis/internal/entropy.py +++ b/contrib/python/hypothesis/py3/hypothesis/internal/entropy.py @@ -13,9 +13,9 @@ import gc import random import sys import warnings -from collections.abc import Hashable +from collections.abc import Generator, Hashable from itertools import count -from typing import TYPE_CHECKING, Any, Callable +from typing import TYPE_CHECKING, Any, Callable, Optional from weakref import WeakValueDictionary import hypothesis.core @@ -28,9 +28,9 @@ if TYPE_CHECKING: # we can't use this at runtime until from_type supports # protocols -- breaks ghostwriter tests class RandomLike(Protocol): - seed: Callable[..., Any] - getstate: Callable[[], Any] - setstate: Callable[..., Any] + def seed(self, *args: Any, **kwargs: Any) -> Any: ... + def getstate(self, *args: Any, **kwargs: Any) -> Any: ... + def setstate(self, *args: Any, **kwargs: Any) -> Any: ... else: # pragma: no cover RandomLike = random.Random @@ -39,11 +39,13 @@ else: # pragma: no cover # with their respective Random instances even as new ones are registered and old # ones go out of scope and get garbage collected. Keys are ascending integers. _RKEY = count() -RANDOMS_TO_MANAGE: WeakValueDictionary = WeakValueDictionary({next(_RKEY): random}) +RANDOMS_TO_MANAGE: WeakValueDictionary[int, RandomLike] = WeakValueDictionary( + {next(_RKEY): random} +) class NumpyRandomWrapper: - def __init__(self): + def __init__(self) -> None: assert "numpy" in sys.modules # This class provides a shim that matches the numpy to stdlib random, # and lets us avoid importing Numpy until it's already in use. @@ -54,7 +56,7 @@ class NumpyRandomWrapper: self.setstate = numpy.random.set_state -NP_RANDOM = None +NP_RANDOM: Optional[RandomLike] = None if not (PYPY or GRAALPY): @@ -160,7 +162,7 @@ def get_seeder_and_restorer( """ assert isinstance(seed, int) assert 0 <= seed < 2**32 - states: dict = {} + states: dict[int, object] = {} if "numpy" in sys.modules: global NP_RANDOM @@ -168,13 +170,13 @@ def get_seeder_and_restorer( # Protect this from garbage-collection by adding it to global scope NP_RANDOM = RANDOMS_TO_MANAGE[next(_RKEY)] = NumpyRandomWrapper() - def seed_all(): + def seed_all() -> None: assert not states for k, r in RANDOMS_TO_MANAGE.items(): states[k] = r.getstate() r.seed(seed) - def restore_all(): + def restore_all() -> None: for k, state in states.items(): r = RANDOMS_TO_MANAGE.get(k) if r is not None: # i.e., hasn't been garbage-collected @@ -185,7 +187,7 @@ def get_seeder_and_restorer( @contextlib.contextmanager -def deterministic_PRNG(seed=0): +def deterministic_PRNG(seed: int = 0) -> Generator[None, None, None]: """Context manager that handles random.seed without polluting global state. See issue #1255 and PR #1295 for details and motivation - in short, diff --git a/contrib/python/hypothesis/py3/hypothesis/internal/escalation.py b/contrib/python/hypothesis/py3/hypothesis/internal/escalation.py index 0fa64541671..26907c4d061 100644 --- a/contrib/python/hypothesis/py3/hypothesis/internal/escalation.py +++ b/contrib/python/hypothesis/py3/hypothesis/internal/escalation.py @@ -38,7 +38,10 @@ def belongs_to(package): except KeyError: pass try: - Path(filepath).resolve().relative_to(root) + if not filepath.startswith("<frozen "): + Path(filepath).resolve().relative_to(root) + else: + raise ValueError result = True except Exception: result = False diff --git a/contrib/python/hypothesis/py3/hypothesis/internal/filtering.py b/contrib/python/hypothesis/py3/hypothesis/internal/filtering.py index f10ff8cfcd7..f06002dcedb 100644 --- a/contrib/python/hypothesis/py3/hypothesis/internal/filtering.py +++ b/contrib/python/hypothesis/py3/hypothesis/internal/filtering.py @@ -282,7 +282,7 @@ def get_numeric_predicate_bounds(predicate: Predicate) -> ConstructivePredicate: def get_integer_predicate_bounds(predicate: Predicate) -> ConstructivePredicate: - kwargs, predicate = get_numeric_predicate_bounds(predicate) # type: ignore + kwargs, predicate = get_numeric_predicate_bounds(predicate) if "min_value" in kwargs: if kwargs["min_value"] == -math.inf: @@ -310,7 +310,7 @@ def get_integer_predicate_bounds(predicate: Predicate) -> ConstructivePredicate: def get_float_predicate_bounds(predicate: Predicate) -> ConstructivePredicate: - kwargs, predicate = get_numeric_predicate_bounds(predicate) # type: ignore + kwargs, predicate = get_numeric_predicate_bounds(predicate) if "min_value" in kwargs: min_value = kwargs["min_value"] diff --git a/contrib/python/hypothesis/py3/hypothesis/internal/intervalsets.py b/contrib/python/hypothesis/py3/hypothesis/internal/intervalsets.py index 47abae66ea9..b42360acda7 100644 --- a/contrib/python/hypothesis/py3/hypothesis/internal/intervalsets.py +++ b/contrib/python/hypothesis/py3/hypothesis/internal/intervalsets.py @@ -8,12 +8,28 @@ # v. 2.0. If a copy of the MPL was not distributed with this file, You can # obtain one at https://mozilla.org/MPL/2.0/. -from typing import Union +from collections.abc import Iterable, Sequence +from typing import TYPE_CHECKING, Union, cast, final +if TYPE_CHECKING: + from typing import TypeAlias + from typing_extensions import Self + +IntervalsT: "TypeAlias" = tuple[tuple[int, int], ...] + + +# @final makes mypy happy with the Self return annotations. We otherwise run +# afoul of: +# > You should not use Self as the return annotation if the method is not +# > guaranteed to return an instance of a subclass when the class is subclassed +# > https://docs.python.org/3/library/typing.html#typing.Self + + +@final class IntervalSet: @classmethod - def from_string(cls, s): + def from_string(cls, s: str) -> "Self": """Return a tuple of intervals, covering the codepoints of characters in `s`. >>> IntervalSet.from_string('abcdef0123456789') @@ -22,23 +38,29 @@ class IntervalSet: x = cls((ord(c), ord(c)) for c in sorted(s)) return x.union(x) - def __init__(self, intervals=()): - self.intervals = tuple(intervals) - self.offsets = [0] + def __init__(self, intervals: Iterable[Sequence[int]] = ()) -> None: + self.intervals: IntervalsT = cast( + IntervalsT, tuple(tuple(v) for v in intervals) + ) + # cast above is validated by this length assertion. check here instead of + # before to not exhaust generators before we create intervals from it + assert all(len(v) == 2 for v in self.intervals) + + self.offsets: list[int] = [0] for u, v in self.intervals: self.offsets.append(self.offsets[-1] + v - u + 1) self.size = self.offsets.pop() self._idx_of_zero = self.index_above(ord("0")) self._idx_of_Z = min(self.index_above(ord("Z")), len(self) - 1) - def __len__(self): + def __len__(self) -> int: return self.size - def __iter__(self): + def __iter__(self) -> Iterable[int]: for u, v in self.intervals: yield from range(u, v + 1) - def __getitem__(self, i): + def __getitem__(self, i: int) -> int: if i < 0: i = self.size + i if i < 0 or i >= self.size: @@ -69,7 +91,7 @@ class IntervalSet: assert 0 <= elem <= 0x10FFFF return any(start <= elem <= end for start, end in self.intervals) - def __repr__(self): + def __repr__(self) -> str: return f"IntervalSet({self.intervals!r})" def index(self, value: int) -> int: @@ -90,22 +112,22 @@ class IntervalSet: return offset + (value - u) return self.size - def __or__(self, other): + def __or__(self, other: "Self") -> "Self": return self.union(other) - def __sub__(self, other): + def __sub__(self, other: "Self") -> "Self": return self.difference(other) - def __and__(self, other): + def __and__(self, other: "Self") -> "Self": return self.intersection(other) - def __eq__(self, other): + def __eq__(self, other: object) -> bool: return isinstance(other, IntervalSet) and (other.intervals == self.intervals) - def __hash__(self): + def __hash__(self) -> int: return hash(self.intervals) - def union(self, other): + def union(self, other: "Self") -> "Self": """Merge two sequences of intervals into a single tuple of intervals. Any integer bounded by `x` or `y` is also bounded by the result. @@ -141,7 +163,7 @@ class IntervalSet: result.append((u, v)) return IntervalSet(result) - def difference(self, other): + def difference(self, other: "Self") -> "Self": """Set difference for lists of intervals. That is, returns a list of intervals that bounds all values bounded by x that are not also bounded by y. x and y are expected to be in sorted order. @@ -158,7 +180,7 @@ class IntervalSet: x = list(map(list, x)) i = 0 j = 0 - result = [] + result: list[Iterable[int]] = [] while i < len(x) and j < len(y): # Iterate in parallel over x and y. j stays pointing at the smallest # interval in the left hand side that could still overlap with some @@ -218,7 +240,7 @@ class IntervalSet: result.extend(x[i:]) return IntervalSet(map(tuple, result)) - def intersection(self, other): + def intersection(self, other: "Self") -> "Self": """Set intersection for lists of intervals.""" assert isinstance(other, type(self)), other intervals = [] diff --git a/contrib/python/hypothesis/py3/hypothesis/internal/observability.py b/contrib/python/hypothesis/py3/hypothesis/internal/observability.py index 2ad2d4dd5a2..40fd2cf7e91 100644 --- a/contrib/python/hypothesis/py3/hypothesis/internal/observability.py +++ b/contrib/python/hypothesis/py3/hypothesis/internal/observability.py @@ -63,7 +63,9 @@ def make_testcase( }[data.status], "status_reason": status_reason, "representation": string_repr, - "arguments": arguments or {}, + "arguments": { + k.removeprefix("generate:"): v for k, v in (arguments or {}).items() + }, "how_generated": how_generated, # iid, mutation, etc. "features": { **{ @@ -74,7 +76,7 @@ def make_testcase( "timing": timing, "metadata": { "traceback": getattr(data.extra_information, "_expected_traceback", None), - "predicates": data._observability_predicates, + "predicates": dict(data._observability_predicates), "backend": backend_metadata or {}, **_system_metadata(), }, diff --git a/contrib/python/hypothesis/py3/hypothesis/internal/scrutineer.py b/contrib/python/hypothesis/py3/hypothesis/internal/scrutineer.py index 5586e428089..e00cc6edcff 100644 --- a/contrib/python/hypothesis/py3/hypothesis/internal/scrutineer.py +++ b/contrib/python/hypothesis/py3/hypothesis/internal/scrutineer.py @@ -54,11 +54,12 @@ if sys.version_info[:2] >= (3, 12): class Tracer: """A super-simple branch coverage tracer.""" - __slots__ = ("branches", "_previous_location") + __slots__ = ("branches", "_previous_location", "_should_trace") - def __init__(self) -> None: + def __init__(self, *, should_trace: bool) -> None: self.branches: Trace = set() self._previous_location: Optional[Location] = None + self._should_trace = should_trace and self.can_trace() @staticmethod def can_trace() -> bool: @@ -75,7 +76,6 @@ class Tracer: if event == "call": return self.trace elif event == "line": - # manual inlining of self.trace_line for performance. fname = frame.f_code.co_filename if should_trace_file(fname): current_location = (fname, frame.f_lineno) @@ -86,13 +86,18 @@ class Tracer: def trace_line(self, code: types.CodeType, line_number: int) -> None: fname = code.co_filename - if should_trace_file(fname): - current_location = (fname, line_number) - self.branches.add((self._previous_location, current_location)) - self._previous_location = current_location + if not should_trace_file(fname): + # this function is only called on 3.12+, but we want to avoid an + # assertion to that effect for performance. + return sys.monitoring.DISABLE # type: ignore + + current_location = (fname, line_number) + self.branches.add((self._previous_location, current_location)) + self._previous_location = current_location def __enter__(self): - assert self.can_trace() # caller checks in core.py + if not self._should_trace: + return self if sys.version_info[:2] < (3, 12): sys.settrace(self.trace) @@ -107,6 +112,9 @@ class Tracer: return self def __exit__(self, *args, **kwargs): + if not self._should_trace: + return + if sys.version_info[:2] < (3, 12): sys.settrace(None) return diff --git a/contrib/python/hypothesis/py3/hypothesis/internal/validation.py b/contrib/python/hypothesis/py3/hypothesis/internal/validation.py index da7dfbb9376..9cf9df57fcb 100644 --- a/contrib/python/hypothesis/py3/hypothesis/internal/validation.py +++ b/contrib/python/hypothesis/py3/hypothesis/internal/validation.py @@ -11,13 +11,14 @@ import decimal import math from numbers import Rational, Real +from typing import Union from hypothesis.errors import InvalidArgument from hypothesis.internal.coverage import check_function @check_function -def check_type(typ, arg, name): +def check_type(typ: Union[type, tuple[type, ...]], arg: object, name: str) -> None: if not isinstance(arg, typ): if isinstance(typ, tuple): assert len(typ) >= 2, "Use bare type instead of len-1 tuple" diff --git a/contrib/python/hypothesis/py3/hypothesis/provisional.py b/contrib/python/hypothesis/py3/hypothesis/provisional.py index 06e829f3c42..7f43762a30e 100644 --- a/contrib/python/hypothesis/py3/hypothesis/provisional.py +++ b/contrib/python/hypothesis/py3/hypothesis/provisional.py @@ -166,13 +166,18 @@ _url_fragments_strategy = ( @defines_strategy(force_reusable_values=True) def urls() -> st.SearchStrategy[str]: - """A strategy for :rfc:`3986`, generating http/https URLs.""" + """A strategy for :rfc:`3986`, generating http/https URLs. + + The generated URLs could, at least in theory, be passed to an HTTP client + and fetched. + + """ def url_encode(s: str) -> str: return "".join(c if c in URL_SAFE_CHARACTERS else "%%%02X" % ord(c) for c in s) schemes = st.sampled_from(["http", "https"]) - ports = st.integers(min_value=0, max_value=2**16 - 1).map(":{}".format) + ports = st.integers(min_value=1, max_value=2**16 - 1).map(":{}".format) paths = st.lists(st.text(string.printable).map(url_encode)).map("/".join) return st.builds( diff --git a/contrib/python/hypothesis/py3/hypothesis/strategies/_internal/core.py b/contrib/python/hypothesis/py3/hypothesis/strategies/_internal/core.py index e652116ed5c..9991bc6b08f 100644 --- a/contrib/python/hypothesis/py3/hypothesis/strategies/_internal/core.py +++ b/contrib/python/hypothesis/py3/hypothesis/strategies/_internal/core.py @@ -36,6 +36,7 @@ from typing import ( Protocol, TypeVar, Union, + cast, get_args, get_origin, overload, @@ -63,6 +64,8 @@ from hypothesis.errors import ( ) from hypothesis.internal.cathetus import cathetus from hypothesis.internal.charmap import ( + Categories, + CategoryName, as_general_categories, categories as all_categories, ) @@ -82,7 +85,6 @@ from hypothesis.internal.conjecture.utils import ( ) from hypothesis.internal.entropy import get_seeder_and_restorer from hypothesis.internal.floats import float_of -from hypothesis.internal.observability import TESTCASE_CALLBACKS from hypothesis.internal.reflection import ( define_function_signature, get_pretty_function_description, @@ -136,21 +138,15 @@ from hypothesis.strategies._internal.strings import ( TextStrategy, _check_is_single_character, ) -from hypothesis.strategies._internal.utils import ( - cacheable, - defines_strategy, - to_jsonable, -) +from hypothesis.strategies._internal.utils import cacheable, defines_strategy from hypothesis.utils.conventions import not_set from hypothesis.vendor.pretty import RepresentationPrinter if sys.version_info >= (3, 10): from types import EllipsisType as EllipsisType - from typing import TypeAlias as TypeAlias elif typing.TYPE_CHECKING: # pragma: no cover from builtins import ellipsis as EllipsisType - from typing_extensions import TypeAlias else: EllipsisType = type(Ellipsis) # pragma: no cover @@ -560,48 +556,6 @@ def dictionaries( ).map(dict_class) -# See https://en.wikipedia.org/wiki/Unicode_character_property#General_Category -CategoryName: "TypeAlias" = Literal[ - "L", # Letter - "Lu", # Letter, uppercase - "Ll", # Letter, lowercase - "Lt", # Letter, titlecase - "Lm", # Letter, modifier - "Lo", # Letter, other - "M", # Mark - "Mn", # Mark, nonspacing - "Mc", # Mark, spacing combining - "Me", # Mark, enclosing - "N", # Number - "Nd", # Number, decimal digit - "Nl", # Number, letter - "No", # Number, other - "P", # Punctuation - "Pc", # Punctuation, connector - "Pd", # Punctuation, dash - "Ps", # Punctuation, open - "Pe", # Punctuation, close - "Pi", # Punctuation, initial quote - "Pf", # Punctuation, final quote - "Po", # Punctuation, other - "S", # Symbol - "Sm", # Symbol, math - "Sc", # Symbol, currency - "Sk", # Symbol, modifier - "So", # Symbol, other - "Z", # Separator - "Zs", # Separator, space - "Zl", # Separator, line - "Zp", # Separator, paragraph - "C", # Other - "Cc", # Other, control - "Cf", # Other, format - "Cs", # Other, surrogate - "Co", # Other, private use - "Cn", # Other, not assigned -] - - @cacheable @defines_strategy(force_reusable_values=True) def characters( @@ -670,7 +624,7 @@ def characters( check_valid_size(min_codepoint, "min_codepoint") check_valid_size(max_codepoint, "max_codepoint") check_valid_interval(min_codepoint, max_codepoint, "min_codepoint", "max_codepoint") - + categories = cast(Optional[Categories], categories) if categories is not None and exclude_categories is not None: raise InvalidArgument( f"Pass at most one of {categories=} and {exclude_categories=} - " @@ -717,8 +671,12 @@ def characters( f"Characters {sorted(overlap)!r} are present in both " f"{include_characters=} and {exclude_characters=}" ) - categories = as_general_categories(categories, "categories") - exclude_categories = as_general_categories(exclude_categories, "exclude_categories") + if categories is not None: + categories = as_general_categories(categories, "categories") + if exclude_categories is not None: + exclude_categories = as_general_categories( + exclude_categories, "exclude_categories" + ) if categories is not None and not categories and not include_characters: raise InvalidArgument( "When `categories` is an empty collection and there are " @@ -2155,16 +2113,14 @@ class DataObject: def draw(self, strategy: SearchStrategy[Ex], label: Any = None) -> Ex: check_strategy(strategy, "strategy") self.count += 1 - printer = RepresentationPrinter(context=current_build_context()) - desc = f"Draw {self.count}{'' if label is None else f' ({label})'}: " + desc = f"Draw {self.count}{'' if label is None else f' ({label})'}" with deprecate_random_in_strategy("{}from {!r}", desc, strategy): result = self.conjecture_data.draw(strategy, observe_as=f"generate:{desc}") - if TESTCASE_CALLBACKS: - self.conjecture_data._observability_args[desc] = to_jsonable(result) # optimization to avoid needless printer.pretty if should_note(): - printer.text(desc) + printer = RepresentationPrinter(context=current_build_context()) + printer.text(f"{desc}: ") printer.pretty(result) note(printer.getvalue()) return result diff --git a/contrib/python/hypothesis/py3/hypothesis/strategies/_internal/ipaddress.py b/contrib/python/hypothesis/py3/hypothesis/strategies/_internal/ipaddress.py index 75aaaba8d7c..0f5fb1bcccc 100644 --- a/contrib/python/hypothesis/py3/hypothesis/strategies/_internal/ipaddress.py +++ b/contrib/python/hypothesis/py3/hypothesis/strategies/_internal/ipaddress.py @@ -115,4 +115,4 @@ def ip_addresses( if v not in (None, network.version): raise InvalidArgument(f"{v=} is incompatible with {network=}") addr_type = IPv4Address if network.version == 4 else IPv6Address - return integers(int(network[0]), int(network[-1])).map(addr_type) # type: ignore + return integers(int(network[0]), int(network[-1])).map(addr_type) diff --git a/contrib/python/hypothesis/py3/hypothesis/strategies/_internal/numbers.py b/contrib/python/hypothesis/py3/hypothesis/strategies/_internal/numbers.py index 033577f2c86..2e4bf01732f 100644 --- a/contrib/python/hypothesis/py3/hypothesis/strategies/_internal/numbers.py +++ b/contrib/python/hypothesis/py3/hypothesis/strategies/_internal/numbers.py @@ -66,24 +66,21 @@ class IntegersStrategy(SearchStrategy): def do_draw(self, data): # For bounded integers, make the bounds and near-bounds more likely. - forced = None + weights = None if ( self.end is not None and self.start is not None and self.end - self.start > 127 ): - bits = data.draw_integer(0, 127) - forced = { - 122: self.start, - 123: self.start, - 124: self.end, - 125: self.end, - 126: self.start + 1, - 127: self.end - 1, - }.get(bits) + weights = { + self.start: (2 / 128), + self.start + 1: (1 / 128), + self.end - 1: (1 / 128), + self.end: (2 / 128), + } return data.draw_integer( - min_value=self.start, max_value=self.end, forced=forced + min_value=self.start, max_value=self.end, weights=weights ) def filter(self, condition): diff --git a/contrib/python/hypothesis/py3/hypothesis/strategies/_internal/strategies.py b/contrib/python/hypothesis/py3/hypothesis/strategies/_internal/strategies.py index c040f748a5e..0d8c8ca10ee 100644 --- a/contrib/python/hypothesis/py3/hypothesis/strategies/_internal/strategies.py +++ b/contrib/python/hypothesis/py3/hypothesis/strategies/_internal/strategies.py @@ -121,20 +121,21 @@ def recursive_property(name, default): mapping = {} sentinel = object() - hit_recursion = [False] + hit_recursion = False # For a first pass we do a direct recursive calculation of the # property, but we block recursively visiting a value in the # computation of its property: When that happens, we simply # note that it happened and return the default value. def recur(strat): + nonlocal hit_recursion try: return forced_value(strat) except AttributeError: pass result = mapping.get(strat, sentinel) if result is calculating: - hit_recursion[0] = True + hit_recursion = True return default elif result is sentinel: mapping[strat] = calculating @@ -150,7 +151,7 @@ def recursive_property(name, default): # a more careful fixed point calculation to get the exact # values. Hopefully our mapping is still pretty good and it # won't take a large number of updates to reach a fixed point. - if hit_recursion[0]: + if hit_recursion: needs_update = set(mapping) # We track which strategies use which in the course of @@ -334,7 +335,10 @@ class SearchStrategy(Generic[Ex]): @given(self) @settings( database=None, - max_examples=100, + # generate only a few examples at a time to avoid slow interactivity + # for large strategies. The overhead of @given is very small relative + # to generation, so a small batch size is fine. + max_examples=10, deadline=None, verbosity=Verbosity.quiet, phases=(Phase.generate,), diff --git a/contrib/python/hypothesis/py3/hypothesis/strategies/_internal/types.py b/contrib/python/hypothesis/py3/hypothesis/strategies/_internal/types.py index 6dea780c95e..4fd48291367 100644 --- a/contrib/python/hypothesis/py3/hypothesis/strategies/_internal/types.py +++ b/contrib/python/hypothesis/py3/hypothesis/strategies/_internal/types.py @@ -822,9 +822,7 @@ def register(type_, fallback=None, *, module=typing): @register("Type") @register("Type", module=typing_extensions) def resolve_Type(thing): - if getattr(thing, "__args__", None) is None: - return st.just(type) - elif get_args(thing) == (): # pragma: no cover + if getattr(thing, "__args__", None) is None or get_args(thing) == (): return _fallback_type_strategy args = (thing.__args__[0],) if is_a_union(args[0]): diff --git a/contrib/python/hypothesis/py3/hypothesis/vendor/pretty.py b/contrib/python/hypothesis/py3/hypothesis/vendor/pretty.py index 2fad6e1ff09..0f0326c435b 100644 --- a/contrib/python/hypothesis/py3/hypothesis/vendor/pretty.py +++ b/contrib/python/hypothesis/py3/hypothesis/vendor/pretty.py @@ -186,6 +186,16 @@ class RepresentationPrinter: pass else: return printer(obj, self, cycle) + + # Look for the _repr_pretty_ method which allows users + # to define custom pretty printing. + # Some objects automatically create any requested + # attribute. Try to ignore most of them by checking for + # callability. + pretty_method = _safe_getattr(obj, "_repr_pretty_", None) + if callable(pretty_method): + return pretty_method(self, cycle) + # Next walk the mro and check for either: # 1) a registered printer # 2) a _repr_pretty_ method @@ -206,14 +216,6 @@ class RepresentationPrinter: self.type_pprinters[cls] = printer return printer(obj, self, cycle) else: - # Finally look for special method names. - # Some objects automatically create any requested - # attribute. Try to ignore most of them by checking for - # callability. - if "_repr_pretty_" in cls.__dict__: - meth = cls._repr_pretty_ - if callable(meth): - return meth(obj, self, cycle) if hasattr(cls, "__attrs_attrs__"): return pprint_fields( obj, @@ -582,6 +584,10 @@ def _seq_pprinter_factory(start, end, basetype): return inner +def get_class_name(cls): + return _safe_getattr(cls, "__qualname__", cls.__name__) + + def _set_pprinter_factory(start, end, basetype): """Factory that returns a pprint function useful for sets and frozensets.""" @@ -600,7 +606,7 @@ def _set_pprinter_factory(start, end, basetype): return p.text(start + "..." + end) if not obj: # Special case. - p.text(basetype.__name__ + "()") + p.text(get_class_name(basetype) + "()") else: step = len(start) with p.group(step, start, end): @@ -733,7 +739,7 @@ def _repr_pprint(obj, p, cycle): def pprint_fields(obj, p, cycle, fields): - name = obj.__class__.__name__ + name = get_class_name(obj.__class__) if cycle: return p.text(f"{name}(...)") with p.group(1, name + "(", ")"): @@ -879,7 +885,7 @@ def _repr_dataframe(obj, p, cycle): # pragma: no cover def _repr_enum(obj, p, cycle): - tname = type(obj).__name__ + tname = get_class_name(type(obj)) if isinstance(obj, Flag): p.text( " | ".join(f"{tname}.{x.name}" for x in type(obj) if x & obj == x) diff --git a/contrib/python/hypothesis/py3/hypothesis/version.py b/contrib/python/hypothesis/py3/hypothesis/version.py index 0624381d901..7595a898e0c 100644 --- a/contrib/python/hypothesis/py3/hypothesis/version.py +++ b/contrib/python/hypothesis/py3/hypothesis/version.py @@ -8,5 +8,5 @@ # v. 2.0. If a copy of the MPL was not distributed with this file, You can # obtain one at https://mozilla.org/MPL/2.0/. -__version_info__ = (6, 115, 1) +__version_info__ = (6, 120, 0) __version__ = ".".join(map(str, __version_info__)) diff --git a/contrib/python/hypothesis/py3/patches/01-fix-crash-with-pydebug.patch b/contrib/python/hypothesis/py3/patches/01-fix-crash-with-pydebug.patch new file mode 100644 index 00000000000..967eb845f85 --- /dev/null +++ b/contrib/python/hypothesis/py3/patches/01-fix-crash-with-pydebug.patch @@ -0,0 +1,14 @@ +--- contrib/python/hypothesis/py3/hypothesis/internal/escalation.py (index) ++++ contrib/python/hypothesis/py3/hypothesis/internal/escalation.py (working tree) +@@ -38,7 +38,10 @@ def belongs_to(package): + except KeyError: + pass + try: +- Path(filepath).resolve().relative_to(root) ++ if not filepath.startswith("<frozen "): ++ Path(filepath).resolve().relative_to(root) ++ else: ++ raise ValueError + result = True + except Exception: + result = False diff --git a/contrib/python/hypothesis/py3/ya.make b/contrib/python/hypothesis/py3/ya.make index 8e7bcbe8124..ada2d6d8e20 100644 --- a/contrib/python/hypothesis/py3/ya.make +++ b/contrib/python/hypothesis/py3/ya.make @@ -2,7 +2,7 @@ PY3_LIBRARY() -VERSION(6.115.1) +VERSION(6.120.0) LICENSE(MPL-2.0) |
