diff options
| author | robot-piglet <[email protected]> | 2025-08-05 18:36:21 +0300 |
|---|---|---|
| committer | robot-piglet <[email protected]> | 2025-08-05 18:47:18 +0300 |
| commit | bbe9a42800ab38491470180c0c49e254f4b4a76f (patch) | |
| tree | fb6363f900a223d673eabac6970929b216d5ef6e /contrib/python/hypothesis | |
| parent | cf6ff7ef6b9295d66cf3be9908ea6dc829cdfde0 (diff) | |
Intermediate changes
commit_hash:2189b71c0a4b3b757ca05f91178afda3697bfab0
Diffstat (limited to 'contrib/python/hypothesis')
77 files changed, 5903 insertions, 4389 deletions
diff --git a/contrib/python/hypothesis/py3/.dist-info/METADATA b/contrib/python/hypothesis/py3/.dist-info/METADATA index d39f7ab465b..228af8a8c57 100644 --- a/contrib/python/hypothesis/py3/.dist-info/METADATA +++ b/contrib/python/hypothesis/py3/.dist-info/METADATA @@ -1,21 +1,19 @@ -Metadata-Version: 2.1 +Metadata-Version: 2.4 Name: hypothesis -Version: 6.120.0 +Version: 6.130.13 Summary: A library for property-based testing -Home-page: https://hypothesis.works -Author: David R. MacIver and Zac Hatfield-Dodds -Author-email: [email protected] -License: MPL-2.0 -Project-URL: Source, https://github.com/HypothesisWorks/hypothesis/tree/master/hypothesis-python -Project-URL: Changelog, https://hypothesis.readthedocs.io/en/latest/changes.html -Project-URL: Documentation, https://hypothesis.readthedocs.io -Project-URL: Issues, https://github.com/HypothesisWorks/hypothesis/issues -Keywords: python testing fuzzing property-based-testing +Author-email: "David R. MacIver and Zac Hatfield-Dodds" <[email protected]> +License-Expression: MPL-2.0 +Project-URL: homepage, https://hypothesis.works +Project-URL: source, https://github.com/HypothesisWorks/hypothesis +Project-URL: changelog, https://hypothesis.readthedocs.io/en/latest/changelog.html +Project-URL: documentation, https://hypothesis.readthedocs.io +Project-URL: issues, https://github.com/HypothesisWorks/hypothesis/issues +Keywords: python,testing,fuzzing,property-based-testing Classifier: Development Status :: 5 - Production/Stable Classifier: Framework :: Hypothesis Classifier: Framework :: Pytest Classifier: Intended Audience :: Developers -Classifier: License :: OSI Approved :: Mozilla Public License 2.0 (MPL 2.0) Classifier: Operating System :: Unix Classifier: Operating System :: POSIX Classifier: Operating System :: Microsoft :: Windows @@ -26,51 +24,30 @@ Classifier: Programming Language :: Python :: 3.9 Classifier: Programming Language :: Python :: 3.10 Classifier: Programming Language :: Python :: 3.11 Classifier: Programming Language :: Python :: 3.12 +Classifier: Programming Language :: Python :: 3.13 Classifier: Programming Language :: Python :: Implementation :: CPython Classifier: Programming Language :: Python :: Implementation :: PyPy Classifier: Topic :: Education :: Testing Classifier: Topic :: Software Development :: Testing Classifier: Typing :: Typed Requires-Python: >=3.9 -Description-Content-Type: text/x-rst +Description-Content-Type: text/markdown License-File: LICENSE.txt Requires-Dist: attrs>=22.2.0 -Requires-Dist: sortedcontainers<3.0.0,>=2.1.0 Requires-Dist: exceptiongroup>=1.0.0; python_version < "3.11" -Provides-Extra: all -Requires-Dist: black>=19.10b0; extra == "all" -Requires-Dist: click>=7.0; extra == "all" -Requires-Dist: crosshair-tool>=0.0.77; extra == "all" -Requires-Dist: django>=4.2; extra == "all" -Requires-Dist: dpcontracts>=0.4; extra == "all" -Requires-Dist: hypothesis-crosshair>=0.0.18; extra == "all" -Requires-Dist: lark>=0.10.1; extra == "all" -Requires-Dist: libcst>=0.3.16; extra == "all" -Requires-Dist: numpy>=1.19.3; extra == "all" -Requires-Dist: pandas>=1.1; extra == "all" -Requires-Dist: pytest>=4.6; extra == "all" -Requires-Dist: python-dateutil>=1.4; extra == "all" -Requires-Dist: pytz>=2014.1; extra == "all" -Requires-Dist: redis>=3.0.0; extra == "all" -Requires-Dist: rich>=9.0.0; extra == "all" -Requires-Dist: tzdata>=2024.2; (sys_platform == "win32" or sys_platform == "emscripten") and extra == "all" +Requires-Dist: sortedcontainers<3.0.0,>=2.1.0 Provides-Extra: cli Requires-Dist: click>=7.0; extra == "cli" Requires-Dist: black>=19.10b0; extra == "cli" Requires-Dist: rich>=9.0.0; extra == "cli" Provides-Extra: codemods Requires-Dist: libcst>=0.3.16; extra == "codemods" -Provides-Extra: crosshair -Requires-Dist: hypothesis-crosshair>=0.0.18; extra == "crosshair" -Requires-Dist: crosshair-tool>=0.0.77; extra == "crosshair" -Provides-Extra: dateutil -Requires-Dist: python-dateutil>=1.4; extra == "dateutil" -Provides-Extra: django -Requires-Dist: django>=4.2; extra == "django" -Provides-Extra: dpcontracts -Requires-Dist: dpcontracts>=0.4; extra == "dpcontracts" Provides-Extra: ghostwriter Requires-Dist: black>=19.10b0; extra == "ghostwriter" +Provides-Extra: pytz +Requires-Dist: pytz>=2014.1; extra == "pytz" +Provides-Extra: dateutil +Requires-Dist: python-dateutil>=1.4; extra == "dateutil" Provides-Extra: lark Requires-Dist: lark>=0.10.1; extra == "lark" Provides-Extra: numpy @@ -79,69 +56,83 @@ Provides-Extra: pandas Requires-Dist: pandas>=1.1; extra == "pandas" Provides-Extra: pytest Requires-Dist: pytest>=4.6; extra == "pytest" -Provides-Extra: pytz -Requires-Dist: pytz>=2014.1; extra == "pytz" +Provides-Extra: dpcontracts +Requires-Dist: dpcontracts>=0.4; extra == "dpcontracts" Provides-Extra: redis Requires-Dist: redis>=3.0.0; extra == "redis" +Provides-Extra: crosshair +Requires-Dist: hypothesis-crosshair>=0.0.20; extra == "crosshair" +Requires-Dist: crosshair-tool>=0.0.85; extra == "crosshair" Provides-Extra: zoneinfo -Requires-Dist: tzdata>=2024.2; (sys_platform == "win32" or sys_platform == "emscripten") and extra == "zoneinfo" - -========== -Hypothesis -========== - -Hypothesis is an advanced testing library for Python. It lets you write tests which -are parametrized by a source of examples, and then generates simple and comprehensible -examples that make your tests fail. This lets you find more bugs in your code with less -work. - -e.g. - -.. code-block:: python - - @given(st.lists(st.floats(allow_nan=False, allow_infinity=False), min_size=1)) - def test_mean(xs): - assert min(xs) <= mean(xs) <= max(xs) +Requires-Dist: tzdata>=2025.2; (sys_platform == "win32" or sys_platform == "emscripten") and extra == "zoneinfo" +Provides-Extra: django +Requires-Dist: django>=4.2; extra == "django" +Provides-Extra: watchdog +Requires-Dist: watchdog>=4.0.0; extra == "watchdog" +Provides-Extra: all +Requires-Dist: black>=19.10b0; extra == "all" +Requires-Dist: click>=7.0; extra == "all" +Requires-Dist: crosshair-tool>=0.0.85; extra == "all" +Requires-Dist: django>=4.2; extra == "all" +Requires-Dist: dpcontracts>=0.4; extra == "all" +Requires-Dist: hypothesis-crosshair>=0.0.20; extra == "all" +Requires-Dist: lark>=0.10.1; extra == "all" +Requires-Dist: libcst>=0.3.16; extra == "all" +Requires-Dist: numpy>=1.19.3; extra == "all" +Requires-Dist: pandas>=1.1; extra == "all" +Requires-Dist: pytest>=4.6; extra == "all" +Requires-Dist: python-dateutil>=1.4; extra == "all" +Requires-Dist: pytz>=2014.1; extra == "all" +Requires-Dist: redis>=3.0.0; extra == "all" +Requires-Dist: rich>=9.0.0; extra == "all" +Requires-Dist: tzdata>=2025.2; (sys_platform == "win32" or sys_platform == "emscripten") and extra == "all" +Requires-Dist: watchdog>=4.0.0; extra == "all" +Dynamic: license-file -.. code-block:: +<div align="center"> + <img src="https://raw.githubusercontent.com/HypothesisWorks/hypothesis/master/brand/dragonfly-rainbow.svg" width="300"> +</div> - Falsifying example: test_mean( - xs=[1.7976321109618856e+308, 6.102390043022755e+303] - ) +# Hypothesis -Hypothesis is extremely practical and advances the state of the art of -unit testing by some way. It's easy to use, stable, and powerful. If -you're not using Hypothesis to test your project then you're missing out. +* [Website](https://hypothesis.works/) +* [Documentation](https://hypothesis.readthedocs.io/en/latest/) +* [Source code](https://github.com/hypothesisWorks/hypothesis/) +* [Contributing](https://github.com/HypothesisWorks/hypothesis/blob/master/CONTRIBUTING.rst) +* [Community](https://hypothesis.readthedocs.io/en/latest/community.html) ------------------------- -Quick Start/Installation ------------------------- +Hypothesis is the property-based testing library for Python. With Hypothesis, you write tests which should pass for all inputs in whatever range you describe, and let Hypothesis randomly choose which of those inputs to check - including edge cases you might not have thought about. For example: -If you just want to get started: +```python +from hypothesis import given, strategies as st -.. code-block:: - pip install hypothesis +@given(st.lists(st.integers())) +def test_matches_builtin(ls): + assert sorted(ls) == my_sort(ls) +``` +This randomized testing can catch bugs and edge cases that you didn't think of and wouldn't have found. In addition, when Hypothesis does find a bug, it doesn't just report any failing example — it reports the simplest possible one. This makes property-based tests a powerful tool for debugging, as well as testing. ------------------ -Links of interest ------------------ +For instance, -The main Hypothesis site is at `hypothesis.works <https://hypothesis.works/>`_, and contains a lot -of good introductory and explanatory material. +```python +def my_sort(ls): + return sorted(set(ls)) +``` -Extensive documentation and examples of usage are `available at readthedocs <https://hypothesis.readthedocs.io/en/latest/>`_. +fails with the simplest possible failing example: -If you want to talk to people about using Hypothesis, `we have both an IRC channel -and a mailing list <https://hypothesis.readthedocs.io/en/latest/community.html>`_. +``` +Falsifying example: test_matches_builtin(ls=[0, 0]) +``` -If you want to receive occasional updates about Hypothesis, including useful tips and tricks, there's a -`TinyLetter mailing list to sign up for them <https://tinyletter.com/DRMacIver/>`_. +### Installation -If you want to contribute to Hypothesis, `instructions are here <https://github.com/HypothesisWorks/hypothesis-python/blob/master/CONTRIBUTING.rst>`_. +To install Hypothesis: -If you want to hear from people who are already using Hypothesis, some of them `have written -about it <https://hypothesis.readthedocs.io/en/latest/endorsements.html>`_. +``` +pip install hypothesis +``` -If you want to create a downstream package of Hypothesis, please read `these guidelines for packagers <https://hypothesis.readthedocs.io/en/latest/packaging.html>`_. +There are also [optional extras available](https://hypothesis.readthedocs.io/en/latest/extras.html). diff --git a/contrib/python/hypothesis/py3/README.md b/contrib/python/hypothesis/py3/README.md new file mode 100644 index 00000000000..fa3229ac71f --- /dev/null +++ b/contrib/python/hypothesis/py3/README.md @@ -0,0 +1 @@ +The Hypothesis python readme has moved to [the main readme](../README.md)! diff --git a/contrib/python/hypothesis/py3/README.rst b/contrib/python/hypothesis/py3/README.rst deleted file mode 100644 index 65419d8b70d..00000000000 --- a/contrib/python/hypothesis/py3/README.rst +++ /dev/null @@ -1,59 +0,0 @@ -========== -Hypothesis -========== - -Hypothesis is an advanced testing library for Python. It lets you write tests which -are parametrized by a source of examples, and then generates simple and comprehensible -examples that make your tests fail. This lets you find more bugs in your code with less -work. - -e.g. - -.. code-block:: python - - @given(st.lists(st.floats(allow_nan=False, allow_infinity=False), min_size=1)) - def test_mean(xs): - assert min(xs) <= mean(xs) <= max(xs) - -.. code-block:: - - Falsifying example: test_mean( - xs=[1.7976321109618856e+308, 6.102390043022755e+303] - ) - -Hypothesis is extremely practical and advances the state of the art of -unit testing by some way. It's easy to use, stable, and powerful. If -you're not using Hypothesis to test your project then you're missing out. - ------------------------- -Quick Start/Installation ------------------------- - -If you just want to get started: - -.. code-block:: - - pip install hypothesis - - ------------------ -Links of interest ------------------ - -The main Hypothesis site is at `hypothesis.works <https://hypothesis.works/>`_, and contains a lot -of good introductory and explanatory material. - -Extensive documentation and examples of usage are `available at readthedocs <https://hypothesis.readthedocs.io/en/latest/>`_. - -If you want to talk to people about using Hypothesis, `we have both an IRC channel -and a mailing list <https://hypothesis.readthedocs.io/en/latest/community.html>`_. - -If you want to receive occasional updates about Hypothesis, including useful tips and tricks, there's a -`TinyLetter mailing list to sign up for them <https://tinyletter.com/DRMacIver/>`_. - -If you want to contribute to Hypothesis, `instructions are here <https://github.com/HypothesisWorks/hypothesis-python/blob/master/CONTRIBUTING.rst>`_. - -If you want to hear from people who are already using Hypothesis, some of them `have written -about it <https://hypothesis.readthedocs.io/en/latest/endorsements.html>`_. - -If you want to create a downstream package of Hypothesis, please read `these guidelines for packagers <https://hypothesis.readthedocs.io/en/latest/packaging.html>`_. diff --git a/contrib/python/hypothesis/py3/_hypothesis_pytestplugin.py b/contrib/python/hypothesis/py3/_hypothesis_pytestplugin.py index 9df7817c68c..98c70b08de0 100644 --- a/contrib/python/hypothesis/py3/_hypothesis_pytestplugin.py +++ b/contrib/python/hypothesis/py3/_hypothesis_pytestplugin.py @@ -24,6 +24,7 @@ import json import os import sys import warnings +from fnmatch import fnmatch from inspect import signature import _hypothesis_globals @@ -444,6 +445,26 @@ else: _orig_call = fixtures.FixtureFunctionMarker.__call__ fixtures.FixtureFunctionMarker.__call__ = _ban_given_call # type: ignore + if int(pytest.__version__.split(".")[0]) >= 7: # pragma: no branch + # Hook has had this signature since Pytest 7.0, so skip on older versions + + def pytest_ignore_collect(collection_path, config): + # Detect, warn about, and mititgate certain misconfigurations; + # this is mostly educational but can also speed up collection. + if ( + (name := collection_path.name) == ".hypothesis" + and collection_path.is_dir() + and not any(fnmatch(name, p) for p in config.getini("norecursedirs")) + ): + warnings.warn( + "Skipping collection of '.hypothesis' directory - this usually " + "means you've explicitly set the `norecursedirs` pytest config " + "option, replacing rather than extending the default ignores.", + stacklevel=1, + ) + return True + return None # let other hooks decide + def load(): """Required for `pluggy` to load a plugin from setuptools entrypoints.""" diff --git a/contrib/python/hypothesis/py3/hypothesis/__init__.py b/contrib/python/hypothesis/py3/hypothesis/__init__.py index cfb55119f74..0b62ac8b1dd 100644 --- a/contrib/python/hypothesis/py3/hypothesis/__init__.py +++ b/contrib/python/hypothesis/py3/hypothesis/__init__.py @@ -36,6 +36,8 @@ __all__ = [ "HealthCheck", "Phase", "Verbosity", + "__version__", + "__version_info__", "assume", "currently_in_test_context", "event", @@ -50,8 +52,6 @@ __all__ = [ "seed", "settings", "target", - "__version__", - "__version_info__", ] run() diff --git a/contrib/python/hypothesis/py3/hypothesis/_settings.py b/contrib/python/hypothesis/py3/hypothesis/_settings.py index f4c77ca420d..ba26925c7dd 100644 --- a/contrib/python/hypothesis/py3/hypothesis/_settings.py +++ b/contrib/python/hypothesis/py3/hypothesis/_settings.py @@ -19,9 +19,18 @@ import datetime import inspect import os import warnings -from collections.abc import Collection +from collections.abc import Collection, Generator, Sequence from enum import Enum, EnumMeta, IntEnum, unique -from typing import TYPE_CHECKING, Any, ClassVar, Optional, TypeVar, Union +from typing import ( + TYPE_CHECKING, + Any, + Callable, + ClassVar, + NoReturn, + Optional, + TypeVar, + Union, +) import attr @@ -30,23 +39,27 @@ from hypothesis.errors import ( InvalidArgument, InvalidState, ) +from hypothesis.internal.conjecture.providers import AVAILABLE_PROVIDERS from hypothesis.internal.reflection import get_pretty_function_description from hypothesis.internal.validation import check_type, try_convert from hypothesis.utils.conventions import not_set from hypothesis.utils.dynamicvariables import DynamicVariable if TYPE_CHECKING: + from typing import TypeAlias + from hypothesis.database import ExampleDatabase __all__ = ["settings"] +ValidatorT: "TypeAlias" = Callable[[Any], object] all_settings: dict[str, "Setting"] = {} T = TypeVar("T") class settingsProperty: - def __init__(self, name, show_default): + def __init__(self, name: str, *, show_default: bool) -> None: self.name = name self.show_default = show_default @@ -60,9 +73,9 @@ class settingsProperty: # you can change the storage directory and it will be reflected # in the default database. if self.name == "database" and result is not_set: - from hypothesis.database import ExampleDatabase + from hypothesis.database import _db_for_path - result = ExampleDatabase(not_set) + result = _db_for_path(not_set) assert result is not not_set return result except KeyError: @@ -85,7 +98,7 @@ class settingsProperty: return f"{description}\n\ndefault value: ``{default}``" -default_variable = DynamicVariable(None) +default_variable = DynamicVariable[Optional["settings"]](None) class settingsMeta(type): @@ -93,19 +106,20 @@ class settingsMeta(type): super().__init__(*args, **kwargs) @property - def default(cls): + def default(cls) -> Optional["settings"]: v = default_variable.value if v is not None: return v if getattr(settings, "_current_profile", None) is not None: + assert settings._current_profile is not None settings.load_profile(settings._current_profile) assert default_variable.value is not None return default_variable.value - def _assign_default_internal(cls, value): + def _assign_default_internal(cls, value: "settings") -> None: default_variable.value = value - def __setattr__(cls, name, value): + def __setattr__(cls, name: str, value: object) -> None: if name == "default": raise AttributeError( "Cannot assign to the property settings.default - " @@ -118,7 +132,7 @@ class settingsMeta(type): "settings with settings.load_profile, or use @settings(...) " "to decorate your test instead." ) - return super().__setattr__(name, value) + super().__setattr__(name, value) class settings(metaclass=settingsMeta): @@ -233,14 +247,14 @@ class settings(metaclass=settingsMeta): @classmethod def _define_setting( cls, - name, - description, + name: str, + description: str, *, - default, - options=None, - validator=None, - show_default=True, - ): + default: object, + options: Optional[Sequence[object]] = None, + validator: Optional[ValidatorT] = None, + show_default: bool = True, + ) -> None: """Add a new setting. - name is the name of the property that will be used to access the @@ -273,18 +287,16 @@ class settings(metaclass=settingsMeta): default=default, validator=validator, ) - setattr(settings, name, settingsProperty(name, show_default)) + setattr(settings, name, settingsProperty(name, show_default=show_default)) @classmethod - def lock_further_definitions(cls): + def lock_further_definitions(cls) -> None: settings.__definitions_are_locked = True - def __setattr__(self, name, value): + def __setattr__(self, name: str, value: object) -> NoReturn: raise AttributeError("settings objects are immutable") - def __repr__(self): - from hypothesis.internal.conjecture.data import AVAILABLE_PROVIDERS - + def __repr__(self) -> str: bits = sorted( f"{name}={getattr(self, name)!r}" for name in all_settings @@ -292,7 +304,7 @@ class settings(metaclass=settingsMeta): ) return "settings({})".format(", ".join(bits)) - def show_changed(self): + def show_changed(self) -> str: bits = [] for name, setting in all_settings.items(): value = getattr(self, name) @@ -350,20 +362,20 @@ class settings(metaclass=settingsMeta): @contextlib.contextmanager -def local_settings(s): +def local_settings(s: settings) -> Generator[settings, None, None]: with default_variable.with_value(s): yield s @attr.s() class Setting: - name = attr.ib() - description = attr.ib() - default = attr.ib() - validator = attr.ib() + name: str = attr.ib() + description: str = attr.ib() + default: object = attr.ib() + validator: ValidatorT = attr.ib() -def _max_examples_validator(x): +def _max_examples_validator(x: int) -> int: check_type(int, x, name="max_examples") if x < 1: raise InvalidArgument( @@ -393,7 +405,7 @@ running time against the chance of missing a bug. If you are writing one-off tests, running tens of thousands of examples is quite reasonable as Hypothesis may miss uncommon bugs with default settings. For very complex code, we have observed Hypothesis finding novel bugs after -*several million* examples while testing :pypi:`SymPy`. +*several million* examples while testing :pypi:`SymPy <sympy>`. If you are running more than 100k examples for a test, consider using our :ref:`integration for coverage-guided fuzzing <fuzz_one_input>` - it really shines when given minutes or hours to run. @@ -421,15 +433,14 @@ By default when running on CI, this will be set to True. ) -def _validate_database(db): +def _validate_database(db: "ExampleDatabase") -> "ExampleDatabase": from hypothesis.database import ExampleDatabase if db is None or isinstance(db, ExampleDatabase): return db raise InvalidArgument( "Arguments to the database setting must be None or an instance of " - f"ExampleDatabase. Try passing database=ExampleDatabase({db!r}), or " - "construct and use one of the specific subclasses in " + "ExampleDatabase. Try using one of the specific subclasses in " "hypothesis.database" ) @@ -443,7 +454,7 @@ An instance of :class:`~hypothesis.database.ExampleDatabase` that will be used to save examples to and load previous examples from. May be ``None`` in which case no storage will be used. -See the :doc:`example database documentation <database>` for a list of built-in +See the :ref:`example database documentation <database>` for a list of built-in example database implementations, and how to define custom implementations. """, validator=_validate_database, @@ -459,7 +470,7 @@ class Phase(IntEnum): shrink = 4 #: controls whether examples will be shrunk. explain = 5 #: controls whether Hypothesis attempts to explain test failures. - def __repr__(self): + def __repr__(self) -> str: return f"Phase.{self.name}" @@ -476,7 +487,7 @@ class HealthCheck(Enum, metaclass=HealthCheckMeta): Each member of this enum is a type of health check to suppress. """ - def __repr__(self): + def __repr__(self) -> str: return f"{self.__class__.__name__}.{self.name}" @classmethod @@ -502,7 +513,7 @@ class HealthCheck(Enum, metaclass=HealthCheckMeta): filter_too_much = 2 """Check for when the test is filtering out too many examples, either - through use of :func:`~hypothesis.assume()` or :ref:`filter() <filtering>`, + through use of :func:`~hypothesis.assume()` or |strategy.filter|, or occasionally for Hypothesis internal reasons.""" too_slow = 3 @@ -549,15 +560,53 @@ class HealthCheck(Enum, metaclass=HealthCheckMeta): subclasses, or to refactor so that :func:`@given <hypothesis.given>` is specified on leaf subclasses.""" + nested_given = 11 + """Checks if :func:`@given <hypothesis.given>` is used inside another + :func:`@given <hypothesis.given>`. This results in quadratic generation and + shrinking behavior, and can usually be expressed more cleanly by using + :func:`~hypothesis.strategies.data` to replace the inner + :func:`@given <hypothesis.given>`. + + Nesting @given can be appropriate if you set appropriate limits for the + quadratic behavior and cannot easily reexpress the inner function with + :func:`~hypothesis.strategies.data`. To suppress this health check, set + ``suppress_health_check=[HealthCheck.nested_given]`` on the outer + :func:`@given <hypothesis.given>`. Setting it on the inner + :func:`@given <hypothesis.given>` has no effect. If you have more than one + level of nesting, add a suppression for this health check to every + :func:`@given <hypothesis.given>` except the innermost one. + """ + @unique class Verbosity(IntEnum): + """Verbosity levels for |@settings|.""" + quiet = 0 + """ + Hypothesis will not print any output, not even the final falsifying example. + """ + normal = 1 + """ + Standard verbosity. Hypothesis will print the falsifying example, alongside + any notes made with |note| (only for the falsfying example). + """ + verbose = 2 + """ + Increased verbosity. In addition to everything in |Verbosity.normal|, Hypothesis + will print each example as it tries it, as well as any notes made with |note| + for every example. Hypothesis will also print shrinking attempts. + """ + debug = 3 + """ + Even more verbosity. Useful for debugging Hypothesis internals. You probably + don't want this. + """ - def __repr__(self): + def __repr__(self) -> str: return f"Verbosity.{self.name}" @@ -569,7 +618,7 @@ settings._define_setting( ) -def _validate_phases(phases): +def _validate_phases(phases: Sequence[Phase]) -> Sequence[Phase]: phases = tuple(phases) for a in phases: if not isinstance(a, Phase): @@ -588,7 +637,7 @@ settings._define_setting( ) -def _validate_stateful_step_count(x): +def _validate_stateful_step_count(x: int) -> int: check_type(int, x, name="stateful_step_count") if x < 1: raise InvalidArgument(f"stateful_step_count={x!r} must be at least one.") @@ -646,12 +695,14 @@ settings._define_setting( class duration(datetime.timedelta): """A timedelta specifically measured in milliseconds.""" - def __repr__(self): + def __repr__(self) -> str: ms = self.total_seconds() * 1000 return f"timedelta(milliseconds={int(ms) if ms == int(ms) else ms!r})" -def _validate_deadline(x): +def _validate_deadline( + x: Union[int, float, datetime.timedelta, None], +) -> Optional[duration]: if x is None: return x invalid_deadline_error = InvalidArgument( @@ -715,9 +766,7 @@ If set to ``True``, Hypothesis will print code for failing examples that can be ) -def _backend_validator(value): - from hypothesis.internal.conjecture.data import AVAILABLE_PROVIDERS - +def _backend_validator(value: str) -> str: if value not in AVAILABLE_PROVIDERS: if value == "crosshair": # pragma: no cover install = '`pip install "hypothesis[crosshair]"` and try again.' diff --git a/contrib/python/hypothesis/py3/hypothesis/control.py b/contrib/python/hypothesis/py3/hypothesis/control.py index 9457b368899..9e76b443fe1 100644 --- a/contrib/python/hypothesis/py3/hypothesis/control.py +++ b/contrib/python/hypothesis/py3/hypothesis/control.py @@ -12,8 +12,9 @@ import inspect import math import random from collections import defaultdict +from collections.abc import Sequence from contextlib import contextmanager -from typing import Any, NoReturn, Union +from typing import Any, Callable, NoReturn, Optional, Union from weakref import WeakKeyDictionary from hypothesis import Verbosity, settings @@ -26,7 +27,7 @@ from hypothesis.internal.reflection import get_pretty_function_description from hypothesis.internal.validation import check_type from hypothesis.reporting import report, verbose_report from hypothesis.utils.dynamicvariables import DynamicVariable -from hypothesis.vendor.pretty import IDKey, pretty +from hypothesis.vendor.pretty import IDKey, PrettyPrintFunction, pretty def _calling_function_location(what: str, frame: Any) -> str: @@ -71,12 +72,12 @@ def assume(condition: object) -> bool: return True -_current_build_context = DynamicVariable(None) +_current_build_context = DynamicVariable[Optional["BuildContext"]](None) def currently_in_test_context() -> bool: """Return ``True`` if the calling code is currently running inside an - :func:`@given <hypothesis.given>` or :doc:`stateful <stateful>` test, + :func:`@given <hypothesis.given>` or :ref:`stateful <stateful>` test, ``False`` otherwise. This is useful for third-party integrations and assertion helpers which @@ -126,21 +127,34 @@ def deprecate_random_in_strategy(fmt, *args): class BuildContext: - def __init__(self, data, *, is_final=False, close_on_capture=True): - assert isinstance(data, ConjectureData) + def __init__( + self, + data: ConjectureData, + *, + is_final: bool = False, + ) -> None: self.data = data - self.tasks = [] + self.tasks: list[Callable[[], Any]] = [] self.is_final = is_final - self.close_on_capture = close_on_capture - self.close_on_del = False + # Use defaultdict(list) here to handle the possibility of having multiple # functions registered for the same object (due to caching, small ints, etc). # The printer will discard duplicates which return different representations. - self.known_object_printers = defaultdict(list) + self.known_object_printers: dict[IDKey, list[PrettyPrintFunction]] = ( + defaultdict(list) + ) - def record_call(self, obj, func, args, kwargs): + def record_call( + self, + obj: object, + func: object, + args: Sequence[object], + kwargs: dict[str, object], + ) -> None: self.known_object_printers[IDKey(obj)].append( - lambda obj, p, cycle, *, _func=func: p.maybe_repr_known_object_as_call( + # _func=func prevents mypy from inferring lambda type. Would need + # paramspec I think - not worth it. + lambda obj, p, cycle, *, _func=func: p.maybe_repr_known_object_as_call( # type: ignore obj, cycle, get_pretty_function_description(_func), args, kwargs ) ) @@ -149,10 +163,10 @@ class BuildContext: arg_labels = {} kwargs = {} for k, s in kwarg_strategies.items(): - start_idx = len(self.data.ir_nodes) + start_idx = len(self.data.nodes) with deprecate_random_in_strategy("from {}={!r}", k, s) as check: obj = check(self.data.draw(s, observe_as=f"generate:{k}")) - end_idx = len(self.data.ir_nodes) + end_idx = len(self.data.nodes) kwargs[k] = obj # This high up the stack, we can't see or really do much with the conjecture diff --git a/contrib/python/hypothesis/py3/hypothesis/core.py b/contrib/python/hypothesis/py3/hypothesis/core.py index 34a274a6eaf..0498522d7ce 100644 --- a/contrib/python/hypothesis/py3/hypothesis/core.py +++ b/contrib/python/hypothesis/py3/hypothesis/core.py @@ -23,8 +23,9 @@ import unittest import warnings import zlib from collections import defaultdict -from collections.abc import Coroutine, Generator, Hashable +from collections.abc import Coroutine, Generator, Hashable, Iterable, Sequence from functools import partial +from inspect import Parameter from random import Random from typing import ( TYPE_CHECKING, @@ -45,10 +46,12 @@ from hypothesis._settings import ( HealthCheck, Phase, Verbosity, + all_settings, local_settings, settings as Settings, ) -from hypothesis.control import BuildContext +from hypothesis.control import BuildContext, currently_in_test_context +from hypothesis.database import choices_from_bytes, choices_to_bytes from hypothesis.errors import ( BackendCannotProceed, DeadlineExceeded, @@ -74,16 +77,17 @@ from hypothesis.internal.compat import ( get_type_hints, int_from_bytes, ) -from hypothesis.internal.conjecture.data import ( - ConjectureData, - PrimitiveProvider, - Status, -) +from hypothesis.internal.conjecture.choice import ChoiceT +from hypothesis.internal.conjecture.data import ConjectureData, Status from hypothesis.internal.conjecture.engine import BUFFER_SIZE, ConjectureRunner from hypothesis.internal.conjecture.junkdrawer import ( ensure_free_stackframes, gc_cumulative_time, ) +from hypothesis.internal.conjecture.providers import ( + BytestringProvider, + PrimitiveProvider, +) from hypothesis.internal.conjecture.shrinker import sort_key from hypothesis.internal.entropy import deterministic_PRNG from hypothesis.internal.escalation import ( @@ -318,27 +322,28 @@ def reproduce_failure(version: str, blob: bytes) -> Callable[[TestFunc], TestFun return accept -def encode_failure(buffer): - buffer = bytes(buffer) - compressed = zlib.compress(buffer) - if len(compressed) < len(buffer): - buffer = b"\1" + compressed +def encode_failure(choices: Iterable[ChoiceT]) -> bytes: + blob = choices_to_bytes(choices) + compressed = zlib.compress(blob) + if len(compressed) < len(blob): + blob = b"\1" + compressed else: - buffer = b"\0" + buffer - return base64.b64encode(buffer) + blob = b"\0" + blob + return base64.b64encode(blob) -def decode_failure(blob): +def decode_failure(blob: bytes) -> Sequence[ChoiceT]: try: - buffer = base64.b64decode(blob) + decoded = base64.b64decode(blob) except Exception: raise InvalidArgument(f"Invalid base64 encoded string: {blob!r}") from None - prefix = buffer[:1] + + prefix = decoded[:1] if prefix == b"\0": - return buffer[1:] + decoded = decoded[1:] elif prefix == b"\1": try: - return zlib.decompress(buffer[1:]) + decoded = zlib.decompress(decoded[1:]) except zlib.error as err: raise InvalidArgument( f"Invalid zlib compression for blob {blob!r}" @@ -348,6 +353,12 @@ def decode_failure(blob): f"Could not decode blob {blob!r}: Invalid start byte {prefix!r}" ) + choices = choices_from_bytes(decoded) + if choices is None: + raise InvalidArgument(f"Invalid serialized choice sequence for blob {blob!r}") + + return choices + def _invalid(message, *, exc=InvalidArgument, test, given_kwargs): @impersonate(test) @@ -405,9 +416,12 @@ def is_invalid_test(test, original_sig, given_arguments, given_kwargs): ] if extra_kwargs and (params == [] or params[-1].kind is not params[-1].VAR_KEYWORD): arg = extra_kwargs[0] + extra = "" + if arg in all_settings: + extra = f". Did you mean @settings({arg}={given_kwargs[arg]!r})?" return invalid( f"{test.__name__}() got an unexpected keyword argument {arg!r}, " - f"from `{arg}={given_kwargs[arg]!r}` in @given" + f"from `{arg}={given_kwargs[arg]!r}` in @given{extra}" ) if any(p.default is not p.empty for p in params): return invalid("Cannot apply @given to a function with defaults.") @@ -475,7 +489,7 @@ def execute_explicit_examples(state, wrapped_test, arguments, kwargs, original_s with local_settings(state.settings): fragments_reported = [] - empty_data = ConjectureData.for_buffer(b"") + empty_data = ConjectureData.for_choices([]) try: execute_example = partial( state.execute_once, @@ -620,7 +634,13 @@ class Stuff: given_kwargs: dict = attr.ib(factory=dict) -def process_arguments_to_given(wrapped_test, arguments, kwargs, given_kwargs, params): +def process_arguments_to_given( + wrapped_test: Any, + arguments: Sequence[object], + kwargs: dict[str, object], + given_kwargs: dict[str, SearchStrategy], + params: dict[str, Parameter], +) -> tuple[Sequence[object], dict[str, object], Stuff]: selfy = None arguments, kwargs = convert_positional_arguments(wrapped_test, arguments, kwargs) @@ -674,7 +694,7 @@ def skip_exceptions_to_reraise(): return tuple(sorted(exceptions, key=str)) -def failure_exceptions_to_catch(): +def failure_exceptions_to_catch() -> tuple[type[BaseException], ...]: """Return a tuple of exceptions meaning 'this test has failed', to catch. This is intended to cover most common test runners; if you would @@ -947,6 +967,7 @@ class StateForActualGivenExecution: if (0, 0) in context.data.slice_comments else None ), + avoid_realization=data.provider.avoid_realization, ) report(printer.getvalue()) @@ -963,11 +984,12 @@ class StateForActualGivenExecution: if (0, 0) in context.data.slice_comments else None ), + avoid_realization=data.provider.avoid_realization, ) self._string_repr = printer.getvalue() data._observability_arguments = { - **dict(enumerate(map(to_jsonable, args))), - **{k: to_jsonable(v) for k, v in kwargs.items()}, + k: to_jsonable(v, avoid_realization=data.provider.avoid_realization) + for k, v in [*enumerate(args), *kwargs.items()] } try: @@ -975,8 +997,9 @@ class StateForActualGivenExecution: except TypeError as e: # If we sampled from a sequence of strategies, AND failed with a # TypeError, *AND that exception mentions SearchStrategy*, add a note: - if "SearchStrategy" in str(e) and hasattr( - data, "_sampled_from_all_strategies_elements_message" + if ( + "SearchStrategy" in str(e) + and data._sampled_from_all_strategies_elements_message is not None ): msg, format_arg = data._sampled_from_all_strategies_elements_message add_note(e, msg.format(format_arg)) @@ -987,16 +1010,18 @@ class StateForActualGivenExecution: # self.test_runner can include the execute_example method, or setup/teardown # _example, so it's important to get the PRNG and build context in place first. - with local_settings(self.settings): - with deterministic_PRNG(): - with BuildContext(data, is_final=is_final) as context: - # providers may throw in per_case_context_fn, and we'd like - # `result` to still be set in these cases. - result = None - with data.provider.per_test_case_context_manager(): - # Run the test function once, via the executor hook. - # In most cases this will delegate straight to `run(data)`. - result = self.test_runner(data, run) + with ( + local_settings(self.settings), + deterministic_PRNG(), + BuildContext(data, is_final=is_final) as context, + ): + # providers may throw in per_case_context_fn, and we'd like + # `result` to still be set in these cases. + result = None + with data.provider.per_test_case_context_manager(): + # Run the test function once, via the executor hook. + # In most cases this will delegate straight to `run(data)`. + result = self.test_runner(data, run) # If a failure was expected, it should have been raised already, so # instead raise an appropriate diagnostic error. @@ -1034,15 +1059,17 @@ class StateForActualGivenExecution: def _flaky_replay_to_failure( self, err: FlakyReplay, context: BaseException ) -> FlakyFailure: + # Note that in the mark_interesting case, _context_ itself + # is part of err._interesting_examples - but it's not in + # _runner.interesting_examples - this is fine, as the context + # (i.e., immediate exception) is appended. interesting_examples = [ self._runner.interesting_examples[io] for io in err._interesting_origins - if io - ] - exceptions = [ - ie.extra_information._expected_exception for ie in interesting_examples + if io in self._runner.interesting_examples ] - exceptions.append(context) # the offending assume (or whatever) + exceptions = [ie.expected_exception for ie in interesting_examples] + exceptions.append(context) # the immediate exception return FlakyFailure(err.reason, exceptions) def _execute_once_for_engine(self, data: ConjectureData) -> None: @@ -1097,7 +1124,23 @@ class StateForActualGivenExecution: # If an unhandled (i.e., non-Hypothesis) error was raised by # Hypothesis-internal code, re-raise it as a fatal error instead # of treating it as a test failure. - filepath = traceback.extract_tb(e.__traceback__)[-1][0] + if isinstance(e, BaseExceptionGroup) and len(e.exceptions) == 1: + # When a naked exception is implicitly wrapped in an ExceptionGroup + # due to a re-raising "except*", the ExceptionGroup is constructed in + # the caller's stack frame (see #4183). This workaround is specifically + # for implicit wrapping of naked exceptions by "except*", since explicit + # raising of ExceptionGroup gets the proper traceback in the first place + # - there's no need to handle hierarchical groups here, at least if no + # such implicit wrapping happens inside hypothesis code (we only care + # about the hypothesis-or-not distinction). + # + # 01-25-2025: this was patched to give the correct + # stacktrace in cpython https://github.com/python/cpython/issues/128799. + # can remove once python3.11 is EOL. + tb = e.exceptions[0].__traceback__ or e.__traceback__ + else: + tb = e.__traceback__ + filepath = traceback.extract_tb(tb)[-1][0] if is_hypothesis_file(filepath) and not isinstance(e, HypothesisException): raise @@ -1111,10 +1154,10 @@ class StateForActualGivenExecution: # engine that this test run was interesting. This is the normal # path for test runs that fail. tb = get_trimmed_traceback() - info = data.extra_information - info._expected_traceback = format_exception(e, tb) # type: ignore - info._expected_exception = e # type: ignore - verbose_report(info._expected_traceback) # type: ignore + data.expected_traceback = format_exception(e, tb) + data.expected_exception = e + assert data.expected_traceback is not None # for mypy + verbose_report(data.expected_traceback) self.failed_normally = True @@ -1125,7 +1168,11 @@ class StateForActualGivenExecution: if interesting_origin[0] == DeadlineExceeded: self.failed_due_to_deadline = True self.explain_traces.clear() - data.mark_interesting(interesting_origin) # type: ignore # mypy bug? + try: + data.mark_interesting(interesting_origin) + except FlakyReplay as err: + raise self._flaky_replay_to_failure(err, e) from None + finally: # Conditional here so we can save some time constructing the payload; in # other cases (without coverage) it's cheap enough to do that regardless. @@ -1222,13 +1269,38 @@ class StateForActualGivenExecution: if runner.interesting_examples: self.falsifying_examples = sorted( runner.interesting_examples.values(), - key=lambda d: sort_key(d.buffer), + key=lambda d: sort_key(d.nodes), reverse=True, ) else: if runner.valid_examples == 0: + explanations = [] + # use a somewhat arbitrary cutoff to avoid recommending spurious + # fixes. + # eg, a few invalid examples from internal filters when the + # problem is the user generating large inputs, or a + # few overruns during internal mutation when the problem is + # impossible user filters/assumes. + if runner.invalid_examples > min(20, runner.call_count // 5): + explanations.append( + f"{runner.invalid_examples} of {runner.call_count} " + "examples failed a .filter() or assume() condition. Try " + "making your filters or assumes less strict, or rewrite " + "using strategy parameters: " + "st.integers().filter(lambda x: x > 0) fails less often " + "(that is, never) when rewritten as st.integers(min_value=1)." + ) + if runner.overrun_examples > min(20, runner.call_count // 5): + explanations.append( + f"{runner.overrun_examples} of {runner.call_count} " + "examples were too large to finish generating; try " + "reducing the typical size of your inputs?" + ) rep = get_pretty_function_description(self.test) - raise Unsatisfiable(f"Unable to satisfy assumptions of {rep}") + raise Unsatisfiable( + f"Unable to satisfy assumptions of {rep}. " + f"{' Also, '.join(explanations)}" + ) # If we have not traced executions, warn about that now (but only when # we'd expect to do so reliably, i.e. on CPython>=3.12) @@ -1264,17 +1336,16 @@ class StateForActualGivenExecution: explanations = explanatory_lines(self.explain_traces, self.settings) for falsifying_example in self.falsifying_examples: - info = falsifying_example.extra_information fragments = [] - ran_example = runner.new_conjecture_data_for_buffer( - falsifying_example.buffer + ran_example = runner.new_conjecture_data( + falsifying_example.choices, max_choices=len(falsifying_example.choices) ) ran_example.slice_comments = falsifying_example.slice_comments tb = None origin = None - assert info is not None - assert info._expected_exception is not None + assert falsifying_example.expected_exception is not None + assert falsifying_example.expected_traceback is not None try: with with_reporter(fragments.append): self.execute_once( @@ -1282,8 +1353,8 @@ class StateForActualGivenExecution: print_example=not self.is_find, is_final=True, expected_failure=( - info._expected_exception, - info._expected_traceback, + falsifying_example.expected_exception, + falsifying_example.expected_traceback, ), ) except StopTest as e: @@ -1299,7 +1370,8 @@ class StateForActualGivenExecution: "Inconsistent results: An example failed on the " "first run but now succeeds (or fails with another " "error, or is for some reason not runnable).", - [info._expected_exception or e], # (note: e is a BaseException) + # (note: e is a BaseException) + [falsifying_example.expected_exception or e], ) errors_to_report.append((fragments, err)) except UnsatisfiedAssumption as e: # pragma: no cover # ironically flaky @@ -1353,7 +1425,7 @@ class StateForActualGivenExecution: fragments.append( "\nYou can reproduce this example by temporarily adding " "@reproduce_failure(%r, %r) as a decorator on your test case" - % (__version__, encode_failure(falsifying_example.buffer)) + % (__version__, encode_failure(falsifying_example.choices)) ) # Mostly useful for ``find`` and ensuring that objects that # hold on to a reference to ``data`` know that it's now been @@ -1500,6 +1572,15 @@ def given( This is the main entry point to Hypothesis. """ + if currently_in_test_context(): + fail_health_check( + Settings(), + "Nesting @given tests results in quadratic generation and shrinking " + "behavior and can usually be more cleanly expressed by replacing the " + "inner function with an st.data() parameter on the outer @given.", + HealthCheck.nested_given, + ) + def run_test_as_given(test): if inspect.isclass(test): # Provide a meaningful error to users, instead of exceptions from @@ -1656,7 +1737,7 @@ def given( ) try: state.execute_once( - ConjectureData.for_buffer(decode_failure(failure)), + ConjectureData.for_choices(decode_failure(failure)), print_example=True, is_final=True, ) @@ -1793,28 +1874,34 @@ def given( minimal_failures: dict = {} def fuzz_one_input( - buffer: Union[bytes, bytearray, memoryview, BinaryIO] + buffer: Union[bytes, bytearray, memoryview, BinaryIO], ) -> Optional[bytes]: # This inner part is all that the fuzzer will actually run, # so we keep it as small and as fast as possible. if isinstance(buffer, io.IOBase): buffer = buffer.read(BUFFER_SIZE) assert isinstance(buffer, (bytes, bytearray, memoryview)) - data = ConjectureData.for_buffer(buffer) + data = ConjectureData( + random=None, + provider=BytestringProvider, + provider_kw={"bytestring": buffer}, + ) try: state.execute_once(data) except (StopTest, UnsatisfiedAssumption): return None except BaseException: - buffer = bytes(data.buffer) known = minimal_failures.get(data.interesting_origin) if settings.database is not None and ( - known is None or sort_key(buffer) <= sort_key(known) + known is None or sort_key(data.nodes) <= sort_key(known) ): - settings.database.save(database_key, buffer) - minimal_failures[data.interesting_origin] = buffer + settings.database.save( + database_key, choices_to_bytes(data.choices) + ) + minimal_failures[data.interesting_origin] = data.nodes raise - return bytes(data.buffer) + assert isinstance(data.provider, BytestringProvider) + return bytes(data.provider.drawn) fuzz_one_input.__doc__ = HypothesisHandle.fuzz_one_input.__doc__ return fuzz_one_input diff --git a/contrib/python/hypothesis/py3/hypothesis/database.py b/contrib/python/hypothesis/py3/hypothesis/database.py index d30ffbf40de..53d5e14684f 100644 --- a/contrib/python/hypothesis/py3/hypothesis/database.py +++ b/contrib/python/hypothesis/py3/hypothesis/database.py @@ -9,39 +9,65 @@ # obtain one at https://mozilla.org/MPL/2.0/. import abc -import binascii import json import os import struct import sys +import tempfile import warnings +import weakref from collections.abc import Iterable from datetime import datetime, timedelta, timezone from functools import lru_cache from hashlib import sha384 -from os import getenv +from os import PathLike, getenv from pathlib import Path, PurePath -from typing import Optional +from queue import Queue +from threading import Thread +from typing import ( + TYPE_CHECKING, + Any, + Callable, + ClassVar, + Literal, + Optional, + Union, + cast, +) from urllib.error import HTTPError, URLError from urllib.request import Request, urlopen from zipfile import BadZipFile, ZipFile +from hypothesis._settings import note_deprecation from hypothesis.configuration import storage_directory from hypothesis.errors import HypothesisException, HypothesisWarning -from hypothesis.internal.conjecture.data import IRType -from hypothesis.utils.conventions import not_set +from hypothesis.internal.conjecture.choice import ChoiceT +from hypothesis.utils.conventions import UniqueIdentifier, not_set __all__ = [ "DirectoryBasedExampleDatabase", "ExampleDatabase", + "GitHubArtifactDatabase", "InMemoryExampleDatabase", "MultiplexedDatabase", "ReadOnlyDatabase", - "GitHubArtifactDatabase", ] +if TYPE_CHECKING: + from typing import TypeAlias + + from watchdog.observers.api import BaseObserver + +StrPathT: "TypeAlias" = Union[str, PathLike[str]] +SaveDataT: "TypeAlias" = tuple[bytes, bytes] # key, value +DeleteDataT: "TypeAlias" = tuple[bytes, Optional[bytes]] # key, value +ListenerEventT: "TypeAlias" = Union[ + tuple[Literal["save"], SaveDataT], tuple[Literal["delete"], DeleteDataT] +] +ListenerT: "TypeAlias" = Callable[[ListenerEventT], Any] + -def _usable_dir(path: os.PathLike) -> bool: +def _usable_dir(path: StrPathT) -> bool: """ Returns True if the desired path can be used as database path because either the directory exists and can be used, or its root directory can @@ -57,7 +83,9 @@ def _usable_dir(path: os.PathLike) -> bool: return False -def _db_for_path(path=None): +def _db_for_path( + path: Optional[Union[StrPathT, UniqueIdentifier, Literal[":memory:"]]] = None, +) -> "ExampleDatabase": if path is not_set: if os.getenv("HYPOTHESIS_DATABASE_FILE") is not None: # pragma: no cover raise HypothesisException( @@ -78,12 +106,22 @@ def _db_for_path(path=None): return InMemoryExampleDatabase() if path in (None, ":memory:"): return InMemoryExampleDatabase() + path = cast(StrPathT, path) return DirectoryBasedExampleDatabase(path) class _EDMeta(abc.ABCMeta): - def __call__(self, *args, **kwargs): + def __call__(self, *args: Any, **kwargs: Any) -> "ExampleDatabase": if self is ExampleDatabase: + note_deprecation( + "Creating a database using the abstract ExampleDatabase() class " + "is deprecated. Prefer using a concrete subclass, like " + "InMemoryExampleDatabase() or DirectoryBasedExampleDatabase(path). " + 'In particular, the special string ExampleDatabase(":memory:") ' + "should be replaced by InMemoryExampleDatabase().", + since="2025-04-07", + has_codemod=False, + ) return _db_for_path(*args, **kwargs) return super().__call__(*args, **kwargs) @@ -110,9 +148,12 @@ class ExampleDatabase(metaclass=_EDMeta): """An abstract base class for storing examples in Hypothesis' internal format. An ExampleDatabase maps each ``bytes`` key to many distinct ``bytes`` - values, like a ``Mapping[bytes, AbstractSet[bytes]]``. + values, like a ``Mapping[bytes, set[bytes]]``. """ + def __init__(self) -> None: + self._listeners: list[ListenerT] = [] + @abc.abstractmethod def save(self, key: bytes, value: bytes) -> None: """Save ``value`` under ``key``. @@ -148,6 +189,78 @@ class ExampleDatabase(metaclass=_EDMeta): self.delete(src, value) self.save(dest, value) + def add_listener(self, f: ListenerT, /) -> None: + """Add a change listener.""" + had_listeners = bool(self._listeners) + self._listeners.append(f) + if not had_listeners: + self._start_listening() + + def remove_listener(self, f: ListenerT, /) -> None: + """ + Remove a change listener. If the listener is not present, silently do + nothing. + """ + if f not in self._listeners: + return + self._listeners.remove(f) + if not self._listeners: + self._stop_listening() + + def clear_listeners(self) -> None: + """Remove all change listeners.""" + had_listeners = bool(self._listeners) + self._listeners.clear() + if had_listeners: + self._stop_listening() + + def _broadcast_change(self, event: ListenerEventT) -> None: + """ + Called when a value has been either added to or deleted from a key in + the underlying database store. event_type is one of "save" or "delete". + + ``value`` may be ``None`` for ``event_type == "delete"``, which indicates + we don't know what value was deleted from the database. + + Note that you should not assume you are the only reference to the underlying + database store. For example, if two DirectoryBasedExampleDatabase reference + the same directory, _broadcast_change should be called whenever a file is + added or removed from the directory, even if that database was not responsible + for changing the file. + """ + for listener in self._listeners: + listener(event) + + def _start_listening(self) -> None: + """ + Called when the database adds a change listener, and did not previously + have any change listeners. Intended to allow databases to wait to start + expensive listening operations until necessary. + + _start_listening and _stop_listening are guaranteed to alternate, so you + do not need to handle the case of multiple consecutive _start_listening + calls without an intermediate _stop_listening call. + """ + warnings.warn( + f"{self.__class__} does not support listening for changes", + HypothesisWarning, + stacklevel=4, + ) + + def _stop_listening(self) -> None: + """ + Called whenever no change listeners remain on the database. + + _stop_listening and _start_listening are guaranteed to alternate, so you + do not need to handle the case of multiple consecutive _stop_listening + calls without an intermediate _start_listening call. + """ + warnings.warn( + f"{self.__class__} does not support stopping listening for changes", + HypothesisWarning, + stacklevel=4, + ) + class InMemoryExampleDatabase(ExampleDatabase): """A non-persistent example database, implemented in terms of a dict of sets. @@ -157,8 +270,9 @@ class InMemoryExampleDatabase(ExampleDatabase): does not persist between runs we do not recommend it for general use. """ - def __init__(self): - self.data = {} + def __init__(self) -> None: + super().__init__() + self.data: dict[bytes, set[bytes]] = {} def __repr__(self) -> str: return f"InMemoryExampleDatabase({self.data!r})" @@ -167,13 +281,34 @@ class InMemoryExampleDatabase(ExampleDatabase): yield from self.data.get(key, ()) def save(self, key: bytes, value: bytes) -> None: - self.data.setdefault(key, set()).add(bytes(value)) + value = bytes(value) + values = self.data.setdefault(key, set()) + changed = value not in values + values.add(value) + + if changed: + self._broadcast_change(("save", (key, value))) def delete(self, key: bytes, value: bytes) -> None: - self.data.get(key, set()).discard(bytes(value)) + value = bytes(value) + values = self.data.get(key, set()) + changed = value in values + values.discard(value) + + if changed: + self._broadcast_change(("delete", (key, value))) + + def _start_listening(self) -> None: + # declare compatibility with the listener api, but do the actual + # implementation in .delete and .save, since we know we are the only + # writer to .data. + pass + + def _stop_listening(self) -> None: + pass -def _hash(key): +def _hash(key: bytes) -> str: return sha384(key).hexdigest()[:16] @@ -196,9 +331,16 @@ class DirectoryBasedExampleDatabase(ExampleDatabase): the :class:`~hypothesis.database.MultiplexedDatabase` helper. """ - def __init__(self, path: os.PathLike) -> None: + # we keep a database entry of the full values of all the database keys. + # currently only used for inverse mapping of hash -> key in change listening. + _metakeys_name: ClassVar[bytes] = b".hypothesis-keys" + _metakeys_hash: ClassVar[str] = _hash(_metakeys_name) + + def __init__(self, path: StrPathT) -> None: + super().__init__() self.path = Path(path) self.keypaths: dict[bytes, Path] = {} + self._observer: BaseObserver | None = None def __repr__(self) -> str: return f"DirectoryBasedExampleDatabase({self.path!r})" @@ -211,7 +353,7 @@ class DirectoryBasedExampleDatabase(ExampleDatabase): self.keypaths[key] = self.path / _hash(key) return self.keypaths[key] - def _value_path(self, key, value): + def _value_path(self, key: bytes, value: bytes) -> Path: return self._key_path(key) / _hash(value) def fetch(self, key: bytes) -> Iterable[bytes]: @@ -225,21 +367,32 @@ class DirectoryBasedExampleDatabase(ExampleDatabase): pass def save(self, key: bytes, value: bytes) -> None: + key_path = self._key_path(key) + if key_path.name != self._metakeys_hash: + # add this key to our meta entry of all keys - taking care to avoid + # infinite recursion. + self.save(self._metakeys_name, key) + # Note: we attempt to create the dir in question now. We # already checked for permissions, but there can still be other issues, # e.g. the disk is full, or permissions might have been changed. try: - self._key_path(key).mkdir(exist_ok=True, parents=True) + key_path.mkdir(exist_ok=True, parents=True) path = self._value_path(key, value) if not path.exists(): - suffix = binascii.hexlify(os.urandom(16)).decode("ascii") - tmpname = path.with_suffix(f"{path.suffix}.{suffix}") - tmpname.write_bytes(value) + # to mimic an atomic write, create and write in a temporary + # directory, and only move to the final path after. This avoids + # any intermediate state where the file is created (and empty) + # but not yet written to. + fd, tmpname = tempfile.mkstemp() + tmppath = Path(tmpname) + os.write(fd, value) + os.close(fd) try: - tmpname.rename(path) + tmppath.rename(path) except OSError: # pragma: no cover - tmpname.unlink() - assert not tmpname.exists() + tmppath.unlink() + assert not tmppath.exists() except OSError: # pragma: no cover pass @@ -247,11 +400,16 @@ class DirectoryBasedExampleDatabase(ExampleDatabase): if src == dest: self.save(src, value) return + + src_path = self._value_path(src, value) + dest_path = self._value_path(dest, value) + # if the dest key path does not exist, os.renames will create it for us, + # and we will never track its creation in the meta keys entry. Do so now. + if not self._key_path(dest).exists(): + self.save(self._metakeys_name, dest) + try: - os.renames( - self._value_path(src, value), - self._value_path(dest, value), - ) + os.renames(src_path, dest_path) except OSError: self.delete(src, value) self.save(dest, value) @@ -262,6 +420,114 @@ class DirectoryBasedExampleDatabase(ExampleDatabase): except OSError: pass + def _start_listening(self) -> None: + try: + from watchdog.events import ( + DirCreatedEvent, + DirDeletedEvent, + DirMovedEvent, + FileCreatedEvent, + FileDeletedEvent, + FileMovedEvent, + FileSystemEventHandler, + ) + from watchdog.observers import Observer + except ImportError: + warnings.warn( + f"listening for changes in a {self.__class__.__name__} " + "requires the watchdog library. To install, run " + "`pip install hypothesis[watchdog]`", + HypothesisWarning, + stacklevel=4, + ) + return + + hash_to_key = {_hash(key): key for key in self.fetch(self._metakeys_name)} + _metakeys_hash = self._metakeys_hash + _broadcast_change = self._broadcast_change + + class Handler(FileSystemEventHandler): + def on_created( + _self, event: Union[FileCreatedEvent, DirCreatedEvent] + ) -> None: + # we only registered for the file creation event + assert not isinstance(event, DirCreatedEvent) + # watchdog events are only bytes if we passed a byte path to + # .schedule + assert isinstance(event.src_path, str) + + value_path = Path(event.src_path) + # the parent dir represents the key, and its name is the key hash + key_hash = value_path.parent.name + + if key_hash == _metakeys_hash: + hash_to_key[value_path.name] = value_path.read_bytes() + return + + key = hash_to_key.get(key_hash) + if key is None: # pragma: no cover + # we didn't recognize this key. This shouldn't ever happen, + # but some race condition trickery might cause this. + return + + try: + value = value_path.read_bytes() + except OSError: # pragma: no cover + return + + _broadcast_change(("save", (key, value))) + + def on_deleted( + self, event: Union[FileDeletedEvent, DirDeletedEvent] + ) -> None: + assert not isinstance(event, DirDeletedEvent) + assert isinstance(event.src_path, str) + + value_path = Path(event.src_path) + key = hash_to_key.get(value_path.parent.name) + if key is None: # pragma: no cover + return + + _broadcast_change(("delete", (key, None))) + + def on_moved(self, event: Union[FileMovedEvent, DirMovedEvent]) -> None: + assert not isinstance(event, DirMovedEvent) + assert isinstance(event.src_path, str) + assert isinstance(event.dest_path, str) + + src_path = Path(event.src_path) + dest_path = Path(event.dest_path) + k1 = hash_to_key.get(src_path.parent.name) + k2 = hash_to_key.get(dest_path.parent.name) + + if k1 is None or k2 is None: # pragma: no cover + return + + try: + value = dest_path.read_bytes() + except OSError: # pragma: no cover + return + + _broadcast_change(("delete", (k1, value))) + _broadcast_change(("save", (k2, value))) + + self._observer = Observer() + self._observer.schedule( + Handler(), + # remove type: ignore when released + # https://github.com/gorakhargosh/watchdog/pull/1096 + self.path, # type: ignore + recursive=True, + event_filter=[FileCreatedEvent, FileDeletedEvent, FileMovedEvent], + ) + self._observer.start() + + def _stop_listening(self) -> None: + assert self._observer is not None + self._observer.stop() + self._observer.join() + self._observer = None + class ReadOnlyDatabase(ExampleDatabase): """A wrapper to make the given database read-only. @@ -275,6 +541,7 @@ class ReadOnlyDatabase(ExampleDatabase): """ def __init__(self, db: ExampleDatabase) -> None: + super().__init__() assert isinstance(db, ExampleDatabase) self._wrapped = db @@ -290,6 +557,13 @@ class ReadOnlyDatabase(ExampleDatabase): def delete(self, key: bytes, value: bytes) -> None: pass + def _start_listening(self) -> None: + # we're read only, so there are no changes to broadcast. + pass + + def _stop_listening(self) -> None: + pass + class MultiplexedDatabase(ExampleDatabase): """A wrapper around multiple databases. @@ -318,6 +592,7 @@ class MultiplexedDatabase(ExampleDatabase): """ def __init__(self, *dbs: ExampleDatabase) -> None: + super().__init__() assert all(isinstance(db, ExampleDatabase) for db in dbs) self._wrapped = dbs @@ -344,6 +619,14 @@ class MultiplexedDatabase(ExampleDatabase): for db in self._wrapped: db.move(src, dest, value) + def _start_listening(self) -> None: + for db in self._wrapped: + db.add_listener(self._broadcast_change) + + def _stop_listening(self) -> None: + for db in self._wrapped: + db.remove_listener(self._broadcast_change) + class GitHubArtifactDatabase(ExampleDatabase): """ @@ -358,7 +641,7 @@ class GitHubArtifactDatabase(ExampleDatabase): .. note:: You must provide ``GITHUB_TOKEN`` as an environment variable. In CI, Github Actions provides this automatically, but it needs to be set manually for local usage. In a developer machine, - this would usually be a `Personal Access Token <https://docs.github.com/en/authentication/keeping-your-account-and-data-secure/creating-a-personal-access-token>`_. + this would usually be a `Personal Access Token <https://docs.github.com/en/authentication/keeping-your-account-and-data-secure/managing-your-personal-access-tokens>`_. If the repository is private, it's necessary for the token to have ``repo`` scope in the case of a classic token, or ``actions:read`` in the case of a fine-grained token. @@ -388,7 +671,7 @@ class GitHubArtifactDatabase(ExampleDatabase): .. code-block:: yaml - name: Download example database - uses: dawidd6/[email protected] + uses: dawidd6/action-download-artifact@v9 with: name: hypothesis-example-db path: .hypothesis/examples @@ -421,8 +704,9 @@ class GitHubArtifactDatabase(ExampleDatabase): repo: str, artifact_name: str = "hypothesis-example-db", cache_timeout: timedelta = timedelta(days=1), - path: Optional[os.PathLike] = None, + path: Optional[StrPathT] = None, ): + super().__init__() self.owner = owner self.repo = repo self.artifact_name = artifact_name @@ -675,8 +959,99 @@ class GitHubArtifactDatabase(ExampleDatabase): raise RuntimeError(self._read_only_message) -def ir_to_bytes(ir: Iterable[IRType], /) -> bytes: - """Serialize a list of IR elements to a bytestring. Inverts ir_from_bytes.""" +class BackgroundWriteDatabase(ExampleDatabase): + """A wrapper which defers writes on the given database to a background thread. + + Calls to :meth:`~hypothesis.database.ExampleDatabase.fetch` wait for any + enqueued writes to finish before fetching from the database. + """ + + def __init__(self, db: ExampleDatabase) -> None: + super().__init__() + self._db = db + self._queue: Queue[tuple[str, tuple[bytes, ...]]] = Queue() + self._thread = Thread(target=self._worker, daemon=True) + self._thread.start() + # avoid an unbounded timeout during gc. 0.1 should be plenty for most + # use cases. + weakref.finalize(self, self._join, 0.1) + + def __repr__(self) -> str: + return f"BackgroundWriteDatabase({self._db!r})" + + def _worker(self) -> None: + while True: + method, args = self._queue.get() + getattr(self._db, method)(*args) + self._queue.task_done() + + def _join(self, timeout: Optional[float] = None) -> None: + # copy of Queue.join with a timeout. https://bugs.python.org/issue9634 + with self._queue.all_tasks_done: + while self._queue.unfinished_tasks: + self._queue.all_tasks_done.wait(timeout) + + def fetch(self, key: bytes) -> Iterable[bytes]: + self._join() + return self._db.fetch(key) + + def save(self, key: bytes, value: bytes) -> None: + self._queue.put(("save", (key, value))) + + def delete(self, key: bytes, value: bytes) -> None: + self._queue.put(("delete", (key, value))) + + def move(self, src: bytes, dest: bytes, value: bytes) -> None: + self._queue.put(("move", (src, dest, value))) + + def _start_listening(self) -> None: + self._db.add_listener(self._broadcast_change) + + def _stop_listening(self) -> None: + self._db.remove_listener(self._broadcast_change) + + +def _pack_uleb128(value: int) -> bytes: + """ + Serialize an integer into variable-length bytes. For each byte, the first 7 + bits represent (part of) the integer, while the last bit indicates whether the + integer continues into the next byte. + + https://en.wikipedia.org/wiki/LEB128 + """ + parts = bytearray() + assert value >= 0 + while True: + # chop off 7 bits + byte = value & ((1 << 7) - 1) + value >>= 7 + # set the continuation bit if we have more left + if value: + byte |= 1 << 7 + + parts.append(byte) + if not value: + break + return bytes(parts) + + +def _unpack_uleb128(buffer: bytes) -> tuple[int, int]: + """ + Inverts _pack_uleb128, and also returns the index at which at which we stopped + reading. + """ + value = 0 + for i, byte in enumerate(buffer): + n = byte & ((1 << 7) - 1) + value |= n << (i * 7) + + if not byte >> 7: + break + return (i + 1, value) + + +def choices_to_bytes(ir: Iterable[ChoiceT], /) -> bytes: + """Serialize a list of IR elements to a bytestring. Inverts choices_from_bytes.""" # We use a custom serialization format for this, which might seem crazy - but our # data is a flat sequence of elements, and standard tools like protobuf or msgpack # don't deal well with e.g. nonstandard bit-pattern-NaNs, or invalid-utf8 unicode. @@ -709,16 +1084,15 @@ def ir_to_bytes(ir: Iterable[IRType], /) -> bytes: parts.append((tag | size).to_bytes(1, "big")) else: parts.append((tag | 0b11111).to_bytes(1, "big")) - parts.append(struct.pack("!H", size)) + parts.append(_pack_uleb128(size)) parts.append(elem) return b"".join(parts) -def ir_from_bytes(buffer: bytes, /) -> list[IRType]: - """Deserialize a bytestring to a list of IR elements. Inverts ir_to_bytes.""" +def _choices_from_bytes(buffer: bytes, /) -> tuple[ChoiceT, ...]: # See above for an explanation of the format. - parts: list[IRType] = [] + parts: list[ChoiceT] = [] idx = 0 while idx < len(buffer): tag = buffer[idx] >> 5 @@ -729,8 +1103,8 @@ def ir_from_bytes(buffer: bytes, /) -> list[IRType]: parts.append(bool(size)) continue if size == 0b11111: - (size,) = struct.unpack_from("!H", buffer, offset=idx) - idx += 2 + (offset, size) = _unpack_uleb128(buffer[idx:]) + idx += offset chunk = buffer[idx : idx + size] idx += size @@ -744,4 +1118,19 @@ def ir_from_bytes(buffer: bytes, /) -> list[IRType]: else: assert tag == 4 parts.append(chunk.decode(errors="surrogatepass")) - return parts + return tuple(parts) + + +def choices_from_bytes(buffer: bytes, /) -> Optional[tuple[ChoiceT, ...]]: + """ + Deserialize a bytestring to a tuple of choices. Inverts choices_to_bytes. + + Returns None if the given bytestring is not a valid serialization of choice + sequences. + """ + try: + return _choices_from_bytes(buffer) + except Exception: + # deserialization error, eg because our format changed or someone put junk + # data in the db. + return None diff --git a/contrib/python/hypothesis/py3/hypothesis/entry_points.py b/contrib/python/hypothesis/py3/hypothesis/entry_points.py index 82a010da423..4a68af7c436 100644 --- a/contrib/python/hypothesis/py3/hypothesis/entry_points.py +++ b/contrib/python/hypothesis/py3/hypothesis/entry_points.py @@ -17,11 +17,13 @@ your package. import importlib.metadata import os +from collections.abc import Generator, Sequence +from importlib.metadata import EntryPoint -def get_entry_points(): +def get_entry_points() -> Generator[EntryPoint, None, None]: try: - eps = importlib.metadata.entry_points(group="hypothesis") + eps: Sequence[EntryPoint] = importlib.metadata.entry_points(group="hypothesis") except TypeError: # pragma: no cover # Load-time selection requires Python >= 3.10. See also # https://importlib-metadata.readthedocs.io/en/latest/using.html @@ -29,7 +31,7 @@ def get_entry_points(): yield from eps -def run(): +def run() -> None: if not os.environ.get("HYPOTHESIS_NO_PLUGINS"): for entry in get_entry_points(): # pragma: no cover hook = entry.load() diff --git a/contrib/python/hypothesis/py3/hypothesis/errors.py b/contrib/python/hypothesis/py3/hypothesis/errors.py index 3adae78b6be..18e71f5928d 100644 --- a/contrib/python/hypothesis/py3/hypothesis/errors.py +++ b/contrib/python/hypothesis/py3/hypothesis/errors.py @@ -8,7 +8,8 @@ # v. 2.0. If a copy of the MPL was not distributed with this file, You can # obtain one at https://mozilla.org/MPL/2.0/. -from typing import Literal +from datetime import timedelta +from typing import Any, Literal, Optional from hypothesis.internal.compat import ExceptionGroup @@ -27,7 +28,7 @@ class UnsatisfiedAssumption(HypothesisException): If you're seeing this error something has gone wrong. """ - def __init__(self, reason=None): + def __init__(self, reason: Optional[str] = None) -> None: self.reason = reason @@ -38,7 +39,7 @@ class NoSuchExample(HypothesisException): unable to find one. """ - def __init__(self, condition_string, extra=""): + def __init__(self, condition_string: str, extra: str = "") -> None: super().__init__(f"No examples found of condition {condition_string}{extra}") @@ -55,6 +56,10 @@ class Unsatisfiable(_Trimmable): """ +class ChoiceTooLarge(HypothesisException): + """An internal error raised by choice_from_index.""" + + class Flaky(_Trimmable): """Base class for indeterministic failures. Usually one of the more specific subclasses (FlakyFailure or FlakyStrategyDefinition) is raised.""" @@ -179,7 +184,7 @@ class Frozen(HypothesisException): after freeze() has been called.""" -def __getattr__(name): +def __getattr__(name: str) -> Any: if name == "MultipleFailures": from hypothesis._settings import note_deprecation from hypothesis.internal.compat import BaseExceptionGroup @@ -199,7 +204,7 @@ def __getattr__(name): class DeadlineExceeded(_Trimmable): """Raised when an individual test body has taken too long to run.""" - def __init__(self, runtime, deadline): + def __init__(self, runtime: timedelta, deadline: timedelta) -> None: super().__init__( "Test took %.2fms, which exceeds the deadline of %.2fms" % (runtime.total_seconds() * 1000, deadline.total_seconds() * 1000) @@ -207,7 +212,9 @@ class DeadlineExceeded(_Trimmable): self.runtime = runtime self.deadline = deadline - def __reduce__(self): + def __reduce__( + self, + ) -> tuple[type["DeadlineExceeded"], tuple[timedelta, timedelta]]: return (type(self), (self.runtime, self.deadline)) @@ -241,6 +248,9 @@ class SmallSearchSpaceWarning(HypothesisWarning): in a meaningful way, for example by only creating default instances.""" +CannotProceedScopeT = Literal["verified", "exhausted", "discard_test_case", "other"] + + class BackendCannotProceed(HypothesisException): """UNSTABLE API @@ -266,9 +276,5 @@ class BackendCannotProceed(HypothesisException): this backend. """ - def __init__( - self, - scope: Literal["verified", "exhausted", "discard_test_case", "other"] = "other", - /, - ) -> None: + def __init__(self, scope: CannotProceedScopeT = "other", /) -> None: self.scope = scope diff --git a/contrib/python/hypothesis/py3/hypothesis/extra/_array_helpers.py b/contrib/python/hypothesis/py3/hypothesis/extra/_array_helpers.py index 681d55e8f22..78e821a2473 100644 --- a/contrib/python/hypothesis/py3/hypothesis/extra/_array_helpers.py +++ b/contrib/python/hypothesis/py3/hypothesis/extra/_array_helpers.py @@ -21,18 +21,18 @@ from hypothesis.utils.conventions import UniqueIdentifier, not_set __all__ = [ "NDIM_MAX", - "Shape", - "BroadcastableShapes", "BasicIndex", - "check_argument", - "order_check", - "check_valid_dims", + "BasicIndexStrategy", + "BroadcastableShapes", + "MutuallyBroadcastableShapesStrategy", + "Shape", "array_shapes", - "valid_tuple_axes", "broadcastable_shapes", + "check_argument", + "check_valid_dims", "mutually_broadcastable_shapes", - "MutuallyBroadcastableShapesStrategy", - "BasicIndexStrategy", + "order_check", + "valid_tuple_axes", ] diff --git a/contrib/python/hypothesis/py3/hypothesis/extra/array_api.py b/contrib/python/hypothesis/py3/hypothesis/extra/array_api.py index f3a7ecc1557..8c192c0069c 100644 --- a/contrib/python/hypothesis/py3/hypothesis/extra/array_api.py +++ b/contrib/python/hypothesis/py3/hypothesis/extra/array_api.py @@ -63,10 +63,10 @@ __all__ = [ ] -RELEASED_VERSIONS = ("2021.12", "2022.12", "2023.12") +RELEASED_VERSIONS = ("2021.12", "2022.12", "2023.12", "2024.12") NOMINAL_VERSIONS = (*RELEASED_VERSIONS, "draft") assert sorted(NOMINAL_VERSIONS) == list(NOMINAL_VERSIONS) # sanity check -NominalVersion = Literal["2021.12", "2022.12", "2023.12", "draft"] +NominalVersion = Literal["2021.12", "2022.12", "2023.12", "2024.12", "draft"] assert get_args(NominalVersion) == NOMINAL_VERSIONS # sanity check @@ -499,9 +499,6 @@ def _arrays( >>> xps.arrays(xp, xp.int8, 3, elements={"min_value": 10}).example() Array([125, 13, 79], dtype=int8) - Refer to :doc:`What you can generate and how <data>` for passing - your own elements strategy. - .. code-block:: pycon >>> xps.arrays(xp, xp.float32, 3, elements=floats(0, 1, width=32)).example() diff --git a/contrib/python/hypothesis/py3/hypothesis/extra/cli.py b/contrib/python/hypothesis/py3/hypothesis/extra/cli.py index 2e5ea5265eb..20c7a79e41b 100644 --- a/contrib/python/hypothesis/py3/hypothesis/extra/cli.py +++ b/contrib/python/hypothesis/py3/hypothesis/extra/cli.py @@ -9,12 +9,6 @@ # obtain one at https://mozilla.org/MPL/2.0/. """ -.. _hypothesis-cli: - ----------------- -hypothesis[cli] ----------------- - :: $ hypothesis --help @@ -30,7 +24,7 @@ hypothesis[cli] write `hypothesis write` writes property-based tests for you! This module requires the :pypi:`click` package, and provides Hypothesis' command-line -interface, for e.g. :doc:`'ghostwriting' tests <ghostwriter>` via the terminal. +interface, for e.g. :ref:`'ghostwriting' tests <ghostwriter>` via the terminal. It's also where `HypoFuzz <https://hypofuzz.com/>`__ adds the :command:`hypothesis fuzz` command (`learn more about that here <https://hypofuzz.com/docs/quickstart.html>`__). """ diff --git a/contrib/python/hypothesis/py3/hypothesis/extra/codemods.py b/contrib/python/hypothesis/py3/hypothesis/extra/codemods.py index 00e6ad30b1d..f39ad34d640 100644 --- a/contrib/python/hypothesis/py3/hypothesis/extra/codemods.py +++ b/contrib/python/hypothesis/py3/hypothesis/extra/codemods.py @@ -9,12 +9,6 @@ # obtain one at https://mozilla.org/MPL/2.0/. """ -.. _codemods: - --------------------- -hypothesis[codemods] --------------------- - This module provides codemods based on the :pypi:`LibCST` library, which can both detect *and automatically fix* issues with code that uses Hypothesis, including upgrading from deprecated features to our recommended style. diff --git a/contrib/python/hypothesis/py3/hypothesis/extra/django/__init__.py b/contrib/python/hypothesis/py3/hypothesis/extra/django/__init__.py index 39e6e0182e8..cb49b7a5096 100644 --- a/contrib/python/hypothesis/py3/hypothesis/extra/django/__init__.py +++ b/contrib/python/hypothesis/py3/hypothesis/extra/django/__init__.py @@ -24,7 +24,7 @@ __all__ = [ "TestCase", "TransactionTestCase", "from_field", + "from_form", "from_model", "register_field_strategy", - "from_form", ] diff --git a/contrib/python/hypothesis/py3/hypothesis/extra/dpcontracts.py b/contrib/python/hypothesis/py3/hypothesis/extra/dpcontracts.py index bf2a267240c..7c2f0843687 100644 --- a/contrib/python/hypothesis/py3/hypothesis/extra/dpcontracts.py +++ b/contrib/python/hypothesis/py3/hypothesis/extra/dpcontracts.py @@ -9,10 +9,6 @@ # obtain one at https://mozilla.org/MPL/2.0/. """ ------------------------ -hypothesis[dpcontracts] ------------------------ - This module provides tools for working with the :pypi:`dpcontracts` library, because `combining contracts and property-based testing works really well <https://hillelwayne.com/talks/beyond-unit-tests/>`_. diff --git a/contrib/python/hypothesis/py3/hypothesis/extra/ghostwriter.py b/contrib/python/hypothesis/py3/hypothesis/extra/ghostwriter.py index d1936894ff9..2008a3d3090 100644 --- a/contrib/python/hypothesis/py3/hypothesis/extra/ghostwriter.py +++ b/contrib/python/hypothesis/py3/hypothesis/extra/ghostwriter.py @@ -67,7 +67,7 @@ generally do their best to write you a useful test. You can also use Legal questions? While the ghostwriter fragments and logic is under the MPL-2.0 license like the rest of Hypothesis, the *output* from the ghostwriter is made available under the `Creative Commons Zero (CC0) - <https://creativecommons.org/share-your-work/public-domain/cc0/>`__ + <https://creativecommons.org/public-domain/cc0/>`__ public domain dedication, so you can use it without any restrictions. """ @@ -648,7 +648,7 @@ def _imports_for_strategy(strategy): for f in strategy.flat_conditions: imports |= _imports_for_object(f) if isinstance(strategy, FlatMapStrategy): - imports |= _imports_for_strategy(strategy.flatmapped_strategy) + imports |= _imports_for_strategy(strategy.base) imports |= _imports_for_object(strategy.expand) # recurse through one_of to handle e.g. from_type(Optional[Foo]) diff --git a/contrib/python/hypothesis/py3/hypothesis/extra/lark.py b/contrib/python/hypothesis/py3/hypothesis/extra/lark.py index 9b26519a6e7..04d01812ac4 100644 --- a/contrib/python/hypothesis/py3/hypothesis/extra/lark.py +++ b/contrib/python/hypothesis/py3/hypothesis/extra/lark.py @@ -9,17 +9,13 @@ # obtain one at https://mozilla.org/MPL/2.0/. """ ----------------- -hypothesis[lark] ----------------- - This extra can be used to generate strings matching any context-free grammar, using the `Lark parser library <https://github.com/lark-parser/lark>`_. It currently only supports Lark's native EBNF syntax, but we plan to extend this to support other common syntaxes such as ANTLR and :rfc:`5234` ABNF. Lark already `supports loading grammars -<https://lark-parser.readthedocs.io/en/latest/nearley.html>`_ +<https://lark-parser.readthedocs.io/en/stable/tools.html#importing-grammars-from-nearley-js>`_ from `nearley.js <https://nearley.js.org/>`_, so you may not have to write your own at all. """ @@ -28,10 +24,13 @@ from inspect import signature from typing import Optional import lark -from lark.grammar import NonTerminal, Terminal +from lark.grammar import NonTerminal, Rule, Symbol, Terminal +from lark.lark import Lark +from lark.lexer import TerminalDef from hypothesis import strategies as st from hypothesis.errors import InvalidArgument +from hypothesis.internal.conjecture.data import ConjectureData from hypothesis.internal.conjecture.utils import calc_label_from_name from hypothesis.internal.validation import check_type from hypothesis.strategies._internal.regex import IncompatibleWithAlphabet @@ -40,7 +39,9 @@ from hypothesis.strategies._internal.utils import cacheable, defines_strategy __all__ = ["from_lark"] -def get_terminal_names(terminals, rules, ignore_names): +def get_terminal_names( + terminals: list[TerminalDef], rules: list[Rule], ignore_names: list[str] +) -> set[str]: """Get names of all terminals in the grammar. The arguments are the results of calling ``Lark.grammar.compile()``, @@ -60,13 +61,15 @@ class LarkStrategy(st.SearchStrategy): See ``from_lark`` for details. """ - def __init__(self, grammar, start, explicit, alphabet): + def __init__( + self, + grammar: Lark, + start: Optional[str], + explicit: dict[str, st.SearchStrategy[str]], + alphabet: st.SearchStrategy[str], + ) -> None: assert isinstance(grammar, lark.lark.Lark) - if start is None: - start = grammar.options.start - if not isinstance(start, list): - start = [start] - self.grammar = grammar + start: list[str] = grammar.options.start if start is None else [start] # This is a total hack, but working around the changes is a nicer user # experience than breaking for anyone who doesn't instantly update their @@ -76,19 +79,18 @@ class LarkStrategy(st.SearchStrategy): terminals, rules, ignore_names = grammar.grammar.compile(start, ()) elif "start" in compile_args: # pragma: no cover # Support lark <= 0.10.0, without the terminals_to_keep argument. - terminals, rules, ignore_names = grammar.grammar.compile(start) + terminals, rules, ignore_names = grammar.grammar.compile(start) # type: ignore else: # pragma: no cover # This branch is to support lark <= 0.7.1, without the start argument. - terminals, rules, ignore_names = grammar.grammar.compile() + terminals, rules, ignore_names = grammar.grammar.compile() # type: ignore - self.names_to_symbols = {} + self.names_to_symbols: dict[str, Symbol] = {} for r in rules: - t = r.origin - self.names_to_symbols[t.name] = t + self.names_to_symbols[r.origin.name] = r.origin disallowed = set() - self.terminal_strategies = {} + self.terminal_strategies: dict[str, st.SearchStrategy[str]] = {} for t in terminals: self.names_to_symbols[t.name] = Terminal(t.name) s = st.from_regex(t.pattern.to_regexp(), fullmatch=True, alphabet=alphabet) @@ -119,7 +121,8 @@ class LarkStrategy(st.SearchStrategy): ) self.terminal_strategies.update(explicit) - nonterminals = {} + # can in fact contain any symbol, despite its name. + nonterminals: dict[str, list[tuple[Symbol, ...]]] = {} for rule in rules: if disallowed.isdisjoint(r.name for r in rule.expansion): @@ -149,15 +152,15 @@ class LarkStrategy(st.SearchStrategy): k: st.sampled_from(sorted(v, key=len)) for k, v in nonterminals.items() } - self.__rule_labels = {} + self.__rule_labels: dict[str, int] = {} - def do_draw(self, data): - state = [] + def do_draw(self, data: ConjectureData) -> str: + state: list[str] = [] start = data.draw(self.start) self.draw_symbol(data, start, state) return "".join(state) - def rule_label(self, name): + def rule_label(self, name: str) -> int: try: return self.__rule_labels[name] except KeyError: @@ -165,20 +168,25 @@ class LarkStrategy(st.SearchStrategy): name, calc_label_from_name(f"LARK:{name}") ) - def draw_symbol(self, data, symbol, draw_state): + def draw_symbol( + self, + data: ConjectureData, + symbol: Symbol, + draw_state: list[str], + ) -> None: if isinstance(symbol, Terminal): strategy = self.terminal_strategies[symbol.name] draw_state.append(data.draw(strategy)) else: assert isinstance(symbol, NonTerminal) - data.start_example(self.rule_label(symbol.name)) + data.start_span(self.rule_label(symbol.name)) expansion = data.draw(self.nonterminal_strategies[symbol.name]) for e in expansion: self.draw_symbol(data, e, draw_state) self.gen_ignore(data, draw_state) - data.stop_example() + data.stop_span() - def gen_ignore(self, data, draw_state): + def gen_ignore(self, data: ConjectureData, draw_state: list[str]) -> None: if self.ignored_symbols and data.draw_boolean(1 / 4): emit = data.draw(st.sampled_from(self.ignored_symbols)) self.draw_symbol(data, emit, draw_state) diff --git a/contrib/python/hypothesis/py3/hypothesis/extra/numpy.py b/contrib/python/hypothesis/py3/hypothesis/extra/numpy.py index 34f8278c249..1ab322aec89 100644 --- a/contrib/python/hypothesis/py3/hypothesis/extra/numpy.py +++ b/contrib/python/hypothesis/py3/hypothesis/extra/numpy.py @@ -68,26 +68,26 @@ _SupportsArray = _try_import("numpy._typing._array_like", "_SupportsArray") __all__ = [ "BroadcastableShapes", - "from_dtype", - "arrays", + "array_dtypes", "array_shapes", - "scalar_dtypes", + "arrays", + "basic_indices", "boolean_dtypes", - "unsigned_integer_dtypes", - "integer_dtypes", - "floating_dtypes", + "broadcastable_shapes", + "byte_string_dtypes", "complex_number_dtypes", "datetime64_dtypes", + "floating_dtypes", + "from_dtype", + "integer_array_indices", + "integer_dtypes", + "mutually_broadcastable_shapes", + "nested_dtypes", + "scalar_dtypes", "timedelta64_dtypes", - "byte_string_dtypes", "unicode_string_dtypes", - "array_dtypes", - "nested_dtypes", + "unsigned_integer_dtypes", "valid_tuple_axes", - "broadcastable_shapes", - "mutually_broadcastable_shapes", - "basic_indices", - "integer_array_indices", ] TIME_RESOLUTIONS = tuple("Y M D h m s ms us ns ps fs as".split()) @@ -590,6 +590,7 @@ def defines_dtype_strategy(strat: T) -> T: @defines_dtype_strategy def boolean_dtypes() -> st.SearchStrategy["np.dtype[np.bool_]"]: + """Return a strategy for boolean dtypes.""" return st.just("?") # type: ignore[arg-type] @@ -1196,7 +1197,9 @@ def integer_array_indices( shape: Shape, *, result_shape: st.SearchStrategy[Shape] = array_shapes(), - dtype: "np.dtype[I] | np.dtype[np.signedinteger[Any]]" = np.dtype(int), + dtype: "np.dtype[I] | np.dtype[np.signedinteger[Any] | np.bool[bool]]" = np.dtype( + int + ), ) -> "st.SearchStrategy[tuple[NDArray[I], ...]]": """Return a search strategy for tuples of integer-arrays that, when used to index into an array of shape ``shape``, given an array whose shape diff --git a/contrib/python/hypothesis/py3/hypothesis/extra/pandas/__init__.py b/contrib/python/hypothesis/py3/hypothesis/extra/pandas/__init__.py index 2fd9c627a1e..5d8a4361f52 100644 --- a/contrib/python/hypothesis/py3/hypothesis/extra/pandas/__init__.py +++ b/contrib/python/hypothesis/py3/hypothesis/extra/pandas/__init__.py @@ -17,4 +17,4 @@ from hypothesis.extra.pandas.impl import ( series, ) -__all__ = ["indexes", "range_indexes", "series", "column", "columns", "data_frames"] +__all__ = ["column", "columns", "data_frames", "indexes", "range_indexes", "series"] diff --git a/contrib/python/hypothesis/py3/hypothesis/extra/pandas/impl.py b/contrib/python/hypothesis/py3/hypothesis/extra/pandas/impl.py index 5beeede287d..9cbf580c734 100644 --- a/contrib/python/hypothesis/py3/hypothesis/extra/pandas/impl.py +++ b/contrib/python/hypothesis/py3/hypothesis/extra/pandas/impl.py @@ -128,7 +128,7 @@ def elements_and_dtype(elements, dtype, source=None): name = f"draw({prefix}elements)" try: return np.array([value], dtype=dtype)[0] - except (TypeError, ValueError): + except (TypeError, ValueError, OverflowError): raise InvalidArgument( "Cannot convert %s=%r of type %s to dtype %s" % (name, value, type(value).__name__, dtype.str) diff --git a/contrib/python/hypothesis/py3/hypothesis/extra/pytz.py b/contrib/python/hypothesis/py3/hypothesis/extra/pytz.py index aac9f0c8939..574d8e9b41f 100644 --- a/contrib/python/hypothesis/py3/hypothesis/extra/pytz.py +++ b/contrib/python/hypothesis/py3/hypothesis/extra/pytz.py @@ -9,10 +9,6 @@ # obtain one at https://mozilla.org/MPL/2.0/. """ ----------------- -hypothesis[pytz] ----------------- - This module provides :pypi:`pytz` timezones. If you are unable to use the stdlib :mod:`zoneinfo` module, e.g. via the diff --git a/contrib/python/hypothesis/py3/hypothesis/extra/redis.py b/contrib/python/hypothesis/py3/hypothesis/extra/redis.py index 516c2d88e44..9ec1b67e180 100644 --- a/contrib/python/hypothesis/py3/hypothesis/extra/redis.py +++ b/contrib/python/hypothesis/py3/hypothesis/extra/redis.py @@ -8,9 +8,12 @@ # v. 2.0. If a copy of the MPL was not distributed with this file, You can # obtain one at https://mozilla.org/MPL/2.0/. +import base64 +import json from collections.abc import Iterable from contextlib import contextmanager from datetime import timedelta +from typing import Any from redis import Redis @@ -36,13 +39,18 @@ class RedisExampleDatabase(ExampleDatabase): *, expire_after: timedelta = timedelta(days=8), key_prefix: bytes = b"hypothesis-example:", + listener_channel: str = "hypothesis-changes", ): + super().__init__() check_type(Redis, redis, "redis") check_type(timedelta, expire_after, "expire_after") check_type(bytes, key_prefix, "key_prefix") + check_type(str, listener_channel, "listener_channel") self.redis = redis self._expire_after = expire_after self._prefix = key_prefix + self.listener_channel = listener_channel + self._pubsub: Any = None def __repr__(self) -> str: return ( @@ -50,29 +58,82 @@ class RedisExampleDatabase(ExampleDatabase): ) @contextmanager - def _pipeline(self, *reset_expire_keys, transaction=False, auto_execute=True): + def _pipeline( + self, + *reset_expire_keys, + execute_and_publish=True, + event_type=None, + to_publish=None, + ): # Context manager to batch updates and expiry reset, reducing TCP roundtrips - pipe = self.redis.pipeline(transaction=transaction) + pipe = self.redis.pipeline() yield pipe for key in reset_expire_keys: pipe.expire(self._prefix + key, self._expire_after) - if auto_execute: - pipe.execute() + if execute_and_publish: + changed = pipe.execute() + # pipe.execute returns the rows modified for each operation, which includes + # the operations performed during the yield, followed by the n operations + # from pipe.exire. Look at just the operations from during the yield. + changed = changed[: -len(reset_expire_keys)] + if any(count > 0 for count in changed): + assert to_publish is not None + assert event_type is not None + self._publish((event_type, to_publish)) + + def _publish(self, event): + event = (event[0], tuple(self._encode(v) for v in event[1])) + self.redis.publish(self.listener_channel, json.dumps(event)) + + def _encode(self, value: bytes) -> str: + return base64.b64encode(value).decode("ascii") + + def _decode(self, value: str) -> bytes: + return base64.b64decode(value) def fetch(self, key: bytes) -> Iterable[bytes]: - with self._pipeline(key, auto_execute=False) as pipe: + with self._pipeline(key, execute_and_publish=False) as pipe: pipe.smembers(self._prefix + key) yield from pipe.execute()[0] def save(self, key: bytes, value: bytes) -> None: - with self._pipeline(key) as pipe: + with self._pipeline(key, event_type="save", to_publish=(key, value)) as pipe: pipe.sadd(self._prefix + key, value) def delete(self, key: bytes, value: bytes) -> None: - with self._pipeline(key) as pipe: + with self._pipeline(key, event_type="delete", to_publish=(key, value)) as pipe: pipe.srem(self._prefix + key, value) def move(self, src: bytes, dest: bytes, value: bytes) -> None: - with self._pipeline(src, dest) as pipe: + if src == dest: + self.save(dest, value) + return + + with self._pipeline(src, dest, execute_and_publish=False) as pipe: pipe.srem(self._prefix + src, value) pipe.sadd(self._prefix + dest, value) + + changed = pipe.execute() + if changed[0] > 0: + self._publish(("delete", (src, value))) + if changed[1] > 0: + self._publish(("save", (dest, value))) + + def _handle_message(self, message: dict) -> None: + # other message types include "subscribe" and "unsubscribe". these are + # sent to the client, but not to the pubsub channel. + assert message["type"] == "message" + data = json.loads(message["data"]) + event_type = data[0] + self._broadcast_change( + (event_type, tuple(self._decode(v) for v in data[1])) # type: ignore + ) + + def _start_listening(self) -> None: + self._pubsub = self.redis.pubsub() + self._pubsub.subscribe(**{self.listener_channel: self._handle_message}) + + def _stop_listening(self) -> None: + self._pubsub.unsubscribe() + self._pubsub.close() + self._pubsub = None diff --git a/contrib/python/hypothesis/py3/hypothesis/internal/cache.py b/contrib/python/hypothesis/py3/hypothesis/internal/cache.py index 8eb1bdba7e5..57b3116d618 100644 --- a/contrib/python/hypothesis/py3/hypothesis/internal/cache.py +++ b/contrib/python/hypothesis/py3/hypothesis/internal/cache.py @@ -10,21 +10,25 @@ import threading from collections import OrderedDict +from typing import Any, Generic, TypeVar import attr from hypothesis.errors import InvalidArgument +K = TypeVar("K") +V = TypeVar("V") + @attr.s(slots=True) -class Entry: - key = attr.ib() - value = attr.ib() - score = attr.ib() - pins = attr.ib(default=0) +class Entry(Generic[K, V]): + key: K = attr.ib() + value: V = attr.ib() + score: int = attr.ib() + pins: int = attr.ib(default=0) @property - def sort_key(self): + def sort_key(self) -> tuple[int, ...]: if self.pins == 0: # Unpinned entries are sorted by score. return (0, self.score) @@ -34,7 +38,7 @@ class Entry: return (1,) -class GenericCache: +class GenericCache(Generic[K, V]): """Generic supertype for cache implementations. Defines a dict-like mapping with a maximum size, where as well as mapping @@ -57,9 +61,9 @@ class GenericCache: on_access and on_evict to implement a specific scoring strategy. """ - __slots__ = ("max_size", "_threadlocal") + __slots__ = ("_threadlocal", "max_size") - def __init__(self, max_size): + def __init__(self, max_size: int): if max_size <= 0: raise InvalidArgument("Cache size must be at least one.") @@ -73,7 +77,7 @@ class GenericCache: self._threadlocal = threading.local() @property - def keys_to_indices(self): + def keys_to_indices(self) -> dict[K, int]: try: return self._threadlocal.keys_to_indices except AttributeError: @@ -81,27 +85,27 @@ class GenericCache: return self._threadlocal.keys_to_indices @property - def data(self): + def data(self) -> list[Entry[K, V]]: try: return self._threadlocal.data except AttributeError: self._threadlocal.data = [] return self._threadlocal.data - def __len__(self): + def __len__(self) -> int: assert len(self.keys_to_indices) == len(self.data) return len(self.data) - def __contains__(self, key): + def __contains__(self, key: K) -> bool: return key in self.keys_to_indices - def __getitem__(self, key): + def __getitem__(self, key: K) -> V: i = self.keys_to_indices[key] result = self.data[i] self.__entry_was_accessed(i) return result.value - def __setitem__(self, key, value): + def __setitem__(self, key: K, value: V) -> None: evicted = None try: i = self.keys_to_indices[key] @@ -135,7 +139,7 @@ class GenericCache: def __iter__(self): return iter(self.keys_to_indices) - def pin(self, key, value): + def pin(self, key: K, value: V) -> None: """Mark ``key`` as pinned (with the given value). That is, it may not be evicted until ``unpin(key)`` has been called. The same key may be pinned multiple times, possibly changing its value, and will not be @@ -149,7 +153,7 @@ class GenericCache: if entry.pins == 1: self.__balance(i) - def unpin(self, key): + def unpin(self, key: K) -> None: """Undo one previous call to ``pin(key)``. The value stays the same. Once all calls are undone this key may be evicted as normal.""" i = self.keys_to_indices[key] @@ -160,20 +164,20 @@ class GenericCache: if entry.pins == 0: self.__balance(i) - def is_pinned(self, key): + def is_pinned(self, key: K) -> bool: """Returns True if the key is currently pinned.""" i = self.keys_to_indices[key] return self.data[i].pins > 0 - def clear(self): + def clear(self) -> None: """Remove all keys, regardless of their pinned status.""" del self.data[:] self.keys_to_indices.clear() - def __repr__(self): + def __repr__(self) -> str: return "{" + ", ".join(f"{e.key!r}: {e.value!r}" for e in self.data) + "}" - def new_entry(self, key, value): + def new_entry(self, key: K, value: V) -> int: """Called when a key is written that does not currently appear in the map. @@ -181,7 +185,7 @@ class GenericCache: """ raise NotImplementedError - def on_access(self, key, value, score): + def on_access(self, key: K, value: V, score: Any) -> Any: """Called every time a key that is already in the map is read or written. @@ -189,11 +193,11 @@ class GenericCache: """ return score - def on_evict(self, key, value, score): + def on_evict(self, key: K, value: V, score: Any) -> Any: """Called after a key has been evicted, with the score it had had at the point of eviction.""" - def check_valid(self): + def check_valid(self) -> None: """Debugging method for use in tests. Asserts that all of the cache's invariants hold. When everything @@ -206,7 +210,7 @@ class GenericCache: if j < len(self.data): assert e.sort_key <= self.data[j].sort_key, self.data - def __entry_was_accessed(self, i): + def __entry_was_accessed(self, i: int) -> None: entry = self.data[i] new_score = self.on_access(entry.key, entry.value, entry.score) if new_score != entry.score: @@ -216,14 +220,14 @@ class GenericCache: if entry.pins == 0: self.__balance(i) - def __swap(self, i, j): + def __swap(self, i: int, j: int) -> None: assert i < j assert self.data[j].sort_key < self.data[i].sort_key self.data[i], self.data[j] = self.data[j], self.data[i] self.keys_to_indices[self.data[i].key] = i self.keys_to_indices[self.data[j].key] = j - def __balance(self, i): + def __balance(self, i: int) -> None: """When we have made a modification to the heap such that the heap property has been violated locally around i but previously held for all other indexes (and no other values have been modified), @@ -244,7 +248,7 @@ class GenericCache: else: break - def __out_of_order(self, i, j): + def __out_of_order(self, i: int, j: int) -> bool: """Returns True if the indices i, j are in the wrong order. i must be the parent of j. @@ -253,7 +257,7 @@ class GenericCache: return self.data[j].sort_key < self.data[i].sort_key -class LRUReusedCache(GenericCache): +class LRUReusedCache(GenericCache[K, V]): """The only concrete implementation of GenericCache we use outside of tests currently. @@ -270,18 +274,18 @@ class LRUReusedCache(GenericCache): __slots__ = ("__tick",) - def __init__(self, max_size): + def __init__(self, max_size: int): super().__init__(max_size) - self.__tick = 0 + self.__tick: int = 0 - def tick(self): + def tick(self) -> int: self.__tick += 1 return self.__tick - def new_entry(self, key, value): + def new_entry(self, key: K, value: V) -> Any: return (1, self.tick()) - def on_access(self, key, value, score): + def on_access(self, key: K, value: V, score: Any) -> Any: return (2, self.tick()) @@ -313,7 +317,7 @@ class LRUCache: self._threadlocal = threading.local() @property - def cache(self): + def cache(self) -> dict[Any, Any]: try: return self._threadlocal.cache except AttributeError: diff --git a/contrib/python/hypothesis/py3/hypothesis/internal/conjecture/choice.py b/contrib/python/hypothesis/py3/hypothesis/internal/conjecture/choice.py new file mode 100644 index 00000000000..9cdd278ec0d --- /dev/null +++ b/contrib/python/hypothesis/py3/hypothesis/internal/conjecture/choice.py @@ -0,0 +1,625 @@ +# This file is part of Hypothesis, which may be found at +# https://github.com/HypothesisWorks/hypothesis/ +# +# Copyright the Hypothesis Authors. +# Individual contributors are listed in AUTHORS.rst and the git log. +# +# This Source Code Form is subject to the terms of the Mozilla Public License, +# v. 2.0. If a copy of the MPL was not distributed with this file, You can +# obtain one at https://mozilla.org/MPL/2.0/. + +import math +from collections.abc import Iterable, Sequence +from typing import ( + TYPE_CHECKING, + Callable, + Literal, + Optional, + TypedDict, + TypeVar, + Union, + cast, +) + +import attr + +from hypothesis.errors import ChoiceTooLarge +from hypothesis.internal.conjecture.floats import float_to_lex, lex_to_float +from hypothesis.internal.conjecture.utils import identity +from hypothesis.internal.floats import float_to_int, make_float_clamper, sign_aware_lte +from hypothesis.internal.intervalsets import IntervalSet + +T = TypeVar("T") + +if TYPE_CHECKING: + from typing import TypeAlias + + +class IntegerConstraints(TypedDict): + min_value: Optional[int] + max_value: Optional[int] + weights: Optional[dict[int, float]] + shrink_towards: int + + +class FloatConstraints(TypedDict): + min_value: float + max_value: float + allow_nan: bool + smallest_nonzero_magnitude: float + + +class StringConstraints(TypedDict): + intervals: IntervalSet + min_size: int + max_size: int + + +class BytesConstraints(TypedDict): + min_size: int + max_size: int + + +class BooleanConstraints(TypedDict): + p: float + + +ChoiceT: "TypeAlias" = Union[int, str, bool, float, bytes] +ChoiceConstraintsT: "TypeAlias" = Union[ + IntegerConstraints, + FloatConstraints, + StringConstraints, + BytesConstraints, + BooleanConstraints, +] +ChoiceTypeT: "TypeAlias" = Literal["integer", "string", "boolean", "float", "bytes"] +ChoiceKeyT: "TypeAlias" = Union[ + int, str, bytes, tuple[Literal["bool"], bool], tuple[Literal["float"], int] +] + + [email protected](slots=True) +class ChoiceTemplate: + type: Literal["simplest"] = attr.ib() + count: Optional[int] = attr.ib() + + def __attrs_post_init__(self) -> None: + if self.count is not None: + assert self.count > 0 + + [email protected](slots=True, repr=False, eq=False) +class ChoiceNode: + type: ChoiceTypeT = attr.ib() + value: ChoiceT = attr.ib() + constraints: ChoiceConstraintsT = attr.ib() + was_forced: bool = attr.ib() + index: Optional[int] = attr.ib(default=None) + + def copy( + self, + *, + with_value: Optional[ChoiceT] = None, + with_constraints: Optional[ChoiceConstraintsT] = None, + ) -> "ChoiceNode": + # we may want to allow this combination in the future, but for now it's + # a footgun. + if self.was_forced: + assert with_value is None, "modifying a forced node doesn't make sense" + # explicitly not copying index. node indices are only assigned via + # ExampleRecord. This prevents footguns with relying on stale indices + # after copying. + return ChoiceNode( + type=self.type, + value=self.value if with_value is None else with_value, + constraints=( + self.constraints if with_constraints is None else with_constraints + ), + was_forced=self.was_forced, + ) + + @property + def trivial(self) -> bool: + """ + A node is trivial if it cannot be simplified any further. This does not + mean that modifying a trivial node can't produce simpler test cases when + viewing the tree as a whole. Just that when viewing this node in + isolation, this is the simplest the node can get. + """ + if self.was_forced: + return True + + if self.type != "float": + zero_value = choice_from_index(0, self.type, self.constraints) + return choice_equal(self.value, zero_value) + else: + constraints = cast(FloatConstraints, self.constraints) + min_value = constraints["min_value"] + max_value = constraints["max_value"] + shrink_towards = 0.0 + + if min_value == -math.inf and max_value == math.inf: + return choice_equal(self.value, shrink_towards) + + if ( + not math.isinf(min_value) + and not math.isinf(max_value) + and math.ceil(min_value) <= math.floor(max_value) + ): + # the interval contains an integer. the simplest integer is the + # one closest to shrink_towards + shrink_towards = max(math.ceil(min_value), shrink_towards) + shrink_towards = min(math.floor(max_value), shrink_towards) + return choice_equal(self.value, float(shrink_towards)) + + # the real answer here is "the value in [min_value, max_value] with + # the lowest denominator when represented as a fraction". + # It would be good to compute this correctly in the future, but it's + # also not incorrect to be conservative here. + return False + + def __eq__(self, other: object) -> bool: + if not isinstance(other, ChoiceNode): + return NotImplemented + + return ( + self.type == other.type + and choice_equal(self.value, other.value) + and choice_constraints_equal(self.type, self.constraints, other.constraints) + and self.was_forced == other.was_forced + ) + + def __hash__(self) -> int: + return hash( + ( + self.type, + choice_key(self.value), + choice_constraints_key(self.type, self.constraints), + self.was_forced, + ) + ) + + def __repr__(self) -> str: + forced_marker = " [forced]" if self.was_forced else "" + return f"{self.type} {self.value!r}{forced_marker} {self.constraints!r}" + + +def _size_to_index(size: int, *, alphabet_size: int) -> int: + # this is the closed form of this geometric series: + # for i in range(size): + # index += alphabet_size**i + if alphabet_size <= 0: + assert size == 0 + return 0 + if alphabet_size == 1: + return size + v = (alphabet_size**size - 1) // (alphabet_size - 1) + # mypy thinks (m: int) // (n: int) -> Any. assert it back to int. + return cast(int, v) + + +def _index_to_size(index: int, alphabet_size: int) -> int: + if alphabet_size == 0: + return 0 + elif alphabet_size == 1: + # there is only one string of each size, so the size is equal to its + # ordering. + return index + + # the closed-form inverse of _size_to_index is + # size = math.floor(math.log(index * (alphabet_size - 1) + 1, alphabet_size)) + # which is fast, but suffers from float precision errors. As performance is + # relatively critical here, we'll use this formula by default, but fall back to + # a much slower integer-only logarithm when the calculation is too close for + # comfort. + total = index * (alphabet_size - 1) + 1 + size = math.log(total, alphabet_size) + + # if this computation is close enough that it could have been affected by + # floating point errors, use a much slower integer-only logarithm instead, + # which is guaranteed to be precise. + if 0 < math.ceil(size) - size < 1e-7: + s = 0 + while total >= alphabet_size: + total //= alphabet_size + s += 1 + return s + return math.floor(size) + + +def collection_index( + choice: Sequence[T], + *, + min_size: int, + alphabet_size: int, + to_order: Callable[[T], int], +) -> int: + # Collections are ordered by counting the number of values of each size, + # starting with min_size. alphabet_size indicates how many options there + # are for a single element. to_order orders an element by returning an n ≥ 0. + + # we start by adding the size to the index, relative to min_size. + index = _size_to_index(len(choice), alphabet_size=alphabet_size) - _size_to_index( + min_size, alphabet_size=alphabet_size + ) + # We then add each element c to the index, starting from the end (so "ab" is + # simpler than "ba"). Each loop takes c at position i in the sequence and + # computes the number of sequences of size i which come before it in the ordering. + + # this running_exp computation is equivalent to doing + # index += (alphabet_size**i) * n + # but reuses intermediate exponentiation steps for efficiency. + running_exp = 1 + for c in reversed(choice): + index += running_exp * to_order(c) + running_exp *= alphabet_size + return index + + +def collection_value( + index: int, + *, + min_size: int, + alphabet_size: int, + from_order: Callable[[int], T], +) -> list[T]: + from hypothesis.internal.conjecture.engine import BUFFER_SIZE + + # this function is probably easiest to make sense of as an inverse of + # collection_index, tracking ~corresponding lines of code between the two. + + index += _size_to_index(min_size, alphabet_size=alphabet_size) + size = _index_to_size(index, alphabet_size=alphabet_size) + # index -> value computation can be arbitrarily expensive for arbitrarily + # large min_size collections. short-circuit if the resulting size would be + # obviously-too-large. callers will generally turn this into a .mark_overrun(). + if size >= BUFFER_SIZE: + raise ChoiceTooLarge + + # subtract out the amount responsible for the size + index -= _size_to_index(size, alphabet_size=alphabet_size) + vals: list[T] = [] + for i in reversed(range(size)): + # optimization for common case when we hit index 0. Exponentiation + # on large integers is expensive! + if index == 0: + n = 0 + else: + n = index // (alphabet_size**i) + # subtract out the nearest multiple of alphabet_size**i + index -= n * (alphabet_size**i) + vals.append(from_order(n)) + return vals + + +def zigzag_index(value: int, *, shrink_towards: int) -> int: + # value | 0 1 -1 2 -2 3 -3 4 + # index | 0 1 2 3 4 5 6 7 + index = 2 * abs(shrink_towards - value) + if value > shrink_towards: + index -= 1 + return index + + +def zigzag_value(index: int, *, shrink_towards: int) -> int: + assert index >= 0 + # count how many "steps" away from shrink_towards we are. + n = (index + 1) // 2 + # now check if we're stepping up or down from shrink_towards. + if (index % 2) == 0: + n *= -1 + return shrink_towards + n + + +def choice_to_index(choice: ChoiceT, constraints: ChoiceConstraintsT) -> int: + # This function takes a choice in the choice sequence and returns the + # complexity index of that choice from among its possible values, where 0 + # is the simplest. + # + # Note that the index of a choice depends on its constraints. The simplest value + # (at index 0) for {"min_value": None, "max_value": None} is 0, while for + # {"min_value": 1, "max_value": None} the simplest value is 1. + # + # choice_from_index inverts this function. An invariant on both functions is + # that they must be injective. Unfortunately, floats do not currently respect + # this. That's not *good*, but nothing has blown up - yet. And ordering + # floats in a sane manner is quite hard, so I've left it for another day. + + if isinstance(choice, int) and not isinstance(choice, bool): + # Let a = shrink_towards. + # * Unbounded: Ordered by (|a - x|, sgn(a - x)). Think of a zigzag. + # [a, a + 1, a - 1, a + 2, a - 2, ...] + # * Semi-bounded: Same as unbounded, except stop on one side when you hit + # {min, max}_value. so min_value=-1 a=0 has order + # [0, 1, -1, 2, 3, 4, ...] + # * Bounded: Same as unbounded and semibounded, except stop on each side + # when you hit {min, max}_value. + # + # To simplify and gain intuition about this ordering, you can think about + # the most common case where 0 is first (a = 0). We deviate from this only + # rarely, e.g. for datetimes, where we generally want year 2000 to be + # simpler than year 0. + constraints = cast(IntegerConstraints, constraints) + shrink_towards = constraints["shrink_towards"] + min_value = constraints["min_value"] + max_value = constraints["max_value"] + + if min_value is not None: + shrink_towards = max(min_value, shrink_towards) + if max_value is not None: + shrink_towards = min(max_value, shrink_towards) + + if min_value is None and max_value is None: + # case: unbounded + return zigzag_index(choice, shrink_towards=shrink_towards) + elif min_value is not None and max_value is None: + # case: semibounded below + + # min_value = -2 + # index | 0 1 2 3 4 5 6 7 + # v | 0 1 -1 2 -2 3 4 5 + if abs(choice - shrink_towards) <= (shrink_towards - min_value): + return zigzag_index(choice, shrink_towards=shrink_towards) + return choice - min_value + elif max_value is not None and min_value is None: + # case: semibounded above + if abs(choice - shrink_towards) <= (max_value - shrink_towards): + return zigzag_index(choice, shrink_towards=shrink_towards) + return max_value - choice + else: + # case: bounded + + # range = [-2, 5] + # shrink_towards = 2 + # index | 0 1 2 3 4 5 6 7 + # v | 2 3 1 4 0 5 -1 -2 + # + # ^ with zero weights at index = [0, 2, 6] + # index | 0 1 2 3 4 + # v | 3 4 0 5 -2 + + assert min_value is not None + assert max_value is not None + assert constraints["weights"] is None or all( + w > 0 for w in constraints["weights"].values() + ), "technically possible but really annoying to support zero weights" + + # check which side gets exhausted first + if (shrink_towards - min_value) < (max_value - shrink_towards): + # Below shrink_towards gets exhausted first. Equivalent to + # semibounded below + if abs(choice - shrink_towards) <= (shrink_towards - min_value): + return zigzag_index(choice, shrink_towards=shrink_towards) + return choice - min_value + else: + # Above shrink_towards gets exhausted first. Equivalent to semibounded + # above + if abs(choice - shrink_towards) <= (max_value - shrink_towards): + return zigzag_index(choice, shrink_towards=shrink_towards) + return max_value - choice + elif isinstance(choice, bool): + constraints = cast(BooleanConstraints, constraints) + # Ordered by [False, True]. + p = constraints["p"] + if not (2 ** (-64) < p < (1 - 2 ** (-64))): + # only one option is possible, so whatever it is is first. + return 0 + return int(choice) + elif isinstance(choice, bytes): + constraints = cast(BytesConstraints, constraints) + return collection_index( + list(choice), + min_size=constraints["min_size"], + alphabet_size=2**8, + to_order=identity, + ) + elif isinstance(choice, str): + constraints = cast(StringConstraints, constraints) + intervals = constraints["intervals"] + return collection_index( + choice, + min_size=constraints["min_size"], + alphabet_size=len(intervals), + to_order=intervals.index_from_char_in_shrink_order, + ) + elif isinstance(choice, float): + sign = int(math.copysign(1.0, choice) < 0) + return (sign << 64) | float_to_lex(abs(choice)) + else: + raise NotImplementedError + + +def choice_from_index( + index: int, choice_type: ChoiceTypeT, constraints: ChoiceConstraintsT +) -> ChoiceT: + assert index >= 0 + if choice_type == "integer": + constraints = cast(IntegerConstraints, constraints) + shrink_towards = constraints["shrink_towards"] + min_value = constraints["min_value"] + max_value = constraints["max_value"] + + if min_value is not None: + shrink_towards = max(min_value, shrink_towards) + if max_value is not None: + shrink_towards = min(max_value, shrink_towards) + + if min_value is None and max_value is None: + # case: unbounded + return zigzag_value(index, shrink_towards=shrink_towards) + elif min_value is not None and max_value is None: + # case: semibounded below + if index <= zigzag_index(min_value, shrink_towards=shrink_towards): + return zigzag_value(index, shrink_towards=shrink_towards) + return index + min_value + elif max_value is not None and min_value is None: + # case: semibounded above + if index <= zigzag_index(max_value, shrink_towards=shrink_towards): + return zigzag_value(index, shrink_towards=shrink_towards) + return max_value - index + else: + # case: bounded + assert min_value is not None + assert max_value is not None + assert constraints["weights"] is None or all( + w > 0 for w in constraints["weights"].values() + ), "possible but really annoying to support zero weights" + + if (shrink_towards - min_value) < (max_value - shrink_towards): + # equivalent to semibounded below case + if index <= zigzag_index(min_value, shrink_towards=shrink_towards): + return zigzag_value(index, shrink_towards=shrink_towards) + return index + min_value + else: + # equivalent to semibounded above case + if index <= zigzag_index(max_value, shrink_towards=shrink_towards): + return zigzag_value(index, shrink_towards=shrink_towards) + return max_value - index + elif choice_type == "boolean": + constraints = cast(BooleanConstraints, constraints) + # Ordered by [False, True]. + p = constraints["p"] + only = None + if p <= 2 ** (-64): + only = False + elif p >= (1 - 2 ** (-64)): + only = True + + assert index in {0, 1} + if only is not None: + # only one choice + assert index == 0 + return only + return bool(index) + elif choice_type == "bytes": + constraints = cast(BytesConstraints, constraints) + value_b = collection_value( + index, + min_size=constraints["min_size"], + alphabet_size=2**8, + from_order=identity, + ) + return bytes(value_b) + elif choice_type == "string": + constraints = cast(StringConstraints, constraints) + intervals = constraints["intervals"] + # _s because mypy is unhappy with reusing different-typed names in branches, + # even if the branches are disjoint. + value_s = collection_value( + index, + min_size=constraints["min_size"], + alphabet_size=len(intervals), + from_order=intervals.char_in_shrink_order, + ) + return "".join(value_s) + elif choice_type == "float": + constraints = cast(FloatConstraints, constraints) + sign = -1 if index >> 64 else 1 + result = sign * lex_to_float(index & ((1 << 64) - 1)) + + clamper = make_float_clamper( + min_value=constraints["min_value"], + max_value=constraints["max_value"], + smallest_nonzero_magnitude=constraints["smallest_nonzero_magnitude"], + allow_nan=constraints["allow_nan"], + ) + return clamper(result) + else: + raise NotImplementedError + + +def choice_permitted(choice: ChoiceT, constraints: ChoiceConstraintsT) -> bool: + if isinstance(choice, int) and not isinstance(choice, bool): + constraints = cast(IntegerConstraints, constraints) + min_value = constraints["min_value"] + max_value = constraints["max_value"] + if min_value is not None and choice < min_value: + return False + return not (max_value is not None and choice > max_value) + elif isinstance(choice, float): + constraints = cast(FloatConstraints, constraints) + if math.isnan(choice): + return constraints["allow_nan"] + if 0 < abs(choice) < constraints["smallest_nonzero_magnitude"]: + return False + return sign_aware_lte(constraints["min_value"], choice) and sign_aware_lte( + choice, constraints["max_value"] + ) + elif isinstance(choice, str): + constraints = cast(StringConstraints, constraints) + if len(choice) < constraints["min_size"]: + return False + if ( + constraints["max_size"] is not None + and len(choice) > constraints["max_size"] + ): + return False + return all(ord(c) in constraints["intervals"] for c in choice) + elif isinstance(choice, bytes): + constraints = cast(BytesConstraints, constraints) + if len(choice) < constraints["min_size"]: + return False + return constraints["max_size"] is None or len(choice) <= constraints["max_size"] + elif isinstance(choice, bool): + constraints = cast(BooleanConstraints, constraints) + if constraints["p"] <= 0: + return choice is False + if constraints["p"] >= 1: + return choice is True + return True + else: + raise NotImplementedError(f"unhandled type {type(choice)} with value {choice}") + + +def choices_key(choices: Sequence[ChoiceT]) -> tuple[ChoiceKeyT, ...]: + return tuple(choice_key(choice) for choice in choices) + + +def choice_key(choice: ChoiceT) -> ChoiceKeyT: + if isinstance(choice, float): + # float_to_int to distinguish -0.0/0.0, signaling/nonsignaling nans, etc, + # and then add a "float" key to avoid colliding with actual integers. + return ("float", float_to_int(choice)) + if isinstance(choice, bool): + # avoid choice_key(0) == choice_key(False) + return ("bool", choice) + return choice + + +def choice_equal(choice1: ChoiceT, choice2: ChoiceT) -> bool: + assert type(choice1) is type(choice2), (choice1, choice2) + return choice_key(choice1) == choice_key(choice2) + + +def choice_constraints_equal( + choice_type: ChoiceTypeT, + constraints1: ChoiceConstraintsT, + constraints2: ChoiceConstraintsT, +) -> bool: + return choice_constraints_key(choice_type, constraints1) == choice_constraints_key( + choice_type, constraints2 + ) + + +def choice_constraints_key(choice_type, constraints): + if choice_type == "float": + return ( + float_to_int(constraints["min_value"]), + float_to_int(constraints["max_value"]), + constraints["allow_nan"], + constraints["smallest_nonzero_magnitude"], + ) + if choice_type == "integer": + return ( + constraints["min_value"], + constraints["max_value"], + None if constraints["weights"] is None else tuple(constraints["weights"]), + constraints["shrink_towards"], + ) + return tuple(constraints[key] for key in sorted(constraints)) + + +def choices_size(choices: Iterable[ChoiceT]) -> int: + from hypothesis.database import choices_to_bytes + + return len(choices_to_bytes(choices)) diff --git a/contrib/python/hypothesis/py3/hypothesis/internal/conjecture/data.py b/contrib/python/hypothesis/py3/hypothesis/internal/conjecture/data.py index b818719b318..2f87e84f70b 100644 --- a/contrib/python/hypothesis/py3/hypothesis/internal/conjecture/data.py +++ b/contrib/python/hypothesis/py3/hypothesis/internal/conjecture/data.py @@ -8,53 +8,54 @@ # v. 2.0. If a copy of the MPL was not distributed with this file, You can # obtain one at https://mozilla.org/MPL/2.0/. -import abc -import contextlib import math import time from collections import defaultdict -from collections.abc import Iterable, Iterator, Sequence +from collections.abc import Hashable, Iterable, Iterator, Sequence from enum import IntEnum +from functools import cached_property from random import Random -from sys import float_info -from typing import ( - TYPE_CHECKING, - Any, - Callable, - Literal, - NoReturn, - Optional, - TypedDict, - TypeVar, - Union, -) +from typing import TYPE_CHECKING, Any, NoReturn, Optional, TypeVar, Union import attr -from hypothesis.errors import Frozen, InvalidArgument, StopTest +from hypothesis.errors import ( + CannotProceedScopeT, + ChoiceTooLarge, + Frozen, + InvalidArgument, + StopTest, +) from hypothesis.internal.cache import LRUCache -from hypothesis.internal.compat import add_note, floor, int_from_bytes, int_to_bytes -from hypothesis.internal.conjecture.floats import float_to_lex, lex_to_float -from hypothesis.internal.conjecture.junkdrawer import ( - IntList, - gc_cumulative_time, - uniform, +from hypothesis.internal.compat import add_note +from hypothesis.internal.conjecture.choice import ( + BooleanConstraints, + BytesConstraints, + ChoiceConstraintsT, + ChoiceNode, + ChoiceT, + ChoiceTemplate, + ChoiceTypeT, + FloatConstraints, + IntegerConstraints, + StringConstraints, + choice_constraints_key, + choice_from_index, + choice_permitted, + choices_size, ) -from hypothesis.internal.conjecture.utils import ( - INT_SIZES, - INT_SIZES_SAMPLER, - Sampler, - calc_label_from_name, - many, +from hypothesis.internal.conjecture.junkdrawer import IntList, gc_cumulative_time +from hypothesis.internal.conjecture.providers import ( + COLLECTION_DEFAULT_MAX_SIZE, + HypothesisProvider, + PrimitiveProvider, ) +from hypothesis.internal.conjecture.utils import calc_label_from_name +from hypothesis.internal.escalation import InterestingOrigin from hypothesis.internal.floats import ( - SIGNALING_NAN, SMALLEST_SUBNORMAL, float_to_int, int_to_float, - make_float_clamper, - next_down, - next_up, sign_aware_lte, ) from hypothesis.internal.intervalsets import IntervalSet @@ -63,65 +64,37 @@ from hypothesis.reporting import debug_report if TYPE_CHECKING: from typing import TypeAlias - from typing_extensions import dataclass_transform - from hypothesis.strategies import SearchStrategy from hypothesis.strategies._internal.strategies import Ex -else: - TypeAlias = object - - def dataclass_transform(): - def wrapper(tp): - return tp - - return wrapper - - -TOP_LABEL = calc_label_from_name("top") -InterestingOrigin = tuple[ - type[BaseException], str, int, tuple[Any, ...], tuple[tuple[Any, ...], ...] -] -TargetObservations = dict[str, Union[int, float]] - -T = TypeVar("T") - - -class IntegerKWargs(TypedDict): - min_value: Optional[int] - max_value: Optional[int] - weights: Optional[dict[int, float]] - shrink_towards: int - - -class FloatKWargs(TypedDict): - min_value: float - max_value: float - allow_nan: bool - smallest_nonzero_magnitude: float - - -class StringKWargs(TypedDict): - intervals: IntervalSet - min_size: int - max_size: int -class BytesKWargs(TypedDict): - min_size: int - max_size: int +def __getattr__(name: str) -> Any: + if name == "AVAILABLE_PROVIDERS": + from hypothesis._settings import note_deprecation + from hypothesis.internal.conjecture.providers import AVAILABLE_PROVIDERS + note_deprecation( + "hypothesis.internal.conjecture.data.AVAILABLE_PROVIDERS has been moved to " + "hypothesis.internal.conjecture.providers.AVAILABLE_PROVIDERS.", + since="2025-01-25", + has_codemod=False, + stacklevel=1, + ) + return AVAILABLE_PROVIDERS -class BooleanKWargs(TypedDict): - p: float + raise AttributeError( + f"Module 'hypothesis.internal.conjecture.data' has no attribute {name}" + ) -IRType: TypeAlias = Union[int, str, bool, float, bytes] -IRKWargsType: TypeAlias = Union[ - IntegerKWargs, FloatKWargs, StringKWargs, BytesKWargs, BooleanKWargs +T = TypeVar("T") +TargetObservations = dict[str, Union[int, float]] +# index, choice_type, constraints, forced value +MisalignedAt: "TypeAlias" = tuple[ + int, ChoiceTypeT, ChoiceConstraintsT, Optional[ChoiceT] ] -IRTypeName: TypeAlias = Literal["integer", "string", "boolean", "float", "bytes"] -# index, ir_type, kwargs, forced -MisalignedAt: TypeAlias = tuple[int, IRTypeName, IRKWargsType, Optional[IRType]] + +TOP_LABEL = calc_label_from_name("top") class ExtraInformation: @@ -147,7 +120,6 @@ class Status(IntEnum): return f"Status.{self.name}" -@dataclass_transform() @attr.s(slots=True, frozen=True) class StructuralCoverageTag: label: int = attr.ib() @@ -163,281 +135,180 @@ def structural_coverage(label: int) -> StructuralCoverageTag: return STRUCTURAL_COVERAGE_CACHE.setdefault(label, StructuralCoverageTag(label)) -NASTY_FLOATS = sorted( - [ - 0.0, - 0.5, - 1.1, - 1.5, - 1.9, - 1.0 / 3, - 10e6, - 10e-6, - 1.175494351e-38, - next_up(0.0), - float_info.min, - float_info.max, - 3.402823466e38, - 9007199254740992, - 1 - 10e-6, - 2 + 10e-6, - 1.192092896e-07, - 2.2204460492503131e-016, - ] - + [2.0**-n for n in (24, 14, 149, 126)] # minimum (sub)normals for float16,32 - + [float_info.min / n for n in (2, 10, 1000, 100_000)] # subnormal in float64 - + [math.inf, math.nan] * 5 - + [SIGNALING_NAN], - key=float_to_lex, -) -NASTY_FLOATS = list(map(float, NASTY_FLOATS)) -NASTY_FLOATS.extend([-x for x in NASTY_FLOATS]) - -# These caches, especially the kwargs cache, can be quite hot and so we prefer -# LRUCache over LRUReusedCache for performance. We lose scan resistance, but -# that's probably fine here. -FLOAT_INIT_LOGIC_CACHE = LRUCache(4096) -POOLED_KWARGS_CACHE = LRUCache(4096) - -COLLECTION_DEFAULT_MAX_SIZE = 10**10 # "arbitrarily large" +# This cache can be quite hot and so we prefer LRUCache over LRUReusedCache for +# performance. We lose scan resistance, but that's probably fine here. +POOLED_CONSTRAINTS_CACHE = LRUCache(4096) -class Example: - """Examples track the hierarchical structure of draws from the byte stream, - within a single test run. +class Span: + """A span tracks the hierarchical structure of choices within a single test run. - Examples are created to mark regions of the byte stream that might be - useful to the shrinker, such as: - - The bytes used by a single draw from a strategy. - - Useful groupings within a strategy, such as individual list elements. - - Strategy-like helper functions that aren't first-class strategies. - - Each lowest-level draw of bits or bytes from the byte stream. - - A single top-level example that spans the entire input. + Spans are created to mark regions of the choice sequence that that are + logically related to each other. For instance, Hypothesis tracks: + - A single top-level span for the entire choice sequence + - A span for the choices made by each strategy + - Some strategies define additional spans within their choices. For instance, + st.lists() tracks the "should add another element" choice and the "add + another element" choices as separate spans. - Example-tracking allows the shrinker to try "high-level" transformations, - such as rearranging or deleting the elements of a list, without having - to understand their exact representation in the byte stream. + Spans provide useful information to the shrinker, mutator, targeted PBT, + and other subsystems of Hypothesis. - Rather than store each ``Example`` as a rich object, it is actually - just an index into the ``Examples`` class defined below. This has two - purposes: Firstly, for most properties of examples we will never need + Rather than store each ``Span`` as a rich object, it is actually + just an index into the ``Spans`` class defined below. This has two + purposes: Firstly, for most properties of spans we will never need to allocate storage at all, because most properties are not used on - most examples. Secondly, by storing the properties as compact lists + most spans. Secondly, by storing the spans as compact lists of integers, we save a considerable amount of space compared to Python's normal object size. This does have the downside that it increases the amount of allocation we do, and slows things down as a result, in some usage patterns because - we repeatedly allocate the same Example or int objects, but it will + we repeatedly allocate the same Span or int objects, but it will often dramatically reduce our memory usage, so is worth it. """ - __slots__ = ("owner", "index") + __slots__ = ("index", "owner") - def __init__(self, owner: "Examples", index: int) -> None: + def __init__(self, owner: "Spans", index: int) -> None: self.owner = owner self.index = index def __eq__(self, other: object) -> bool: if self is other: return True - if not isinstance(other, Example): + if not isinstance(other, Span): return NotImplemented return (self.owner is other.owner) and (self.index == other.index) def __ne__(self, other: object) -> bool: if self is other: return False - if not isinstance(other, Example): + if not isinstance(other, Span): return NotImplemented return (self.owner is not other.owner) or (self.index != other.index) def __repr__(self) -> str: - return f"examples[{self.index}]" + return f"spans[{self.index}]" @property def label(self) -> int: - """A label is an opaque value that associates each example with its + """A label is an opaque value that associates each span with its approximate origin, such as a particular strategy class or a particular kind of draw.""" return self.owner.labels[self.owner.label_indices[self.index]] @property def parent(self) -> Optional[int]: - """The index of the example that this one is nested directly within.""" + """The index of the span that this one is nested directly within.""" if self.index == 0: return None return self.owner.parentage[self.index] @property def start(self) -> int: - """The position of the start of this example in the byte stream.""" return self.owner.starts[self.index] @property def end(self) -> int: - """The position directly after the last byte in this byte stream. - i.e. the example corresponds to the half open region [start, end). - """ return self.owner.ends[self.index] @property - def ir_start(self) -> int: - return self.owner.ir_starts[self.index] - - @property - def ir_end(self) -> int: - return self.owner.ir_ends[self.index] - - @property def depth(self) -> int: - """Depth of this example in the example tree. The top-level example has a - depth of 0.""" + """ + Depth of this span in the span tree. The top-level span has a depth of 0. + """ return self.owner.depths[self.index] @property - def trivial(self) -> bool: - """An example is "trivial" if it only contains forced bytes and zero bytes. - All examples start out as trivial, and then get marked non-trivial when - we see a byte that is neither forced nor zero.""" - return self.index in self.owner.trivial - - @property def discarded(self) -> bool: - """True if this is example's ``stop_example`` call had ``discard`` set to + """True if this is span's ``stop_span`` call had ``discard`` set to ``True``. This means we believe that the shrinker should be able to delete - this example completely, without affecting the value produced by its enclosing + this span completely, without affecting the value produced by its enclosing strategy. Typically set when a rejection sampler decides to reject a generated value and try again.""" return self.index in self.owner.discarded @property - def length(self) -> int: - """The number of bytes in this example.""" + def choice_count(self) -> int: + """The number of choices in this span.""" return self.end - self.start @property - def ir_length(self) -> int: - """The number of ir nodes in this example.""" - return self.ir_end - self.ir_start - - @property - def children(self) -> "list[Example]": - """The list of all examples with this as a parent, in increasing index + def children(self) -> "list[Span]": + """The list of all spans with this as a parent, in increasing index order.""" return [self.owner[i] for i in self.owner.children[self.index]] -class ExampleProperty: - """There are many properties of examples that we calculate by +class SpanProperty: + """There are many properties of spans that we calculate by essentially rerunning the test case multiple times based on the - calls which we record in ExampleRecord. + calls which we record in SpanProperty. This class defines a visitor, subclasses of which can be used to calculate these properties. """ - def __init__(self, examples: "Examples"): - self.example_stack: "list[int]" = [] - self.examples = examples - self.bytes_read = 0 - self.example_count = 0 - self.block_count = 0 - self.ir_node_count = 0 - self.result: Any = None + def __init__(self, spans: "Spans"): + self.span_stack: list[int] = [] + self.spans = spans + self.span_count = 0 + self.choice_count = 0 def run(self) -> Any: """Rerun the test case with this visitor and return the results of ``self.finish()``.""" - self.begin() - blocks = self.examples.blocks - for record in self.examples.trail: - if record == DRAW_BITS_RECORD: - self.bytes_read = blocks.endpoints[self.block_count] - self.block(self.block_count) - self.block_count += 1 - elif record == IR_NODE_RECORD: - self.ir_node_count += 1 - elif record >= START_EXAMPLE_RECORD: - self.__push(record - START_EXAMPLE_RECORD) + for record in self.spans.trail: + if record == TrailType.CHOICE: + self.choice_count += 1 + elif record >= TrailType.START_SPAN: + self.__push(record - TrailType.START_SPAN) else: assert record in ( - STOP_EXAMPLE_DISCARD_RECORD, - STOP_EXAMPLE_NO_DISCARD_RECORD, + TrailType.STOP_SPAN_DISCARD, + TrailType.STOP_SPAN_NO_DISCARD, ) - self.__pop(discarded=record == STOP_EXAMPLE_DISCARD_RECORD) + self.__pop(discarded=record == TrailType.STOP_SPAN_DISCARD) return self.finish() def __push(self, label_index: int) -> None: - i = self.example_count - assert i < len(self.examples) - self.start_example(i, label_index=label_index) - self.example_count += 1 - self.example_stack.append(i) + i = self.span_count + assert i < len(self.spans) + self.start_span(i, label_index=label_index) + self.span_count += 1 + self.span_stack.append(i) def __pop(self, *, discarded: bool) -> None: - i = self.example_stack.pop() - self.stop_example(i, discarded=discarded) - - def begin(self) -> None: - """Called at the beginning of the run to initialise any - relevant state.""" - self.result = IntList.of_length(len(self.examples)) - - def start_example(self, i: int, label_index: int) -> None: - """Called at the start of each example, with ``i`` the - index of the example and ``label_index`` the index of - its label in ``self.examples.labels``.""" + i = self.span_stack.pop() + self.stop_span(i, discarded=discarded) - def block(self, i: int) -> None: - """Called with each ``draw_bits`` call, with ``i`` the index of the - corresponding block in ``self.examples.blocks``""" + def start_span(self, i: int, label_index: int) -> None: + """Called at the start of each span, with ``i`` the + index of the span and ``label_index`` the index of + its label in ``self.spans.labels``.""" - def stop_example(self, i: int, *, discarded: bool) -> None: - """Called at the end of each example, with ``i`` the - index of the example and ``discarded`` being ``True`` if ``stop_example`` + def stop_span(self, i: int, *, discarded: bool) -> None: + """Called at the end of each span, with ``i`` the + index of the span and ``discarded`` being ``True`` if ``stop_span`` was called with ``discard=True``.""" def finish(self) -> Any: - return self.result - - -def calculated_example_property(cls: type[ExampleProperty]) -> Any: - """Given an ``ExampleProperty`` as above we use this decorator - to transform it into a lazy property on the ``Examples`` class, - which has as its value the result of calling ``cls.run()``, - computed the first time the property is accessed. - - This has the slightly weird result that we are defining nested - classes which get turned into properties.""" - name = cls.__name__ - cache_name = "__" + name - - def lazy_calculate(self: "Examples") -> Any: - result = getattr(self, cache_name, None) - if result is None: - result = cls(self).run() - setattr(self, cache_name, result) - return result - - lazy_calculate.__name__ = cls.__name__ - lazy_calculate.__qualname__ = cls.__qualname__ - return property(lazy_calculate) - + raise NotImplementedError -DRAW_BITS_RECORD = 0 -STOP_EXAMPLE_DISCARD_RECORD = 1 -STOP_EXAMPLE_NO_DISCARD_RECORD = 2 -START_EXAMPLE_RECORD = 3 -IR_NODE_RECORD = calc_label_from_name("ir draw record") +class TrailType(IntEnum): + STOP_SPAN_DISCARD = 1 + STOP_SPAN_NO_DISCARD = 2 + START_SPAN = 3 + CHOICE = calc_label_from_name("ir draw record") -class ExampleRecord: - """Records the series of ``start_example``, ``stop_example``, and - ``draw_bits`` calls so that these may be stored in ``Examples`` and +class SpanRecord: + """Records the series of ``start_span``, ``stop_span``, and + ``draw_bits`` calls so that these may be stored in ``Spans`` and replayed when we need to know about the structure of individual - ``Example`` objects. + ``Span`` objects. Note that there is significant similarity between this class and ``DataObserver``, and the plan is to eventually unify them, but @@ -446,182 +317,166 @@ class ExampleRecord: def __init__(self) -> None: self.labels: list[int] = [] - self.__index_of_labels: "dict[int, int] | None" = {} + self.__index_of_labels: Optional[dict[int, int]] = {} self.trail = IntList() - self.ir_nodes: list[IRNode] = [] + self.nodes: list[ChoiceNode] = [] def freeze(self) -> None: self.__index_of_labels = None - def record_ir_draw(self) -> None: - self.trail.append(IR_NODE_RECORD) + def record_choice(self) -> None: + self.trail.append(TrailType.CHOICE) - def start_example(self, label: int) -> None: + def start_span(self, label: int) -> None: assert self.__index_of_labels is not None try: i = self.__index_of_labels[label] except KeyError: i = self.__index_of_labels.setdefault(label, len(self.labels)) self.labels.append(label) - self.trail.append(START_EXAMPLE_RECORD + i) + self.trail.append(TrailType.START_SPAN + i) - def stop_example(self, *, discard: bool) -> None: + def stop_span(self, *, discard: bool) -> None: if discard: - self.trail.append(STOP_EXAMPLE_DISCARD_RECORD) + self.trail.append(TrailType.STOP_SPAN_DISCARD) else: - self.trail.append(STOP_EXAMPLE_NO_DISCARD_RECORD) + self.trail.append(TrailType.STOP_SPAN_NO_DISCARD) - def draw_bits(self) -> None: - self.trail.append(DRAW_BITS_RECORD) +class _starts_and_ends(SpanProperty): + def __init__(self, spans: "Spans") -> None: + super().__init__(spans) + self.starts = IntList.of_length(len(self.spans)) + self.ends = IntList.of_length(len(self.spans)) -class Examples: - """A lazy collection of ``Example`` objects, derived from - the record of recorded behaviour in ``ExampleRecord``. - - Behaves logically as if it were a list of ``Example`` objects, - but actually mostly exists as a compact store of information - for them to reference into. All properties on here are best - understood as the backing storage for ``Example`` and are - described there. - """ - - def __init__(self, record: ExampleRecord, blocks: "Blocks") -> None: - self.trail = record.trail - self.labels = record.labels - self.__length = self.trail.count( - STOP_EXAMPLE_DISCARD_RECORD - ) + record.trail.count(STOP_EXAMPLE_NO_DISCARD_RECORD) - self.blocks = blocks - self.__children: "list[Sequence[int]] | None" = None + def start_span(self, i: int, label_index: int) -> None: + self.starts[i] = self.choice_count - class _starts_and_ends(ExampleProperty): - def begin(self) -> None: - self.starts = IntList.of_length(len(self.examples)) - self.ends = IntList.of_length(len(self.examples)) + def stop_span(self, i: int, *, discarded: bool) -> None: + self.ends[i] = self.choice_count - def start_example(self, i: int, label_index: int) -> None: - self.starts[i] = self.bytes_read + def finish(self) -> tuple[IntList, IntList]: + return (self.starts, self.ends) - def stop_example(self, i: int, *, discarded: bool) -> None: - self.ends[i] = self.bytes_read - def finish(self) -> tuple[IntList, IntList]: - return (self.starts, self.ends) +class _discarded(SpanProperty): + def __init__(self, spans: "Spans") -> None: + super().__init__(spans) + self.result: set[int] = set() - starts_and_ends: "tuple[IntList, IntList]" = calculated_example_property( - _starts_and_ends - ) + def finish(self) -> frozenset[int]: + return frozenset(self.result) - @property - def starts(self) -> IntList: - return self.starts_and_ends[0] + def stop_span(self, i: int, *, discarded: bool) -> None: + if discarded: + self.result.add(i) - @property - def ends(self) -> IntList: - return self.starts_and_ends[1] - class _ir_starts_and_ends(ExampleProperty): - def begin(self) -> None: - self.starts = IntList.of_length(len(self.examples)) - self.ends = IntList.of_length(len(self.examples)) +class _parentage(SpanProperty): + def __init__(self, spans: "Spans") -> None: + super().__init__(spans) + self.result = IntList.of_length(len(self.spans)) - def start_example(self, i: int, label_index: int) -> None: - self.starts[i] = self.ir_node_count + def stop_span(self, i: int, *, discarded: bool) -> None: + if i > 0: + self.result[i] = self.span_stack[-1] - def stop_example(self, i: int, *, discarded: bool) -> None: - self.ends[i] = self.ir_node_count + def finish(self) -> IntList: + return self.result - def finish(self) -> tuple[IntList, IntList]: - return (self.starts, self.ends) - ir_starts_and_ends: "tuple[IntList, IntList]" = calculated_example_property( - _ir_starts_and_ends - ) +class _depths(SpanProperty): + def __init__(self, spans: "Spans") -> None: + super().__init__(spans) + self.result = IntList.of_length(len(self.spans)) - @property - def ir_starts(self) -> IntList: - return self.ir_starts_and_ends[0] + def start_span(self, i: int, label_index: int) -> None: + self.result[i] = len(self.span_stack) - @property - def ir_ends(self) -> IntList: - return self.ir_starts_and_ends[1] + def finish(self) -> IntList: + return self.result - class _discarded(ExampleProperty): - def begin(self) -> None: - self.result: set[int] = set() - def finish(self) -> frozenset[int]: - return frozenset(self.result) +class _label_indices(SpanProperty): + def __init__(self, spans: "Spans") -> None: + super().__init__(spans) + self.result = IntList.of_length(len(self.spans)) - def stop_example(self, i: int, *, discarded: bool) -> None: - if discarded: - self.result.add(i) + def start_span(self, i: int, label_index: int) -> None: + self.result[i] = label_index - discarded: frozenset[int] = calculated_example_property(_discarded) + def finish(self) -> IntList: + return self.result - class _trivial(ExampleProperty): - def begin(self) -> None: - self.nontrivial = IntList.of_length(len(self.examples)) - self.result: set[int] = set() - def block(self, i: int) -> None: - if not self.examples.blocks.trivial(i): - self.nontrivial[self.example_stack[-1]] = 1 +class _mutator_groups(SpanProperty): + def __init__(self, spans: "Spans") -> None: + super().__init__(spans) + self.groups: dict[int, set[tuple[int, int]]] = defaultdict(set) - def stop_example(self, i: int, *, discarded: bool) -> None: - if self.nontrivial[i]: - if self.example_stack: - self.nontrivial[self.example_stack[-1]] = 1 - else: - self.result.add(i) + def start_span(self, i: int, label_index: int) -> None: + # TODO should we discard start == end cases? occurs for eg st.data() + # which is conditionally or never drawn from. arguably swapping + # nodes with the empty list is a useful mutation enabled by start == end? + key = (self.spans[i].start, self.spans[i].end) + self.groups[label_index].add(key) - def finish(self) -> frozenset[int]: - return frozenset(self.result) + def finish(self) -> Iterable[set[tuple[int, int]]]: + # Discard groups with only one span, since the mutator can't + # do anything useful with them. + return [g for g in self.groups.values() if len(g) >= 2] - trivial: frozenset[int] = calculated_example_property(_trivial) - class _parentage(ExampleProperty): - def stop_example(self, i: int, *, discarded: bool) -> None: - if i > 0: - self.result[i] = self.example_stack[-1] +class Spans: + """A lazy collection of ``Span`` objects, derived from + the record of recorded behaviour in ``SpanRecord``. - parentage: IntList = calculated_example_property(_parentage) + Behaves logically as if it were a list of ``Span`` objects, + but actually mostly exists as a compact store of information + for them to reference into. All properties on here are best + understood as the backing storage for ``Span`` and are + described there. + """ - class _depths(ExampleProperty): - def begin(self) -> None: - self.result = IntList.of_length(len(self.examples)) + def __init__(self, record: SpanRecord) -> None: + self.trail = record.trail + self.labels = record.labels + self.__length = self.trail.count( + TrailType.STOP_SPAN_DISCARD + ) + record.trail.count(TrailType.STOP_SPAN_NO_DISCARD) + self.__children: Optional[list[Sequence[int]]] = None - def start_example(self, i: int, label_index: int) -> None: - self.result[i] = len(self.example_stack) + @cached_property + def starts_and_ends(self) -> tuple[IntList, IntList]: + return _starts_and_ends(self).run() - depths: IntList = calculated_example_property(_depths) + @property + def starts(self) -> IntList: + return self.starts_and_ends[0] - class _label_indices(ExampleProperty): - def start_example(self, i: int, label_index: int) -> None: - self.result[i] = label_index + @property + def ends(self) -> IntList: + return self.starts_and_ends[1] - label_indices: IntList = calculated_example_property(_label_indices) + @cached_property + def discarded(self) -> frozenset[int]: + return _discarded(self).run() - class _mutator_groups(ExampleProperty): - def begin(self) -> None: - self.groups: "dict[int, set[tuple[int, int]]]" = defaultdict(set) + @cached_property + def parentage(self) -> IntList: + return _parentage(self).run() - def start_example(self, i: int, label_index: int) -> None: - # TODO should we discard start == end cases? occurs for eg st.data() - # which is conditionally or never drawn from. arguably swapping - # nodes with the empty list is a useful mutation enabled by start == end? - key = (self.examples[i].ir_start, self.examples[i].ir_end) - self.groups[label_index].add(key) + @cached_property + def depths(self) -> IntList: + return _depths(self).run() - def finish(self) -> Iterable[set[tuple[int, int]]]: - # Discard groups with only one example, since the mutator can't - # do anything useful with them. - return [g for g in self.groups.values() if len(g) >= 2] + @cached_property + def label_indices(self) -> IntList: + return _label_indices(self).run() - mutator_groups: list[set[tuple[int, int]]] = calculated_example_property( - _mutator_groups - ) + @cached_property + def mutator_groups(self) -> list[set[tuple[int, int]]]: + return _mutator_groups(self).run() @property def children(self) -> list[Sequence[int]]: @@ -641,233 +496,23 @@ class Examples: def __len__(self) -> int: return self.__length - def __getitem__(self, i: int) -> Example: - assert isinstance(i, int) - n = len(self) + def __getitem__(self, i: int) -> Span: + n = self.__length if i < -n or i >= n: raise IndexError(f"Index {i} out of range [-{n}, {n})") if i < 0: i += n - return Example(self, i) + return Span(self, i) # not strictly necessary as we have len/getitem, but required for mypy. # https://github.com/python/mypy/issues/9737 - def __iter__(self) -> Iterator[Example]: + def __iter__(self) -> Iterator[Span]: for i in range(len(self)): yield self[i] -@dataclass_transform() [email protected](slots=True, frozen=True) -class Block: - """Blocks track the flat list of lowest-level draws from the byte stream, - within a single test run. - - Block-tracking allows the shrinker to try "low-level" - transformations, such as minimizing the numeric value of an - individual call to ``draw_bits``. - """ - - start: int = attr.ib() - end: int = attr.ib() - - # Index of this block inside the overall list of blocks. - index: int = attr.ib() - - # True if this block's byte values were forced by a write operation. - # As long as the bytes before this block remain the same, modifying this - # block's bytes will have no effect. - forced: bool = attr.ib(repr=False) - - # True if this block's byte values are all 0. Reading this flag can be - # more convenient than explicitly checking a slice for non-zero bytes. - all_zero: bool = attr.ib(repr=False) - - @property - def bounds(self) -> tuple[int, int]: - return (self.start, self.end) - - @property - def length(self) -> int: - return self.end - self.start - - @property - def trivial(self) -> bool: - return self.forced or self.all_zero - - -class Blocks: - """A lazily calculated list of blocks for a particular ``ConjectureResult`` - or ``ConjectureData`` object. - - Pretends to be a list containing ``Block`` objects but actually only - contains their endpoints right up until the point where you want to - access the actual block, at which point it is constructed. - - This is designed to be as space efficient as possible, so will at - various points silently transform its representation into one - that is better suited for the current access pattern. - - In addition, it has a number of convenience methods for accessing - properties of the block object at index ``i`` that should generally - be preferred to using the Block objects directly, as it will not - have to allocate the actual object.""" - - __slots__ = ("endpoints", "owner", "__blocks", "__count", "__sparse") - owner: "Union[ConjectureData, ConjectureResult, None]" - __blocks: Union[dict[int, Block], list[Optional[Block]]] - - def __init__(self, owner: "ConjectureData") -> None: - self.owner = owner - self.endpoints = IntList() - self.__blocks = {} - self.__count = 0 - self.__sparse = True - - def add_endpoint(self, n: int) -> None: - """Add n to the list of endpoints.""" - assert isinstance(self.owner, ConjectureData) - self.endpoints.append(n) - - def transfer_ownership(self, new_owner: "ConjectureResult") -> None: - """Used to move ``Blocks`` over to a ``ConjectureResult`` object - when that is read to be used and we no longer want to keep the - whole ``ConjectureData`` around.""" - assert isinstance(new_owner, ConjectureResult) - self.owner = new_owner - self.__check_completion() - - def start(self, i: int) -> int: - """Equivalent to self[i].start.""" - i = self._check_index(i) - - if i == 0: - return 0 - else: - return self.end(i - 1) - - def end(self, i: int) -> int: - """Equivalent to self[i].end.""" - return self.endpoints[i] - - def all_bounds(self) -> Iterable[tuple[int, int]]: - """Equivalent to [(b.start, b.end) for b in self].""" - prev = 0 - for e in self.endpoints: - yield (prev, e) - prev = e - - @property - def last_block_length(self) -> int: - return self.end(-1) - self.start(-1) - - def __len__(self) -> int: - return len(self.endpoints) - - def __known_block(self, i: int) -> Optional[Block]: - try: - return self.__blocks[i] - except (KeyError, IndexError): - return None - - def trivial(self, i: int) -> Any: - """Equivalent to self.blocks[i].trivial.""" - if self.owner is not None: - return self.start(i) in self.owner.forced_indices or not any( - self.owner.buffer[self.start(i) : self.end(i)] - ) - else: - return self[i].trivial - - def _check_index(self, i: int) -> int: - n = len(self) - if i < -n or i >= n: - raise IndexError(f"Index {i} out of range [-{n}, {n})") - if i < 0: - i += n - return i - - def __getitem__(self, i: int) -> Block: - i = self._check_index(i) - assert i >= 0 - result = self.__known_block(i) - if result is not None: - return result - - # We store the blocks as a sparse dict mapping indices to the - # actual result, but this isn't the best representation once we - # stop being sparse and want to use most of the blocks. Switch - # over to a list at that point. - if self.__sparse and len(self.__blocks) * 2 >= len(self): - new_blocks: "list[Block | None]" = [None] * len(self) - assert isinstance(self.__blocks, dict) - for k, v in self.__blocks.items(): - new_blocks[k] = v - self.__sparse = False - self.__blocks = new_blocks - assert self.__blocks[i] is None - - start = self.start(i) - end = self.end(i) - - # We keep track of the number of blocks that have actually been - # instantiated so that when every block that could be instantiated - # has been we know that the list is complete and can throw away - # some data that we no longer need. - self.__count += 1 - - # Integrity check: We can't have allocated more blocks than we have - # positions for blocks. - assert self.__count <= len(self) - assert self.owner is not None - result = Block( - start=start, - end=end, - index=i, - forced=start in self.owner.forced_indices, - all_zero=not any(self.owner.buffer[start:end]), - ) - try: - self.__blocks[i] = result - except IndexError: - assert isinstance(self.__blocks, list) - assert len(self.__blocks) < len(self) - self.__blocks.extend([None] * (len(self) - len(self.__blocks))) - self.__blocks[i] = result - - self.__check_completion() - - return result - - def __check_completion(self) -> None: - """The list of blocks is complete if we have created every ``Block`` - object that we currently good and know that no more will be created. - - If this happens then we don't need to keep the reference to the - owner around, and delete it so that there is no circular reference. - The main benefit of this is that the gc doesn't need to run to collect - this because normal reference counting is enough. - """ - if self.__count == len(self) and isinstance(self.owner, ConjectureResult): - self.owner = None - - def __iter__(self) -> Iterator[Block]: - for i in range(len(self)): - yield self[i] - - def __repr__(self) -> str: - parts: "list[str]" = [] - for i in range(len(self)): - b = self.__known_block(i) - if b is None: - parts.append("...") - else: - parts.append(repr(b)) - return "Block([{}])".format(", ".join(parts)) - - class _Overrun: - status = Status.OVERRUN + status: Status = Status.OVERRUN def __repr__(self) -> str: return "Overrun" @@ -901,248 +546,31 @@ class DataObserver: """Mark this part of the tree as not worth re-exploring.""" def draw_integer( - self, value: int, *, kwargs: IntegerKWargs, was_forced: bool + self, value: int, *, constraints: IntegerConstraints, was_forced: bool ) -> None: pass def draw_float( - self, value: float, *, kwargs: FloatKWargs, was_forced: bool + self, value: float, *, constraints: FloatConstraints, was_forced: bool ) -> None: pass def draw_string( - self, value: str, *, kwargs: StringKWargs, was_forced: bool + self, value: str, *, constraints: StringConstraints, was_forced: bool ) -> None: pass def draw_bytes( - self, value: bytes, *, kwargs: BytesKWargs, was_forced: bool + self, value: bytes, *, constraints: BytesConstraints, was_forced: bool ) -> None: pass def draw_boolean( - self, value: bool, *, kwargs: BooleanKWargs, was_forced: bool + self, value: bool, *, constraints: BooleanConstraints, was_forced: bool ) -> None: pass [email protected](slots=True, repr=False, eq=False) -class IRNode: - ir_type: IRTypeName = attr.ib() - value: IRType = attr.ib() - kwargs: IRKWargsType = attr.ib() - was_forced: bool = attr.ib() - index: Optional[int] = attr.ib(default=None) - - def copy( - self, - *, - with_value: Optional[IRType] = None, - with_kwargs: Optional[IRKWargsType] = None, - ) -> "IRNode": - # we may want to allow this combination in the future, but for now it's - # a footgun. - if self.was_forced: - assert with_value is None, "modifying a forced node doesn't make sense" - # explicitly not copying index. node indices are only assigned via - # ExampleRecord. This prevents footguns with relying on stale indices - # after copying. - return IRNode( - ir_type=self.ir_type, - value=self.value if with_value is None else with_value, - kwargs=self.kwargs if with_kwargs is None else with_kwargs, - was_forced=self.was_forced, - ) - - @property - def trivial(self): - """ - A node is trivial if it cannot be simplified any further. This does not - mean that modifying a trivial node can't produce simpler test cases when - viewing the tree as a whole. Just that when viewing this node in - isolation, this is the simplest the node can get. - """ - if self.was_forced: - return True - - if self.ir_type == "integer": - shrink_towards = self.kwargs["shrink_towards"] - min_value = self.kwargs["min_value"] - max_value = self.kwargs["max_value"] - - # shrink_towards is not respected for unbounded integers. (though - # probably it should be?) - if min_value is None and max_value is None: - return self.value == 0 - - if min_value is not None: - shrink_towards = max(min_value, shrink_towards) - if max_value is not None: - shrink_towards = min(max_value, shrink_towards) - - return self.value == shrink_towards - if self.ir_type == "float": - min_value = self.kwargs["min_value"] - max_value = self.kwargs["max_value"] - shrink_towards = 0 - - if min_value == -math.inf and max_value == math.inf: - return ir_value_equal("float", self.value, shrink_towards) - - if ( - not math.isinf(min_value) - and not math.isinf(max_value) - and math.ceil(min_value) <= math.floor(max_value) - ): - # the interval contains an integer. the simplest integer is the - # one closest to shrink_towards - shrink_towards = max(math.ceil(min_value), shrink_towards) - shrink_towards = min(math.floor(max_value), shrink_towards) - return ir_value_equal("float", self.value, shrink_towards) - - # the real answer here is "the value in [min_value, max_value] with - # the lowest denominator when represented as a fraction". - # It would be good to compute this correctly in the future, but it's - # also not incorrect to be conservative here. - return False - if self.ir_type == "boolean": - p = self.kwargs["p"] - if p == 1.0: - return True - return self.value is False - if self.ir_type == "string": - # smallest size and contains only the smallest-in-shrink-order character. - minimal_char = self.kwargs["intervals"].char_in_shrink_order(0) - return self.value == (minimal_char * self.kwargs["min_size"]) - if self.ir_type == "bytes": - # smallest size and all-zero value. - return len(self.value) == self.kwargs["min_size"] and not any(self.value) - - raise NotImplementedError(f"unhandled ir_type {self.ir_type}") - - def __eq__(self, other): - if not isinstance(other, IRNode): - return NotImplemented - - return ( - self.ir_type == other.ir_type - and ir_value_equal(self.ir_type, self.value, other.value) - and ir_kwargs_equal(self.ir_type, self.kwargs, other.kwargs) - and self.was_forced == other.was_forced - ) - - def __hash__(self) -> int: - return hash( - ( - self.ir_type, - ir_value_key(self.ir_type, self.value), - ir_kwargs_key(self.ir_type, self.kwargs), - self.was_forced, - ) - ) - - def __repr__(self) -> str: - # repr to avoid "BytesWarning: str() on a bytes instance" for bytes nodes - forced_marker = " [forced]" if self.was_forced else "" - return f"{self.ir_type} {self.value!r}{forced_marker} {self.kwargs!r}" - - [email protected](slots=True) -class NodeTemplate: - type: Literal["simplest"] = attr.ib() - size: int = attr.ib() - - def __attrs_post_init__(self) -> None: - assert self.size > 0 - - -def ir_value_permitted(value, ir_type, kwargs): - if ir_type == "integer": - min_value = kwargs["min_value"] - max_value = kwargs["max_value"] - shrink_towards = kwargs["shrink_towards"] - if min_value is not None and value < min_value: - return False - if max_value is not None and value > max_value: - return False - - if max_value is None or min_value is None: - return (value - shrink_towards).bit_length() < 128 - - return True - elif ir_type == "float": - if math.isnan(value): - return kwargs["allow_nan"] - return ( - sign_aware_lte(kwargs["min_value"], value) - and sign_aware_lte(value, kwargs["max_value"]) - ) and not (0 < abs(value) < kwargs["smallest_nonzero_magnitude"]) - elif ir_type == "string": - if len(value) < kwargs["min_size"]: - return False - if kwargs["max_size"] is not None and len(value) > kwargs["max_size"]: - return False - return all(ord(c) in kwargs["intervals"] for c in value) - elif ir_type == "bytes": - if len(value) < kwargs["min_size"]: - return False - return kwargs["max_size"] is None or len(value) <= kwargs["max_size"] - elif ir_type == "boolean": - if kwargs["p"] <= 2 ** (-64): - return value is False - if kwargs["p"] >= (1 - 2 ** (-64)): - return value is True - return True - - raise NotImplementedError(f"unhandled type {type(value)} of ir value {value}") - - -def ir_size(ir: Iterable[IRType]) -> int: - from hypothesis.database import ir_to_bytes - - return len(ir_to_bytes(ir)) - - -def ir_size_nodes(nodes: Iterable[Union[IRNode, NodeTemplate]]) -> int: - size = 0 - for node in nodes: - size += node.size if isinstance(node, NodeTemplate) else ir_size([node.value]) - return size - - -def ir_value_key(ir_type, v): - if ir_type == "float": - return float_to_int(v) - return v - - -def ir_kwargs_key(ir_type, kwargs): - if ir_type == "float": - return ( - float_to_int(kwargs["min_value"]), - float_to_int(kwargs["max_value"]), - kwargs["allow_nan"], - kwargs["smallest_nonzero_magnitude"], - ) - if ir_type == "integer": - return ( - kwargs["min_value"], - kwargs["max_value"], - None if kwargs["weights"] is None else tuple(kwargs["weights"]), - kwargs["shrink_towards"], - ) - return tuple(kwargs[key] for key in sorted(kwargs)) - - -def ir_value_equal(ir_type, v1, v2): - return ir_value_key(ir_type, v1) == ir_value_key(ir_type, v2) - - -def ir_kwargs_equal(ir_type, kwargs1, kwargs2): - return ir_kwargs_key(ir_type, kwargs1) == ir_kwargs_key(ir_type, kwargs2) - - -@dataclass_transform() @attr.s(slots=True) class ConjectureResult: """Result class storing the parts of ConjectureData that we @@ -1151,901 +579,81 @@ class ConjectureResult: status: Status = attr.ib() interesting_origin: Optional[InterestingOrigin] = attr.ib() - buffer: bytes = attr.ib() - # some ConjectureDatas pass through the ir and some pass through buffers. - # the ir does not drive its result through the buffer, which means blocks/examples - # may differ (I think for forced values?) even when the buffer is the same. - # I don't *think* anything was relying on anything but .buffer for result equality, - # though that assumption may be leaning on flakiness detection invariants. - # - # If we consider blocks or examples in equality checks, multiple semantically equal - # results get stored in e.g. the pareto front. - blocks: Blocks = attr.ib(eq=False) - ir_nodes: tuple[IRNode, ...] = attr.ib(eq=False, repr=False) + nodes: tuple[ChoiceNode, ...] = attr.ib(eq=False, repr=False) + length: int = attr.ib() output: str = attr.ib() extra_information: Optional[ExtraInformation] = attr.ib() + expected_exception: Optional[BaseException] = attr.ib() + expected_traceback: Optional[str] = attr.ib() has_discards: bool = attr.ib() target_observations: TargetObservations = attr.ib() tags: frozenset[StructuralCoverageTag] = attr.ib() - forced_indices: frozenset[int] = attr.ib(repr=False) - examples: Examples = attr.ib(repr=False, eq=False) + spans: Spans = attr.ib(repr=False, eq=False) arg_slices: set[tuple[int, int]] = attr.ib(repr=False) slice_comments: dict[tuple[int, int], str] = attr.ib(repr=False) misaligned_at: Optional[MisalignedAt] = attr.ib(repr=False) - - index: int = attr.ib(init=False) - - def __attrs_post_init__(self) -> None: - self.index = len(self.buffer) - self.forced_indices = frozenset(self.forced_indices) + cannot_proceed_scope: Optional[CannotProceedScopeT] = attr.ib(repr=False) def as_result(self) -> "ConjectureResult": return self @property - def choices(self) -> tuple[IRType, ...]: - return tuple(node.value for node in self.ir_nodes) - - -# Masks for masking off the first byte of an n-bit buffer. -# The appropriate mask is stored at position n % 8. -BYTE_MASKS = [(1 << n) - 1 for n in range(8)] -BYTE_MASKS[0] = 255 - -_Lifetime: TypeAlias = Literal["test_case", "test_function"] - - -class _BackendInfoMsg(TypedDict): - type: str - title: str - content: Union[str, dict[str, Any]] - - -class PrimitiveProvider(abc.ABC): - # This is the low-level interface which would also be implemented - # by e.g. CrossHair, by an Atheris-hypothesis integration, etc. - # We'd then build the structured tree handling, database and replay - # support, etc. on top of this - so all backends get those for free. - # - # See https://github.com/HypothesisWorks/hypothesis/issues/3086 - - # How long a provider instance is used for. One of test_function or - # test_case. Defaults to test_function. - # - # If test_function, a single provider instance will be instantiated and used - # for the entirety of each test function. I.e., roughly one provider per - # @given annotation. This can be useful if you need to track state over many - # executions to a test function. - # - # This lifetime will cause None to be passed for the ConjectureData object - # in PrimitiveProvider.__init__, because that object is instantiated per - # test case. - # - # If test_case, a new provider instance will be instantiated and used each - # time hypothesis tries to generate a new input to the test function. This - # lifetime can access the passed ConjectureData object. - # - # Non-hypothesis providers probably want to set a lifetime of test_function. - lifetime: _Lifetime = "test_function" - - # Solver-based backends such as hypothesis-crosshair use symbolic values - # which record operations performed on them in order to discover new paths. - # If avoid_realization is set to True, hypothesis will avoid interacting with - # ir values (symbolics) returned by the provider in any way that would force the - # solver to narrow the range of possible values for that symbolic. - # - # Setting this to True disables some hypothesis features, such as - # DataTree-based deduplication, and some internal optimizations, such as - # caching kwargs. Only enable this if it is necessary for your backend. - avoid_realization = False - - def __init__(self, conjecturedata: Optional["ConjectureData"], /) -> None: - self._cd = conjecturedata - - def per_test_case_context_manager(self): - return contextlib.nullcontext() - - def realize(self, value: T) -> T: - """ - Called whenever hypothesis requires a concrete (non-symbolic) value from - a potentially symbolic value. Hypothesis will not check that `value` is - symbolic before calling `realize`, so you should handle the case where - `value` is non-symbolic. - - The returned value should be non-symbolic. If you cannot provide a value, - raise hypothesis.errors.BackendCannotProceed("discard_test_case") - """ - return value - - def observe_test_case(self) -> dict[str, Any]: - """Called at the end of the test case when observability mode is active. - - The return value should be a non-symbolic json-encodable dictionary, - and will be included as `observation["metadata"]["backend"]`. - """ - return {} - - def observe_information_messages( - self, *, lifetime: _Lifetime - ) -> Iterable[_BackendInfoMsg]: - """Called at the end of each test case and again at end of the test function. - - Return an iterable of `{type: info/alert/error, title: str, content: str|dict}` - dictionaries to be delivered as individual information messages. - (Hypothesis adds the `run_start` timestamp and `property` name for you.) - """ - assert lifetime in ("test_case", "test_function") - yield from [] - - @abc.abstractmethod - def draw_boolean( - self, - p: float = 0.5, - *, - forced: Optional[bool] = None, - fake_forced: bool = False, - ) -> bool: - raise NotImplementedError - - @abc.abstractmethod - def draw_integer( - self, - min_value: Optional[int] = None, - max_value: Optional[int] = None, - *, - # weights are for choosing an element index from a bounded range - weights: Optional[dict[int, float]] = None, - shrink_towards: int = 0, - forced: Optional[int] = None, - fake_forced: bool = False, - ) -> int: - raise NotImplementedError - - @abc.abstractmethod - def draw_float( - self, - *, - min_value: float = -math.inf, - max_value: float = math.inf, - allow_nan: bool = True, - smallest_nonzero_magnitude: float, - # TODO: consider supporting these float widths at the IR level in the - # future. - # width: Literal[16, 32, 64] = 64, - # exclude_min and exclude_max handled higher up, - forced: Optional[float] = None, - fake_forced: bool = False, - ) -> float: - raise NotImplementedError - - @abc.abstractmethod - def draw_string( - self, - intervals: IntervalSet, - *, - min_size: int = 0, - max_size: int = COLLECTION_DEFAULT_MAX_SIZE, - forced: Optional[str] = None, - fake_forced: bool = False, - ) -> str: - raise NotImplementedError - - @abc.abstractmethod - def draw_bytes( - self, - min_size: int = 0, - max_size: int = COLLECTION_DEFAULT_MAX_SIZE, - *, - forced: Optional[bytes] = None, - fake_forced: bool = False, - ) -> bytes: - raise NotImplementedError - - -class HypothesisProvider(PrimitiveProvider): - lifetime = "test_case" - - def __init__(self, conjecturedata: Optional["ConjectureData"], /): - super().__init__(conjecturedata) - - def draw_boolean( - self, - p: float = 0.5, - *, - forced: Optional[bool] = None, - fake_forced: bool = False, - ) -> bool: - """Return True with probability p (assuming a uniform generator), - shrinking towards False. If ``forced`` is set to a non-None value, this - will always return that value but will write choices appropriate to having - drawn that value randomly.""" - # Note that this could also be implemented in terms of draw_integer(). - - assert self._cd is not None - # NB this function is vastly more complicated than it may seem reasonable - # for it to be. This is because it is used in a lot of places and it's - # important for it to shrink well, so it's worth the engineering effort. - - if p <= 0 or p >= 1: - bits = 1 - else: - # When there is a meaningful draw, in order to shrink well we will - # set things up so that 0 and 1 always correspond to False and True - # respectively. This means we want enough bits available that in a - # draw we will always have at least one truthy value and one falsey - # value. - bits = math.ceil(-math.log(min(p, 1 - p), 2)) - # In order to avoid stupidly large draws where the probability is - # effectively zero or one, we treat probabilities of under 2^-64 to be - # effectively zero. - if bits > 64: - # There isn't enough precision near one for this to occur for values - # far from 0. - p = 0.0 - bits = 1 - - size = 2**bits - - while True: - # The logic here is a bit complicated and special cased to make it - # play better with the shrinker. - - # We imagine partitioning the real interval [0, 1] into 2**n equal parts - # and looking at each part and whether its interior is wholly <= p - # or wholly >= p. At most one part can be neither. - - # We then pick a random part. If it's wholly on one side or the other - # of p then we use that as the answer. If p is contained in the - # interval then we start again with a new probability that is given - # by the fraction of that interval that was <= our previous p. - - # We then take advantage of the fact that we have control of the - # labelling to make this shrink better, using the following tricks: - - # If p is <= 0 or >= 1 the result of this coin is certain. We make sure - # to write a byte to the data stream anyway so that these don't cause - # difficulties when shrinking. - if p <= 0: - self._cd.draw_bits(1, forced=0) - result = False - elif p >= 1: - self._cd.draw_bits(1, forced=1) - result = True - else: - falsey = floor(size * (1 - p)) - truthy = floor(size * p) - remainder = size * p - truthy - - if falsey + truthy == size: - partial = False - else: - partial = True - - i = self._cd.draw_bits( - bits, - forced=None if forced is None else int(forced), - fake_forced=fake_forced, - ) - - # We always choose the region that causes us to repeat the loop as - # the maximum value, so that shrinking the drawn bits never causes - # us to need to draw more self._cd. - if partial and i == size - 1: - p = remainder - continue - if falsey == 0: - # Every other partition is truthy, so the result is true - result = True - elif truthy == 0: - # Every other partition is falsey, so the result is false - result = False - elif i <= 1: - # We special case so that zero is always false and 1 is always - # true which makes shrinking easier because we can always - # replace a truthy block with 1. This has the slightly weird - # property that shrinking from 2 to 1 can cause the result to - # grow, but the shrinker always tries 0 and 1 first anyway, so - # this will usually be fine. - result = bool(i) - else: - # Originally everything in the region 0 <= i < falsey was false - # and everything above was true. We swapped one truthy element - # into this region, so the region becomes 0 <= i <= falsey - # except for i = 1. We know i > 1 here, so the test for truth - # becomes i > falsey. - result = i > falsey - - break - return result - - def draw_integer( - self, - min_value: Optional[int] = None, - max_value: Optional[int] = None, - *, - weights: Optional[dict[int, float]] = None, - shrink_towards: int = 0, - forced: Optional[int] = None, - fake_forced: bool = False, - ) -> int: - assert self._cd is not None - - if min_value is not None: - shrink_towards = max(min_value, shrink_towards) - if max_value is not None: - shrink_towards = min(max_value, shrink_towards) - - # This is easy to build on top of our existing conjecture utils, - # and it's easy to build sampled_from and weighted_coin on this. - if weights is not None: - assert min_value is not None - assert max_value is not None - - # format of weights is a mapping of ints to p, where sum(p) < 1. - # The remaining probability mass is uniformly distributed over - # *all* ints (not just the unmapped ones; this is somewhat undesirable, - # but simplifies things). - # - # We assert that sum(p) is strictly less than 1 because it simplifies - # handling forced values when we can force into the unmapped probability - # mass. We should eventually remove this restriction. - sampler = Sampler( - [1 - sum(weights.values()), *weights.values()], observe=False - ) - # if we're forcing, it's easiest to force into the unmapped probability - # mass and then force the drawn value after. - idx = sampler.sample( - self._cd, forced=None if forced is None else 0, fake_forced=fake_forced - ) - - return self._draw_bounded_integer( - min_value, - max_value, - # implicit reliance on dicts being sorted for determinism - forced=forced if idx == 0 else list(weights)[idx - 1], - center=shrink_towards, - fake_forced=fake_forced, - ) - - if min_value is None and max_value is None: - return self._draw_unbounded_integer(forced=forced, fake_forced=fake_forced) - - if min_value is None: - assert max_value is not None # make mypy happy - probe = max_value + 1 - while max_value < probe: - probe = shrink_towards + self._draw_unbounded_integer( - forced=None if forced is None else forced - shrink_towards, - fake_forced=fake_forced, - ) - return probe - - if max_value is None: - assert min_value is not None - probe = min_value - 1 - while probe < min_value: - probe = shrink_towards + self._draw_unbounded_integer( - forced=None if forced is None else forced - shrink_towards, - fake_forced=fake_forced, - ) - return probe - - return self._draw_bounded_integer( - min_value, - max_value, - center=shrink_towards, - forced=forced, - fake_forced=fake_forced, - ) - - def draw_float( - self, - *, - min_value: float = -math.inf, - max_value: float = math.inf, - allow_nan: bool = True, - smallest_nonzero_magnitude: float, - # TODO: consider supporting these float widths at the IR level in the - # future. - # width: Literal[16, 32, 64] = 64, - # exclude_min and exclude_max handled higher up, - forced: Optional[float] = None, - fake_forced: bool = False, - ) -> float: - ( - sampler, - forced_sign_bit, - neg_clamper, - pos_clamper, - nasty_floats, - ) = self._draw_float_init_logic( - min_value=min_value, - max_value=max_value, - allow_nan=allow_nan, - smallest_nonzero_magnitude=smallest_nonzero_magnitude, - ) - - assert self._cd is not None - - while True: - # If `forced in nasty_floats`, then `forced` was *probably* - # generated by drawing a nonzero index from the sampler. However, we - # have no obligation to generate it that way when forcing. In particular, - # i == 0 is able to produce all possible floats, and the forcing - # logic is simpler if we assume this choice. - forced_i = None if forced is None else 0 - i = ( - sampler.sample(self._cd, forced=forced_i, fake_forced=fake_forced) - if sampler - else 0 - ) - if i == 0: - result = self._draw_float( - forced_sign_bit=forced_sign_bit, - forced=forced, - fake_forced=fake_forced, - ) - if allow_nan and math.isnan(result): - clamped = result - elif math.copysign(1.0, result) == -1: - assert neg_clamper is not None - clamped = -neg_clamper(-result) - else: - assert pos_clamper is not None - clamped = pos_clamper(result) - if clamped != result and not (math.isnan(result) and allow_nan): - self._draw_float(forced=clamped, fake_forced=fake_forced) - result = clamped - else: - result = nasty_floats[i - 1] - # nan values generated via int_to_float break list membership: - # - # >>> n = 18444492273895866368 - # >>> assert math.isnan(int_to_float(n)) - # >>> assert int_to_float(n) not in [int_to_float(n)] - # - # because int_to_float nans are not equal in the sense of either - # `a == b` or `a is b`. - # - # This can lead to flaky errors when collections require unique - # floats. I think what is happening is that in some places we - # provide math.nan, and in others we provide - # int_to_float(float_to_int(math.nan)), and which one gets used - # is not deterministic across test iterations. - # - # As a (temporary?) fix, we'll *always* generate nan values which - # are not equal in the identity sense. - # - # see also https://github.com/HypothesisWorks/hypothesis/issues/3926. - if math.isnan(result): - result = int_to_float(float_to_int(result)) - - self._draw_float(forced=result, fake_forced=fake_forced) - - return result - - def draw_string( - self, - intervals: IntervalSet, - *, - min_size: int = 0, - max_size: int = COLLECTION_DEFAULT_MAX_SIZE, - forced: Optional[str] = None, - fake_forced: bool = False, - ) -> str: - assert self._cd is not None - - average_size = min( - max(min_size * 2, min_size + 5), - 0.5 * (min_size + max_size), - ) - - chars = [] - elements = many( - self._cd, - min_size=min_size, - max_size=max_size, - average_size=average_size, - forced=None if forced is None else len(forced), - fake_forced=fake_forced, - observe=False, - ) - while elements.more(): - forced_i: Optional[int] = None - if forced is not None: - c = forced[elements.count - 1] - forced_i = intervals.index_from_char_in_shrink_order(c) - - if len(intervals) > 256: - if self.draw_boolean( - 0.2, - forced=None if forced_i is None else forced_i > 255, - fake_forced=fake_forced, - ): - i = self._draw_bounded_integer( - 256, - len(intervals) - 1, - forced=forced_i, - fake_forced=fake_forced, - ) - else: - i = self._draw_bounded_integer( - 0, 255, forced=forced_i, fake_forced=fake_forced - ) - else: - i = self._draw_bounded_integer( - 0, len(intervals) - 1, forced=forced_i, fake_forced=fake_forced - ) - - chars.append(intervals.char_in_shrink_order(i)) - - return "".join(chars) - - def draw_bytes( - self, - min_size: int = 0, - max_size: int = COLLECTION_DEFAULT_MAX_SIZE, - *, - forced: Optional[bytes] = None, - fake_forced: bool = False, - ) -> bytes: - assert self._cd is not None - - buf = bytearray() - average_size = min( - max(min_size * 2, min_size + 5), - 0.5 * (min_size + max_size), - ) - elements = many( - self._cd, - min_size=min_size, - max_size=max_size, - average_size=average_size, - forced=None if forced is None else len(forced), - fake_forced=fake_forced, - observe=False, - ) - while elements.more(): - forced_i: Optional[int] = None - if forced is not None: - # implicit conversion from bytes to int by indexing here - forced_i = forced[elements.count - 1] - - buf += self._cd.draw_bits( - 8, forced=forced_i, fake_forced=fake_forced - ).to_bytes(1, "big") - - return bytes(buf) - - def _draw_float( - self, - forced_sign_bit: Optional[int] = None, - *, - forced: Optional[float] = None, - fake_forced: bool = False, - ) -> float: - """ - Helper for draw_float which draws a random 64-bit float. - """ - assert self._cd is not None - - if forced is not None: - # sign_aware_lte(forced, -0.0) does not correctly handle the - # math.nan case here. - forced_sign_bit = math.copysign(1, forced) == -1 - is_negative = self._cd.draw_bits( - 1, forced=forced_sign_bit, fake_forced=fake_forced - ) - f = lex_to_float( - self._cd.draw_bits( - 64, - forced=None if forced is None else float_to_lex(abs(forced)), - fake_forced=fake_forced, - ) - ) - return -f if is_negative else f - - def _draw_unbounded_integer( - self, *, forced: Optional[int] = None, fake_forced: bool = False - ) -> int: - assert self._cd is not None - forced_i = None - if forced is not None: - # Using any bucket large enough to contain this integer would be a - # valid way to force it. This is because an n bit integer could have - # been drawn from a bucket of size n, or from any bucket of size - # m > n. - # We'll always choose the smallest eligible bucket here. - - # We need an extra bit to handle forced signed integers. INT_SIZES - # is interpreted as unsigned sizes. - bit_size = forced.bit_length() + 1 - size = min(size for size in INT_SIZES if bit_size <= size) - forced_i = INT_SIZES.index(size) - - size = INT_SIZES[ - INT_SIZES_SAMPLER.sample(self._cd, forced=forced_i, fake_forced=fake_forced) - ] - - forced_r = None - if forced is not None: - forced_r = forced - forced_r <<= 1 - if forced < 0: - forced_r = -forced_r - forced_r |= 1 - - r = self._cd.draw_bits(size, forced=forced_r, fake_forced=fake_forced) - sign = r & 1 - r >>= 1 - if sign: - r = -r - return r - - def _draw_bounded_integer( - self, - lower: int, - upper: int, - *, - center: Optional[int] = None, - forced: Optional[int] = None, - fake_forced: bool = False, - _vary_effective_size: bool = True, - ) -> int: - assert lower <= upper - assert forced is None or lower <= forced <= upper - assert self._cd is not None - if lower == upper: - # Write a value even when this is trivial so that when a bound depends - # on other values we don't suddenly disappear when the gap shrinks to - # zero - if that happens then often the data stream becomes misaligned - # and we fail to shrink in cases where we really should be able to. - self._cd.draw_bits(1, forced=0) - return int(lower) - - if center is None: - center = lower - center = min(max(center, lower), upper) - - if center == upper: - above = False - elif center == lower: - above = True - else: - force_above = None if forced is None else forced < center - above = not self._cd.draw_bits( - 1, forced=force_above, fake_forced=fake_forced - ) - - if above: - gap = upper - center - else: - gap = center - lower - - assert gap > 0 - - bits = gap.bit_length() - probe = gap + 1 - - if ( - bits > 24 - and _vary_effective_size - and self.draw_boolean( - 7 / 8, forced=None if forced is None else False, fake_forced=fake_forced - ) - ): - # For large ranges, we combine the uniform random distribution from draw_bits - # with a weighting scheme with moderate chance. Cutoff at 2 ** 24 so that our - # choice of unicode characters is uniform but the 32bit distribution is not. - idx = INT_SIZES_SAMPLER.sample(self._cd) - force_bits = min(bits, INT_SIZES[idx]) - forced = self._draw_bounded_integer( - lower=center if above else max(lower, center - 2**force_bits - 1), - upper=center if not above else min(upper, center + 2**force_bits - 1), - _vary_effective_size=False, - ) - - assert lower <= forced <= upper - - while probe > gap: - probe = self._cd.draw_bits( - bits, - forced=None if forced is None else abs(forced - center), - fake_forced=fake_forced, - ) - - if above: - result = center + probe - else: - result = center - probe - - assert lower <= result <= upper - assert forced is None or result == forced, (result, forced, center, above) - return result - - @classmethod - def _draw_float_init_logic( - cls, - *, - min_value: float, - max_value: float, - allow_nan: bool, - smallest_nonzero_magnitude: float, - ) -> tuple[ - Optional[Sampler], - Optional[Literal[0, 1]], - Optional[Callable[[float], float]], - Optional[Callable[[float], float]], - list[float], - ]: - """ - Caches initialization logic for draw_float, as an alternative to - computing this for *every* float draw. - """ - # float_to_int allows us to distinguish between e.g. -0.0 and 0.0, - # even in light of hash(-0.0) == hash(0.0) and -0.0 == 0.0. - key = ( - float_to_int(min_value), - float_to_int(max_value), - allow_nan, - float_to_int(smallest_nonzero_magnitude), - ) - if key in FLOAT_INIT_LOGIC_CACHE: - return FLOAT_INIT_LOGIC_CACHE[key] - - result = cls._compute_draw_float_init_logic( - min_value=min_value, - max_value=max_value, - allow_nan=allow_nan, - smallest_nonzero_magnitude=smallest_nonzero_magnitude, - ) - FLOAT_INIT_LOGIC_CACHE[key] = result - return result - - @staticmethod - def _compute_draw_float_init_logic( - *, - min_value: float, - max_value: float, - allow_nan: bool, - smallest_nonzero_magnitude: float, - ) -> tuple[ - Optional[Sampler], - Optional[Literal[0, 1]], - Optional[Callable[[float], float]], - Optional[Callable[[float], float]], - list[float], - ]: - if smallest_nonzero_magnitude == 0.0: # pragma: no cover - raise FloatingPointError( - "Got allow_subnormal=True, but we can't represent subnormal floats " - "right now, in violation of the IEEE-754 floating-point " - "specification. This is usually because something was compiled with " - "-ffast-math or a similar option, which sets global processor state. " - "See https://simonbyrne.github.io/notes/fastmath/ for a more detailed " - "writeup - and good luck!" - ) - - def permitted(f: float) -> bool: - if math.isnan(f): - return allow_nan - if 0 < abs(f) < smallest_nonzero_magnitude: - return False - return sign_aware_lte(min_value, f) and sign_aware_lte(f, max_value) - - boundary_values = [ - min_value, - next_up(min_value), - min_value + 1, - max_value - 1, - next_down(max_value), - max_value, - ] - nasty_floats = [f for f in NASTY_FLOATS + boundary_values if permitted(f)] - weights = [0.2 * len(nasty_floats)] + [0.8] * len(nasty_floats) - sampler = Sampler(weights, observe=False) if nasty_floats else None - - pos_clamper = neg_clamper = None - if sign_aware_lte(0.0, max_value): - pos_min = max(min_value, smallest_nonzero_magnitude) - allow_zero = sign_aware_lte(min_value, 0.0) - pos_clamper = make_float_clamper(pos_min, max_value, allow_zero=allow_zero) - if sign_aware_lte(min_value, -0.0): - neg_max = min(max_value, -smallest_nonzero_magnitude) - allow_zero = sign_aware_lte(-0.0, max_value) - neg_clamper = make_float_clamper( - -neg_max, -min_value, allow_zero=allow_zero - ) - - forced_sign_bit: Optional[Literal[0, 1]] = None - if (pos_clamper is None) != (neg_clamper is None): - forced_sign_bit = 1 if neg_clamper else 0 - - return (sampler, forced_sign_bit, neg_clamper, pos_clamper, nasty_floats) - - -# The set of available `PrimitiveProvider`s, by name. Other libraries, such as -# crosshair, can implement this interface and add themselves; at which point users -# can configure which backend to use via settings. Keys are the name of the library, -# which doubles as the backend= setting, and values are importable class names. -# -# NOTE: this is a temporary interface. We DO NOT promise to continue supporting it! -# (but if you want to experiment and don't mind breakage, here you go) -AVAILABLE_PROVIDERS = { - "hypothesis": "hypothesis.internal.conjecture.data.HypothesisProvider", -} + def choices(self) -> tuple[ChoiceT, ...]: + return tuple(node.value for node in self.nodes) class ConjectureData: @classmethod - def for_buffer( - cls, - buffer: Union[list[int], bytes], - *, - observer: Optional[DataObserver] = None, - provider: Union[type, PrimitiveProvider] = HypothesisProvider, - ) -> "ConjectureData": - return cls( - len(buffer), buffer, random=None, observer=observer, provider=provider - ) - - @classmethod - def for_ir_tree( + def for_choices( cls, - ir_tree_prefix: Sequence[Union[IRNode, NodeTemplate]], + choices: Sequence[Union[ChoiceTemplate, ChoiceT]], *, observer: Optional[DataObserver] = None, provider: Union[type, PrimitiveProvider] = HypothesisProvider, - max_length: Optional[int] = None, random: Optional[Random] = None, ) -> "ConjectureData": - from hypothesis.internal.conjecture.engine import BUFFER_SIZE + from hypothesis.internal.conjecture.engine import choice_count return cls( - max_length=BUFFER_SIZE, - max_length_ir=( - ir_size_nodes(ir_tree_prefix) if max_length is None else max_length - ), - prefix=b"", + max_choices=choice_count(choices), random=random, - ir_tree_prefix=ir_tree_prefix, + prefix=choices, observer=observer, provider=provider, ) def __init__( self, - max_length: int, - prefix: Union[list[int], bytes, bytearray], *, random: Optional[Random], observer: Optional[DataObserver] = None, provider: Union[type, PrimitiveProvider] = HypothesisProvider, - ir_tree_prefix: Optional[Sequence[Union[IRNode, NodeTemplate]]] = None, - max_length_ir: Optional[int] = None, + prefix: Optional[Sequence[Union[ChoiceTemplate, ChoiceT]]] = None, + max_choices: Optional[int] = None, + provider_kw: Optional[dict[str, Any]] = None, ) -> None: - from hypothesis.internal.conjecture.engine import BUFFER_SIZE_IR + from hypothesis.internal.conjecture.engine import BUFFER_SIZE if observer is None: observer = DataObserver() + if provider_kw is None: + provider_kw = {} + elif not isinstance(provider, type): + raise InvalidArgument( + f"Expected {provider=} to be a class since {provider_kw=} was " + "passed, but got an instance instead." + ) + assert isinstance(observer, DataObserver) - self._bytes_drawn = 0 self.observer = observer - self.max_length = max_length - self.max_length_ir = BUFFER_SIZE_IR if max_length_ir is None else max_length_ir + self.max_choices = max_choices + self.max_length = BUFFER_SIZE self.is_find = False self.overdraw = 0 - self.__prefix = bytes(prefix) - self.__random = random - - if ir_tree_prefix is None: - assert random is not None or max_length <= len(prefix) + self._random = random - self.blocks = Blocks(self) - self.buffer: "Union[bytes, bytearray]" = bytearray() + self.length = 0 self.index = 0 - self.length_ir = 0 - self.index_ir = 0 self.output = "" self.status = Status.VALID self.frozen = False @@ -2055,19 +663,18 @@ class ConjectureData: self.start_time = time.perf_counter() self.gc_start_time = gc_cumulative_time() self.events: dict[str, Union[str, int, float]] = {} - self.forced_indices: "set[int]" = set() self.interesting_origin: Optional[InterestingOrigin] = None - self.draw_times: "dict[str, float]" = {} - self._stateful_run_times: "defaultdict[str, float]" = defaultdict(float) + self.draw_times: dict[str, float] = {} + self._stateful_run_times: dict[str, float] = defaultdict(float) self.max_depth = 0 self.has_discards = False self.provider: PrimitiveProvider = ( - provider(self) if isinstance(provider, type) else provider + provider(self, **provider_kw) if isinstance(provider, type) else provider ) assert isinstance(self.provider, PrimitiveProvider) - self.__result: "Optional[ConjectureResult]" = None + self.__result: Optional[ConjectureResult] = None # Observations used for targeted search. They'll be aggregated in # ConjectureRunner.generate_new_examples and fed to TargetSelector. @@ -2075,18 +682,18 @@ class ConjectureData: # Tags which indicate something about which part of the search space # this example is in. These are used to guide generation. - self.tags: "set[StructuralCoverageTag]" = set() - self.labels_for_structure_stack: "list[set[int]]" = [] + self.tags: set[StructuralCoverageTag] = set() + self.labels_for_structure_stack: list[set[int]] = [] # Normally unpopulated but we need this in the niche case # that self.as_result() is Overrun but we still want the # examples for reporting purposes. - self.__examples: "Optional[Examples]" = None + self.__spans: Optional[Spans] = None - # We want the top level example to have depth 0, so we start + # We want the top level span to have depth 0, so we start # at -1. self.depth = -1 - self.__example_record = ExampleRecord() + self.__span_record = SpanRecord() # Slice indices for discrete reportable parts that which-parts-matter can # try varying, to report if the minimal example always fails anyway. @@ -2096,100 +703,104 @@ class ConjectureData: self._observability_predicates: defaultdict = defaultdict( lambda: {"satisfied": 0, "unsatisfied": 0} ) + self._sampled_from_all_strategies_elements_message: Optional[ + tuple[str, object] + ] = None + self._shared_strategy_draws: dict[Hashable, Any] = {} + self.expected_exception: Optional[BaseException] = None + self.expected_traceback: Optional[str] = None self.extra_information = ExtraInformation() - self.ir_prefix = ir_tree_prefix - self.ir_nodes: tuple[IRNode, ...] = () + self.prefix = prefix + self.nodes: tuple[ChoiceNode, ...] = () self.misaligned_at: Optional[MisalignedAt] = None - self.start_example(TOP_LABEL) + self.cannot_proceed_scope: Optional[CannotProceedScopeT] = None + self.start_span(TOP_LABEL) def __repr__(self) -> str: - return "ConjectureData(%s, %d bytes%s)" % ( + return "ConjectureData(%s, %d choices%s)" % ( self.status.name, - len(self.buffer), + len(self.nodes), ", frozen" if self.frozen else "", ) @property - def choices(self) -> tuple[IRType, ...]: - return tuple(node.value for node in self.ir_nodes) + def choices(self) -> tuple[ChoiceT, ...]: + return tuple(node.value for node in self.nodes) - # A bit of explanation of the `observe` and `fake_forced` arguments in our - # draw_* functions. + # draw_* functions might be called in one of two contexts: either "above" or + # "below" the choice sequence. For instance, draw_string calls draw_boolean + # from ``many`` when calculating the number of characters to return. We do + # not want these choices to get written to the choice sequence, because they + # are not true choices themselves. # - # There are two types of draws: sub-ir and super-ir. For instance, some ir - # nodes use `many`, which in turn calls draw_boolean. But some strategies - # also use many, at the super-ir level. We don't want to write sub-ir draws - # to the DataTree (and consequently use them when computing novel prefixes), - # since they are fully recorded by writing the ir node itself. - # But super-ir draws are not included in the ir node, so we do want to write - # these to the tree. - # - # `observe` formalizes this distinction. The draw will only be written to - # the DataTree if observe is True. - # - # `fake_forced` deals with a different problem. We use `forced=` to convert - # ir prefixes, which are potentially from other backends, into our backing - # bits representation. This works fine, except using `forced=` in this way - # also sets `was_forced=True` for all blocks, even those that weren't forced - # in the traditional way. The shrinker chokes on this due to thinking that - # nothing can be modified. - # - # Setting `fake_forced` to true says that yes, we want to force a particular - # value to be returned, but we don't want to treat that block as fixed for - # e.g. the shrinker. - - def _draw(self, ir_type, kwargs, *, observe, forced, fake_forced): + # `observe` formalizes this. The choice will only be written to the choice + # sequence if observe is True. + def _draw(self, choice_type, constraints, *, observe, forced): # this is somewhat redundant with the length > max_length check at the # end of the function, but avoids trying to use a null self.random when - # drawing past the node of a ConjectureData.for_ir_tree data. - if self.length_ir == self.max_length_ir: - debug_report(f"overrun because hit {self.max_length_ir=}") + # drawing past the node of a ConjectureData.for_choices data. + if self.length == self.max_length: + debug_report(f"overrun because hit {self.max_length=}") + self.mark_overrun() + if len(self.nodes) == self.max_choices: + debug_report(f"overrun because hit {self.max_choices=}") self.mark_overrun() - if self.ir_prefix is not None and observe: - if self.index_ir < len(self.ir_prefix): - node_value = self._pop_ir_tree_node(ir_type, kwargs, forced=forced) - else: - try: - (node_value, _buf) = ir_to_buffer( - ir_type, kwargs, forced=forced, random=self.__random - ) - except StopTest: - debug_report("overrun because ir_to_buffer overran") - self.mark_overrun() + if observe and self.prefix is not None and self.index < len(self.prefix): + value = self._pop_choice(choice_type, constraints, forced=forced) + elif forced is None: + value = getattr(self.provider, f"draw_{choice_type}")(**constraints) - if forced is None: - forced = node_value - fake_forced = True + if forced is not None: + value = forced - value = getattr(self.provider, f"draw_{ir_type}")( - **kwargs, forced=forced, fake_forced=fake_forced - ) + # nan values generated via int_to_float break list membership: + # + # >>> n = 18444492273895866368 + # >>> assert math.isnan(int_to_float(n)) + # >>> assert int_to_float(n) not in [int_to_float(n)] + # + # because int_to_float nans are not equal in the sense of either + # `a == b` or `a is b`. + # + # This can lead to flaky errors when collections require unique + # floats. What was happening is that in some places we provided math.nan + # provide math.nan, and in others we provided + # int_to_float(float_to_int(math.nan)), and which one gets used + # was not deterministic across test iterations. + # + # To fix this, *never* provide a nan value which is equal (via `is`) to + # another provided nan value. This sacrifices some test power; we should + # bring that back (ABOVE the choice sequence layer) in the future. + # + # See https://github.com/HypothesisWorks/hypothesis/issues/3926. + if choice_type == "float" and math.isnan(value): + value = int_to_float(float_to_int(value)) if observe: - was_forced = forced is not None and not fake_forced - getattr(self.observer, f"draw_{ir_type}")( - value, kwargs=kwargs, was_forced=was_forced + was_forced = forced is not None + getattr(self.observer, f"draw_{choice_type}")( + value, constraints=constraints, was_forced=was_forced ) - size = ir_size([value]) - if self.length_ir + size > self.max_length_ir: + size = 0 if self.provider.avoid_realization else choices_size([value]) + if self.length + size > self.max_length: debug_report( - f"overrun because {self.length_ir=} + {size=} > {self.max_length_ir=}" + f"overrun because {self.length=} + {size=} > {self.max_length=}" ) self.mark_overrun() - node = IRNode( - ir_type=ir_type, + node = ChoiceNode( + type=choice_type, value=value, - kwargs=kwargs, + constraints=constraints, was_forced=was_forced, - index=len(self.ir_nodes), + index=len(self.nodes), ) - self.__example_record.record_ir_draw() - self.ir_nodes += (node,) - self.length_ir += size + self.__span_record.record_choice() + self.nodes += (node,) + self.length += size return value @@ -2201,7 +812,6 @@ class ConjectureData: weights: Optional[dict[int, float]] = None, shrink_towards: int = 0, forced: Optional[int] = None, - fake_forced: bool = False, observe: bool = True, ) -> int: # Validate arguments @@ -2217,24 +827,12 @@ class ConjectureData: # we'll want to drop this restriction eventually. assert all(w != 0 for w in weights.values()) - if forced is not None and (min_value is None or max_value is None): - # We draw `forced=forced - shrink_towards` here internally, after clamping. - # If that grows larger than a 128 bit signed integer, we can't represent it. - # Disallow this combination for now. - # Note that bit_length() = 128 -> signed bit size = 129. - _shrink_towards = shrink_towards - if min_value is not None: - _shrink_towards = max(min_value, _shrink_towards) - if max_value is not None: - _shrink_towards = min(max_value, _shrink_towards) - - assert (forced - _shrink_towards).bit_length() < 128 if forced is not None and min_value is not None: assert min_value <= forced if forced is not None and max_value is not None: assert forced <= max_value - kwargs: IntegerKWargs = self._pooled_kwargs( + constraints: IntegerConstraints = self._pooled_constraints( "integer", { "min_value": min_value, @@ -2243,9 +841,7 @@ class ConjectureData: "shrink_towards": shrink_towards, }, ) - return self._draw( - "integer", kwargs, observe=observe, forced=forced, fake_forced=fake_forced - ) + return self._draw("integer", constraints, observe=observe, forced=forced) def draw_float( self, @@ -2259,7 +855,6 @@ class ConjectureData: # width: Literal[16, 32, 64] = 64, # exclude_min and exclude_max handled higher up, forced: Optional[float] = None, - fake_forced: bool = False, observe: bool = True, ) -> float: assert smallest_nonzero_magnitude > 0 @@ -2272,7 +867,7 @@ class ConjectureData: sign_aware_lte(min_value, forced) and sign_aware_lte(forced, max_value) ) - kwargs: FloatKWargs = self._pooled_kwargs( + constraints: FloatConstraints = self._pooled_constraints( "float", { "min_value": min_value, @@ -2281,9 +876,7 @@ class ConjectureData: "smallest_nonzero_magnitude": smallest_nonzero_magnitude, }, ) - return self._draw( - "float", kwargs, observe=observe, forced=forced, fake_forced=fake_forced - ) + return self._draw("float", constraints, observe=observe, forced=forced) def draw_string( self, @@ -2292,13 +885,14 @@ class ConjectureData: min_size: int = 0, max_size: int = COLLECTION_DEFAULT_MAX_SIZE, forced: Optional[str] = None, - fake_forced: bool = False, observe: bool = True, ) -> str: assert forced is None or min_size <= len(forced) <= max_size assert min_size >= 0 + if len(intervals) == 0: + assert min_size == 0 - kwargs: StringKWargs = self._pooled_kwargs( + constraints: StringConstraints = self._pooled_constraints( "string", { "intervals": intervals, @@ -2306,9 +900,7 @@ class ConjectureData: "max_size": max_size, }, ) - return self._draw( - "string", kwargs, observe=observe, forced=forced, fake_forced=fake_forced - ) + return self._draw("string", constraints, observe=observe, forced=forced) def draw_bytes( self, @@ -2316,125 +908,137 @@ class ConjectureData: max_size: int = COLLECTION_DEFAULT_MAX_SIZE, *, forced: Optional[bytes] = None, - fake_forced: bool = False, observe: bool = True, ) -> bytes: assert forced is None or min_size <= len(forced) <= max_size assert min_size >= 0 - kwargs: BytesKWargs = self._pooled_kwargs( + constraints: BytesConstraints = self._pooled_constraints( "bytes", {"min_size": min_size, "max_size": max_size} ) - return self._draw( - "bytes", kwargs, observe=observe, forced=forced, fake_forced=fake_forced - ) + return self._draw("bytes", constraints, observe=observe, forced=forced) def draw_boolean( self, p: float = 0.5, *, forced: Optional[bool] = None, - fake_forced: bool = False, observe: bool = True, ) -> bool: - # Internally, we treat probabilities lower than 1 / 2**64 as - # unconditionally false. - # - # Note that even if we lift this 64 bit restriction in the future, p - # cannot be 0 (1) when forced is True (False). - eps = 2 ** (-64) if isinstance(self.provider, HypothesisProvider) else 0 - assert (forced is not True) or (0 + eps) < p - assert (forced is not False) or p < (1 - eps) + assert (forced is not True) or p > 0 + assert (forced is not False) or p < 1 - kwargs: BooleanKWargs = self._pooled_kwargs("boolean", {"p": p}) - return self._draw( - "boolean", kwargs, observe=observe, forced=forced, fake_forced=fake_forced - ) + constraints: BooleanConstraints = self._pooled_constraints("boolean", {"p": p}) + return self._draw("boolean", constraints, observe=observe, forced=forced) - def _pooled_kwargs(self, ir_type, kwargs): + def _pooled_constraints(self, choice_type, constraints): """Memoize common dictionary objects to reduce memory pressure.""" # caching runs afoul of nondeterminism checks if self.provider.avoid_realization: - return kwargs + return constraints - key = (ir_type, *ir_kwargs_key(ir_type, kwargs)) + key = (choice_type, *choice_constraints_key(choice_type, constraints)) try: - return POOLED_KWARGS_CACHE[key] + return POOLED_CONSTRAINTS_CACHE[key] except KeyError: - POOLED_KWARGS_CACHE[key] = kwargs - return kwargs - - def _pop_ir_tree_node( - self, ir_type: IRTypeName, kwargs: IRKWargsType, *, forced: Optional[IRType] - ) -> IRType: - from hypothesis.internal.conjecture.engine import BUFFER_SIZE + POOLED_CONSTRAINTS_CACHE[key] = constraints + return constraints - assert self.ir_prefix is not None + def _pop_choice( + self, + choice_type: ChoiceTypeT, + constraints: ChoiceConstraintsT, + *, + forced: Optional[ChoiceT], + ) -> ChoiceT: + assert self.prefix is not None # checked in _draw - assert self.index_ir < len(self.ir_prefix) + assert self.index < len(self.prefix) - node = self.ir_prefix[self.index_ir] - if isinstance(node, NodeTemplate): - assert node.size >= 0 + value = self.prefix[self.index] + if isinstance(value, ChoiceTemplate): + node: ChoiceTemplate = value + if node.count is not None: + assert node.count >= 0 # node templates have to be at the end for now, since it's not immediately # apparent how to handle overruning a node template while generating a single # node if the alternative is not "the entire data is an overrun". - assert self.index_ir == len(self.ir_prefix) - 1 + assert self.index == len(self.prefix) - 1 if node.type == "simplest": - try: - value = buffer_to_ir(ir_type, kwargs, buffer=bytes(BUFFER_SIZE)) - except StopTest: - self.mark_overrun() + if forced is not None: + choice = forced + elif isinstance(self.provider, HypothesisProvider): + try: + choice = choice_from_index(0, choice_type, constraints) + except ChoiceTooLarge: + self.mark_overrun() + else: + # give alternative backends control over ChoiceTemplate draws + # as well + choice = getattr(self.provider, f"draw_{choice_type}")( + **constraints + ) else: raise NotImplementedError - node.size -= ir_size([value]) - if node.size < 0: - self.mark_overrun() - return value + if node.count is not None: + node.count -= 1 + if node.count < 0: + self.mark_overrun() + return choice - value = node.value + choice = value + node_choice_type = { + str: "string", + float: "float", + int: "integer", + bool: "boolean", + bytes: "bytes", + }[type(choice)] # If we're trying to: # * draw a different ir type at the same location - # * draw the same ir type with a different kwargs + # * draw the same ir type with a different constraints, which does not permit + # the current value # # then we call this a misalignment, because the choice sequence has # slipped from what we expected at some point. An easy misalignment is # - # st.one_of(st.integers(0, 100), st.integers(101, 200)) + # one_of(integers(0, 100), integers(101, 200)) # - # where the choice sequence [0, 100] has kwargs {min_value: 0, max_value: 100} - # at position 2, but [0, 101] has kwargs {min_value: 101, max_value: 200} at - # position 2. + # where the choice sequence [0, 100] has constraints {min_value: 0, max_value: 100} + # at index 1, but [0, 101] has constraints {min_value: 101, max_value: 200} at + # index 1 (which does not permit any of the values 0-100). # - # When we see a misalignment, we can't offer up the stored node value as-is. - # We need to make it appropriate for the requested kwargs and ir type. - # Right now we do that by using bytes as the intermediary to convert between - # ir types/kwargs. In the future we'll probably use the index into a custom - # ordering for an (ir_type, kwargs) pair. - if node.ir_type != ir_type or not ir_value_permitted( - node.value, node.ir_type, kwargs - ): + # When the choice sequence becomes misaligned, we generate a new value of the + # type and constraints the strategy expects. + if node_choice_type != choice_type or not choice_permitted(choice, constraints): # only track first misalignment for now. if self.misaligned_at is None: - self.misaligned_at = (self.index_ir, ir_type, kwargs, forced) + self.misaligned_at = (self.index, choice_type, constraints, forced) try: - (_value, buffer) = ir_to_buffer( - node.ir_type, node.kwargs, forced=node.value - ) - value = buffer_to_ir( - ir_type, kwargs, buffer=buffer + bytes(BUFFER_SIZE - len(buffer)) - ) - except StopTest: - # must have been an overrun. + # Fill in any misalignments with index 0 choices. An alternative to + # this is using the index of the misaligned choice instead + # of index 0, which may be useful for maintaining + # "similarly-complex choices" in the shrinker. This requires + # attaching an index to every choice in ConjectureData.for_choices, + # which we don't always have (e.g. when reading from db). + # + # If we really wanted this in the future we could make this complexity + # optional, use it if present, and default to index 0 otherwise. + # This complicates our internal api and so I'd like to avoid it + # if possible. # - # maybe we should fall back to to an arbitrary small value here - # instead? eg - # buffer_to_ir(ir_type, kwargs, buffer=bytes(BUFFER_SIZE)) + # Additionally, I don't think slips which require + # slipping to high-complexity values are common. Though arguably + # we may want to expand a bit beyond *just* the simplest choice. + # (we could for example consider sampling choices from index 0-10). + choice = choice_from_index(0, choice_type, constraints) + except ChoiceTooLarge: + # should really never happen with a 0-index choice, but let's be safe. self.mark_overrun() - self.index_ir += 1 - return value + self.index += 1 + return choice def as_result(self) -> Union[ConjectureResult, _Overrun]: """Convert the result of running this test into @@ -2447,11 +1051,12 @@ class ConjectureData: self.__result = ConjectureResult( status=self.status, interesting_origin=self.interesting_origin, - buffer=self.buffer, - examples=self.examples, - ir_nodes=self.ir_nodes, - blocks=self.blocks, + spans=self.spans, + nodes=self.nodes, + length=self.length, output=self.output, + expected_traceback=self.expected_traceback, + expected_exception=self.expected_exception, extra_information=( self.extra_information if self.extra_information.has_information() @@ -2460,13 +1065,12 @@ class ConjectureData: has_discards=self.has_discards, target_observations=self.target_observations, tags=frozenset(self.tags), - forced_indices=frozenset(self.forced_indices), arg_slices=self.arg_slices, slice_comments=self.slice_comments, misaligned_at=self.misaligned_at, + cannot_proceed_scope=self.cannot_proceed_scope, ) assert self.__result is not None - self.blocks.transfer_ownership(self.__result) return self.__result def __assert_not_frozen(self, name: str) -> None: @@ -2515,7 +1119,7 @@ class ConjectureData: if label is None: assert isinstance(strategy.label, int) label = strategy.label - self.start_example(label=label) + self.start_span(label=label) try: if not at_top_level: return strategy.do_draw(self) @@ -2537,13 +1141,15 @@ class ConjectureData: ) raise if TESTCASE_CALLBACKS: - self._observability_args[key] = to_jsonable(v) + avoid = self.provider.avoid_realization + self._observability_args[key] = to_jsonable(v, avoid_realization=avoid) return v finally: - self.stop_example() + self.stop_span() - def start_example(self, label: int) -> None: - self.__assert_not_frozen("start_example") + def start_span(self, label: int) -> None: + self.provider.span_start(label) + self.__assert_not_frozen("start_span") self.depth += 1 # Logically it would make sense for this to just be # ``self.depth = max(self.depth, self.max_depth)``, which is what it used to @@ -2553,17 +1159,18 @@ class ConjectureData: # to fix with this check. if self.depth > self.max_depth: self.max_depth = self.depth - self.__example_record.start_example(label) + self.__span_record.start_span(label) self.labels_for_structure_stack.append({label}) - def stop_example(self, *, discard: bool = False) -> None: + def stop_span(self, *, discard: bool = False) -> None: + self.provider.span_end(discard) if self.frozen: return if discard: self.has_discards = True self.depth -= 1 assert self.depth >= -1 - self.__example_record.stop_example(discard=discard) + self.__span_record.stop_span(discard=discard) labels_for_structure = self.labels_for_structure_stack.pop() @@ -2574,10 +1181,10 @@ class ConjectureData: self.tags.update([structural_coverage(l) for l in labels_for_structure]) if discard: - # Once we've discarded an example, every test case starting with + # Once we've discarded a span, every test case starting with # this prefix contains discards. We prune the tree at that point so # as to avoid future test cases bothering with this region, on the - # assumption that some example that you could have used instead + # assumption that some span that you could have used instead # there would *not* trigger the discard. This greatly speeds up # test case generation in some cases, because it allows us to # ignore large swathes of the search space that are effectively @@ -2601,28 +1208,24 @@ class ConjectureData: self.observer.kill_branch() @property - def examples(self) -> Examples: + def spans(self) -> Spans: assert self.frozen - if self.__examples is None: - self.__examples = Examples(record=self.__example_record, blocks=self.blocks) - return self.__examples + if self.__spans is None: + self.__spans = Spans(record=self.__span_record) + return self.__spans def freeze(self) -> None: if self.frozen: - assert isinstance(self.buffer, bytes) return self.finish_time = time.perf_counter() self.gc_finish_time = gc_cumulative_time() - assert len(self.buffer) == self.index - # Always finish by closing all remaining examples so that we have a - # valid tree. + # Always finish by closing all remaining spans so that we have a valid tree. while self.depth >= 0: - self.stop_example() + self.stop_span() - self.__example_record.freeze() + self.__span_record.freeze() self.frozen = True - self.buffer = bytes(self.buffer) self.observer.conclude_test(self.status, self.interesting_origin) def choice( @@ -2630,7 +1233,6 @@ class ConjectureData: values: Sequence[T], *, forced: Optional[T] = None, - fake_forced: bool = False, observe: bool = True, ) -> T: forced_i = None if forced is None else values.index(forced) @@ -2638,67 +1240,10 @@ class ConjectureData: 0, len(values) - 1, forced=forced_i, - fake_forced=fake_forced, observe=observe, ) return values[i] - def draw_bits( - self, n: int, *, forced: Optional[int] = None, fake_forced: bool = False - ) -> int: - """Return an ``n``-bit integer from the underlying source of - bytes. If ``forced`` is set to an integer will instead - ignore the underlying source and simulate a draw as if it had - returned that integer.""" - self.__assert_not_frozen("draw_bits") - if n == 0: - return 0 - assert n > 0 - n_bytes = bits_to_bytes(n) - self.__check_capacity(n_bytes) - - if forced is not None: - buf = int_to_bytes(forced, n_bytes) - elif self._bytes_drawn < len(self.__prefix): - index = self._bytes_drawn - buf = self.__prefix[index : index + n_bytes] - if len(buf) < n_bytes: - assert self.__random is not None - buf += uniform(self.__random, n_bytes - len(buf)) - else: - assert self.__random is not None - buf = uniform(self.__random, n_bytes) - buf = bytearray(buf) - self._bytes_drawn += n_bytes - - assert len(buf) == n_bytes - - # If we have a number of bits that is not a multiple of 8 - # we have to mask off the high bits. - buf[0] &= BYTE_MASKS[n % 8] - buf = bytes(buf) - result = int_from_bytes(buf) - - self.__example_record.draw_bits() - - initial = self.index - - assert isinstance(self.buffer, bytearray) - self.buffer.extend(buf) - self.index = len(self.buffer) - - if forced is not None and not fake_forced: - self.forced_indices.update(range(initial, self.index)) - - self.blocks.add_endpoint(self.index) - - assert result.bit_length() <= n - return result - - def __check_capacity(self, n: int) -> None: - if self.index + n > self.max_length: - self.mark_overrun() - def conclude_test( self, status: Status, @@ -2725,29 +1270,6 @@ class ConjectureData: self.conclude_test(Status.OVERRUN) -def bits_to_bytes(n: int) -> int: - """The number of bytes required to represent an n-bit number. - Equivalent to (n + 7) // 8, but slightly faster. This really is - called enough times that that matters.""" - return (n + 7) >> 3 - - -def ir_to_buffer(ir_type, kwargs, *, forced=None, random=None): - from hypothesis.internal.conjecture.engine import BUFFER_SIZE - - if forced is None: - assert random is not None - - cd = ConjectureData( - max_length=BUFFER_SIZE, - # buffer doesn't matter if forced is passed since we're forcing the sole draw - prefix=b"" if forced is None else bytes(BUFFER_SIZE), - random=random, - ) - value = getattr(cd.provider, f"draw_{ir_type}")(**kwargs, forced=forced) - return (value, cd.buffer) - - -def buffer_to_ir(ir_type, kwargs, *, buffer): - cd = ConjectureData.for_buffer(buffer) - return getattr(cd.provider, f"draw_{ir_type}")(**kwargs) +def draw_choice(choice_type, constraints, *, random): + cd = ConjectureData(random=random) + return getattr(cd.provider, f"draw_{choice_type}")(**constraints) diff --git a/contrib/python/hypothesis/py3/hypothesis/internal/conjecture/datatree.py b/contrib/python/hypothesis/py3/hypothesis/internal/conjecture/datatree.py index 98eafedb255..1621d7c3ff2 100644 --- a/contrib/python/hypothesis/py3/hypothesis/internal/conjecture/datatree.py +++ b/contrib/python/hypothesis/py3/hypothesis/internal/conjecture/datatree.py @@ -8,9 +8,10 @@ # v. 2.0. If a copy of the MPL was not distributed with this file, You can # obtain one at https://mozilla.org/MPL/2.0/. -import itertools import math -from typing import Optional, Union +from collections.abc import Generator +from random import Random +from typing import TYPE_CHECKING, AbstractSet, Final, Optional, Union, cast import attr @@ -21,20 +22,18 @@ from hypothesis.errors import ( StopTest, ) from hypothesis.internal import floats as flt -from hypothesis.internal.compat import int_to_bytes -from hypothesis.internal.conjecture.data import ( - BooleanKWargs, - BytesKWargs, - ConjectureData, - DataObserver, - FloatKWargs, - IntegerKWargs, - IRKWargsType, - IRType, - IRTypeName, - Status, - StringKWargs, +from hypothesis.internal.conjecture.choice import ( + BooleanConstraints, + BytesConstraints, + ChoiceConstraintsT, + ChoiceT, + ChoiceTypeT, + FloatConstraints, + IntegerConstraints, + StringConstraints, + choice_from_index, ) +from hypothesis.internal.conjecture.data import ConjectureData, DataObserver, Status from hypothesis.internal.escalation import InterestingOrigin from hypothesis.internal.floats import ( count_between_floats, @@ -43,20 +42,28 @@ from hypothesis.internal.floats import ( sign_aware_lte, ) +if TYPE_CHECKING: + from typing import TypeAlias + + from hypothesis.vendor.pretty import RepresentationPrinter + +ChildrenCacheValueT: "TypeAlias" = tuple[ + Generator[ChoiceT, None, None], list[ChoiceT], set[ChoiceT] +] + class PreviouslyUnseenBehaviour(HypothesisException): pass -def inconsistent_generation(): - raise FlakyStrategyDefinition( - "Inconsistent data generation! Data generation behaved differently " - "between different runs. Is your data generation depending on external " - "state?" - ) +_FLAKY_STRAT_MSG = ( + "Inconsistent data generation! Data generation behaved differently " + "between different runs. Is your data generation depending on external " + "state?" +) -EMPTY: frozenset = frozenset() +EMPTY: frozenset[int] = frozenset() @attr.s(slots=True) @@ -66,16 +73,22 @@ class Killed: be treated as if it were completely explored for the purposes of exhaustion.""" - next_node = attr.ib() + next_node: "TreeNode" = attr.ib() - def _repr_pretty_(self, p, cycle): + def _repr_pretty_(self, p: "RepresentationPrinter", cycle: bool) -> None: assert cycle is False p.text("Killed") -def _node_pretty(ir_type, value, kwargs, *, forced): +def _node_pretty( + choice_type: ChoiceTypeT, + value: ChoiceT, + constraints: ChoiceConstraintsT, + *, + forced: bool, +) -> str: forced_marker = " [forced]" if forced else "" - return f"{ir_type} {value}{forced_marker} {kwargs}" + return f"{choice_type} {value!r}{forced_marker} {constraints}" @attr.s(slots=True) @@ -83,22 +96,24 @@ class Branch: """Represents a transition where multiple choices can be made as to what to drawn.""" - kwargs = attr.ib() - ir_type = attr.ib() - children = attr.ib(repr=False) + constraints: ChoiceConstraintsT = attr.ib() + choice_type: ChoiceTypeT = attr.ib() + children: dict[ChoiceT, "TreeNode"] = attr.ib(repr=False) @property - def max_children(self): - max_children = compute_max_children(self.ir_type, self.kwargs) + def max_children(self) -> int: + max_children = compute_max_children(self.choice_type, self.constraints) assert max_children > 0 return max_children - def _repr_pretty_(self, p, cycle): + def _repr_pretty_(self, p: "RepresentationPrinter", cycle: bool) -> None: assert cycle is False for i, (value, child) in enumerate(self.children.items()): if i > 0: p.break_() - p.text(_node_pretty(self.ir_type, value, self.kwargs, forced=False)) + p.text( + _node_pretty(self.choice_type, value, self.constraints, forced=False) + ) with p.indent(2): p.break_() p.pretty(child) @@ -111,7 +126,7 @@ class Conclusion: status: Status = attr.ib() interesting_origin: Optional[InterestingOrigin] = attr.ib() - def _repr_pretty_(self, p, cycle): + def _repr_pretty_(self, p: "RepresentationPrinter", cycle: bool) -> None: assert cycle is False o = self.interesting_origin # avoid str(o), which can include multiple lines of context @@ -143,10 +158,10 @@ class Conclusion: # The one case where this may be detrimental is fuzzing, where the throughput of # examples is so high that it really may saturate important nodes. We'll cross # that bridge when we come to it. -MAX_CHILDREN_EFFECTIVELY_INFINITE = 100_000 +MAX_CHILDREN_EFFECTIVELY_INFINITE: Final[int] = 100_000 -def _count_distinct_strings(*, alphabet_size, min_size, max_size): +def _count_distinct_strings(*, alphabet_size: int, min_size: int, max_size: int) -> int: # We want to estimate if we're going to have more children than # MAX_CHILDREN_EFFECTIVELY_INFINITE, without computing a potentially # extremely expensive pow. We'll check if the number of strings in @@ -170,10 +185,13 @@ def _count_distinct_strings(*, alphabet_size, min_size, max_size): return sum(alphabet_size**k for k in range(min_size, max_size + 1)) -def compute_max_children(ir_type, kwargs): - if ir_type == "integer": - min_value = kwargs["min_value"] - max_value = kwargs["max_value"] +def compute_max_children( + choice_type: ChoiceTypeT, constraints: ChoiceConstraintsT +) -> int: + if choice_type == "integer": + constraints = cast(IntegerConstraints, constraints) + min_value = constraints["min_value"] + max_value = constraints["max_value"] if min_value is None and max_value is None: # full 128 bit range. @@ -188,20 +206,25 @@ def compute_max_children(ir_type, kwargs): # direction we want. ((2**128 - 1) // 2) + 1 == 2 ** 127 assert (min_value is None) ^ (max_value is None) return 2**127 - elif ir_type == "boolean": - p = kwargs["p"] + elif choice_type == "boolean": + constraints = cast(BooleanConstraints, constraints) + p = constraints["p"] # probabilities of 0 or 1 (or effectively 0 or 1) only have one choice. if p <= 2 ** (-64) or p >= (1 - 2 ** (-64)): return 1 return 2 - elif ir_type == "bytes": + elif choice_type == "bytes": + constraints = cast(BytesConstraints, constraints) return _count_distinct_strings( - alphabet_size=2**8, min_size=kwargs["min_size"], max_size=kwargs["max_size"] + alphabet_size=2**8, + min_size=constraints["min_size"], + max_size=constraints["max_size"], ) - elif ir_type == "string": - min_size = kwargs["min_size"] - max_size = kwargs["max_size"] - intervals = kwargs["intervals"] + elif choice_type == "string": + constraints = cast(StringConstraints, constraints) + min_size = constraints["min_size"] + max_size = constraints["max_size"] + intervals = constraints["intervals"] if len(intervals) == 0: # Special-case the empty alphabet to avoid an error in math.log(0). @@ -216,12 +239,13 @@ def compute_max_children(ir_type, kwargs): return _count_distinct_strings( alphabet_size=len(intervals), min_size=min_size, max_size=max_size ) - elif ir_type == "float": - min_value = kwargs["min_value"] - max_value = kwargs["max_value"] - smallest_nonzero_magnitude = kwargs["smallest_nonzero_magnitude"] + elif choice_type == "float": + constraints = cast(FloatConstraints, constraints) + min_value_f = constraints["min_value"] + max_value_f = constraints["max_value"] + smallest_nonzero_magnitude = constraints["smallest_nonzero_magnitude"] - count = count_between_floats(min_value, max_value) + count = count_between_floats(min_value_f, max_value_f) # we have two intervals: # a. [min_value, max_value] @@ -231,89 +255,56 @@ def compute_max_children(ir_type, kwargs): # want the interval difference a - b. # next_down because endpoints are ok with smallest_nonzero_magnitude - min_point = max(min_value, -flt.next_down(smallest_nonzero_magnitude)) - max_point = min(max_value, flt.next_down(smallest_nonzero_magnitude)) + min_point = max(min_value_f, -flt.next_down(smallest_nonzero_magnitude)) + max_point = min(max_value_f, flt.next_down(smallest_nonzero_magnitude)) if min_point > max_point: # case: disjoint intervals. return count count -= count_between_floats(min_point, max_point) - if sign_aware_lte(min_value, -0.0) and sign_aware_lte(-0.0, max_value): + if sign_aware_lte(min_value_f, -0.0) and sign_aware_lte(-0.0, max_value_f): # account for -0.0 count += 1 - if sign_aware_lte(min_value, 0.0) and sign_aware_lte(0.0, max_value): + if sign_aware_lte(min_value_f, 0.0) and sign_aware_lte(0.0, max_value_f): # account for 0.0 count += 1 return count - raise NotImplementedError(f"unhandled ir_type {ir_type}") + raise NotImplementedError(f"unhandled choice_type {choice_type}") # In theory, this is a strict superset of the functionality of compute_max_children; # -# assert len(all_children(ir_type, kwargs)) == compute_max_children(ir_type, kwargs) +# assert len(all_children(choice_type, constraints)) == compute_max_children(choice_type, constraints) # # In practice, we maintain two distinct implementations for efficiency and space # reasons. If you just need the number of children, it is cheaper to use # compute_max_children than to reify the list of children (only to immediately # throw it away). -def all_children(ir_type, kwargs): - if ir_type == "integer": - min_value = kwargs["min_value"] - max_value = kwargs["max_value"] +def _floats_between(a: float, b: float) -> Generator[float, None, None]: + for n in range(float_to_int(a), float_to_int(b) + 1): + yield int_to_float(n) - if min_value is None and max_value is None: - # full 128 bit range. - yield from range(-(2**127) + 1, 2**127 - 1) - elif min_value is not None and max_value is not None: - yield from range(min_value, max_value + 1) - else: - assert (min_value is None) ^ (max_value is None) - # hard case: only one bound was specified. Here we probe in 128 bits - # around shrink_towards, and discard those above max_value or below - # min_value respectively. - shrink_towards = kwargs["shrink_towards"] - if min_value is None: - shrink_towards = min(max_value, shrink_towards) - yield from range(shrink_towards - (2**127) + 1, max_value) - else: - assert max_value is None - shrink_towards = max(min_value, shrink_towards) - yield from range(min_value, shrink_towards + (2**127) - 1) - - if ir_type == "boolean": - p = kwargs["p"] - if p <= 2 ** (-64): - yield False - elif p >= (1 - 2 ** (-64)): - yield True - else: - yield from [False, True] - if ir_type == "bytes": - for size in range(kwargs["min_size"], kwargs["max_size"] + 1): - yield from (int_to_bytes(i, size) for i in range(2 ** (8 * size))) - if ir_type == "string": - min_size = kwargs["min_size"] - max_size = kwargs["max_size"] - intervals = kwargs["intervals"] - - # written unidiomatically in order to handle the case of max_size=inf. - size = min_size - while size <= max_size: - for ords in itertools.product(intervals, repeat=size): - yield "".join(chr(n) for n in ords) - size += 1 - if ir_type == "float": - - def floats_between(a, b): - for n in range(float_to_int(a), float_to_int(b) + 1): - yield int_to_float(n) - - min_value = kwargs["min_value"] - max_value = kwargs["max_value"] - smallest_nonzero_magnitude = kwargs["smallest_nonzero_magnitude"] +def all_children( + choice_type: ChoiceTypeT, constraints: ChoiceConstraintsT +) -> Generator[ChoiceT, None, None]: + if choice_type != "float": + for index in range(compute_max_children(choice_type, constraints)): + yield choice_from_index(index, choice_type, constraints) + else: + constraints = cast(FloatConstraints, constraints) + # the float ordering is not injective (because of resampling + # out-of-bounds values), so using choice_from_index would result in + # duplicates. This violates invariants in datatree about being able + # to draw unique new children using all_children. + # + # We instead maintain a separate implementation for floats. + # TODO_IR write a better (bijective) ordering for floats and remove this! + min_value = constraints["min_value"] + max_value = constraints["max_value"] + smallest_nonzero_magnitude = constraints["smallest_nonzero_magnitude"] # handle zeroes separately so smallest_nonzero_magnitude can think of # itself as a complete interval (instead of a hole at ±0). @@ -328,15 +319,15 @@ def all_children(ir_type, kwargs): max_point = min(max_value, -smallest_nonzero_magnitude) # float_to_int increases as negative magnitude increases, so # invert order. - yield from floats_between(max_point, min_value) + yield from _floats_between(max_point, min_value) else: # case: straddles midpoint (which is between -0.0 and 0.0). - yield from floats_between(-smallest_nonzero_magnitude, min_value) - yield from floats_between(smallest_nonzero_magnitude, max_value) + yield from _floats_between(-smallest_nonzero_magnitude, min_value) + yield from _floats_between(smallest_nonzero_magnitude, max_value) else: # case: both positive. min_point = max(min_value, smallest_nonzero_magnitude) - yield from floats_between(min_point, max_value) + yield from _floats_between(min_point, max_value) @attr.s(slots=True) @@ -350,7 +341,7 @@ class TreeNode: Conceptually, you can unfold a single TreeNode storing n values in its lists into a sequence of n nodes, each a child of the last. In other words, - (kwargs[i], values[i], ir_types[i]) corresponds to the single node at index + (constraints[i], values[i], choice_types[i]) corresponds to the single node at index i. Note that if a TreeNode represents a choice (i.e. the nodes cannot be compacted @@ -403,17 +394,17 @@ class TreeNode: └───┘ └───┘ """ - # The kwargs, value, and ir_types of the nodes stored here. These always + # The constraints, value, and choice_types of the nodes stored here. These always # have the same length. The values at index i belong to node i. - kwargs: list[IRKWargsType] = attr.ib(factory=list) - values: list[IRType] = attr.ib(factory=list) - ir_types: list[IRTypeName] = attr.ib(factory=list) + constraints: list[ChoiceConstraintsT] = attr.ib(factory=list) + values: list[ChoiceT] = attr.ib(factory=list) + choice_types: list[ChoiceTypeT] = attr.ib(factory=list) # The indices of nodes which had forced values. # # Stored as None if no indices have been forced, purely for space saving # reasons (we force quite rarely). - __forced: Optional[set] = attr.ib(default=None, init=False) + __forced: Optional[set[int]] = attr.ib(default=None, init=False) # What happens next after drawing these nodes. (conceptually, "what is the # child/children of the last node stored here"). @@ -434,12 +425,12 @@ class TreeNode: is_exhausted: bool = attr.ib(default=False, init=False) @property - def forced(self): + def forced(self) -> AbstractSet[int]: if not self.__forced: return EMPTY return self.__forced - def mark_forced(self, i): + def mark_forced(self, i: int) -> None: """ Note that the draw at node i was forced. """ @@ -448,7 +439,7 @@ class TreeNode: self.__forced = set() self.__forced.add(i) - def split_at(self, i): + def split_at(self, i: int) -> None: """ Splits the tree so that it can incorporate a decision at the draw call corresponding to the node at position i. @@ -457,31 +448,33 @@ class TreeNode: """ if i in self.forced: - inconsistent_generation() + raise FlakyStrategyDefinition(_FLAKY_STRAT_MSG) assert not self.is_exhausted key = self.values[i] child = TreeNode( - ir_types=self.ir_types[i + 1 :], - kwargs=self.kwargs[i + 1 :], + choice_types=self.choice_types[i + 1 :], + constraints=self.constraints[i + 1 :], values=self.values[i + 1 :], transition=self.transition, ) self.transition = Branch( - kwargs=self.kwargs[i], ir_type=self.ir_types[i], children={key: child} + constraints=self.constraints[i], + choice_type=self.choice_types[i], + children={key: child}, ) if self.__forced is not None: child.__forced = {j - i - 1 for j in self.__forced if j > i} self.__forced = {j for j in self.__forced if j < i} child.check_exhausted() - del self.ir_types[i:] + del self.choice_types[i:] del self.values[i:] - del self.kwargs[i:] - assert len(self.values) == len(self.kwargs) == len(self.ir_types) == i + del self.constraints[i:] + assert len(self.values) == len(self.constraints) == len(self.choice_types) == i - def check_exhausted(self): + def check_exhausted(self) -> bool: """ Recalculates is_exhausted if necessary, and then returns it. @@ -525,16 +518,20 @@ class TreeNode: ) return self.is_exhausted - def _repr_pretty_(self, p, cycle): + def _repr_pretty_(self, p: "RepresentationPrinter", cycle: bool) -> None: assert cycle is False indent = 0 - for i, (ir_type, kwargs, value) in enumerate( - zip(self.ir_types, self.kwargs, self.values) + for i, (choice_type, constraints, value) in enumerate( + zip(self.choice_types, self.constraints, self.values) ): with p.indent(indent): if i > 0: p.break_() - p.text(_node_pretty(ir_type, value, kwargs, forced=i in self.forced)) + p.text( + _node_pretty( + choice_type, value, constraints, forced=i in self.forced + ) + ) indent += 2 with p.indent(indent): @@ -558,7 +555,7 @@ class DataTree: DataTree tracks the following: - - Draws, at the ir level (with some ir_type, e.g. "integer") + - Drawn choices in the choice sequence - ConjectureData.draw_integer() - ConjectureData.draw_float() - ConjectureData.draw_string() @@ -567,8 +564,8 @@ class DataTree: - Test conclusions (with some Status, e.g. Status.VALID) - ConjectureData.conclude_test() - A DataTree is — surprise — a *tree*. A node in this tree is either a draw with - some value, a test conclusion with some Status, or a special `Killed` value, + A DataTree is — surprise — a *tree*. A node in this tree is either a choice draw + with some value, a test conclusion with some Status, or a special `Killed` value, which denotes that further draws may exist beyond this node but should not be considered worth exploring when generating novel prefixes. A node is a leaf iff it is a conclusion or Killed. @@ -671,15 +668,15 @@ class DataTree: intuition. In practice, there are some implementation details to be aware of. - - In draw nodes, we store the kwargs used in addition to the value drawn. + - In draw nodes, we store the constraints used in addition to the value drawn. E.g. the node corresponding to data.draw_float(min_value=1.0, max_value=1.5) would store {"min_value": 1.0, "max_value": 1.5, ...} (default values for - other kwargs omitted). + other constraints omitted). - The kwargs parameters have the potential to change both the range of + The constraints parameters have the potential to change both the range of possible outputs of a node, and the probability distribution within that range, so we need to use these when drawing in DataTree as well. We draw - values using these kwargs when (1) generating a novel value for a node + values using these constraints when (1) generating a novel value for a node and (2) choosing a random child when traversing the tree. - For space efficiency, rather than tracking the full tree structure, we @@ -690,19 +687,19 @@ class DataTree: See TreeNode for more information. """ - def __init__(self): - self.root = TreeNode() - self._children_cache = {} + def __init__(self) -> None: + self.root: TreeNode = TreeNode() + self._children_cache: dict[ChoiceT, ChildrenCacheValueT] = {} @property - def is_exhausted(self): + def is_exhausted(self) -> bool: """ Returns True if every node is exhausted, and therefore the tree has been fully explored. """ return self.root.is_exhausted - def generate_novel_prefix(self, random): + def generate_novel_prefix(self, random: Random) -> tuple[ChoiceT, ...]: """Generate a short random string that (after rewriting) is not a prefix of any buffer previously added to the tree. @@ -710,34 +707,35 @@ class DataTree: for it to be uniform at random, but previous attempts to do that have proven too expensive. """ - from hypothesis.internal.conjecture.data import IRNode - assert not self.is_exhausted - novel_prefix = [] + prefix = [] - def append_node(node): - if node.ir_type == "float": - node.value = int_to_float(node.value) - novel_prefix.append(node) + def append_choice(choice_type: ChoiceTypeT, choice: ChoiceT) -> None: + if choice_type == "float": + assert isinstance(choice, int) + choice = int_to_float(choice) + prefix.append(choice) current_node = self.root while True: assert not current_node.is_exhausted - for i, (ir_type, kwargs, value) in enumerate( - zip(current_node.ir_types, current_node.kwargs, current_node.values) + for i, (choice_type, constraints, value) in enumerate( + zip( + current_node.choice_types, + current_node.constraints, + current_node.values, + ) ): if i in current_node.forced: - append_node( - IRNode( - ir_type=ir_type, value=value, kwargs=kwargs, was_forced=True - ) - ) + append_choice(choice_type, value) else: attempts = 0 while True: if attempts <= 10: try: - node = self._draw(ir_type, kwargs, random=random) + node_value = self._draw( + choice_type, constraints, random=random + ) except StopTest: # pragma: no cover # it is possible that drawing from a fresh data can # overrun BUFFER_SIZE, due to eg unlucky rejection sampling @@ -745,24 +743,30 @@ class DataTree: attempts += 1 continue else: - node = self._draw_from_cache( - ir_type, kwargs, key=id(current_node), random=random + node_value = self._draw_from_cache( + choice_type, + constraints, + key=id(current_node), + random=random, ) - if node.value != value: - append_node(node) + if node_value != value: + append_choice(choice_type, node_value) break attempts += 1 self._reject_child( - ir_type, kwargs, child=node.value, key=id(current_node) + choice_type, + constraints, + child=node_value, + key=id(current_node), ) # We've now found a value that is allowed to # vary, so what follows is not fixed. - return tuple(novel_prefix) + return tuple(prefix) else: assert not isinstance(current_node.transition, (Conclusion, Killed)) if current_node.transition is None: - return tuple(novel_prefix) + return tuple(prefix) branch = current_node.transition assert isinstance(branch, Branch) @@ -770,28 +774,34 @@ class DataTree: while True: if attempts <= 10: try: - node = self._draw( - branch.ir_type, branch.kwargs, random=random + node_value = self._draw( + branch.choice_type, branch.constraints, random=random ) except StopTest: # pragma: no cover attempts += 1 continue else: - node = self._draw_from_cache( - branch.ir_type, branch.kwargs, key=id(branch), random=random + node_value = self._draw_from_cache( + branch.choice_type, + branch.constraints, + key=id(branch), + random=random, ) try: - child = branch.children[node.value] + child = branch.children[node_value] except KeyError: - append_node(node) - return tuple(novel_prefix) + append_choice(branch.choice_type, node_value) + return tuple(prefix) if not child.is_exhausted: - append_node(node) + append_choice(branch.choice_type, node_value) current_node = child break attempts += 1 self._reject_child( - branch.ir_type, branch.kwargs, child=node.value, key=id(branch) + branch.choice_type, + branch.constraints, + child=node_value, + key=id(branch), ) # We don't expect this assertion to ever fire, but coverage @@ -803,43 +813,45 @@ class DataTree: or any(not v.is_exhausted for v in branch.children.values()) ) - def rewrite(self, nodes): + def rewrite(self, choices): """Use previously seen ConjectureData objects to return a tuple of the rewritten choice sequence and the status we would get from running that with the test function. If the status cannot be predicted from the existing values it will be None.""" - data = ConjectureData.for_ir_tree(nodes) + data = ConjectureData.for_choices(choices) try: self.simulate_test_function(data) - return (data.ir_nodes, data.status) + return (data.choices, data.status) except PreviouslyUnseenBehaviour: - return (nodes, None) + return (choices, None) - def simulate_test_function(self, data): + def simulate_test_function(self, data: ConjectureData) -> None: """Run a simulated version of the test function recorded by - this tree. Note that this does not currently call ``stop_example`` - or ``start_example`` as these are not currently recorded in the + this tree. Note that this does not currently call ``stop_span`` + or ``start_span`` as these are not currently recorded in the tree. This will likely change in future.""" node = self.root - def draw(ir_type, kwargs, *, forced=None, convert_forced=True): - if ir_type == "float" and forced is not None and convert_forced: + def draw(choice_type, constraints, *, forced=None, convert_forced=True): + if choice_type == "float" and forced is not None and convert_forced: forced = int_to_float(forced) - draw_func = getattr(data, f"draw_{ir_type}") - value = draw_func(**kwargs, forced=forced) + draw_func = getattr(data, f"draw_{choice_type}") + value = draw_func(**constraints, forced=forced) - if ir_type == "float": + if choice_type == "float": value = float_to_int(value) return value try: while True: - for i, (ir_type, kwargs, previous) in enumerate( - zip(node.ir_types, node.kwargs, node.values) + for i, (choice_type, constraints, previous) in enumerate( + zip(node.choice_types, node.constraints, node.values) ): v = draw( - ir_type, kwargs, forced=previous if i in node.forced else None + choice_type, + constraints, + forced=previous if i in node.forced else None, ) if v != previous: raise PreviouslyUnseenBehaviour @@ -849,7 +861,7 @@ class DataTree: elif node.transition is None: raise PreviouslyUnseenBehaviour elif isinstance(node.transition, Branch): - v = draw(node.transition.ir_type, node.transition.kwargs) + v = draw(node.transition.choice_type, node.transition.constraints) try: node = node.transition.children[v] except KeyError as err: @@ -864,10 +876,16 @@ class DataTree: def new_observer(self): return TreeRecordingObserver(self) - def _draw(self, ir_type, kwargs, *, random): - from hypothesis.internal.conjecture.data import IRNode, ir_to_buffer + def _draw( + self, + choice_type: ChoiceTypeT, + constraints: ChoiceConstraintsT, + *, + random: Random, + ) -> ChoiceT: + from hypothesis.internal.conjecture.data import draw_choice - (value, buf) = ir_to_buffer(ir_type, kwargs, random=random) + value = draw_choice(choice_type, constraints, random=random) # using floats as keys into branch.children breaks things, because # e.g. hash(0.0) == hash(-0.0) would collide as keys when they are # in fact distinct child branches. @@ -876,11 +894,13 @@ class DataTree: # float key is in its bits form (as a key into branch.children) and # when it is in its float form (as a value we want to write to the # buffer), and converting between the two forms as appropriate. - if ir_type == "float": + if choice_type == "float": value = float_to_int(value) - return IRNode(ir_type=ir_type, value=value, kwargs=kwargs, was_forced=False) + return value - def _get_children_cache(self, ir_type, kwargs, *, key): + def _get_children_cache( + self, choice_type: ChoiceTypeT, constraints: ChoiceConstraintsT, *, key: ChoiceT + ) -> ChildrenCacheValueT: # cache the state of the children generator per node/branch (passed as # `key` here), such that we track which children we've already tried # for this branch across draws. @@ -891,18 +911,23 @@ class DataTree: # with. Whenever we need to top up this list, we will draw a new value # from the generator. if key not in self._children_cache: - generator = all_children(ir_type, kwargs) - children = [] - rejected = set() + generator = all_children(choice_type, constraints) + children: list[ChoiceT] = [] + rejected: set[ChoiceT] = set() self._children_cache[key] = (generator, children, rejected) return self._children_cache[key] - def _draw_from_cache(self, ir_type, kwargs, *, key, random): - from hypothesis.internal.conjecture.data import IRNode - + def _draw_from_cache( + self, + choice_type: ChoiceTypeT, + constraints: ChoiceConstraintsT, + *, + key: ChoiceT, + random: Random, + ) -> ChoiceT: (generator, children, rejected) = self._get_children_cache( - ir_type, kwargs, key=key + choice_type, constraints, key=key ) # Keep a stock of 100 potentially-valid children at all times. # This number is chosen to balance memory/speed vs randomness. Ideally @@ -912,7 +937,8 @@ class DataTree: # annoying. if len(children) < 100: # pragma: no branch for v in generator: - if ir_type == "float": + if choice_type == "float": + assert isinstance(v, float) v = float_to_int(v) if v in rejected: continue @@ -920,12 +946,18 @@ class DataTree: if len(children) >= 100: break - value = random.choice(children) - return IRNode(ir_type=ir_type, value=value, kwargs=kwargs, was_forced=True) + return random.choice(children) - def _reject_child(self, ir_type, kwargs, *, child, key): + def _reject_child( + self, + choice_type: ChoiceTypeT, + constraints: ChoiceConstraintsT, + *, + child: ChoiceT, + key: ChoiceT, + ) -> None: (_generator, children, rejected) = self._get_children_cache( - ir_type, kwargs, key=key + choice_type, constraints, key=key ) rejected.add(child) # we remove a child from the list of possible children *only* when it is @@ -947,50 +979,54 @@ class DataTree: if child in children: children.remove(child) - def _repr_pretty_(self, p, cycle): + def _repr_pretty_(self, p: "RepresentationPrinter", cycle: bool) -> None: assert cycle is False - return p.pretty(self.root) + p.pretty(self.root) class TreeRecordingObserver(DataObserver): - def __init__(self, tree): - self.__current_node = tree.root - self.__index_in_current_node = 0 - self.__trail = [self.__current_node] - self.killed = False + def __init__(self, tree: DataTree): + self.__current_node: TreeNode = tree.root + self.__index_in_current_node: int = 0 + self.__trail: list[TreeNode] = [self.__current_node] + self.killed: bool = False def draw_integer( - self, value: int, *, was_forced: bool, kwargs: IntegerKWargs + self, value: int, *, was_forced: bool, constraints: IntegerConstraints ) -> None: - self.draw_value("integer", value, was_forced=was_forced, kwargs=kwargs) + self.draw_value( + "integer", value, was_forced=was_forced, constraints=constraints + ) def draw_float( - self, value: float, *, was_forced: bool, kwargs: FloatKWargs + self, value: float, *, was_forced: bool, constraints: FloatConstraints ) -> None: - self.draw_value("float", value, was_forced=was_forced, kwargs=kwargs) + self.draw_value("float", value, was_forced=was_forced, constraints=constraints) def draw_string( - self, value: str, *, was_forced: bool, kwargs: StringKWargs + self, value: str, *, was_forced: bool, constraints: StringConstraints ) -> None: - self.draw_value("string", value, was_forced=was_forced, kwargs=kwargs) + self.draw_value("string", value, was_forced=was_forced, constraints=constraints) def draw_bytes( - self, value: bytes, *, was_forced: bool, kwargs: BytesKWargs + self, value: bytes, *, was_forced: bool, constraints: BytesConstraints ) -> None: - self.draw_value("bytes", value, was_forced=was_forced, kwargs=kwargs) + self.draw_value("bytes", value, was_forced=was_forced, constraints=constraints) def draw_boolean( - self, value: bool, *, was_forced: bool, kwargs: BooleanKWargs + self, value: bool, *, was_forced: bool, constraints: BooleanConstraints ) -> None: - self.draw_value("boolean", value, was_forced=was_forced, kwargs=kwargs) + self.draw_value( + "boolean", value, was_forced=was_forced, constraints=constraints + ) def draw_value( self, - ir_type: IRTypeName, - value: IRType, + choice_type: ChoiceTypeT, + value: ChoiceT, *, was_forced: bool, - kwargs: IRKWargsType, + constraints: ChoiceConstraintsT, ) -> None: i = self.__index_in_current_node self.__index_in_current_node += 1 @@ -999,10 +1035,13 @@ class TreeRecordingObserver(DataObserver): if isinstance(value, float): value = float_to_int(value) - assert len(node.kwargs) == len(node.values) == len(node.ir_types) + assert len(node.constraints) == len(node.values) == len(node.choice_types) if i < len(node.values): - if ir_type != node.ir_types[i] or kwargs != node.kwargs[i]: - inconsistent_generation() + if ( + choice_type != node.choice_types[i] + or constraints != node.constraints[i] + ): + raise FlakyStrategyDefinition(_FLAKY_STRAT_MSG) # Note that we don't check whether a previously # forced value is now free. That will be caught # if we ever split the node there, but otherwise @@ -1010,19 +1049,20 @@ class TreeRecordingObserver(DataObserver): # means we skip a hash set lookup on every # draw and that's a pretty niche failure mode. if was_forced and i not in node.forced: - inconsistent_generation() + raise FlakyStrategyDefinition(_FLAKY_STRAT_MSG) if value != node.values[i]: node.split_at(i) assert i == len(node.values) new_node = TreeNode() + assert isinstance(node.transition, Branch) node.transition.children[value] = new_node self.__current_node = new_node self.__index_in_current_node = 0 else: trans = node.transition if trans is None: - node.ir_types.append(ir_type) - node.kwargs.append(kwargs) + node.choice_types.append(choice_type) + node.constraints.append(constraints) node.values.append(value) if was_forced: node.mark_forced(i) @@ -1042,19 +1082,23 @@ class TreeRecordingObserver(DataObserver): # An alternative is not writing such choices to the tree at # all, and thus guaranteeing that each node has at least 2 max # children. - if compute_max_children(ir_type, kwargs) == 1 and not was_forced: + if ( + compute_max_children(choice_type, constraints) == 1 + and not was_forced + ): node.split_at(i) + assert isinstance(node.transition, Branch) self.__current_node = node.transition.children[value] self.__index_in_current_node = 0 elif isinstance(trans, Conclusion): assert trans.status != Status.OVERRUN # We tried to draw where history says we should have # stopped - inconsistent_generation() + raise FlakyStrategyDefinition(_FLAKY_STRAT_MSG) else: assert isinstance(trans, Branch), trans - if ir_type != trans.ir_type or kwargs != trans.kwargs: - inconsistent_generation() + if choice_type != trans.choice_type or constraints != trans.constraints: + raise FlakyStrategyDefinition(_FLAKY_STRAT_MSG) try: self.__current_node = trans.children[value] except KeyError: @@ -1063,7 +1107,7 @@ class TreeRecordingObserver(DataObserver): if self.__trail[-1] is not self.__current_node: self.__trail.append(self.__current_node) - def kill_branch(self): + def kill_branch(self) -> None: """Mark this part of the tree as not worth re-exploring.""" if self.killed: return @@ -1074,7 +1118,7 @@ class TreeRecordingObserver(DataObserver): self.__current_node.transition is not None and not isinstance(self.__current_node.transition, Killed) ): - inconsistent_generation() + raise FlakyStrategyDefinition(_FLAKY_STRAT_MSG) if self.__current_node.transition is None: self.__current_node.transition = Killed(TreeNode()) @@ -1084,7 +1128,9 @@ class TreeRecordingObserver(DataObserver): self.__index_in_current_node = 0 self.__trail.append(self.__current_node) - def conclude_test(self, status, interesting_origin): + def conclude_test( + self, status: Status, interesting_origin: Optional[InterestingOrigin] + ) -> None: """Says that ``status`` occurred at node ``node``. This updates the node if necessary and checks for consistency.""" if status == Status.OVERRUN: @@ -1093,7 +1139,7 @@ class TreeRecordingObserver(DataObserver): node = self.__current_node if i < len(node.values) or isinstance(node.transition, Branch): - inconsistent_generation() + raise FlakyStrategyDefinition(_FLAKY_STRAT_MSG) new_transition = Conclusion(status, interesting_origin) @@ -1123,7 +1169,7 @@ class TreeRecordingObserver(DataObserver): if not self.killed: self.__update_exhausted() - def __update_exhausted(self): + def __update_exhausted(self) -> None: for t in reversed(self.__trail): # Any node we've traversed might have now become exhausted. # We check from the right. As soon as we hit a node that diff --git a/contrib/python/hypothesis/py3/hypothesis/internal/conjecture/dfa/__init__.py b/contrib/python/hypothesis/py3/hypothesis/internal/conjecture/dfa/__init__.py index 8beb4ad5883..1b9d033f132 100644 --- a/contrib/python/hypothesis/py3/hypothesis/internal/conjecture/dfa/__init__.py +++ b/contrib/python/hypothesis/py3/hypothesis/internal/conjecture/dfa/__init__.py @@ -541,7 +541,7 @@ class DFA: alphabet = sorted(set(self.alphabet) | set(other.alphabet)) - queue = deque([((self.start, other.start))]) + queue = deque([(self.start, other.start)]) while queue: self_state, other_state = queue.popleft() diff --git a/contrib/python/hypothesis/py3/hypothesis/internal/conjecture/engine.py b/contrib/python/hypothesis/py3/hypothesis/internal/conjecture/engine.py index 2b182ed6750..b0ea8ba62c6 100644 --- a/contrib/python/hypothesis/py3/hypothesis/internal/conjecture/engine.py +++ b/contrib/python/hypothesis/py3/hypothesis/internal/conjecture/engine.py @@ -18,24 +18,13 @@ from contextlib import contextmanager, suppress from datetime import timedelta from enum import Enum from random import Random, getrandbits -from typing import ( - Any, - Callable, - Final, - List, - Literal, - NoReturn, - Optional, - Union, - cast, - overload, -) +from typing import Callable, Final, List, Literal, NoReturn, Optional, Union, cast import attr from hypothesis import HealthCheck, Phase, Verbosity, settings as Settings from hypothesis._settings import local_settings -from hypothesis.database import ExampleDatabase +from hypothesis.database import ExampleDatabase, choices_from_bytes, choices_to_bytes from hypothesis.errors import ( BackendCannotProceed, FlakyReplay, @@ -45,24 +34,21 @@ from hypothesis.errors import ( ) from hypothesis.internal.cache import LRUReusedCache from hypothesis.internal.compat import NotRequired, TypeAlias, TypedDict, ceil, override +from hypothesis.internal.conjecture.choice import ( + ChoiceConstraintsT, + ChoiceKeyT, + ChoiceNode, + ChoiceT, + ChoiceTemplate, + choices_key, +) from hypothesis.internal.conjecture.data import ( - AVAILABLE_PROVIDERS, ConjectureData, ConjectureResult, DataObserver, - HypothesisProvider, - InterestingOrigin, - IRKWargsType, - IRNode, - NodeTemplate, Overrun, - PrimitiveProvider, Status, _Overrun, - ir_kwargs_key, - ir_size, - ir_size_nodes, - ir_value_key, ) from hypothesis.internal.conjecture.datatree import ( DataTree, @@ -74,7 +60,13 @@ from hypothesis.internal.conjecture.junkdrawer import ( startswith, ) from hypothesis.internal.conjecture.pareto import NO_SCORE, ParetoFront, ParetoOptimiser -from hypothesis.internal.conjecture.shrinker import Shrinker, sort_key +from hypothesis.internal.conjecture.providers import ( + AVAILABLE_PROVIDERS, + HypothesisProvider, + PrimitiveProvider, +) +from hypothesis.internal.conjecture.shrinker import Shrinker, ShrinkPredicateT, sort_key +from hypothesis.internal.escalation import InterestingOrigin from hypothesis.internal.healthcheck import fail_health_check from hypothesis.reporting import base_report, report @@ -83,7 +75,6 @@ CACHE_SIZE: Final[int] = 10000 MUTATION_POOL_SIZE: Final[int] = 100 MIN_TEST_CALLS: Final[int] = 10 BUFFER_SIZE: Final[int] = 8 * 1024 -BUFFER_SIZE_IR: Final[int] = 8 * 1024 # If the shrinking phase takes more than five minutes, abort it early and print # a warning. Many CI systems will kill a build after around ten minutes with @@ -95,6 +86,10 @@ MAX_SHRINKING_SECONDS: Final[int] = 300 Ls: TypeAlias = list["Ls | int"] +def shortlex(s): + return (len(s), s) + + @attr.s class HealthCheckState: valid_examples: int = attr.ib(default=0) @@ -200,15 +195,46 @@ StatisticsDict = TypedDict( ) -def truncate_nodes_to_size(nodes: Sequence[IRNode], size: int) -> tuple[IRNode, ...]: - s = 0 - i = 0 - for node in nodes: - s += ir_size([node.value]) - if s > size: - break - i += 1 - return tuple(nodes[:i]) +def choice_count(choices: Sequence[Union[ChoiceT, ChoiceTemplate]]) -> Optional[int]: + count = 0 + for choice in choices: + if isinstance(choice, ChoiceTemplate): + if choice.count is None: + return None + count += choice.count + else: + count += 1 + return count + + +class DiscardObserver(DataObserver): + @override + def kill_branch(self) -> NoReturn: + raise ContainsDiscard + + +def realize_choices(data: ConjectureData) -> None: + for node in data.nodes: + value = data.provider.realize(node.value) + expected_type = { + "string": str, + "float": float, + "integer": int, + "boolean": bool, + "bytes": bytes, + }[node.type] + if type(value) is not expected_type: + raise HypothesisException( + f"expected {expected_type} from " + f"{data.provider.realize.__qualname__}, got {type(value)}" + ) + + constraints = cast( + ChoiceConstraintsT, + {k: data.provider.realize(v) for k, v in node.constraints.items()}, + ) + node.value = value + node.constraints = constraints class ConjectureRunner: @@ -228,6 +254,8 @@ class ConjectureRunner: self.call_count: int = 0 self.misaligned_count: int = 0 self.valid_examples: int = 0 + self.invalid_examples: int = 0 + self.overrun_examples: int = 0 self.random: Random = random or Random(getrandbits(128)) self.database_key: Optional[bytes] = database_key self.ignore_limits: bool = ignore_limits @@ -239,7 +267,9 @@ class ConjectureRunner: self.stats_per_test_case: list[CallStats] = [] # At runtime, the keys are only ever type `InterestingOrigin`, but can be `None` during tests. - self.interesting_examples: dict[InterestingOrigin, ConjectureResult] = {} + self.interesting_examples: dict[ + Optional[InterestingOrigin], ConjectureResult + ] = {} # We use call_count because there may be few possible valid_examples. self.first_bug_found_at: Optional[int] = None self.last_bug_found_at: Optional[int] = None @@ -255,7 +285,7 @@ class ConjectureRunner: self.settings.backend ) - self.best_observed_targets: "defaultdict[str, float]" = defaultdict( + self.best_observed_targets: defaultdict[str, float] = defaultdict( lambda: NO_SCORE ) self.best_examples_of_observed_targets: dict[str, ConjectureResult] = {} @@ -273,16 +303,24 @@ class ConjectureRunner: # from running a buffer without recalculating, especially during # shrinking where we need to know about the structure of the # executed test case. - self.__data_cache = LRUReusedCache(CACHE_SIZE) - self.__data_cache_ir = LRUReusedCache(CACHE_SIZE) + self.__data_cache = LRUReusedCache[ + tuple[ChoiceKeyT, ...], Union[ConjectureResult, _Overrun] + ](CACHE_SIZE) - self.reused_previously_shrunk_test_case = False + self.reused_previously_shrunk_test_case: bool = False self.__pending_call_explanation: Optional[str] = None self._switch_to_hypothesis_provider: bool = False - self.__failed_realize_count = 0 - self._verified_by = None # note unsound verification by alt backends + self.__failed_realize_count: int = 0 + # note unsound verification by alt backends + self._verified_by: Optional[str] = None + + @property + def using_hypothesis_backend(self) -> bool: + return ( + self.settings.backend == "hypothesis" or self._switch_to_hypothesis_provider + ) def explain_next_call_as(self, explanation: str) -> None: self.__pending_call_explanation = explanation @@ -314,7 +352,7 @@ class ConjectureRunner: return Phase.target in self.settings.phases def __tree_is_exhausted(self) -> bool: - return self.tree.is_exhausted and self.settings.backend == "hypothesis" + return self.tree.is_exhausted and self.using_hypothesis_backend def __stoppable_test_function(self, data: ConjectureData) -> None: """Run ``self._test_function``, but convert a ``StopTest`` exception @@ -339,67 +377,38 @@ class ConjectureRunner: # correct engine. raise - def _cache_key_ir( - self, - *, - nodes: Optional[Sequence[IRNode]] = None, - data: Union[ConjectureData, ConjectureResult, None] = None, - ) -> tuple[tuple[Any, ...], ...]: - assert (nodes is not None) ^ (data is not None) - if data is not None: - nodes = data.ir_nodes - assert nodes is not None - - # intentionally drop was_forced from equality here, because the was_forced - # of node prefixes on ConjectureData has no impact on that data's result - return tuple( - ( - node.ir_type, - ir_value_key(node.ir_type, node.value), - ir_kwargs_key(node.ir_type, node.kwargs), - ) - for node in nodes - ) + def _cache_key(self, choices: Sequence[ChoiceT]) -> tuple[ChoiceKeyT, ...]: + return choices_key(choices) def _cache(self, data: ConjectureData) -> None: result = data.as_result() - self.__data_cache[data.buffer] = result - - # interesting buffer-based data can mislead the shrinker if we cache them. - # - # @given(st.integers()) - # def f(n): - # assert n < 100 - # - # may generate two counterexamples, n=101 and n=m > 101, in that order, - # where the buffer corresponding to n is large due to eg failed probes. - # We shrink m and eventually try n=101, but it is cached to a large buffer - # and so the best we can do is n=102, a non-ideal shrink. - # - # We can cache ir-based buffers fine, which always correspond to the - # smallest buffer via forced=. The overhead here is small because almost - # all interesting data are ir-based via the shrinker (and that overhead - # will tend towards zero as we move generation to the ir). - if data.ir_prefix is not None or data.status < Status.INTERESTING: - key = self._cache_key_ir(data=data) - self.__data_cache_ir[key] = result + key = self._cache_key(data.choices) + self.__data_cache[key] = result - def cached_test_function_ir( + def cached_test_function( self, - nodes: Sequence[Union[IRNode, NodeTemplate]], + choices: Sequence[Union[ChoiceT, ChoiceTemplate]], *, error_on_discard: bool = False, - extend: int = 0, + extend: Union[int, Literal["full"]] = 0, ) -> Union[ConjectureResult, _Overrun]: + """ + If ``error_on_discard`` is set to True this will raise ``ContainsDiscard`` + in preference to running the actual test function. This is to allow us + to skip test cases we expect to be redundant in some cases. Note that + it may be the case that we don't raise ``ContainsDiscard`` even if the + result has discards if we cannot determine from previous runs whether + it will have a discard. + """ # node templates represent a not-yet-filled hole and therefore cannot # be cached or retrieved from the cache. - if not any(isinstance(node, NodeTemplate) for node in nodes): + if not any(isinstance(choice, ChoiceTemplate) for choice in choices): # this type cast is validated by the isinstance check above (ie, there - # are no NodeTemplate elements). - nodes = cast(Sequence[IRNode], nodes) - key = self._cache_key_ir(nodes=nodes) + # are no ChoiceTemplate elements). + choices = cast(Sequence[ChoiceT], choices) + key = self._cache_key(choices) try: - cached = self.__data_cache_ir[key] + cached = self.__data_cache[key] # if we have a cached overrun for this key, but we're allowing extensions # of the nodes, it could in fact run to a valid data if we try. if extend == 0 or cached.status is not Status.OVERRUN: @@ -407,7 +416,12 @@ class ConjectureRunner: except KeyError: pass - max_length = min(BUFFER_SIZE_IR, ir_size_nodes(nodes) + extend) + if extend == "full": + max_length = None + elif (count := choice_count(choices)) is None: + max_length = None + else: + max_length = count + extend # explicitly use a no-op DataObserver here instead of a TreeRecordingObserver. # The reason is we don't expect simulate_test_function to explore new choices @@ -415,36 +429,35 @@ class ConjectureRunner: # TreeRecordingObserver tracking those calls. trial_observer: Optional[DataObserver] = DataObserver() if error_on_discard: - - class DiscardObserver(DataObserver): - @override - def kill_branch(self) -> NoReturn: - raise ContainsDiscard - trial_observer = DiscardObserver() try: - trial_data = self.new_conjecture_data_ir( - nodes, observer=trial_observer, max_length=max_length + trial_data = self.new_conjecture_data( + choices, observer=trial_observer, max_choices=max_length ) self.tree.simulate_test_function(trial_data) except PreviouslyUnseenBehaviour: pass else: trial_data.freeze() - key = self._cache_key_ir(data=trial_data) - if trial_data.status is Status.OVERRUN: + key = self._cache_key(trial_data.choices) + if trial_data.status > Status.OVERRUN: + try: + return self.__data_cache[key] + except KeyError: + pass + else: # if we simulated to an overrun, then we our result is certainly # an overrun; no need to consult the cache. (and we store this result # for simulation-less lookup later). - self.__data_cache_ir[key] = Overrun + self.__data_cache[key] = Overrun return Overrun try: - return self.__data_cache_ir[key] + return self.__data_cache[key] except KeyError: pass - data = self.new_conjecture_data_ir(nodes, max_length=max_length) + data = self.new_conjecture_data(choices, max_choices=max_length) # note that calling test_function caches `data` for us, for both an ir # tree key and a buffer key. self.test_function(data) @@ -475,13 +488,22 @@ class ConjectureRunner: and (self.__failed_realize_count / self.call_count) > 0.2 ): self._switch_to_hypothesis_provider = True + # skip the post-test-case tracking; we're pretending this never happened + interrupted = True + data.cannot_proceed_scope = exc.scope + data.freeze() + return except BaseException: - self.save_buffer(data.buffer) + data.freeze() + if self.settings.backend != "hypothesis": + realize_choices(data) + self.save_choices(data.choices) raise finally: # No branch, because if we're interrupted we always raise # the KeyboardInterrupt, never continue to the code below. if not interrupted: # pragma: no branch + assert data.cannot_proceed_scope is None data.freeze() call_stats: CallStats = { "status": data.status.name.lower(), @@ -494,31 +516,7 @@ class ConjectureRunner: } self.stats_per_test_case.append(call_stats) if self.settings.backend != "hypothesis": - for node in data.ir_nodes: - value = data.provider.realize(node.value) - expected_type = { - "string": str, - "float": float, - "integer": int, - "boolean": bool, - "bytes": bytes, - }[node.ir_type] - if type(value) is not expected_type: - raise HypothesisException( - f"expected {expected_type} from " - f"{data.provider.realize.__qualname__}, " - f"got {type(value)}" - ) - - kwargs = cast( - IRKWargsType, - { - k: data.provider.realize(v) - for k, v in node.kwargs.items() - }, - ) - node.value = value - node.kwargs = kwargs + realize_choices(data) self._cache(data) if data.misaligned_at is not None: # pragma: no branch # coverage bug? @@ -531,9 +529,7 @@ class ConjectureRunner: and self.pareto_front is not None and self.pareto_front.add(data.as_result()) ): - self.save_buffer(data.buffer, sub_key=b"pareto") - - assert len(data.buffer) <= BUFFER_SIZE + self.save_choices(data.choices, sub_key=b"pareto") if data.status >= Status.VALID: for k, v in data.target_observations.items(): @@ -551,25 +547,27 @@ class ConjectureRunner: if v < existing_score: continue - if v > existing_score or sort_key(data.buffer) < sort_key( - existing_example.buffer + if v > existing_score or sort_key(data.nodes) < sort_key( + existing_example.nodes ): data_as_result = data.as_result() assert not isinstance(data_as_result, _Overrun) self.best_examples_of_observed_targets[k] = data_as_result - if data.status == Status.VALID: + if data.status is Status.VALID: self.valid_examples += 1 + if data.status is Status.INVALID: + self.invalid_examples += 1 + if data.status is Status.OVERRUN: + self.overrun_examples += 1 if data.status == Status.INTERESTING: - if self.settings.backend != "hypothesis": - # drive the ir tree through the test function to convert it - # to a buffer + if not self.using_hypothesis_backend: + # replay this failure on the hypothesis backend to ensure it still + # finds a failure. otherwise, it is flaky. initial_origin = data.interesting_origin - initial_traceback = getattr( - data.extra_information, "_expected_traceback", None - ) - data = ConjectureData.for_ir_tree(data.ir_nodes) + initial_traceback = data.expected_traceback + data = ConjectureData.for_choices(data.choices) self.__stoppable_test_function(data) data.freeze() # TODO: Convert to FlakyFailure on the way out. Should same-origin @@ -597,23 +595,23 @@ class ConjectureRunner: key = data.interesting_origin changed = False try: - existing = self.interesting_examples[key] # type: ignore + existing = self.interesting_examples[key] except KeyError: changed = True self.last_bug_found_at = self.call_count if self.first_bug_found_at is None: self.first_bug_found_at = self.call_count else: - if sort_key(data.buffer) < sort_key(existing.buffer): + if sort_key(data.nodes) < sort_key(existing.nodes): self.shrinks += 1 - self.downgrade_buffer(existing.buffer) - self.__data_cache.unpin(existing.buffer) + self.downgrade_choices(existing.choices) + self.__data_cache.unpin(self._cache_key(existing.choices)) changed = True if changed: - self.save_buffer(data.buffer) + self.save_choices(data.choices) self.interesting_examples[key] = data.as_result() # type: ignore - self.__data_cache.pin(data.buffer, data.as_result()) + self.__data_cache.pin(self._cache_key(data.choices), data.as_result()) self.shrunk_examples.discard(key) if self.shrinks >= MAX_SHRINKS: @@ -655,10 +653,10 @@ class ConjectureRunner: self.record_for_health_check(data) - def on_pareto_evict(self, data: ConjectureData) -> None: - self.settings.database.delete(self.pareto_key, data.buffer) + def on_pareto_evict(self, data: ConjectureResult) -> None: + self.settings.database.delete(self.pareto_key, choices_to_bytes(data.choices)) - def generate_novel_prefix(self) -> tuple[IRNode, ...]: + def generate_novel_prefix(self) -> tuple[ChoiceT, ...]: """Uses the tree to proactively generate a starting sequence of bytes that we haven't explored yet for this test. @@ -740,16 +738,17 @@ class ConjectureRunner: HealthCheck.too_slow, ) - def save_buffer( - self, buffer: Union[bytes, bytearray], sub_key: Optional[bytes] = None + def save_choices( + self, choices: Sequence[ChoiceT], sub_key: Optional[bytes] = None ) -> None: if self.settings.database is not None: key = self.sub_key(sub_key) if key is None: return - self.settings.database.save(key, bytes(buffer)) + self.settings.database.save(key, choices_to_bytes(choices)) - def downgrade_buffer(self, buffer: Union[bytes, bytearray]) -> None: + def downgrade_choices(self, choices: Sequence[ChoiceT]) -> None: + buffer = choices_to_bytes(choices) if self.settings.database is not None and self.database_key is not None: self.settings.database.move(self.database_key, self.secondary_key, buffer) @@ -837,7 +836,7 @@ class ConjectureRunner: # sample the secondary corpus to a more manageable size. corpus = sorted( - self.settings.database.fetch(self.database_key), key=sort_key + self.settings.database.fetch(self.database_key), key=shortlex ) factor = 0.1 if (Phase.generate in self.settings.phases) else 1 desired_size = max(2, ceil(factor * self.settings.max_examples)) @@ -852,7 +851,7 @@ class ConjectureRunner: extra = extra_corpus else: extra = self.random.sample(extra_corpus, shortfall) - extra.sort(key=sort_key) + extra.sort(key=shortlex) corpus.extend(extra) # We want a fast path where every primary entry in the database was @@ -863,7 +862,12 @@ class ConjectureRunner: for i, existing in enumerate(corpus): if i >= primary_corpus_size and found_interesting_in_primary: break - data = self.cached_test_function(existing, extend=BUFFER_SIZE) + choices = choices_from_bytes(existing) + if choices is None: + # clear out any keys which fail deserialization + self.settings.database.delete(self.database_key, existing) + continue + data = self.cached_test_function(choices, extend="full") if data.status != Status.INTERESTING: self.settings.database.delete(self.database_key, existing) self.settings.database.delete(self.secondary_key, existing) @@ -871,7 +875,7 @@ class ConjectureRunner: if i < primary_corpus_size: found_interesting_in_primary = True assert not isinstance(data, _Overrun) - if existing != data.buffer: + if choices_key(choices) != choices_key(data.choices): all_interesting_in_primary_were_exact = False if not self.settings.report_multiple_bugs: break @@ -891,10 +895,14 @@ class ConjectureRunner: pareto_corpus = list(self.settings.database.fetch(self.pareto_key)) if len(pareto_corpus) > desired_extra: pareto_corpus = self.random.sample(pareto_corpus, desired_extra) - pareto_corpus.sort(key=sort_key) + pareto_corpus.sort(key=shortlex) for existing in pareto_corpus: - data = self.cached_test_function(existing, extend=BUFFER_SIZE) + choices = choices_from_bytes(existing) + if choices is None: + self.settings.database.delete(self.pareto_key, existing) + continue + data = self.cached_test_function(choices, extend="full") if data not in self.pareto_front: self.settings.database.delete(self.pareto_key, existing) if data.status == Status.INTERESTING: @@ -956,17 +964,26 @@ class ConjectureRunner: self.debug("Generating new examples") assert self.should_generate_more() - zero_data = self.cached_test_function(bytes(BUFFER_SIZE)) + zero_data = self.cached_test_function((ChoiceTemplate("simplest", count=None),)) if zero_data.status > Status.OVERRUN: assert isinstance(zero_data, ConjectureResult) - self.__data_cache.pin( - zero_data.buffer, zero_data.as_result() - ) # Pin forever + # if the crosshair backend cannot proceed, it does not (and cannot) + # realize the symbolic values, with the intent that Hypothesis will + # throw away this test case. We usually do, but if it's the zero data + # then we try to pin it here, which requires realizing the symbolics. + # + # We don't (yet) rely on the zero data being pinned, and so + # it's simply a very slight performance loss to simply not pin it + # if doing so would error. + if zero_data.cannot_proceed_scope is None: # pragma: no branch + self.__data_cache.pin( + self._cache_key(zero_data.choices), zero_data.as_result() + ) # Pin forever if zero_data.status == Status.OVERRUN or ( zero_data.status == Status.VALID and isinstance(zero_data, ConjectureResult) - and len(zero_data.buffer) * 2 > BUFFER_SIZE + and zero_data.length * 2 > BUFFER_SIZE ): fail_health_check( self.settings, @@ -1034,28 +1051,22 @@ class ConjectureRunner: # a buffer and uses HypothesisProvider as its backing provider, # not whatever is specified by the backend. We can improve this # once more things are on the ir. - if self.settings.backend != "hypothesis": - data = self.new_conjecture_data(prefix=b"", max_length=BUFFER_SIZE) + if not self.using_hypothesis_backend: + data = self.new_conjecture_data([]) with suppress(BackendCannotProceed): self.test_function(data) continue self._current_phase = "generate" prefix = self.generate_novel_prefix() - # it is possible, if unlikely, to generate a > BUFFER_SIZE novel prefix, - # as nodes in the novel tree may be variable sized due to eg integer - # probe retries. - prefix = truncate_nodes_to_size(prefix, BUFFER_SIZE_IR) if ( self.valid_examples <= small_example_cap and self.call_count <= 5 * small_example_cap and not self.interesting_examples and consecutive_zero_extend_is_invalid < 5 ): - prefix_size = ir_size_nodes(prefix) - minimal_example = self.cached_test_function_ir( - prefix - + (NodeTemplate("simplest", size=BUFFER_SIZE_IR - prefix_size),) + minimal_example = self.cached_test_function( + prefix + (ChoiceTemplate("simplest", count=None),) ) if minimal_example.status < Status.VALID: @@ -1066,10 +1077,8 @@ class ConjectureRunner: # ConjectureResult object. assert isinstance(minimal_example, ConjectureResult) consecutive_zero_extend_is_invalid = 0 - minimal_extension = ( - ir_size_nodes(minimal_example.ir_nodes) - prefix_size - ) - max_length = min(prefix_size + minimal_extension * 10, BUFFER_SIZE_IR) + minimal_extension = len(minimal_example.choices) - len(prefix) + max_length = len(prefix) + minimal_extension * 5 # We could end up in a situation where even though the prefix was # novel when we generated it, because we've now tried zero extending @@ -1079,7 +1088,7 @@ class ConjectureRunner: # running the test function for real here. If however we encounter # some novel behaviour, we try again with the real test function, # starting from the new novel prefix that has discovered. - trial_data = self.new_conjecture_data_ir(prefix, max_length=max_length) + trial_data = self.new_conjecture_data(prefix, max_choices=max_length) try: self.tree.simulate_test_function(trial_data) continue @@ -1097,16 +1106,16 @@ class ConjectureRunner: if not self.should_generate_more(): break - prefix = trial_data.ir_nodes + prefix = trial_data.choices else: - max_length = BUFFER_SIZE_IR + max_length = None - data = self.new_conjecture_data_ir(prefix, max_length=max_length) + data = self.new_conjecture_data(prefix, max_choices=max_length) self.test_function(data) if ( data.status is Status.OVERRUN - and max_length < BUFFER_SIZE_IR + and max_length is not None and "invalid because" not in data.events ): data.events["invalid because"] = ( @@ -1162,32 +1171,87 @@ class ConjectureRunner: and self.call_count <= initial_calls + 5 and failed_mutations <= 5 ): - groups = data.examples.mutator_groups + groups = data.spans.mutator_groups if not groups: break group = self.random.choice(groups) - (start1, end1), (start2, end2) = self.random.sample(sorted(group), 2) - if (start1 <= start2 <= end2 <= end1) or ( - start2 <= start1 <= end1 <= end2 - ): # pragma: no cover # flaky on conjecture-cover tests - # one example entirely contains the other. give up. - # TODO use more intelligent mutation for containment, like - # replacing child with parent or vice versa. Would allow for - # recursive / subtree mutation - failed_mutations += 1 - continue - if start1 > start2: (start1, end1), (start2, end2) = (start2, end2), (start1, end1) - assert end1 <= start2 - nodes = data.ir_nodes - (start, end) = self.random.choice([(start1, end1), (start2, end2)]) - replacement = nodes[start:end] + if ( + start1 <= start2 <= end2 <= end1 + ): # pragma: no cover # flaky on conjecture-cover tests + # One span entirely contains the other. The strategy is very + # likely some kind of tree. e.g. we might have + # + # ┌─────┐ + # ┌─────┤ a ├──────┐ + # │ └─────┘ │ + # ┌──┴──┐ ┌──┴──┐ + # ┌──┤ b ├──┐ ┌──┤ c ├──┐ + # │ └──┬──┘ │ │ └──┬──┘ │ + # ┌─┴─┐ ┌─┴─┐ ┌─┴─┐ ┌─┴─┐ ┌─┴─┐ ┌─┴─┐ + # │ d │ │ e │ │ f │ │ g │ │ h │ │ i │ + # └───┘ └───┘ └───┘ └───┘ └───┘ └───┘ + # + # where each node is drawn from the same strategy and so + # has the same span label. We might have selected the spans + # corresponding to the a and c nodes, which is the entire + # tree and the subtree of (and including) c respectively. + # + # There are two possible mutations we could apply in this case: + # 1. replace a with c (replace child with parent) + # 2. replace c with a (replace parent with child) + # + # (1) results in multiple partial copies of the + # parent: + # ┌─────┐ + # ┌─────┤ a ├────────────┐ + # │ └─────┘ │ + # ┌──┴──┐ ┌─┴───┐ + # ┌──┤ b ├──┐ ┌─────┤ a ├──────┐ + # │ └──┬──┘ │ │ └─────┘ │ + # ┌─┴─┐ ┌─┴─┐ ┌─┴─┐ ┌──┴──┐ ┌──┴──┐ + # │ d │ │ e │ │ f │ ┌──┤ b ├──┐ ┌──┤ c ├──┐ + # └───┘ └───┘ └───┘ │ └──┬──┘ │ │ └──┬──┘ │ + # ┌─┴─┐ ┌─┴─┐ ┌─┴─┐ ┌─┴─┐ ┌─┴─┐ ┌─┴─┐ + # │ d │ │ e │ │ f │ │ g │ │ h │ │ i │ + # └───┘ └───┘ └───┘ └───┘ └───┘ └───┘ + # + # While (2) results in truncating part of the parent: + # + # ┌─────┐ + # ┌──┤ c ├──┐ + # │ └──┬──┘ │ + # ┌─┴─┐ ┌─┴─┐ ┌─┴─┐ + # │ g │ │ h │ │ i │ + # └───┘ └───┘ └───┘ + # + # (1) is the same as Example IV.4. in Nautilus (NDSS '19) + # (https://wcventure.github.io/FuzzingPaper/Paper/NDSS19_Nautilus.pdf), + # except we do not repeat the replacement additional times + # (the paper repeats it once for a total of two copies). + # + # We currently only apply mutation (1), and ignore mutation + # (2). The reason is that the attempt generated from (2) is + # always something that Hypothesis could easily have generated + # itself, by simply not making various choices. Whereas + # duplicating the exact value + structure of particular choices + # in (1) would have been hard for Hypothesis to generate by + # chance. + # + # TODO: an extension of this mutation might repeat (1) on + # a geometric distribution between 0 and ~10 times. We would + # need to find the corresponding span to recurse on in the new + # choices, probably just by using the choices index. - try: + # case (1): duplicate the choices in start1:start2. + attempt = data.choices[:start2] + data.choices[start1:] + else: + (start, end) = self.random.choice([(start1, end1), (start2, end2)]) + replacement = data.choices[start:end] # We attempt to replace both the examples with # whichever choice we made. Note that this might end # up messing up and getting the example boundaries @@ -1196,12 +1260,17 @@ class ConjectureRunner: # really matter. It may not achieve the desired result, # but it's still a perfectly acceptable choice sequence # to try. - new_data = self.cached_test_function_ir( - nodes[:start1] + attempt = ( + data.choices[:start1] + replacement - + nodes[end1:start2] + + data.choices[end1:start2] + replacement - + nodes[end2:], + + data.choices[end2:] + ) + + try: + new_data = self.cached_test_function( + attempt, # We set error_on_discard so that we don't end up # entering parts of the tree we consider redundant # and not worth exploring. @@ -1217,7 +1286,7 @@ class ConjectureRunner: assert isinstance(new_data, ConjectureResult) if ( new_data.status >= data.status - and data.buffer != new_data.buffer + and choices_key(data.choices) != choices_key(new_data.choices) and all( k in new_data.target_observations and new_data.target_observations[k] >= v @@ -1299,54 +1368,28 @@ class ConjectureRunner: self.shrink_interesting_examples() self.exit_with(ExitReason.finished) - def new_conjecture_data_ir( - self, - ir_tree_prefix: Sequence[Union[IRNode, NodeTemplate]], - *, - observer: Optional[DataObserver] = None, - max_length: Optional[int] = None, - ) -> ConjectureData: - provider = ( - HypothesisProvider if self._switch_to_hypothesis_provider else self.provider - ) - observer = observer or self.tree.new_observer() - if self.settings.backend != "hypothesis": - observer = DataObserver() - - return ConjectureData.for_ir_tree( - ir_tree_prefix, - observer=observer, - provider=provider, - max_length=max_length, - random=self.random, - ) - def new_conjecture_data( self, - prefix: Union[bytes, bytearray], - max_length: int = BUFFER_SIZE, + prefix: Sequence[Union[ChoiceT, ChoiceTemplate]], + *, observer: Optional[DataObserver] = None, + max_choices: Optional[int] = None, ) -> ConjectureData: provider = ( HypothesisProvider if self._switch_to_hypothesis_provider else self.provider ) observer = observer or self.tree.new_observer() - if self.settings.backend != "hypothesis": + if not self.using_hypothesis_backend: observer = DataObserver() return ConjectureData( prefix=prefix, - max_length=max_length, - random=self.random, observer=observer, provider=provider, + max_choices=max_choices, + random=self.random, ) - def new_conjecture_data_for_buffer( - self, buffer: Union[bytes, bytearray] - ) -> ConjectureData: - return self.new_conjecture_data(buffer, max_length=len(buffer)) - def shrink_interesting_examples(self) -> None: """If we've found interesting examples, try to replace each of them with a minimal interesting example with the same interesting_origin. @@ -1361,10 +1404,10 @@ class ConjectureRunner: self.finish_shrinking_deadline = time.perf_counter() + MAX_SHRINKING_SECONDS for prev_data in sorted( - self.interesting_examples.values(), key=lambda d: sort_key(d.buffer) + self.interesting_examples.values(), key=lambda d: sort_key(d.nodes) ): assert prev_data.status == Status.INTERESTING - data = self.new_conjecture_data_ir(prev_data.ir_nodes) + data = self.new_conjecture_data(prev_data.choices) self.test_function(data) if data.status != Status.INTERESTING: self.exit_with(ExitReason.flaky) @@ -1378,9 +1421,9 @@ class ConjectureRunner: for k, v in self.interesting_examples.items() if k not in self.shrunk_examples ), - key=lambda kv: (sort_key(kv[1].buffer), sort_key(repr(kv[0]))), + key=lambda kv: (sort_key(kv[1].nodes), shortlex(repr(kv[0]))), ) - self.debug(f"Shrinking {target!r}: {data.choices}") + self.debug(f"Shrinking {target!r}: {example.choices}") if not self.settings.report_multiple_bugs: # If multi-bug reporting is disabled, we shrink our currently-minimal @@ -1388,9 +1431,10 @@ class ConjectureRunner: self.shrink(example, lambda d: d.status == Status.INTERESTING) return - def predicate(d: ConjectureData) -> bool: + def predicate(d: Union[ConjectureResult, _Overrun]) -> bool: if d.status < Status.INTERESTING: return False + d = cast(ConjectureResult, d) return d.interesting_origin == target self.shrink(example, predicate) @@ -1407,17 +1451,23 @@ class ConjectureRunner: # It's not worth trying the primary corpus because we already # tried all of those in the initial phase. corpus = sorted( - self.settings.database.fetch(self.secondary_key), key=sort_key + self.settings.database.fetch(self.secondary_key), key=shortlex ) for c in corpus: - primary = {v.buffer for v in self.interesting_examples.values()} - - cap = max(map(sort_key, primary)) + choices = choices_from_bytes(c) + if choices is None: + self.settings.database.delete(self.secondary_key, c) + continue + primary = { + choices_to_bytes(v.choices) + for v in self.interesting_examples.values() + } + cap = max(map(shortlex, primary)) - if sort_key(c) > cap: + if shortlex(c) > cap: break else: - self.cached_test_function(c) + self.cached_test_function(choices) # We unconditionally remove c from the secondary key as it # is either now primary or worse than our primary example # of this reason for interestingness. @@ -1426,7 +1476,7 @@ class ConjectureRunner: def shrink( self, example: Union[ConjectureData, ConjectureResult], - predicate: Optional[Callable[[ConjectureData], bool]] = None, + predicate: Optional[ShrinkPredicateT] = None, allow_transition: Optional[ Callable[[Union[ConjectureData, ConjectureResult], ConjectureData], bool] ] = None, @@ -1438,7 +1488,7 @@ class ConjectureRunner: def new_shrinker( self, example: Union[ConjectureData, ConjectureResult], - predicate: Optional[Callable[[ConjectureData], bool]] = None, + predicate: Optional[ShrinkPredicateT] = None, allow_transition: Optional[ Callable[[Union[ConjectureData, ConjectureResult], ConjectureData], bool] ] = None, @@ -1452,99 +1502,17 @@ class ConjectureRunner: in_target_phase=self._current_phase == "target", ) - def cached_test_function( - self, - buffer: Union[bytes, bytearray], - *, - extend: int = 0, - ) -> Union[ConjectureResult, _Overrun]: # pragma: no cover # removing function soon - """Checks the tree to see if we've tested this buffer, and returns the - previous result if we have. - - Otherwise we call through to ``test_function``, and return a - fresh result. - - If ``error_on_discard`` is set to True this will raise ``ContainsDiscard`` - in preference to running the actual test function. This is to allow us - to skip test cases we expect to be redundant in some cases. Note that - it may be the case that we don't raise ``ContainsDiscard`` even if the - result has discards if we cannot determine from previous runs whether - it will have a discard. - """ - buffer = bytes(buffer)[:BUFFER_SIZE] - - max_length = min(BUFFER_SIZE, len(buffer) + extend) - - @overload - def check_result(result: _Overrun) -> _Overrun: ... - @overload - def check_result(result: ConjectureResult) -> ConjectureResult: ... - def check_result( - result: Union[_Overrun, ConjectureResult], - ) -> Union[_Overrun, ConjectureResult]: - assert result is Overrun or ( - isinstance(result, ConjectureResult) and result.status != Status.OVERRUN - ) - return result - - try: - cached = check_result(self.__data_cache[buffer]) - if cached.status > Status.OVERRUN or extend == 0: - return cached - except KeyError: - pass - - observer = DataObserver() - dummy_data = self.new_conjecture_data( - prefix=buffer, max_length=max_length, observer=observer - ) - - if self.settings.backend == "hypothesis": - try: - self.tree.simulate_test_function(dummy_data) - except PreviouslyUnseenBehaviour: - pass - else: - if dummy_data.status > Status.OVERRUN: - dummy_data.freeze() - try: - return self.__data_cache[dummy_data.buffer] - except KeyError: - pass - else: - self.__data_cache[buffer] = Overrun - return Overrun - - # We didn't find a match in the tree, so we need to run the test - # function normally. Note that test_function will automatically - # add this to the tree so we don't need to update the cache. - - result = None - - data = self.new_conjecture_data( - prefix=max((buffer, dummy_data.buffer), key=len), max_length=max_length - ) - self.test_function(data) - result = check_result(data.as_result()) - if extend == 0 or ( - result is not Overrun - and not isinstance(result, _Overrun) - and len(result.buffer) <= len(buffer) - ): - self.__data_cache[buffer] = result - return result - def passing_choice_sequences( - self, prefix: Sequence[IRNode] = () - ) -> frozenset[bytes]: + self, prefix: Sequence[ChoiceNode] = () + ) -> frozenset[tuple[ChoiceNode, ...]]: """Return a collection of choice sequence nodes which cause the test to pass. Optionally restrict this by a certain prefix, which is useful for explain mode. """ return frozenset( - result.ir_nodes - for key in self.__data_cache_ir - if (result := self.__data_cache_ir[key]).status is Status.VALID - and startswith(result.ir_nodes, prefix) + cast(ConjectureResult, result).nodes + for key in self.__data_cache + if (result := self.__data_cache[key]).status is Status.VALID + and startswith(cast(ConjectureResult, result).nodes, prefix) ) diff --git a/contrib/python/hypothesis/py3/hypothesis/internal/conjecture/junkdrawer.py b/contrib/python/hypothesis/py3/hypothesis/internal/conjecture/junkdrawer.py index 51ced29bd94..800c2f22c92 100644 --- a/contrib/python/hypothesis/py3/hypothesis/internal/conjecture/junkdrawer.py +++ b/contrib/python/hypothesis/py3/hypothesis/internal/conjecture/junkdrawer.py @@ -17,19 +17,9 @@ import gc import sys import time import warnings +from array import ArrayType from collections.abc import Iterable, Iterator, Sequence -from random import Random -from typing import ( - Any, - Callable, - Generic, - List, - Literal, - Optional, - TypeVar, - Union, - overload, -) +from typing import Any, Callable, Generic, Literal, Optional, TypeVar, Union, overload from sortedcontainers import SortedList @@ -42,7 +32,7 @@ T = TypeVar("T") def array_or_list( code: str, contents: Iterable[int] -) -> "Union[List[int], array.ArrayType[int]]": +) -> Union[list[int], "ArrayType[int]"]: if code == "O": return list(contents) return array.array(code, contents) @@ -83,7 +73,7 @@ class IntList(Sequence[int]): __slots__ = ("__underlying",) - __underlying: "Union[List[int], array.ArrayType[int]]" + __underlying: Union[list[int], "ArrayType[int]"] def __init__(self, values: Sequence[int] = ()): for code in ARRAY_CODES: @@ -117,14 +107,16 @@ class IntList(Sequence[int]): def __getitem__(self, i: int) -> int: ... # pragma: no cover @overload - def __getitem__(self, i: slice) -> "IntList": ... # pragma: no cover + def __getitem__( + self, i: slice + ) -> Union[list[int], "ArrayType[int]"]: ... # pragma: no cover - def __getitem__(self, i: Union[int, slice]) -> "Union[int, IntList]": - if isinstance(i, slice): - return IntList(self.__underlying[i]) + def __getitem__( + self, i: Union[int, slice] + ) -> Union[int, list[int], "ArrayType[int]"]: return self.__underlying[i] - def __delitem__(self, i: int) -> None: + def __delitem__(self, i: Union[int, slice]) -> None: del self.__underlying[i] def insert(self, i: int, v: int) -> None: @@ -189,22 +181,17 @@ def binary_search(lo: int, hi: int, f: Callable[[int], bool]) -> int: return lo -def uniform(random: Random, n: int) -> bytes: - """Returns a bytestring of length n, distributed uniformly at random.""" - return random.getrandbits(n * 8).to_bytes(n, "big") - - -class LazySequenceCopy: +class LazySequenceCopy(Generic[T]): """A "copy" of a sequence that works by inserting a mask in front of the underlying sequence, so that you can mutate it without changing the underlying sequence. Effectively behaves as if you could do list(x) in O(1) time. The full list API is not supported yet but there's no reason in principle it couldn't be.""" - def __init__(self, values: Sequence[int]): + def __init__(self, values: Sequence[T]): self.__values = values self.__len = len(values) - self.__mask: Optional[dict[int, int]] = None + self.__mask: Optional[dict[int, T]] = None self.__popped_indices: Optional[SortedList] = None def __len__(self) -> int: @@ -212,7 +199,7 @@ class LazySequenceCopy: return self.__len return self.__len - len(self.__popped_indices) - def pop(self, i: int = -1) -> int: + def pop(self, i: int = -1) -> T: if len(self) == 0: raise IndexError("Cannot pop from empty list") i = self.__underlying_index(i) @@ -228,7 +215,13 @@ class LazySequenceCopy: self.__popped_indices.add(i) return v - def __getitem__(self, i: int) -> int: + def swap(self, i: int, j: int) -> None: + """Swap the elements ls[i], ls[j].""" + if i == j: + return + self[i], self[j] = self[j], self[i] + + def __getitem__(self, i: int) -> T: i = self.__underlying_index(i) default = self.__values[i] @@ -237,7 +230,7 @@ class LazySequenceCopy: else: return self.__mask.get(i, default) - def __setitem__(self, i: int, v: int) -> None: + def __setitem__(self, i: int, v: T) -> None: i = self.__underlying_index(i) if self.__mask is None: self.__mask = {} @@ -268,18 +261,10 @@ class LazySequenceCopy: i += 1 return i - -def clamp(lower: float, value: float, upper: float) -> float: - """Given a value and lower/upper bounds, 'clamp' the value so that - it satisfies lower <= value <= upper.""" - return max(lower, min(value, upper)) - - -def swap(ls: LazySequenceCopy, i: int, j: int) -> None: - """Swap the elements ls[i], ls[j].""" - if i == j: - return - ls[i], ls[j] = ls[j], ls[i] + # even though we have len + getitem, mypyc requires iter. + def __iter__(self) -> Iterable[T]: + for i in range(len(self)): + yield self[i] def stack_depth_of_caller() -> int: @@ -480,3 +465,10 @@ def endswith(l1: Sequence[T], l2: Sequence[T]) -> bool: if len(l1) < len(l2): return False return all(v1 == v2 for v1, v2 in zip(l1[-len(l2) :], l2)) + + +def bits_to_bytes(n: int) -> int: + """The number of bytes required to represent an n-bit number. + Equivalent to (n + 7) // 8, but slightly faster. This really is + called enough times that that matters.""" + return (n + 7) >> 3 diff --git a/contrib/python/hypothesis/py3/hypothesis/internal/conjecture/optimiser.py b/contrib/python/hypothesis/py3/hypothesis/internal/conjecture/optimiser.py index 324d19c5715..03807cf4be2 100644 --- a/contrib/python/hypothesis/py3/hypothesis/internal/conjecture/optimiser.py +++ b/contrib/python/hypothesis/py3/hypothesis/internal/conjecture/optimiser.py @@ -8,20 +8,13 @@ # v. 2.0. If a copy of the MPL was not distributed with this file, You can # obtain one at https://mozilla.org/MPL/2.0/. -from typing import Union +from typing import Optional, Union from hypothesis.internal.compat import int_from_bytes, int_to_bytes -from hypothesis.internal.conjecture.data import ( - ConjectureResult, - IRType, - Status, - _Overrun, - bits_to_bytes, - ir_size_nodes, - ir_value_permitted, -) -from hypothesis.internal.conjecture.engine import BUFFER_SIZE_IR, ConjectureRunner -from hypothesis.internal.conjecture.junkdrawer import find_integer +from hypothesis.internal.conjecture.choice import ChoiceT, choice_permitted +from hypothesis.internal.conjecture.data import ConjectureResult, Status, _Overrun +from hypothesis.internal.conjecture.engine import ConjectureRunner +from hypothesis.internal.conjecture.junkdrawer import bits_to_bytes, find_integer from hypothesis.internal.conjecture.pareto import NO_SCORE @@ -85,7 +78,7 @@ class Optimiser: # We allow transitions that leave the score unchanged as long as they # don't increase the number of nodes. This gives us a certain amount of # freedom for lateral moves that will take us out of local maxima. - if len(data.ir_nodes) <= len(self.current_data.ir_nodes): + if len(data.nodes) <= len(self.current_data.nodes): self.current_data = data return True return False @@ -98,11 +91,11 @@ class Optimiser: nodes_examined = set() - prev = None - i = len(self.current_data.ir_nodes) - 1 + prev: Optional[ConjectureResult] = None + i = len(self.current_data.nodes) - 1 while i >= 0 and self.improvements <= self.max_improvements: if prev is not self.current_data: - i = len(self.current_data.ir_nodes) - 1 + i = len(self.current_data.nodes) - 1 prev = self.current_data if i in nodes_examined: @@ -110,19 +103,19 @@ class Optimiser: continue nodes_examined.add(i) - node = self.current_data.ir_nodes[i] + node = self.current_data.nodes[i] assert node.index is not None # we can only (sensibly & easily) define hill climbing for # numeric-style nodes. It's not clear hill-climbing a string is # useful, for instance. - if node.ir_type not in {"integer", "float", "bytes", "boolean"}: + if node.type not in {"integer", "float", "bytes", "boolean"}: continue def attempt_replace(k: int) -> bool: """ Try replacing the current node in the current best test case with a value which is "k times larger", where the exact notion - of "larger" depends on the ir_type. + of "larger" depends on the choice_type. Note that we use the *current* best and not the one we started with. This helps ensure that if we luck into a good draw when making @@ -132,27 +125,27 @@ class Optimiser: if abs(k) > 2**20: return False - node = self.current_data.ir_nodes[i] + node = self.current_data.nodes[i] assert node.index is not None if node.was_forced: return False # pragma: no cover - new_value: IRType - if node.ir_type in {"integer", "float"}: + new_choice: ChoiceT + if node.type in {"integer", "float"}: assert isinstance(node.value, (int, float)) - new_value = node.value + k - elif node.ir_type == "boolean": + new_choice = node.value + k + elif node.type == "boolean": assert isinstance(node.value, bool) if abs(k) > 1: return False if k == -1: - new_value = False + new_choice = False if k == 1: - new_value = True + new_choice = True if k == 0: # pragma: no cover - new_value = node.value + new_choice = node.value else: - assert node.ir_type == "bytes" + assert node.type == "bytes" assert isinstance(node.value, bytes) v = int_from_bytes(node.value) # can't go below zero for bytes @@ -162,21 +155,20 @@ class Optimiser: # allow adding k to increase the number of bytes. we don't want # to decrease so that b"01" doesn't turn into b"1". size = max(len(node.value), bits_to_bytes(v.bit_length())) - new_value = int_to_bytes(v, size) + new_choice = int_to_bytes(v, size) - if not ir_value_permitted(new_value, node.ir_type, node.kwargs): + if not choice_permitted(new_choice, node.constraints): return False for _ in range(3): - nodes = self.current_data.ir_nodes - attempt_nodes = ( - nodes[: node.index] - + (node.copy(with_value=new_value),) - + nodes[node.index + 1 :] + choices = self.current_data.choices + attempt_choices = ( + choices[: node.index] + + (new_choice,) + + choices[node.index + 1 :] ) - attempt = self.engine.cached_test_function_ir( - attempt_nodes, - extend=BUFFER_SIZE_IR - ir_size_nodes(attempt_nodes), + attempt = self.engine.cached_test_function( + attempt_choices, extend="full" ) if self.consider_new_data(attempt): @@ -186,25 +178,23 @@ class Optimiser: return False assert isinstance(attempt, ConjectureResult) - if len(attempt.ir_nodes) == len(self.current_data.ir_nodes): + if len(attempt.nodes) == len(self.current_data.nodes): return False - for j, ex in enumerate(self.current_data.examples): - if ex.ir_start >= node.index + 1: + for j, ex in enumerate(self.current_data.spans): + if ex.start >= node.index + 1: break # pragma: no cover - if ex.ir_end <= node.index: + if ex.end <= node.index: continue - ex_attempt = attempt.examples[j] - if ex.ir_length == ex_attempt.ir_length: + ex_attempt = attempt.spans[j] + if ex.choice_count == ex_attempt.choice_count: continue # pragma: no cover - replacement = attempt.ir_nodes[ - ex_attempt.ir_start : ex_attempt.ir_end - ] + replacement = attempt.choices[ex_attempt.start : ex_attempt.end] if self.consider_new_data( - self.engine.cached_test_function_ir( - nodes[: node.index] + self.engine.cached_test_function( + choices[: node.index] + replacement - + self.current_data.ir_nodes[ex.ir_end :] + + self.current_data.choices[ex.end :] ) ): return True diff --git a/contrib/python/hypothesis/py3/hypothesis/internal/conjecture/pareto.py b/contrib/python/hypothesis/py3/hypothesis/internal/conjecture/pareto.py index a0451d7f864..73f3e6a533a 100644 --- a/contrib/python/hypothesis/py3/hypothesis/internal/conjecture/pareto.py +++ b/contrib/python/hypothesis/py3/hypothesis/internal/conjecture/pareto.py @@ -8,16 +8,28 @@ # v. 2.0. If a copy of the MPL was not distributed with this file, You can # obtain one at https://mozilla.org/MPL/2.0/. +from collections.abc import Iterator from enum import Enum +from random import Random +from typing import TYPE_CHECKING, Callable, Optional, Union from sortedcontainers import SortedList -from hypothesis.internal.conjecture.data import ConjectureData, ConjectureResult, Status -from hypothesis.internal.conjecture.junkdrawer import LazySequenceCopy, swap +from hypothesis.internal.conjecture.choice import choices_key +from hypothesis.internal.conjecture.data import ( + ConjectureData, + ConjectureResult, + Status, + _Overrun, +) +from hypothesis.internal.conjecture.junkdrawer import LazySequenceCopy from hypothesis.internal.conjecture.shrinker import sort_key NO_SCORE = float("-inf") +if TYPE_CHECKING: + from hypothesis.internal.conjecture.engine import ConjectureRunner + class DominanceRelation(Enum): NO_DOMINANCE = 0 @@ -26,7 +38,7 @@ class DominanceRelation(Enum): RIGHT_DOMINATES = 3 -def dominance(left, right): +def dominance(left: ConjectureResult, right: ConjectureResult) -> DominanceRelation: """Returns the dominance relation between ``left`` and ``right``, according to the rules that one ConjectureResult dominates another if and only if it is better in every way. @@ -45,10 +57,12 @@ def dominance(left, right): more structured or failing tests it can be useful to track, and future work will depend on it more.""" - if left.buffer == right.buffer: + left_key = sort_key(left.nodes) + right_key = sort_key(right.nodes) + if left_key == right_key: return DominanceRelation.EQUAL - if sort_key(right.buffer) < sort_key(left.buffer): + if right_key < left_key: result = dominance(left=right, right=left) if result == DominanceRelation.LEFT_DOMINATES: return DominanceRelation.RIGHT_DOMINATES @@ -60,7 +74,7 @@ def dominance(left, right): return result # Either left is better or there is no dominance relationship. - assert sort_key(left.buffer) < sort_key(right.buffer) + assert left_key < right_key # The right is more interesting if left.status < right.status: @@ -122,21 +136,25 @@ class ParetoFront: see how much of a problem this is in practice before we try that. """ - def __init__(self, random): + def __init__(self, random: Random) -> None: self.__random = random - self.__eviction_listeners = [] + self.__eviction_listeners: list[Callable[[ConjectureResult], None]] = [] - self.front = SortedList(key=lambda d: sort_key(d.buffer)) - self.__pending = None + self.front: SortedList[ConjectureResult] = SortedList( + key=lambda d: sort_key(d.nodes) + ) + self.__pending: Optional[ConjectureResult] = None - def add(self, data): + def add(self, data: Union[ConjectureData, ConjectureResult, _Overrun]) -> bool: """Attempts to add ``data`` to the pareto front. Returns True if ``data`` is now in the front, including if data is already in the collection, and False otherwise""" if data.status < Status.VALID: return False + assert not isinstance(data, _Overrun) data = data.as_result() + assert not isinstance(data, _Overrun) if not self.front: self.front.add(data) @@ -163,7 +181,7 @@ class ParetoFront: # We track which values we are going to remove and remove them all # at the end so the shape of the front doesn't change while we're # using it. - to_remove = [] + to_remove: list[ConjectureResult] = [] # We now iteratively sample elements from the approximate pareto # front to check whether they should be retained. When the set of @@ -196,7 +214,7 @@ class ParetoFront: dominators = [data] while i >= 0 and len(dominators) < 10: - swap(front, i, self.__random.randint(0, i)) + front.swap(i, self.__random.randint(0, i)) candidate = front[i] @@ -235,26 +253,26 @@ class ParetoFront: finally: self.__pending = None - def on_evict(self, f): + def on_evict(self, f: Callable[[ConjectureResult], None]) -> None: """Register a listener function that will be called with data when it gets removed from the front because something else dominates it.""" self.__eviction_listeners.append(f) - def __contains__(self, data): + def __contains__(self, data: object) -> bool: return isinstance(data, (ConjectureData, ConjectureResult)) and ( data.as_result() in self.front ) - def __iter__(self): + def __iter__(self) -> Iterator[ConjectureResult]: return iter(self.front) - def __getitem__(self, i): + def __getitem__(self, i: int) -> ConjectureResult: return self.front[i] - def __len__(self): + def __len__(self) -> int: return len(self.front) - def __remove(self, data): + def __remove(self, data: ConjectureResult) -> None: try: self.front.remove(data) except ValueError: @@ -274,11 +292,12 @@ class ParetoOptimiser: grow more powerful over time. """ - def __init__(self, engine): + def __init__(self, engine: "ConjectureRunner") -> None: self.__engine = engine - self.front = self.__engine.pareto_front + assert self.__engine.pareto_front is not None + self.front: ParetoFront = self.__engine.pareto_front - def run(self): + def run(self) -> None: seen = set() # We iterate backwards through the pareto front, using the shrinker to @@ -298,7 +317,7 @@ class ParetoOptimiser: assert self.front i = min(i, len(self.front) - 1) target = self.front[i] - if target.buffer in seen: + if choices_key(target.choices) in seen: i -= 1 continue assert target is not prev @@ -326,7 +345,7 @@ class ParetoOptimiser: return False shrunk = self.__engine.shrink(target, allow_transition=allow_transition) - seen.add(shrunk.buffer) + seen.add(choices_key(shrunk.choices)) # Note that the front may have changed shape arbitrarily when # we ran the shrinker. If it didn't change shape then this is diff --git a/contrib/python/hypothesis/py3/hypothesis/internal/conjecture/providers.py b/contrib/python/hypothesis/py3/hypothesis/internal/conjecture/providers.py new file mode 100644 index 00000000000..beccc75c361 --- /dev/null +++ b/contrib/python/hypothesis/py3/hypothesis/internal/conjecture/providers.py @@ -0,0 +1,881 @@ +# This file is part of Hypothesis, which may be found at +# https://github.com/HypothesisWorks/hypothesis/ +# +# Copyright the Hypothesis Authors. +# Individual contributors are listed in AUTHORS.rst and the git log. +# +# This Source Code Form is subject to the terms of the Mozilla Public License, +# v. 2.0. If a copy of the MPL was not distributed with this file, You can +# obtain one at https://mozilla.org/MPL/2.0/. + +import abc +import contextlib +import math +import warnings +from collections.abc import Iterable +from random import Random +from sys import float_info +from typing import ( + TYPE_CHECKING, + Any, + Callable, + Literal, + Optional, + TypedDict, + TypeVar, + Union, +) + +from hypothesis.errors import HypothesisWarning +from hypothesis.internal.cache import LRUCache +from hypothesis.internal.compat import WINDOWS, int_from_bytes +from hypothesis.internal.conjecture.choice import ( + StringConstraints, + choice_constraints_key, + choice_permitted, +) +from hypothesis.internal.conjecture.floats import float_to_lex, lex_to_float +from hypothesis.internal.conjecture.junkdrawer import bits_to_bytes +from hypothesis.internal.conjecture.utils import ( + INT_SIZES, + INT_SIZES_SAMPLER, + Sampler, + many, +) +from hypothesis.internal.floats import ( + SIGNALING_NAN, + float_to_int, + make_float_clamper, + next_down, + next_up, + sign_aware_lte, +) +from hypothesis.internal.intervalsets import IntervalSet + +if TYPE_CHECKING: + from typing import TypeAlias + + from hypothesis.internal.conjecture.data import ConjectureData + +T = TypeVar("T") +_Lifetime: "TypeAlias" = Literal["test_case", "test_function"] +COLLECTION_DEFAULT_MAX_SIZE = 10**10 # "arbitrarily large" + + +# The available `PrimitiveProvider`s, and therefore also the available backends +# for use by @settings(backend=...). The key is the name to be used in the backend= +# value, and the value is the importable path to a subclass of PrimitiveProvider. +# +# See also +# https://hypothesis.readthedocs.io/en/latest/strategies.html#alternative-backends-for-hypothesis. +# +# NOTE: the PrimitiveProvider interface is not yet stable. We may continue to +# make breaking changes to it. (but if you want to experiment and don't mind +# breakage, here you go!) +AVAILABLE_PROVIDERS = { + "hypothesis": "hypothesis.internal.conjecture.providers.HypothesisProvider", + "hypothesis-urandom": "hypothesis.internal.conjecture.providers.URandomProvider", +} +FLOAT_INIT_LOGIC_CACHE = LRUCache(4096) +STRING_SAMPLER_CACHE = LRUCache(64) + +NASTY_FLOATS = sorted( + [ + 0.0, + 0.5, + 1.1, + 1.5, + 1.9, + 1.0 / 3, + 10e6, + 10e-6, + 1.175494351e-38, + next_up(0.0), + float_info.min, + float_info.max, + 3.402823466e38, + 9007199254740992, + 1 - 10e-6, + 2 + 10e-6, + 1.192092896e-07, + 2.2204460492503131e-016, + ] + + [2.0**-n for n in (24, 14, 149, 126)] # minimum (sub)normals for float16,32 + + [float_info.min / n for n in (2, 10, 1000, 100_000)] # subnormal in float64 + + [math.inf, math.nan] * 5 + + [SIGNALING_NAN], + key=float_to_lex, +) +NASTY_FLOATS = list(map(float, NASTY_FLOATS)) +NASTY_FLOATS.extend([-x for x in NASTY_FLOATS]) + +NASTY_STRINGS = sorted( + [ + # strings which can be interpreted as code / logic + "undefined", + "null", + "NULL", + "nil", + "NIL", + "true", + "false", + "True", + "False", + "TRUE", + "FALSE", + "None", + "none", + "if", + "then", + "else", + # strings which can be interpreted as a number + "0", + "1e100", + "0..0", + "0/0", + "1/0", + "+0.0", + "Infinity", + "-Infinity", + "Inf", + "INF", + "NaN", + "9" * 30, + # common ascii characters + ",./;'[]\\-=<>?:\"{}|_+!@#$%^&*()`~", + # common unicode characters + "Ω≈ç√∫˜µ≤≥÷åß∂ƒ©˙∆˚¬…æœ∑´®†¥¨ˆøπ“‘¡™£¢∞§¶•ªº–≠¸˛Ç◊ı˜Â¯˘¿ÅÍÎÏ˝ÓÔÒÚÆ☃Œ„´‰ˇÁ¨ˆØ∏”’`⁄€‹›fifl‡°·‚—±", + # characters which increase in length when lowercased + "Ⱥ", + "Ⱦ", + # ligatures + "æœÆŒffʤʨß" + # emoticons + "(╯°□°)╯︵ ┻━┻)", + # emojis + "😍", + "🇺🇸", + # emoji modifiers + "🏻" # U+1F3FB Light Skin Tone, + "👍🏻", # 👍 followed by U+1F3FB + # RTL text + "الكل في المجمو عة", + # Ogham text, which contains the only character in the Space Separators + # unicode category (Zs) that isn't visually blank: . # noqa: RUF003 + "᚛ᚄᚓᚐᚋᚒᚄ ᚑᚄᚂᚑᚏᚅ᚜", + # readable variations on text (bolt/italic/script) + "𝐓𝐡𝐞 𝐪𝐮𝐢𝐜𝐤 𝐛𝐫𝐨𝐰𝐧 𝐟𝐨𝐱 𝐣𝐮𝐦𝐩𝐬 𝐨𝐯𝐞𝐫 𝐭𝐡𝐞 𝐥𝐚𝐳𝐲 𝐝𝐨𝐠", + "𝕿𝖍𝖊 𝖖𝖚𝖎𝖈𝖐 𝖇𝖗𝖔𝖜𝖓 𝖋𝖔𝖝 𝖏𝖚𝖒𝖕𝖘 𝖔𝖛𝖊𝖗 𝖙𝖍𝖊 𝖑𝖆𝖟𝖞 𝖉𝖔𝖌", + "𝑻𝒉𝒆 𝒒𝒖𝒊𝒄𝒌 𝒃𝒓𝒐𝒘𝒏 𝒇𝒐𝒙 𝒋𝒖𝒎𝒑𝒔 𝒐𝒗𝒆𝒓 𝒕𝒉𝒆 𝒍𝒂𝒛𝒚 𝒅𝒐𝒈", + "𝓣𝓱𝓮 𝓺𝓾𝓲𝓬𝓴 𝓫𝓻𝓸𝔀𝓷 𝓯𝓸𝔁 𝓳𝓾𝓶𝓹𝓼 𝓸𝓿𝓮𝓻 𝓽𝓱𝓮 𝓵𝓪𝔃𝔂 𝓭𝓸𝓰", + "𝕋𝕙𝕖 𝕢𝕦𝕚𝕔𝕜 𝕓𝕣𝕠𝕨𝕟 𝕗𝕠𝕩 𝕛𝕦𝕞𝕡𝕤 𝕠𝕧𝕖𝕣 𝕥𝕙𝕖 𝕝𝕒𝕫𝕪 𝕕𝕠𝕘", + # upsidown text + "ʇǝɯɐ ʇᴉs ɹolop ɯnsdᴉ ɯǝɹo˥", + # reserved strings in windows + "NUL", + "COM1", + "LPT1", + # scunthorpe problem + "Scunthorpe", + # zalgo text + "Ṱ̺̺̕o͞ ̷i̲̬͇̪͙n̝̗͕v̟̜̘̦͟o̶̙̰̠kè͚̮̺̪̹̱̤ ̖t̝͕̳̣̻̪͞h̼͓̲̦̳̘̲e͇̣̰̦̬͎ ̢̼̻̱̘h͚͎͙̜̣̲ͅi̦̲̣̰̤v̻͍e̺̭̳̪̰-m̢iͅn̖̺̞̲̯̰d̵̼̟͙̩̼̘̳ ̞̥̱̳̭r̛̗̘e͙p͠r̼̞̻̭̗e̺̠̣͟s̘͇̳͍̝͉e͉̥̯̞̲͚̬͜ǹ̬͎͎̟̖͇̤t͍̬̤͓̼̭͘ͅi̪̱n͠g̴͉ ͏͉ͅc̬̟h͡a̫̻̯͘o̫̟̖͍̙̝͉s̗̦̲.̨̹͈̣", + # + # examples from https://faultlore.com/blah/text-hates-you/ + "मनीष منش", + "पन्ह पन्ह त्र र्च कृकृ ड्ड न्हृे إلا بسم الله", + "lorem لا بسم الله ipsum 你好1234你好", + ], + key=len, +) + +# Masks for masking off the first byte of an n-bit buffer. +# The appropriate mask is stored at position n % 8. +BYTE_MASKS = [(1 << n) - 1 for n in range(8)] +BYTE_MASKS[0] = 255 + + +class _BackendInfoMsg(TypedDict): + type: str + title: str + content: Union[str, dict[str, Any]] + + +class PrimitiveProvider(abc.ABC): + # This is the low-level interface which would also be implemented + # by e.g. CrossHair, by an Atheris-hypothesis integration, etc. + # We'd then build the structured tree handling, database and replay + # support, etc. on top of this - so all backends get those for free. + # + # See https://github.com/HypothesisWorks/hypothesis/issues/3086 + + # How long a provider instance is used for. One of test_function or + # test_case. Defaults to test_function. + # + # If test_function, a single provider instance will be instantiated and used + # for the entirety of each test function. I.e., roughly one provider per + # @given annotation. This can be useful if you need to track state over many + # executions to a test function. + # + # This lifetime will cause None to be passed for the ConjectureData object + # in PrimitiveProvider.__init__, because that object is instantiated per + # test case. + # + # If test_case, a new provider instance will be instantiated and used each + # time hypothesis tries to generate a new input to the test function. This + # lifetime can access the passed ConjectureData object. + # + # Non-hypothesis providers probably want to set a lifetime of test_function. + lifetime: _Lifetime = "test_function" + + # Solver-based backends such as hypothesis-crosshair use symbolic values + # which record operations performed on them in order to discover new paths. + # If avoid_realization is set to True, hypothesis will avoid interacting with + # symbolic choices returned by the provider in any way that would force the + # solver to narrow the range of possible values for that symbolic. + # + # Setting this to True disables some hypothesis features, such as + # DataTree-based deduplication, and some internal optimizations, such as + # caching constraints. Only enable this if it is necessary for your backend. + avoid_realization = False + + def __init__(self, conjecturedata: Optional["ConjectureData"], /) -> None: + self._cd = conjecturedata + + def per_test_case_context_manager(self): + return contextlib.nullcontext() + + def realize(self, value: T) -> T: + """ + Called whenever hypothesis requires a concrete (non-symbolic) value from + a potentially symbolic value. Hypothesis will not check that `value` is + symbolic before calling `realize`, so you should handle the case where + `value` is non-symbolic. + + The returned value should be non-symbolic. If you cannot provide a value, + raise hypothesis.errors.BackendCannotProceed("discard_test_case") + """ + return value + + def observe_test_case(self) -> dict[str, Any]: + """Called at the end of the test case when observability mode is active. + + The return value should be a non-symbolic json-encodable dictionary, + and will be included as `observation["metadata"]["backend"]`. + """ + return {} + + def observe_information_messages( + self, *, lifetime: _Lifetime + ) -> Iterable[_BackendInfoMsg]: + """Called at the end of each test case and again at end of the test function. + + Return an iterable of `{type: info/alert/error, title: str, content: str|dict}` + dictionaries to be delivered as individual information messages. + (Hypothesis adds the `run_start` timestamp and `property` name for you.) + """ + assert lifetime in ("test_case", "test_function") + yield from [] + + @abc.abstractmethod + def draw_boolean( + self, + p: float = 0.5, + ) -> bool: + raise NotImplementedError + + @abc.abstractmethod + def draw_integer( + self, + min_value: Optional[int] = None, + max_value: Optional[int] = None, + *, + # weights are for choosing an element index from a bounded range + weights: Optional[dict[int, float]] = None, + shrink_towards: int = 0, + ) -> int: + raise NotImplementedError + + @abc.abstractmethod + def draw_float( + self, + *, + min_value: float = -math.inf, + max_value: float = math.inf, + allow_nan: bool = True, + smallest_nonzero_magnitude: float, + # TODO: consider supporting these float widths at the IR level in the + # future. + # width: Literal[16, 32, 64] = 64, + # exclude_min and exclude_max handled higher up, + ) -> float: + raise NotImplementedError + + @abc.abstractmethod + def draw_string( + self, + intervals: IntervalSet, + *, + min_size: int = 0, + max_size: int = COLLECTION_DEFAULT_MAX_SIZE, + ) -> str: + raise NotImplementedError + + @abc.abstractmethod + def draw_bytes( + self, + min_size: int = 0, + max_size: int = COLLECTION_DEFAULT_MAX_SIZE, + ) -> bytes: + raise NotImplementedError + + def span_start(self, label: int, /) -> None: # noqa: B027 # non-abstract noop + """Marks the beginning of a semantically meaningful span. + + Providers can optionally track this data to learn which sub-sequences + of draws correspond to a higher-level object, recovering the parse tree. + `label` is an opaque integer, which will be shared by all spans drawn + from a particular strategy. + + This method is called from ConjectureData.start_span(). + """ + + def span_end(self, discard: bool, /) -> None: # noqa: B027, FBT001 + """Marks the end of a semantically meaningful span. + + `discard` is True when the draw was filtered out or otherwise marked as + unlikely to contribute to the input data as seen by the user's test. + Note however that side effects can make this determination unsound. + + This method is called from ConjectureData.stop_span(). + """ + + +class HypothesisProvider(PrimitiveProvider): + lifetime = "test_case" + + def __init__(self, conjecturedata: Optional["ConjectureData"], /): + super().__init__(conjecturedata) + self._random = None if self._cd is None else self._cd._random + + def draw_boolean( + self, + p: float = 0.5, + ) -> bool: + assert self._random is not None + + if p <= 0: + return False + if p >= 1: + return True + + return self._random.random() < p + + def draw_integer( + self, + min_value: Optional[int] = None, + max_value: Optional[int] = None, + *, + weights: Optional[dict[int, float]] = None, + shrink_towards: int = 0, + ) -> int: + assert self._cd is not None + + center = 0 + if min_value is not None: + center = max(min_value, center) + if max_value is not None: + center = min(max_value, center) + + if weights is not None: + assert min_value is not None + assert max_value is not None + + # format of weights is a mapping of ints to p, where sum(p) < 1. + # The remaining probability mass is uniformly distributed over + # *all* ints (not just the unmapped ones; this is somewhat undesirable, + # but simplifies things). + # + # We assert that sum(p) is strictly less than 1 because it simplifies + # handling forced values when we can force into the unmapped probability + # mass. We should eventually remove this restriction. + sampler = Sampler( + [1 - sum(weights.values()), *weights.values()], observe=False + ) + # if we're forcing, it's easiest to force into the unmapped probability + # mass and then force the drawn value after. + idx = sampler.sample(self._cd) + + if idx == 0: + return self._draw_bounded_integer(min_value, max_value) + # implicit reliance on dicts being sorted for determinism + return list(weights)[idx - 1] + + if min_value is None and max_value is None: + return self._draw_unbounded_integer() + + if min_value is None: + assert max_value is not None + probe = max_value + 1 + while max_value < probe: + probe = center + self._draw_unbounded_integer() + return probe + + if max_value is None: + assert min_value is not None + probe = min_value - 1 + while probe < min_value: + probe = center + self._draw_unbounded_integer() + return probe + + return self._draw_bounded_integer(min_value, max_value) + + def draw_float( + self, + *, + min_value: float = -math.inf, + max_value: float = math.inf, + allow_nan: bool = True, + smallest_nonzero_magnitude: float, + # TODO: consider supporting these float widths at the IR level in the + # future. + # width: Literal[16, 32, 64] = 64, + # exclude_min and exclude_max handled higher up, + ) -> float: + ( + sampler, + clamper, + nasty_floats, + ) = self._draw_float_init_logic( + min_value=min_value, + max_value=max_value, + allow_nan=allow_nan, + smallest_nonzero_magnitude=smallest_nonzero_magnitude, + ) + + assert self._cd is not None + + while True: + i = sampler.sample(self._cd) if sampler else 0 + if i == 0: + result = self._draw_float() + if allow_nan and math.isnan(result): + clamped = result # pragma: no cover + else: + clamped = clamper(result) + if float_to_int(clamped) != float_to_int(result) and not ( + math.isnan(result) and allow_nan + ): + result = clamped + else: + result = nasty_floats[i - 1] + return result + + def draw_string( + self, + intervals: IntervalSet, + *, + min_size: int = 0, + max_size: int = COLLECTION_DEFAULT_MAX_SIZE, + ) -> str: + assert self._cd is not None + assert self._random is not None + + if len(intervals) == 0: + return "" + + sampler, nasty_strings = self._draw_string_sampler( + intervals=intervals, + min_size=min_size, + max_size=max_size, + ) + + if sampler is not None and self.draw_boolean(p=0.05): + return nasty_strings[sampler.sample(self._cd)] + + average_size = min( + max(min_size * 2, min_size + 5), + 0.5 * (min_size + max_size), + ) + + chars = [] + elements = many( + self._cd, + min_size=min_size, + max_size=max_size, + average_size=average_size, + observe=False, + ) + while elements.more(): + if len(intervals) > 256: + if self.draw_boolean(0.2): + i = self._random.randint(256, len(intervals) - 1) + else: + i = self._random.randint(0, 255) + else: + i = self._random.randint(0, len(intervals) - 1) + + chars.append(intervals.char_in_shrink_order(i)) + + return "".join(chars) + + def draw_bytes( + self, + min_size: int = 0, + max_size: int = COLLECTION_DEFAULT_MAX_SIZE, + ) -> bytes: + assert self._cd is not None + assert self._random is not None + + buf = bytearray() + average_size = min( + max(min_size * 2, min_size + 5), + 0.5 * (min_size + max_size), + ) + elements = many( + self._cd, + min_size=min_size, + max_size=max_size, + average_size=average_size, + observe=False, + ) + while elements.more(): + buf += self._random.randbytes(1) + + return bytes(buf) + + def _draw_float(self) -> float: + assert self._random is not None + + f = lex_to_float(self._random.getrandbits(64)) + sign = 1 if self._random.getrandbits(1) else -1 + return sign * f + + def _draw_unbounded_integer(self) -> int: + assert self._cd is not None + assert self._random is not None + + size = INT_SIZES[INT_SIZES_SAMPLER.sample(self._cd)] + + r = self._random.getrandbits(size) + sign = r & 1 + r >>= 1 + if sign: + r = -r + return r + + def _draw_bounded_integer( + self, + lower: int, + upper: int, + *, + vary_size: bool = True, + ) -> int: + assert lower <= upper + assert self._cd is not None + assert self._random is not None + + if lower == upper: + return lower + + bits = (upper - lower).bit_length() + if bits > 24 and vary_size and self._random.random() < 7 / 8: + # For large ranges, we combine the uniform random distribution + # with a weighting scheme with moderate chance. Cutoff at 2 ** 24 so that our + # choice of unicode characters is uniform but the 32bit distribution is not. + idx = INT_SIZES_SAMPLER.sample(self._cd) + cap_bits = min(bits, INT_SIZES[idx]) + upper = min(upper, lower + 2**cap_bits - 1) + return self._random.randint(lower, upper) + + return self._random.randint(lower, upper) + + @classmethod + def _draw_float_init_logic( + cls, + *, + min_value: float, + max_value: float, + allow_nan: bool, + smallest_nonzero_magnitude: float, + ) -> tuple[ + Optional[Sampler], + Callable[[float], float], + list[float], + ]: + """ + Caches initialization logic for draw_float, as an alternative to + computing this for *every* float draw. + """ + # float_to_int allows us to distinguish between e.g. -0.0 and 0.0, + # even in light of hash(-0.0) == hash(0.0) and -0.0 == 0.0. + key = ( + float_to_int(min_value), + float_to_int(max_value), + allow_nan, + float_to_int(smallest_nonzero_magnitude), + ) + if key in FLOAT_INIT_LOGIC_CACHE: + return FLOAT_INIT_LOGIC_CACHE[key] + + result = cls._compute_draw_float_init_logic( + min_value=min_value, + max_value=max_value, + allow_nan=allow_nan, + smallest_nonzero_magnitude=smallest_nonzero_magnitude, + ) + FLOAT_INIT_LOGIC_CACHE[key] = result + return result + + @staticmethod + def _compute_draw_float_init_logic( + *, + min_value: float, + max_value: float, + allow_nan: bool, + smallest_nonzero_magnitude: float, + ) -> tuple[ + Optional[Sampler], + Callable[[float], float], + list[float], + ]: + if smallest_nonzero_magnitude == 0.0: # pragma: no cover + raise FloatingPointError( + "Got allow_subnormal=True, but we can't represent subnormal floats " + "right now, in violation of the IEEE-754 floating-point " + "specification. This is usually because something was compiled with " + "-ffast-math or a similar option, which sets global processor state. " + "See https://simonbyrne.github.io/notes/fastmath/ for a more detailed " + "writeup - and good luck!" + ) + + def permitted(f: float) -> bool: + if math.isnan(f): + return allow_nan + if 0 < abs(f) < smallest_nonzero_magnitude: + return False + return sign_aware_lte(min_value, f) and sign_aware_lte(f, max_value) + + boundary_values = [ + min_value, + next_up(min_value), + min_value + 1, + max_value - 1, + next_down(max_value), + max_value, + ] + nasty_floats = [f for f in NASTY_FLOATS + boundary_values if permitted(f)] + weights = [0.2 * len(nasty_floats)] + [0.8] * len(nasty_floats) + sampler = Sampler(weights, observe=False) if nasty_floats else None + + clamper = make_float_clamper( + min_value, + max_value, + smallest_nonzero_magnitude=smallest_nonzero_magnitude, + allow_nan=allow_nan, + ) + return (sampler, clamper, nasty_floats) + + @classmethod + def _draw_string_sampler( + cls, + *, + intervals: IntervalSet, + min_size: int, + max_size: int, + ) -> tuple[Optional[Sampler], list[str]]: + constraints: StringConstraints = { + "intervals": intervals, + "min_size": min_size, + "max_size": max_size, + } + key = choice_constraints_key("string", constraints) + if key in STRING_SAMPLER_CACHE: + return STRING_SAMPLER_CACHE[key] + + nasty_strings = [s for s in NASTY_STRINGS if choice_permitted(s, constraints)] + sampler = ( + Sampler([1 / len(nasty_strings)] * len(nasty_strings), observe=False) + if nasty_strings + else None + ) + result = (sampler, nasty_strings) + STRING_SAMPLER_CACHE[key] = result + return result + + +class BytestringProvider(PrimitiveProvider): + lifetime = "test_case" + + def __init__( + self, conjecturedata: Optional["ConjectureData"], /, *, bytestring: bytes + ): + super().__init__(conjecturedata) + self.bytestring = bytestring + self.index = 0 + self.drawn = bytearray() + + def _draw_bits(self, n): + if n == 0: # pragma: no cover + return 0 + n_bytes = bits_to_bytes(n) + if self.index + n_bytes > len(self.bytestring): + self._cd.mark_overrun() + buf = bytearray(self.bytestring[self.index : self.index + n_bytes]) + self.index += n_bytes + + buf[0] &= BYTE_MASKS[n % 8] + buf = bytes(buf) + self.drawn += buf + return int_from_bytes(buf) + + def draw_boolean( + self, + p: float = 0.5, + ) -> bool: + if p <= 0: + return False + if p >= 1: + return True + + # always use one byte for booleans to maintain constant draw size. + # If a probability requires more than 8 bits to represent precisely, + # the result will be slightly biased, but not badly. + bits = 8 + size = 2**bits + # always leave at least one value that can be true, even for very small + # p. + falsey = max(1, math.floor(size * (1 - p))) + n = self._draw_bits(bits) + return n >= falsey + + def draw_integer( + self, + min_value: Optional[int] = None, + max_value: Optional[int] = None, + *, + weights: Optional[dict[int, float]] = None, + shrink_towards: int = 0, + ) -> int: + assert self._cd is not None + + # we explicitly ignore integer weights for now, as they are likely net + # negative on fuzzer performance. + + if min_value is None and max_value is None: + min_value = -(2**127) + max_value = 2**127 - 1 + elif min_value is None: + assert max_value is not None + min_value = max_value - 2**64 + elif max_value is None: + assert min_value is not None + max_value = min_value + 2**64 + + if min_value == max_value: + return min_value + + bits = (max_value - min_value).bit_length() + value = self._draw_bits(bits) + while not (min_value <= value <= max_value): + value = self._draw_bits(bits) + return value + + def draw_float( + self, + *, + min_value: float = -math.inf, + max_value: float = math.inf, + allow_nan: bool = True, + smallest_nonzero_magnitude: float, + ) -> float: + n = self._draw_bits(64) + sign = -1 if n >> 64 else 1 + f = sign * lex_to_float(n & ((1 << 64) - 1)) + clamper = make_float_clamper( + min_value, + max_value, + smallest_nonzero_magnitude=smallest_nonzero_magnitude, + allow_nan=allow_nan, + ) + return clamper(f) + + def _draw_collection(self, min_size, max_size, *, alphabet_size): + average_size = min( + max(min_size * 2, min_size + 5), + 0.5 * (min_size + max_size), + ) + elements = many( + self._cd, + min_size=min_size, + max_size=max_size, + average_size=average_size, + observe=False, + ) + values = [] + while elements.more(): + values.append(self.draw_integer(0, alphabet_size - 1)) + return values + + def draw_string( + self, + intervals: IntervalSet, + *, + min_size: int = 0, + max_size: int = COLLECTION_DEFAULT_MAX_SIZE, + ) -> str: + values = self._draw_collection(min_size, max_size, alphabet_size=len(intervals)) + return "".join(chr(intervals[v]) for v in values) + + def draw_bytes( + self, + min_size: int = 0, + max_size: int = COLLECTION_DEFAULT_MAX_SIZE, + ) -> bytes: + values = self._draw_collection(min_size, max_size, alphabet_size=2**8) + return bytes(values) + + +class URandom(Random): + # we reimplement a Random instance instead of using SystemRandom, because + # os.urandom is not guaranteed to read from /dev/urandom. + + @staticmethod + def _urandom(size: int) -> bytes: + with open("/dev/urandom", "rb") as f: + return f.read(size) + + def getrandbits(self, k: int) -> int: + assert k >= 0 + size = bits_to_bytes(k) + n = int_from_bytes(self._urandom(size)) + # trim excess bits + return n >> (size * 8 - k) + + def random(self) -> float: + # adapted from random.SystemRandom.random + return (int_from_bytes(self._urandom(7)) >> 3) * (2**-53) + + +class URandomProvider(HypothesisProvider): + # A provider which reads directly from /dev/urandom as its source of randomness. + # This provider exists to provide better Hypothesis integration with Antithesis + # (https://antithesis.com/), which interprets calls to /dev/urandom as the + # randomness to mutate. This effectively gives Antithesis control over + # the choices made by the URandomProvider. + # + # If you are not using Antithesis, you probably don't want to use this + # provider. + + def __init__(self, conjecturedata: Optional["ConjectureData"], /): + super().__init__(conjecturedata) + if WINDOWS: # pragma: no cover + warnings.warn( + "/dev/urandom is not available on windows. Falling back to " + 'standard PRNG generation (equivalent to backend="hypothesis").', + HypothesisWarning, + stacklevel=1, + ) + # don't overwrite the HypothesisProvider self._random attribute in + # this case + else: + self._random = URandom() diff --git a/contrib/python/hypothesis/py3/hypothesis/internal/conjecture/shrinker.py b/contrib/python/hypothesis/py3/hypothesis/internal/conjecture/shrinker.py index 0e940d47b42..0aac4360122 100644 --- a/contrib/python/hypothesis/py3/hypothesis/internal/conjecture/shrinker.py +++ b/contrib/python/hypothesis/py3/hypothesis/internal/conjecture/shrinker.py @@ -8,28 +8,29 @@ # v. 2.0. If a copy of the MPL was not distributed with this file, You can # obtain one at https://mozilla.org/MPL/2.0/. +import math from collections import defaultdict from collections.abc import Sequence -from typing import TYPE_CHECKING, Callable, Optional, TypeVar, Union +from typing import TYPE_CHECKING, Callable, Literal, Optional, Union, cast import attr -from hypothesis.internal.compat import int_from_bytes, int_to_bytes -from hypothesis.internal.conjecture.choicetree import ( - ChoiceTree, - prefix_selection_order, - random_selection_order, +from hypothesis.internal.conjecture.choice import ( + ChoiceNode, + ChoiceT, + choice_equal, + choice_from_index, + choice_key, + choice_permitted, + choice_to_index, ) from hypothesis.internal.conjecture.data import ( ConjectureData, ConjectureResult, - IRNode, + Spans, Status, - ir_size_nodes, - ir_to_buffer, - ir_value_equal, - ir_value_key, - ir_value_permitted, + _Overrun, + draw_choice, ) from hypothesis.internal.conjecture.junkdrawer import ( endswith, @@ -44,39 +45,44 @@ from hypothesis.internal.conjecture.shrinking import ( Ordering, String, ) +from hypothesis.internal.conjecture.shrinking.choicetree import ( + ChoiceTree, + prefix_selection_order, + random_selection_order, +) +from hypothesis.internal.floats import MAX_PRECISE_INTEGER if TYPE_CHECKING: from random import Random + from typing import TypeAlias from hypothesis.internal.conjecture.engine import ConjectureRunner -SortKeyT = TypeVar("SortKeyT", str, bytes) +ShrinkPredicateT: "TypeAlias" = Callable[[Union[ConjectureResult, _Overrun]], bool] -def sort_key(buffer: SortKeyT) -> tuple[int, SortKeyT]: - """Returns a sort key such that "simpler" buffers are smaller than +def sort_key(nodes: Sequence[ChoiceNode]) -> tuple[int, tuple[int, ...]]: + """Returns a sort key such that "simpler" choice sequences are smaller than "more complicated" ones. We define sort_key so that x is simpler than y if x is shorter than y or if - they have the same length and x < y lexicographically. This is called the - shortlex order. + they have the same length and map(choice_to_index, x) < map(choice_to_index, y). - The reason for using the shortlex order is: + The reason for using this ordering is: 1. If x is shorter than y then that means we had to make fewer decisions in constructing the test case when we ran x than we did when we ran y. - 2. If x is the same length as y then replacing a byte with a lower byte - corresponds to reducing the value of an integer we drew with draw_bits - towards zero. - 3. We want a total order, and given (2) the natural choices for things of - the same size are either the lexicographic or colexicographic orders - (the latter being the lexicographic order of the reverse of the string). - Because values drawn early in generation potentially get used in more + 2. If x is the same length as y then replacing a choice with a lower index + choice corresponds to replacing it with a simpler/smaller choice. + 3. Because choices drawn early in generation potentially get used in more places they potentially have a more significant impact on the final - result, so it makes sense to prioritise reducing earlier values over - later ones. This makes the lexicographic order the more natural choice. + result, so it makes sense to prioritise reducing earlier choices over + later ones. """ - return (len(buffer), buffer) + return ( + len(nodes), + tuple(choice_to_index(node.value, node.constraints) for node in nodes), + ) SHRINK_PASS_DEFINITIONS: dict[str, "ShrinkPassDefinition"] = {} @@ -101,10 +107,10 @@ class ShrinkPassDefinition: run_with_chooser = attr.ib() @property - def name(self): + def name(self) -> str: return self.run_with_chooser.__name__ - def __attrs_post_init__(self): + def __attrs_post_init__(self) -> None: assert self.name not in SHRINK_PASS_DEFINITIONS, self.name SHRINK_PASS_DEFINITIONS[self.name] = self @@ -130,7 +136,7 @@ class Shrinker: manage the associated state of a particular shrink problem. That is, we have some initial ConjectureData object and some property of interest that it satisfies, and we want to find a ConjectureData object with a - shortlex (see sort_key above) smaller buffer that exhibits the same + shortlex (see sort_key above) smaller choice sequence that exhibits the same property. Currently the only property of interest we use is that the status is @@ -154,7 +160,7 @@ class Shrinker: ======================= Generally a shrink pass is just any function that calls - cached_test_function and/or incorporate_new_buffer a number of times, + cached_test_function and/or consider_new_nodes a number of times, but there are a couple of useful things to bear in mind. A shrink pass *makes progress* if running it changes self.shrink_target @@ -181,7 +187,7 @@ class Shrinker: change in the underlying shrink target. It is generally safe to assume that the shrink target does not change prior to the point of first modification - e.g. if you change no bytes at - index ``i``, all examples whose start is ``<= i`` still exist, + index ``i``, all spans whose start is ``<= i`` still exist, as do all blocks, and the data object is still of length ``>= i + 1``. This can only be violated by bad user code which relies on an external source of non-determinism. @@ -196,22 +202,22 @@ class Shrinker: are carefully designed to do the right thing in the case that no shrinks occurred and try to adapt to any changes to do a reasonable job. e.g. say we wanted to write a shrink pass that tried deleting - each individual byte (this isn't an especially good choice, + each individual choice (this isn't an especially good pass, but it leads to a simple illustrative example), we might do it - by iterating over the buffer like so: + by iterating over the choice sequence like so: .. code-block:: python i = 0 - while i < len(self.shrink_target.buffer): - if not self.incorporate_new_buffer( - self.shrink_target.buffer[:i] + self.shrink_target.buffer[i + 1 :] + while i < len(self.shrink_target.nodes): + if not self.consider_new_nodes( + self.shrink_target.nodes[:i] + self.shrink_target.nodes[i + 1 :] ): i += 1 The reason for writing the loop this way is that i is always a - valid index into the current buffer, even if the current buffer - changes as a result of our actions. When the buffer changes, + valid index into the current choice sequence, even if the current sequence + changes as a result of our actions. When the choice sequence changes, we leave the index where it is rather than restarting from the beginning, and carry on. This means that the number of steps we run in this case is always bounded above by the number of steps @@ -281,7 +287,7 @@ class Shrinker: self, engine: "ConjectureRunner", initial: Union[ConjectureData, ConjectureResult], - predicate: Optional[Callable[[ConjectureData], bool]], + predicate: Optional[ShrinkPredicateT], *, allow_transition: Optional[ Callable[[Union[ConjectureData, ConjectureResult], ConjectureData], bool] @@ -302,10 +308,8 @@ class Shrinker: self.__predicate = predicate or (lambda data: True) self.__allow_transition = allow_transition or (lambda source, destination: True) self.__derived_values: dict = {} - self.__pending_shrink_explanation = None - - self.initial_size = len(initial.buffer) + self.initial_size = len(initial.choices) # We keep track of the current best example on the shrink_target # attribute. self.shrink_target = initial @@ -325,12 +329,7 @@ class Shrinker: # Because the shrinker is also used to `pareto_optimise` in the target phase, # we sometimes want to allow extending buffers instead of aborting at the end. - if in_target_phase: - from hypothesis.internal.conjecture.engine import BUFFER_SIZE - - self.__extend = BUFFER_SIZE - else: - self.__extend = 0 + self.__extend: Union[Literal["full"], int] = "full" if in_target_phase else 0 self.should_explain = explain @derived_value # type: ignore @@ -382,60 +381,32 @@ class Shrinker: if self.calls - self.calls_at_last_shrink >= self.max_stall: raise StopShrinking - def cached_test_function_ir(self, tree): - # sometimes our shrinking passes try obviously invalid things. We handle - # discarding them in one place here. - for node in tree: - if not ir_value_permitted(node.value, node.ir_type, node.kwargs): - return None + def cached_test_function( + self, nodes: Sequence[ChoiceNode] + ) -> tuple[bool, Optional[Union[ConjectureResult, _Overrun]]]: + nodes = nodes[: len(self.nodes)] - result = self.engine.cached_test_function_ir(tree) - self.incorporate_test_data(result) - self.check_calls() - return result - - def consider_new_tree(self, tree: Sequence[IRNode]) -> bool: - tree = tree[: len(self.nodes)] + if startswith(nodes, self.nodes): + return (True, None) - if startswith(tree, self.nodes): - return True + if sort_key(self.nodes) < sort_key(nodes): + return (False, None) - if startswith(self.nodes, tree): - return False + # sometimes our shrinking passes try obviously invalid things. We handle + # discarding them in one place here. + if any(not choice_permitted(node.value, node.constraints) for node in nodes): + return (False, None) + result = self.engine.cached_test_function( + [n.value for n in nodes], extend=self.__extend + ) previous = self.shrink_target - self.cached_test_function_ir(tree) - return previous is not self.shrink_target - - def consider_new_buffer(self, buffer): - """Returns True if after running this buffer the result would be - the current shrink_target.""" - buffer = bytes(buffer) - return buffer.startswith(self.buffer) or self.incorporate_new_buffer(buffer) - - def incorporate_new_buffer( - self, buffer - ): # pragma: no cover # removing function soon - """Either runs the test function on this buffer and returns True if - that changed the shrink_target, or determines that doing so would - be useless and returns False without running it.""" - - buffer = bytes(buffer[: self.shrink_target.index]) - # Sometimes an attempt at lexicographic minimization will do the wrong - # thing because the buffer has changed under it (e.g. something has - # turned into a write, the bit size has changed). The result would be - # an invalid string, but it's better for us to just ignore it here as - # it turns out to involve quite a lot of tricky book-keeping to get - # this right and it's better to just handle it in one place. - if sort_key(buffer) >= sort_key(self.shrink_target.buffer): - return False - - if self.shrink_target.buffer.startswith(buffer): - return False + self.incorporate_test_data(result) + self.check_calls() + return (previous is not self.shrink_target, result) - previous = self.shrink_target - self.cached_test_function(buffer) - return previous is not self.shrink_target + def consider_new_nodes(self, nodes: Sequence[ChoiceNode]) -> bool: + return self.cached_test_function(nodes)[0] def incorporate_test_data(self, data): """Takes a ConjectureData or Overrun object updates the current @@ -444,22 +415,11 @@ class Shrinker: return if ( self.__predicate(data) - and sort_key(data.buffer) < sort_key(self.shrink_target.buffer) + and sort_key(data.nodes) < sort_key(self.shrink_target.nodes) and self.__allow_transition(self.shrink_target, data) ): self.update_shrink_target(data) - def cached_test_function(self, buffer): - """Returns a cached version of the underlying test function, so - that the result is either an Overrun object (if the buffer is - too short to be a valid test case) or a ConjectureData object - with status >= INVALID that would result from running this buffer.""" - buffer = bytes(buffer) - result = self.engine.cached_test_function(buffer, extend=self.__extend) - self.incorporate_test_data(result) - self.check_calls() - return result - def debug(self, msg: str) -> None: self.engine.debug(msg) @@ -467,36 +427,15 @@ class Shrinker: def random(self) -> "Random": return self.engine.random - def shrink(self): + def shrink(self) -> None: """Run the full set of shrinks and update shrink_target. This method is "mostly idempotent" - calling it twice is unlikely to have any effect, though it has a non-zero probability of doing so. """ - # We assume that if an all-zero block of bytes is an interesting - # example then we're not going to do better than that. - # This might not technically be true: e.g. for integers() | booleans() - # the simplest example is actually [1, 0]. Missing this case is fairly - # harmless and this allows us to make various simplifying assumptions - # about the structure of the data (principally that we're never - # operating on a block of all zero bytes so can use non-zeroness as a - # signpost of complexity). - if not any(self.shrink_target.buffer) or self.incorporate_new_buffer( - bytes(len(self.shrink_target.buffer)) - ): - self.explain() - return - - # There are multiple buffers that represent the same counterexample, eg - # n=2 (from the 16 bit integer bucket) and n=2 (from the 32 bit integer - # bucket). Before we start shrinking, we need to normalize to the minimal - # such buffer, else a buffer-smaller but ir-larger value may be chosen - # as the minimal counterexample. - data = self.engine.new_conjecture_data_ir(self.nodes) - self.engine.test_function(data) - self.incorporate_test_data(data.as_result()) try: + self.initial_coarse_reduction() self.greedy_shrink() except StopShrinking: # If we stopped shrinking because we're making slow progress (instead of @@ -508,7 +447,7 @@ class Shrinker: def s(n): return "s" if n != 1 else "" - total_deleted = self.initial_size - len(self.shrink_target.buffer) + total_deleted = self.initial_size - len(self.shrink_target.choices) calls = self.engine.call_count - self.initial_calls misaligned = self.engine.misaligned_count - self.initial_misaligned @@ -517,8 +456,8 @@ class Shrinker: "Shrink pass profiling\n" "---------------------\n\n" f"Shrinking made a total of {calls} call{s(calls)} of which " - f"{self.shrinks} shrank and {misaligned} were misaligned. This deleted {total_deleted} bytes out " - f"of {self.initial_size}." + f"{self.shrinks} shrank and {misaligned} were misaligned. This " + f"deleted {total_deleted} choices out of {self.initial_size}." ) for useful in [True, False]: self.debug("") @@ -539,21 +478,21 @@ class Shrinker: self.debug( f" * {p.name} made {p.calls} call{s(p.calls)} of which " f"{p.shrinks} shrank and {p.misaligned} were misaligned, " - f"deleting {p.deletions} byte{s(p.deletions)}." + f"deleting {p.deletions} choice{s(p.deletions)}." ) self.debug("") self.explain() - def explain(self): - from hypothesis.internal.conjecture.engine import BUFFER_SIZE_IR + def explain(self) -> None: if not self.should_explain or not self.shrink_target.arg_slices: return - self.max_stall = 1e999 + self.max_stall = 2**100 shrink_target = self.shrink_target nodes = self.nodes - chunks = defaultdict(list) + choices = self.choices + chunks: dict[tuple[int, int], list[tuple[ChoiceT, ...]]] = defaultdict(list) # Before we start running experiments, let's check for known inputs which would # make them redundant. The shrinking process means that we've already tried many @@ -592,44 +531,46 @@ class Shrinker: for i in range(start, end): node = nodes[i] if not node.was_forced: - (value, _buf) = ir_to_buffer( - node.ir_type, node.kwargs, random=self.random + value = draw_choice( + node.type, node.constraints, random=self.random ) node = node.copy(with_value=value) - replacement.append(node) + replacement.append(node.value) - attempt = nodes[:start] + tuple(replacement) + nodes[end:] - result = self.engine.cached_test_function_ir( - attempt, extend=BUFFER_SIZE_IR - ir_size_nodes(attempt) - ) + attempt = choices[:start] + tuple(replacement) + choices[end:] + result = self.engine.cached_test_function(attempt, extend="full") - # Turns out this was a variable-length part, so grab the infix... if result.status is Status.OVERRUN: continue # pragma: no cover # flakily covered + result = cast(ConjectureResult, result) if not ( - len(attempt) == len(result.ir_nodes) - and endswith(result.ir_nodes, nodes[end:]) + len(attempt) == len(result.choices) + and endswith(result.nodes, nodes[end:]) ): - for ex, res in zip(shrink_target.examples, result.examples): - assert ex.ir_start == res.ir_start - assert ex.ir_start <= start - assert ex.label == res.label - if start == ex.ir_start and end == ex.ir_end: - res_end = res.ir_end + # Turns out this was a variable-length part, so grab the infix... + for span1, span2 in zip(shrink_target.spans, result.spans): + assert span1.start == span2.start + assert span1.start <= start + assert span1.label == span2.label + if span1.start == start and span1.end == end: + result_end = span2.end break else: raise NotImplementedError("Expected matching prefixes") attempt = ( - nodes[:start] + result.ir_nodes[start:res_end] + nodes[end:] + choices[:start] + + result.choices[start:result_end] + + choices[end:] ) - chunks[(start, end)].append(result.ir_nodes[start:res_end]) - result = self.engine.cached_test_function_ir(attempt) + chunks[(start, end)].append(result.choices[start:result_end]) + result = self.engine.cached_test_function(attempt) if result.status is Status.OVERRUN: continue # pragma: no cover # flakily covered + result = cast(ConjectureResult, result) else: - chunks[(start, end)].append(result.ir_nodes[start:end]) + chunks[(start, end)].append(result.choices[start:end]) if shrink_target is not self.shrink_target: # pragma: no cover # If we've shrunk further without meaning to, bail out. @@ -654,15 +595,15 @@ class Shrinker: chunks_by_start_index = sorted(chunks.items()) for _ in range(500): # pragma: no branch # no-branch here because we don't coverage-test the abort-at-500 logic. - new_nodes = [] + new_choices: list[ChoiceT] = [] prev_end = 0 for (start, end), ls in chunks_by_start_index: assert prev_end <= start < end, "these chunks must be nonoverlapping" - new_nodes.extend(nodes[prev_end:start]) - new_nodes.extend(self.random.choice(ls)) + new_choices.extend(choices[prev_end:start]) + new_choices.extend(self.random.choice(ls)) prev_end = end - result = self.engine.cached_test_function_ir(new_nodes) + result = self.engine.cached_test_function(new_choices) # This *can't* be a shrink because none of the components were. assert shrink_target is self.shrink_target @@ -679,7 +620,7 @@ class Shrinker: ) break - def greedy_shrink(self): + def greedy_shrink(self) -> None: """Run a full set of greedy shrinks (that is, ones that will only ever move to a better target) and update shrink_target appropriately. @@ -688,20 +629,134 @@ class Shrinker: """ self.fixate_shrink_passes( [ + "try_trivial_spans", node_program("X" * 5), node_program("X" * 4), node_program("X" * 3), node_program("X" * 2), node_program("X" * 1), "pass_to_descendant", - "reorder_examples", - "minimize_duplicated_nodes", - "minimize_individual_nodes", - "redistribute_integer_pairs", - "lower_blocks_together", + "reorder_spans", + "minimize_duplicated_choices", + "minimize_individual_choices", + "redistribute_numeric_pairs", + "lower_integers_together", + "lower_duplicated_characters", ] ) + def initial_coarse_reduction(self): + """Performs some preliminary reductions that should not be + repeated as part of the main shrink passes. + + The main reason why these can't be included as part of shrink + passes is that they have much more ability to make the test + case "worse". e.g. they might rerandomise part of it, significantly + increasing the value of individual nodes, which works in direct + opposition to the lexical shrinking and will frequently undo + its work. + """ + self.reduce_each_alternative() + + @derived_value # type: ignore + def spans_starting_at(self): + result = [[] for _ in self.shrink_target.nodes] + for i, ex in enumerate(self.spans): + # We can have zero-length spans that start at the end + if ex.start < len(result): + result[ex.start].append(i) + return tuple(map(tuple, result)) + + def reduce_each_alternative(self): + """This is a pass that is designed to rerandomise use of the + one_of strategy or things that look like it, in order to try + to move from later strategies to earlier ones in the branch + order. + + It does this by trying to systematically lower each value it + finds that looks like it might be the branch decision for + one_of, and then attempts to repair any changes in shape that + this causes. + """ + i = 0 + while i < len(self.shrink_target.nodes): + nodes = self.shrink_target.nodes + node = nodes[i] + if ( + node.type == "integer" + and not node.was_forced + and node.value <= 10 + and node.constraints["min_value"] == 0 + ): + assert isinstance(node.value, int) + + # We've found a plausible candidate for a ``one_of`` choice. + # We now want to see if the shape of the test case actually depends + # on it. If it doesn't, then we don't need to do this (comparatively + # costly) pass, and can let much simpler lexicographic reduction + # handle it later. + # + # We test this by trying to set the value to zero and seeing if the + # shape changes, as measured by either changing the number of subsequent + # nodes, or changing the nodes in such a way as to cause one of the + # previous values to no longer be valid in its position. + zero_attempt = self.cached_test_function( + nodes[:i] + (nodes[i].copy(with_value=0),) + nodes[i + 1 :] + )[1] + if ( + zero_attempt is not self.shrink_target + and zero_attempt is not None + and zero_attempt.status >= Status.VALID + ): + changed_shape = len(zero_attempt.nodes) != len(nodes) + + if not changed_shape: + for j in range(i + 1, len(nodes)): + zero_node = zero_attempt.nodes[j] + orig_node = nodes[j] + if ( + zero_node.type != orig_node.type + or not choice_permitted( + orig_node.value, zero_node.constraints + ) + ): + changed_shape = True + break + if changed_shape: + for v in range(node.value): + if self.try_lower_node_as_alternative(i, v): + break + i += 1 + + def try_lower_node_as_alternative(self, i, v): + """Attempt to lower `self.shrink_target.nodes[i]` to `v`, + while rerandomising and attempting to repair any subsequent + changes to the shape of the test case that this causes.""" + nodes = self.shrink_target.nodes + if self.consider_new_nodes( + nodes[:i] + (nodes[i].copy(with_value=v),) + nodes[i + 1 :] + ): + return True + + prefix = nodes[:i] + (nodes[i].copy(with_value=v),) + initial = self.shrink_target + spans = self.spans_starting_at[i] + for _ in range(3): + random_attempt = self.engine.cached_test_function( + [n.value for n in prefix], extend=len(nodes) + ) + if random_attempt.status < Status.VALID: + continue + self.incorporate_test_data(random_attempt) + for j in spans: + initial_ex = initial.spans[j] + attempt_ex = random_attempt.spans[j] + contents = random_attempt.nodes[attempt_ex.start : attempt_ex.end] + self.consider_new_nodes(nodes[:i] + contents + nodes[initial_ex.end :]) + if initial is not self.shrink_target: + return True + return False + @derived_value # type: ignore def shrink_pass_choice_trees(self): return defaultdict(ChoiceTree) @@ -795,7 +850,7 @@ class Shrinker: # the length are the best. if self.shrink_target is before_sp: reordering[sp] = 1 - elif len(self.buffer) < len(before_sp.buffer): + elif len(self.choices) < len(before_sp.choices): reordering[sp] = -1 else: reordering[sp] = 0 @@ -803,42 +858,36 @@ class Shrinker: passes.sort(key=reordering.__getitem__) @property - def buffer(self): - return self.shrink_target.buffer - - @property - def blocks(self): - return self.shrink_target.blocks - - @property - def nodes(self): - return self.shrink_target.ir_nodes + def nodes(self) -> tuple[ChoiceNode, ...]: + return self.shrink_target.nodes @property - def choices(self): + def choices(self) -> tuple[ChoiceT, ...]: return self.shrink_target.choices @property - def examples(self): - return self.shrink_target.examples + def spans(self) -> Spans: + return self.shrink_target.spans @derived_value # type: ignore - def examples_by_label(self): - """An index of all examples grouped by their label, with - the examples stored in their normal index order.""" + def spans_by_label(self): + """ + A mapping of labels to a list of spans with that label. Spans in the list + are ordered by their normal index order. + """ - examples_by_label = defaultdict(list) - for ex in self.examples: - examples_by_label[ex.label].append(ex) - return dict(examples_by_label) + spans_by_label = defaultdict(list) + for ex in self.spans: + spans_by_label[ex.label].append(ex) + return dict(spans_by_label) @derived_value # type: ignore def distinct_labels(self): - return sorted(self.examples_by_label, key=str) + return sorted(self.spans_by_label, key=str) @defines_shrink_pass() def pass_to_descendant(self, chooser): - """Attempt to replace each example with a descendant example. + """Attempt to replace each span with a descendant span. This is designed to deal with strategies that call themselves recursively. For example, suppose we had: @@ -857,14 +906,14 @@ class Shrinker: """ label = chooser.choose( - self.distinct_labels, lambda l: len(self.examples_by_label[l]) >= 2 + self.distinct_labels, lambda l: len(self.spans_by_label[l]) >= 2 ) - ls = self.examples_by_label[label] + ls = self.spans_by_label[label] i = chooser.choose(range(len(ls) - 1)) ancestor = ls[i] - if i + 1 == len(ls) or ls[i + 1].ir_start >= ancestor.ir_end: + if i + 1 == len(ls) or ls[i + 1].start >= ancestor.end: return @self.cached(label, i) @@ -873,22 +922,22 @@ class Shrinker: hi = len(ls) while lo + 1 < hi: mid = (lo + hi) // 2 - if ls[mid].ir_start >= ancestor.ir_end: + if ls[mid].start >= ancestor.end: hi = mid else: lo = mid - return [t for t in ls[i + 1 : hi] if t.ir_length < ancestor.ir_length] + return [t for t in ls[i + 1 : hi] if t.choice_count < ancestor.choice_count] - descendant = chooser.choose(descendants, lambda ex: ex.ir_length > 0) + descendant = chooser.choose(descendants, lambda ex: ex.choice_count > 0) - assert ancestor.ir_start <= descendant.ir_start - assert ancestor.ir_end >= descendant.ir_end - assert descendant.ir_length < ancestor.ir_length + assert ancestor.start <= descendant.start + assert ancestor.end >= descendant.end + assert descendant.choice_count < ancestor.choice_count - self.consider_new_tree( - self.nodes[: ancestor.ir_start] - + self.nodes[descendant.ir_start : descendant.ir_end] - + self.nodes[ancestor.ir_end :] + self.consider_new_nodes( + self.nodes[: ancestor.start] + + self.nodes[descendant.start : descendant.end] + + self.nodes[ancestor.end :] ) def lower_common_node_offset(self): @@ -930,14 +979,16 @@ class Shrinker: changed = [] for i in sorted(self.__changed_nodes): node = self.nodes[i] - if node.trivial or node.ir_type != "integer": + if node.trivial or node.type != "integer": continue changed.append(node) if not changed: return - ints = [abs(node.value - node.kwargs["shrink_towards"]) for node in changed] + ints = [ + abs(node.value - node.constraints["shrink_towards"]) for node in changed + ] offset = min(ints) assert offset > 0 @@ -950,13 +1001,13 @@ class Shrinker: return ( node.index, node.index + 1, - [node.copy(with_value=node.kwargs["shrink_towards"] + n)], + [node.copy(with_value=node.constraints["shrink_towards"] + n)], ) def consider(n, sign): - return self.consider_new_tree( + return self.consider_new_nodes( replace_all( - st.ir_nodes, + st.nodes, [ offset_node(node, sign * (n + v)) for node, v in zip(changed, ints) @@ -977,27 +1028,27 @@ class Shrinker: self.__changed_nodes.add(i) @property - def __changed_nodes(self): + def __changed_nodes(self) -> set[int]: if self.__last_checked_changed_at is self.shrink_target: return self.__all_changed_nodes prev_target = self.__last_checked_changed_at new_target = self.shrink_target assert prev_target is not new_target - prev_nodes = prev_target.ir_nodes - new_nodes = new_target.ir_nodes - assert sort_key(new_target.buffer) < sort_key(prev_target.buffer) + prev_nodes = prev_target.nodes + new_nodes = new_target.nodes + assert sort_key(new_target.nodes) < sort_key(prev_target.nodes) if len(prev_nodes) != len(new_nodes) or any( - n1.ir_type != n2.ir_type for n1, n2 in zip(prev_nodes, new_nodes) + n1.type != n2.type for n1, n2 in zip(prev_nodes, new_nodes) ): - # should we check kwargs are equal as well? + # should we check constraints are equal as well? self.__all_changed_nodes = set() else: assert len(prev_nodes) == len(new_nodes) for i, (n1, n2) in enumerate(zip(prev_nodes, new_nodes)): - assert n1.ir_type == n2.ir_type - if not ir_value_equal(n1.ir_type, n1.value, n2.value): + assert n1.type == n2.type + if not choice_equal(n1.value, n2.value): self.__all_changed_nodes.add(i) return self.__all_changed_nodes @@ -1042,7 +1093,7 @@ class Shrinker: [(node.index, node.index + 1, [node.copy(with_value=n)]) for node in nodes], ) - attempt = self.cached_test_function_ir(initial_attempt) + attempt = self.cached_test_function(initial_attempt)[1] if attempt is None: return False @@ -1073,7 +1124,7 @@ class Shrinker: # min_size than our attempt had for the draw_string node. # # We'll now try realigning this tree by: - # * replacing the kwargs in our attempt with what test_function tried + # * replacing the constraints in our attempt with what test_function tried # to draw in practice # * truncating the value of that node to match min_size # @@ -1085,39 +1136,38 @@ class Shrinker: # case of this function of preserving from the right instead of # preserving from the left. see test_can_shrink_variable_string_draws. - (index, attempt_ir_type, attempt_kwargs, _attempt_forced) = ( + (index, attempt_choice_type, attempt_constraints, _attempt_forced) = ( attempt.misaligned_at ) node = self.nodes[index] - if node.ir_type != attempt_ir_type: + if node.type != attempt_choice_type: return False # pragma: no cover if node.was_forced: return False # pragma: no cover - if node.ir_type in {"string", "bytes"}: + if node.type in {"string", "bytes"}: # if the size *increased*, we would have to guess what to pad with # in order to try fixing up this attempt. Just give up. - if node.kwargs["min_size"] <= attempt_kwargs["min_size"]: + if node.constraints["min_size"] <= attempt_constraints["min_size"]: # attempts which increase min_size tend to overrun rather than # be misaligned, making a covering case difficult. return False # pragma: no cover - # the size decreased in our attempt. Try again, but replace with - # the min_size that we would have gotten, and truncate the value + # the size decreased in our attempt. Try again, but truncate the value # to that size by removing any elements past min_size. - return self.consider_new_tree( + return self.consider_new_nodes( initial_attempt[: node.index] + [ initial_attempt[node.index].copy( - with_kwargs=attempt_kwargs, + with_constraints=attempt_constraints, with_value=initial_attempt[node.index].value[ - : attempt_kwargs["min_size"] + : attempt_constraints["min_size"] ], ) ] + initial_attempt[node.index :] ) - lost_nodes = len(self.nodes) - len(attempt.ir_nodes) + lost_nodes = len(self.nodes) - len(attempt.nodes) if lost_nodes <= 0: return False @@ -1130,35 +1180,35 @@ class Shrinker: # try to be more aggressive. regions_to_delete = {(end, end + lost_nodes)} - for ex in self.examples: - if ex.ir_start > start: + for ex in self.spans: + if ex.start > start: continue - if ex.ir_end <= end: + if ex.end <= end: continue - if ex.index >= len(attempt.examples): + if ex.index >= len(attempt.spans): continue # pragma: no cover - replacement = attempt.examples[ex.index] - in_original = [c for c in ex.children if c.ir_start >= end] - in_replaced = [c for c in replacement.children if c.ir_start >= end] + replacement = attempt.spans[ex.index] + in_original = [c for c in ex.children if c.start >= end] + in_replaced = [c for c in replacement.children if c.start >= end] if len(in_replaced) >= len(in_original) or not in_replaced: continue - # We've found an example where some of the children went missing + # We've found a span where some of the children went missing # as a result of this change, and just replacing it with the data # it would have had and removing the spillover didn't work. This # means that some of its children towards the right must be # important, so we try to arrange it so that it retains its # rightmost children instead of its leftmost. regions_to_delete.add( - (in_original[0].ir_start, in_original[-len(in_replaced)].ir_start) + (in_original[0].start, in_original[-len(in_replaced)].start) ) for u, v in sorted(regions_to_delete, key=lambda x: x[1] - x[0], reverse=True): try_with_deleted = initial_attempt[:u] + initial_attempt[v:] - if self.consider_new_tree(try_with_deleted): + if self.consider_new_nodes(try_with_deleted): return True return False @@ -1182,9 +1232,9 @@ class Shrinker: while self.shrink_target.has_discards: discarded = [] - for ex in self.shrink_target.examples: + for ex in self.shrink_target.spans: if ( - ex.length > 0 + ex.choice_count > 0 and ex.discarded and (not discarded or ex.start >= discarded[-1][-1]) ): @@ -1197,27 +1247,25 @@ class Shrinker: if not discarded: break - attempt = bytearray(self.shrink_target.buffer) + attempt = list(self.nodes) for u, v in reversed(discarded): del attempt[u:v] - if not self.incorporate_new_buffer(attempt): + if not self.consider_new_nodes(tuple(attempt)): return False return True @derived_value # type: ignore def duplicated_nodes(self): - """Returns a list of nodes grouped (ir_type, value).""" + """Returns a list of nodes grouped (choice_type, value).""" duplicates = defaultdict(list) for node in self.nodes: - duplicates[(node.ir_type, ir_value_key(node.ir_type, node.value))].append( - node - ) + duplicates[(node.type, choice_key(node.value))].append(node) return list(duplicates.values()) @defines_shrink_pass() - def minimize_duplicated_nodes(self, chooser): - """Find blocks that have been duplicated in multiple places and attempt + def minimize_duplicated_choices(self, chooser): + """Find choices that have been duplicated in multiple places and attempt to minimize all of the duplicates simultaneously. This lets us handle cases where two values can't be shrunk @@ -1237,34 +1285,42 @@ class Shrinker: more values at once. """ nodes = chooser.choose(self.duplicated_nodes) + # we can't lower any nodes which are trivial. try proceeding with the + # remaining nodes. + nodes = [node for node in nodes if not node.trivial] if len(nodes) <= 1: return - # no point in lowering nodes together if one is already trivial. - # TODO_BETTER_SHRINK: we could potentially just drop the trivial nodes - # here and carry on with nontrivial ones? - if any(node.trivial for node in nodes): - return - self.minimize_nodes(nodes) @defines_shrink_pass() - def redistribute_integer_pairs(self, chooser): - """If there is a sum of generated integers that we need their sum + def redistribute_numeric_pairs(self, chooser): + """If there is a sum of generated numbers that we need their sum to exceed some bound, lowering one of them requires raising the other. This pass enables that.""" - # TODO_SHRINK let's extend this to floats as well. - # look for a pair of nodes (node1, node2) which are both integers and - # aren't separated by too many other nodes. We'll decrease node1 and + # look for a pair of nodes (node1, node2) which are both numeric + # and aren't separated by too many other nodes. We'll decrease node1 and # increase node2 (note that the other way around doesn't make sense as # it's strictly worse in the ordering). + def can_choose_node(node): + # don't choose nan, inf, or floats above the threshold where f + 1 > f + # (which is not necessarily true for floats above MAX_PRECISE_INTEGER). + # The motivation for the last condition is to avoid trying weird + # non-shrinks where we raise one node and think we lowered another + # (but didn't). + return node.type in {"integer", "float"} and not ( + node.type == "float" + and (math.isnan(node.value) or abs(node.value) >= MAX_PRECISE_INTEGER) + ) + node1 = chooser.choose( - self.nodes, lambda node: node.ir_type == "integer" and not node.trivial + self.nodes, + lambda node: can_choose_node(node) and not node.trivial, ) node2 = chooser.choose( self.nodes, - lambda node: node.ir_type == "integer" + lambda node: can_choose_node(node) # Note that it's fine for node2 to be trivial, because we're going to # explicitly make it *not* trivial by adding to its value. and not node.was_forced @@ -1273,76 +1329,148 @@ class Shrinker: and node1.index < node.index <= node1.index + 4, ) - m = node1.value - n = node2.value + m: Union[int, float] = node1.value + n: Union[int, float] = node2.value - def boost(k): + def boost(k: int) -> bool: if k > m: return False - node_value = m - k - next_node_value = n + k + try: + v1 = m - k + v2 = n + k + except OverflowError: # pragma: no cover + # if n or m is a float and k is over sys.float_info.max, coercing + # k to a float will overflow. + return False + + # if we've increased node2 to the point that we're past max precision, + # give up - things have become too unstable. + if node1.type == "float" and v2 >= MAX_PRECISE_INTEGER: + return False - return self.consider_new_tree( + return self.consider_new_nodes( self.nodes[: node1.index] - + (node1.copy(with_value=node_value),) + + (node1.copy(with_value=v1),) + self.nodes[node1.index + 1 : node2.index] - + (node2.copy(with_value=next_node_value),) + + (node2.copy(with_value=v2),) + self.nodes[node2.index + 1 :] ) find_integer(boost) @defines_shrink_pass() - def lower_blocks_together(self, chooser): - block = chooser.choose(self.blocks, lambda b: not b.trivial) - - # Choose the next block to be up to eight blocks onwards. We don't - # want to go too far (to avoid quadratic time) but it's worth a - # reasonable amount of lookahead, especially as we expect most - # blocks are zero by this point anyway. - next_block = self.blocks[ + def lower_integers_together(self, chooser): + node1 = chooser.choose( + self.nodes, lambda n: n.type == "integer" and not n.trivial + ) + # Search up to 3 nodes ahead, to avoid quadratic time. + node2 = self.nodes[ chooser.choose( - range(block.index + 1, min(len(self.blocks), block.index + 9)), - lambda j: not self.blocks[j].trivial, + range(node1.index + 1, min(len(self.nodes), node1.index + 3 + 1)), + lambda i: self.nodes[i].type == "integer" + and not self.nodes[i].was_forced, ) ] - buffer = self.buffer + # one might expect us to require node2 to be nontrivial, and to minimize + # the node which is closer to its shrink_towards, rather than node1 + # unconditionally. In reality, it's acceptable for us to transition node2 + # from trivial to nontrivial, because the shrink ordering is dominated by + # the complexity of the earlier node1. What matters is minimizing node1. + shrink_towards = node1.constraints["shrink_towards"] + + def consider(n): + return self.consider_new_nodes( + self.nodes[: node1.index] + + (node1.copy(with_value=node1.value - n),) + + self.nodes[node1.index + 1 : node2.index] + + (node2.copy(with_value=node2.value - n),) + + self.nodes[node2.index + 1 :] + ) - m = int_from_bytes(buffer[block.start : block.end]) - n = int_from_bytes(buffer[next_block.start : next_block.end]) + find_integer(lambda n: consider(shrink_towards - n)) + find_integer(lambda n: consider(n - shrink_towards)) - def lower(k): - if k > min(m, n): - return False - attempt = bytearray(buffer) - attempt[block.start : block.end] = int_to_bytes(m - k, block.length) - attempt[next_block.start : next_block.end] = int_to_bytes( - n - k, next_block.length + @defines_shrink_pass() + def lower_duplicated_characters(self, chooser): + """ + Select two string choices no more than 4 choices apart and simultaneously + lower characters which appear in both strings. This helps cases where the + same character must appear in two strings, but the actual value of the + character is not relevant. + + This shrinking pass currently only tries lowering *all* instances of the + duplicated character in both strings. So for instance, given two choices: + + "bbac" + "abbb" + + we would try lowering all five of the b characters simultaneously. This + may fail to shrink some cases where only certain character indices are + correlated, for instance if only the b at index 1 could be lowered + simultaneously and the rest did in fact actually have to be a `b`. + + It would be nice to try shrinking that case as well, but we would need good + safeguards because it could get very expensive to try all combinations. + I expect lowering all duplicates to handle most cases in the meantime. + """ + node1 = chooser.choose( + self.nodes, lambda n: n.type == "string" and not n.trivial + ) + + # limit search to up to 4 choices ahead, to avoid quadratic behavior + node2 = self.nodes[ + chooser.choose( + range(node1.index + 1, min(len(self.nodes), node1.index + 1 + 4)), + lambda i: self.nodes[i].type == "string" and not self.nodes[i].trivial + # select nodes which have at least one of the same character present + and set(node1.value) & set(self.nodes[i].value), ) - assert len(attempt) == len(buffer) - return self.consider_new_buffer(attempt) + ] - find_integer(lower) + duplicated_characters = set(node1.value) & set(node2.value) + # deterministic ordering + char = chooser.choose(sorted(duplicated_characters)) + intervals = node1.constraints["intervals"] + + def copy_node(node, n): + # replace all duplicate characters in each string. This might miss + # some shrinks compared to only replacing some, but trying all possible + # combinations of indices could get expensive if done without some + # thought. + return node.copy( + with_value=node.value.replace(char, intervals.char_in_shrink_order(n)) + ) + + Integer.shrink( + intervals.index_from_char_in_shrink_order(char), + lambda n: self.consider_new_nodes( + self.nodes[: node1.index] + + (copy_node(node1, n),) + + self.nodes[node1.index + 1 : node2.index] + + (copy_node(node2, n),) + + self.nodes[node2.index + 1 :] + ), + ) def minimize_nodes(self, nodes): - ir_type = nodes[0].ir_type + choice_type = nodes[0].type value = nodes[0].value - # unlike ir_type and value, kwargs are *not* guaranteed to be equal among all - # passed nodes. We arbitrarily use the kwargs of the first node. I think + # unlike choice_type and value, constraints are *not* guaranteed to be equal among all + # passed nodes. We arbitrarily use the constraints of the first node. I think # this is unsound (= leads to us trying shrinks that could not have been # generated), but those get discarded at test-time, and this enables useful - # slips where kwargs are not equal but are close enough that doing the + # slips where constraints are not equal but are close enough that doing the # same operation on both basically just works. - kwargs = nodes[0].kwargs + constraints = nodes[0].constraints assert all( - node.ir_type == ir_type and ir_value_equal(ir_type, node.value, value) + node.type == choice_type and choice_equal(node.value, value) for node in nodes ) - if ir_type == "integer": - shrink_towards = kwargs["shrink_towards"] + if choice_type == "integer": + shrink_towards = constraints["shrink_towards"] # try shrinking from both sides towards shrink_towards. # we're starting from n = abs(shrink_towards - value). Because the # shrinker will not check its starting value, we need to try @@ -1356,7 +1484,7 @@ class Shrinker: abs(shrink_towards - value), lambda n: self.try_shrinking_nodes(nodes, shrink_towards - n), ) - elif ir_type == "float": + elif choice_type == "float": self.try_shrinking_nodes(nodes, abs(value)) Float.shrink( abs(value), @@ -1366,28 +1494,61 @@ class Shrinker: abs(value), lambda val: self.try_shrinking_nodes(nodes, -val), ) - elif ir_type == "boolean": + elif choice_type == "boolean": # must be True, otherwise would be trivial and not selected. assert value is True # only one thing to try: false! self.try_shrinking_nodes(nodes, False) - elif ir_type == "bytes": + elif choice_type == "bytes": Bytes.shrink( value, lambda val: self.try_shrinking_nodes(nodes, val), + min_size=constraints["min_size"], ) - elif ir_type == "string": + elif choice_type == "string": String.shrink( value, lambda val: self.try_shrinking_nodes(nodes, val), - intervals=kwargs["intervals"], + intervals=constraints["intervals"], + min_size=constraints["min_size"], ) else: raise NotImplementedError @defines_shrink_pass() - def minimize_individual_nodes(self, chooser): - """Attempt to minimize each node in sequence. + def try_trivial_spans(self, chooser): + i = chooser.choose(range(len(self.spans))) + + prev = self.shrink_target + nodes = self.shrink_target.nodes + ex = self.spans[i] + prefix = nodes[: ex.start] + replacement = tuple( + [ + ( + node + if node.was_forced + else node.copy( + with_value=choice_from_index(0, node.type, node.constraints) + ) + ) + for node in nodes[ex.start : ex.end] + ] + ) + suffix = nodes[ex.end :] + attempt = self.cached_test_function(prefix + replacement + suffix)[1] + + if self.shrink_target is not prev: + return + + if isinstance(attempt, ConjectureResult): + new_ex = attempt.spans[i] + new_replacement = attempt.nodes[new_ex.start : new_ex.end] + self.consider_new_nodes(prefix + new_replacement + suffix) + + @defines_shrink_pass() + def minimize_individual_choices(self, chooser): + """Attempt to minimize each choice in sequence. This is the pass that ensures that e.g. each integer we draw is a minimum value. So it's the part that guarantees that if we e.g. do @@ -1397,7 +1558,7 @@ class Shrinker: then in our shrunk example, x = 10 rather than say 97. - If we are unsuccessful at minimizing a node of interest we then + If we are unsuccessful at minimizing a choice of interest we then check if that's because it's changing the size of the test case and, if so, we also make an attempt to delete parts of the test case to see if that fixes it. @@ -1424,7 +1585,7 @@ class Shrinker: # the size of the generated input, we'll try deleting things after that # node and see if the resulting attempt works. - if node.ir_type != "integer": + if node.type != "integer": # Only try this fixup logic on integer draws. Almost all size # dependencies are on integer draws, and if it's not, it's doing # something convoluted enough that it is unlikely to shrink well anyway. @@ -1439,12 +1600,12 @@ class Shrinker: + (node.copy(with_value=node.value - 1),) + self.nodes[node.index + 1 :] ) - attempt = self.cached_test_function_ir(lowered) + attempt = self.cached_test_function(lowered)[1] if ( attempt is None or attempt.status < Status.VALID - or len(attempt.ir_nodes) == len(self.nodes) - or len(attempt.ir_nodes) == node.index + 1 + or len(attempt.nodes) == len(self.nodes) + or len(attempt.nodes) == node.index + 1 ): # no point in trying our size-dependency-logic if our attempt at # lowering the node resulted in: @@ -1458,37 +1619,37 @@ class Shrinker: assert attempt is not self.shrink_target @self.cached(node.index) - def first_example_after_node(): + def first_span_after_node(): lo = 0 - hi = len(self.examples) + hi = len(self.spans) while lo + 1 < hi: mid = (lo + hi) // 2 - ex = self.examples[mid] - if ex.ir_start >= node.index: + ex = self.spans[mid] + if ex.start >= node.index: hi = mid else: lo = mid return hi - # we try deleting both entire examples, and single nodes. + # we try deleting both entire spans, and single nodes. # If we wanted to get more aggressive, we could try deleting n - # consecutive nodes (that don't cross an example boundary) for say + # consecutive nodes (that don't cross a span boundary) for say # n <= 2 or n <= 3. if chooser.choose([True, False]): - ex = self.examples[ + ex = self.spans[ chooser.choose( - range(first_example_after_node, len(self.examples)), - lambda i: self.examples[i].ir_length > 0, + range(first_span_after_node, len(self.spans)), + lambda i: self.spans[i].choice_count > 0, ) ] - self.consider_new_tree(lowered[: ex.ir_start] + lowered[ex.ir_end :]) + self.consider_new_nodes(lowered[: ex.start] + lowered[ex.end :]) else: node = self.nodes[chooser.choose(range(node.index + 1, len(self.nodes)))] - self.consider_new_tree(lowered[: node.index] + lowered[node.index + 1 :]) + self.consider_new_nodes(lowered[: node.index] + lowered[node.index + 1 :]) @defines_shrink_pass() - def reorder_examples(self, chooser): - """This pass allows us to reorder the children of each example. + def reorder_spans(self, chooser): + """This pass allows us to reorder the children of each span. For example, consider the following: @@ -1506,31 +1667,31 @@ class Shrinker: ``x=""``, ``y="0"``, or the other way around. With reordering it will reliably fail with ``x=""``, ``y="0"``. """ - ex = chooser.choose(self.examples) + ex = chooser.choose(self.spans) label = chooser.choose(ex.children).label - examples = [c for c in ex.children if c.label == label] - if len(examples) <= 1: + spans = [c for c in ex.children if c.label == label] + if len(spans) <= 1: return st = self.shrink_target - endpoints = [(ex.ir_start, ex.ir_end) for ex in examples] + endpoints = [(ex.start, ex.end) for ex in spans] Ordering.shrink( - range(len(examples)), - lambda indices: self.consider_new_tree( + range(len(spans)), + lambda indices: self.consider_new_nodes( replace_all( - st.ir_nodes, + st.nodes, [ ( u, v, - st.ir_nodes[examples[i].ir_start : examples[i].ir_end], + st.nodes[spans[i].start : spans[i].end], ) for (u, v), i in zip(endpoints, indices) ], ) ), - key=lambda i: st.buffer[examples[i].start : examples[i].end], + key=lambda i: sort_key(st.nodes[spans[i].start : spans[i].end]), ) def run_node_program(self, i, description, original, repeats=1): @@ -1548,9 +1709,9 @@ class Shrinker: Returns True if this successfully changes the underlying shrink target, else False. """ - if i + len(description) > len(original.ir_nodes) or i < 0: + if i + len(description) > len(original.nodes) or i < 0: return False - attempt = list(original.ir_nodes) + attempt = list(original.nodes) for _ in range(repeats): for k, command in reversed(list(enumerate(description))): j = i + k @@ -1562,7 +1723,7 @@ class Shrinker: else: raise NotImplementedError(f"Unrecognised command {command!r}") - return self.consider_new_tree(attempt) + return self.consider_new_nodes(attempt) def shrink_pass_family(f): @@ -1637,7 +1798,7 @@ class ShrinkPass: initial_shrinks = self.shrinker.shrinks initial_calls = self.shrinker.calls initial_misaligned = self.shrinker.misaligned - size = len(self.shrinker.shrink_target.buffer) + size = len(self.shrinker.shrink_target.choices) self.shrinker.engine.explain_next_call_as(self.name) if random_order: @@ -1654,7 +1815,7 @@ class ShrinkPass: self.calls += self.shrinker.calls - initial_calls self.misaligned += self.shrinker.misaligned - initial_misaligned self.shrinks += self.shrinker.shrinks - initial_shrinks - self.deletions += size - len(self.shrinker.shrink_target.buffer) + self.deletions += size - len(self.shrinker.shrink_target.choices) self.shrinker.engine.clear_call_explanation() return True diff --git a/contrib/python/hypothesis/py3/hypothesis/internal/conjecture/shrinking/__init__.py b/contrib/python/hypothesis/py3/hypothesis/internal/conjecture/shrinking/__init__.py index 46cc1660000..0e12c675919 100644 --- a/contrib/python/hypothesis/py3/hypothesis/internal/conjecture/shrinking/__init__.py +++ b/contrib/python/hypothesis/py3/hypothesis/internal/conjecture/shrinking/__init__.py @@ -15,4 +15,4 @@ from hypothesis.internal.conjecture.shrinking.integer import Integer from hypothesis.internal.conjecture.shrinking.ordering import Ordering from hypothesis.internal.conjecture.shrinking.string import String -__all__ = ["Integer", "Ordering", "Float", "Collection", "String", "Bytes"] +__all__ = ["Bytes", "Collection", "Float", "Integer", "Ordering", "String"] diff --git a/contrib/python/hypothesis/py3/hypothesis/internal/conjecture/choicetree.py b/contrib/python/hypothesis/py3/hypothesis/internal/conjecture/shrinking/choicetree.py index 0ba8ab819b7..0ba8ab819b7 100644 --- a/contrib/python/hypothesis/py3/hypothesis/internal/conjecture/choicetree.py +++ b/contrib/python/hypothesis/py3/hypothesis/internal/conjecture/shrinking/choicetree.py diff --git a/contrib/python/hypothesis/py3/hypothesis/internal/conjecture/shrinking/collection.py b/contrib/python/hypothesis/py3/hypothesis/internal/conjecture/shrinking/collection.py index a87a27b359d..4247221f7da 100644 --- a/contrib/python/hypothesis/py3/hypothesis/internal/conjecture/shrinking/collection.py +++ b/contrib/python/hypothesis/py3/hypothesis/internal/conjecture/shrinking/collection.py @@ -8,20 +8,29 @@ # v. 2.0. If a copy of the MPL was not distributed with this file, You can # obtain one at https://mozilla.org/MPL/2.0/. +from collections import Counter + from hypothesis.internal.conjecture.shrinking.common import Shrinker from hypothesis.internal.conjecture.shrinking.ordering import Ordering from hypothesis.internal.conjecture.utils import identity class Collection(Shrinker): - def setup(self, *, ElementShrinker, to_order=identity, from_order=identity): + def setup( + self, *, ElementShrinker, min_size, to_order=identity, from_order=identity + ): self.ElementShrinker = ElementShrinker self.to_order = to_order self.from_order = from_order + self.min_size = min_size def make_immutable(self, value): return tuple(value) + def short_circuit(self): + zero = self.from_order(0) + return self.consider([zero] * self.min_size) + def left_is_better(self, left, right): if len(left) < len(right): return True @@ -38,6 +47,11 @@ class Collection(Shrinker): return False def run_step(self): + # try all-zero first; we already considered all-zero-and-smallest in + # short_circuit. + zero = self.from_order(0) + self.consider([zero] * len(self.current)) + # try deleting each element in turn, starting from the back # TODO_BETTER_SHRINK: adaptively delete here by deleting larger chunks at once # if early deletes succeed. use find_integer. turns O(n) into O(log(n)) @@ -47,6 +61,17 @@ class Collection(Shrinker): # then try reordering Ordering.shrink(self.current, self.consider, key=self.to_order) + # then try minimizing all duplicated elements together simultaneously. This + # helps in cases like https://github.com/HypothesisWorks/hypothesis/issues/4286 + duplicated = {val for val, count in Counter(self.current).items() if count > 1} + for val in duplicated: + self.ElementShrinker.shrink( + self.to_order(val), + lambda v: self.consider( + tuple(self.from_order(v) if x == val else x for x in self.current) + ), + ) + # then try minimizing each element in turn for i, val in enumerate(self.current): self.ElementShrinker.shrink( diff --git a/contrib/python/hypothesis/py3/hypothesis/internal/conjecture/shrinking/common.py b/contrib/python/hypothesis/py3/hypothesis/internal/conjecture/shrinking/common.py index b0c5ec8694e..8290ec6737d 100644 --- a/contrib/python/hypothesis/py3/hypothesis/internal/conjecture/shrinking/common.py +++ b/contrib/python/hypothesis/py3/hypothesis/internal/conjecture/shrinking/common.py @@ -34,7 +34,7 @@ class Shrinker: self.name = name self.__predicate = predicate - self.__seen = set() + self.__seen = {self.make_canonical(self.current)} self.debugging_enabled = debug @property @@ -107,39 +107,46 @@ class Shrinker: self.run_step() self.debug("COMPLETE") - def incorporate(self, value): + def consider(self, value): """Try using ``value`` as a possible candidate improvement. - Return True if it works. + Return True if self.current is canonically equal to value after the call, either because + the value was incorporated as an improvement or because it had that value already. """ value = self.make_immutable(value) + self.debug(f"considering {value!r}") + canonical = self.make_canonical(value) + if canonical == self.make_canonical(self.current): + return True + if canonical in self.__seen: + return False + self.__seen.add(canonical) self.check_invariants(value) if not self.left_is_better(value, self.current): - if value != self.current and (value == value): - self.debug(f"Rejected {value!r} as worse than {self.current=}") - return False - if value in self.__seen: + self.debug(f"Rejected {value!r} as no better than {self.current=}") return False - self.__seen.add(value) if self.__predicate(value): self.debug(f"shrinking to {value!r}") self.changes += 1 self.current = value return True - return False + else: + self.debug(f"Rejected {value!r} not satisfying predicate") + return False - def consider(self, value): - """Returns True if make_immutable(value) == self.current after calling - self.incorporate(value).""" - self.debug(f"considering {value}") - value = self.make_immutable(value) - if value == self.current: - return True - return self.incorporate(value) + def make_canonical(self, value): + """Convert immutable value into a canonical and hashable, but not necessarily equal, + representation of itself. + + This representation is used only for tracking already-seen values, not passed to the + shrinker. + + Defaults to just returning the (immutable) input value. + """ + return value def make_immutable(self, value): - """Convert value into an immutable (and hashable) representation of - itself. + """Convert value into an immutable representation of itself. It is these immutable versions that the shrinker will work on. diff --git a/contrib/python/hypothesis/py3/hypothesis/internal/conjecture/shrinking/floats.py b/contrib/python/hypothesis/py3/hypothesis/internal/conjecture/shrinking/floats.py index 48021535028..f55d3ddc8af 100644 --- a/contrib/python/hypothesis/py3/hypothesis/internal/conjecture/shrinking/floats.py +++ b/contrib/python/hypothesis/py3/hypothesis/internal/conjecture/shrinking/floats.py @@ -14,25 +14,23 @@ import sys from hypothesis.internal.conjecture.floats import float_to_lex from hypothesis.internal.conjecture.shrinking.common import Shrinker from hypothesis.internal.conjecture.shrinking.integer import Integer - -MAX_PRECISE_INTEGER = 2**53 +from hypothesis.internal.floats import MAX_PRECISE_INTEGER, float_to_int class Float(Shrinker): def setup(self): - self.NAN = math.nan self.debugging_enabled = True - def make_immutable(self, f): - f = float(f) + def make_canonical(self, f): if math.isnan(f): - # Always use the same NAN so it works properly in self.seen - f = self.NAN + # Distinguish different NaN bit patterns, while making each equal to itself. + # Wrap in tuple to avoid potential collision with (huge) finite floats. + return ("nan", float_to_int(f)) return f def check_invariants(self, value): - # We only handle positive floats because we encode the sign separately - # anyway. + # We only handle positive floats (including NaN) because we encode the sign + # separately anyway. assert not (value < 0) def left_is_better(self, left, right): diff --git a/contrib/python/hypothesis/py3/hypothesis/internal/conjecture/utils.py b/contrib/python/hypothesis/py3/hypothesis/internal/conjecture/utils.py index 06f49ce3fea..a6c71cfbff1 100644 --- a/contrib/python/hypothesis/py3/hypothesis/internal/conjecture/utils.py +++ b/contrib/python/hypothesis/py3/hypothesis/internal/conjecture/utils.py @@ -11,11 +11,12 @@ import enum import hashlib import heapq +import math import sys from collections import OrderedDict, abc from collections.abc import Sequence from functools import lru_cache -from typing import TYPE_CHECKING, List, Optional, TypeVar, Union +from typing import TYPE_CHECKING, Optional, TypeVar, Union from hypothesis.errors import InvalidArgument from hypothesis.internal.compat import int_from_bytes @@ -37,6 +38,10 @@ def calc_label_from_cls(cls: type) -> int: return calc_label_from_name(cls.__qualname__) +def calc_label_from_hash(obj: object) -> int: + return calc_label_from_name(str(hash(obj))) + + def combine_labels(*labels: int) -> int: label = 0 for l in labels: @@ -71,7 +76,7 @@ def check_sample( ) elif not isinstance(values, (OrderedDict, abc.Sequence, enum.EnumMeta)): raise InvalidArgument( - f"Cannot sample from {values!r}, not an ordered collection. " + f"Cannot sample from {values!r} because it is not an ordered collection. " f"Hypothesis goes to some length to ensure that the {strategy_name} " "strategy has stable results between runs. To replay a saved " "example, the sampled values must have the same iteration order " @@ -86,6 +91,73 @@ def check_sample( return tuple(values) +@lru_cache(64) +def compute_sampler_table(weights: tuple[float, ...]) -> list[tuple[int, int, float]]: + n = len(weights) + table: list[list[int | float | None]] = [[i, None, None] for i in range(n)] + total = sum(weights) + num_type = type(total) + + zero = num_type(0) # type: ignore + one = num_type(1) # type: ignore + + small: list[int] = [] + large: list[int] = [] + + probabilities = [w / total for w in weights] + scaled_probabilities: list[float] = [] + + for i, alternate_chance in enumerate(probabilities): + scaled = alternate_chance * n + scaled_probabilities.append(scaled) + if scaled == 1: + table[i][2] = zero + elif scaled < 1: + small.append(i) + else: + large.append(i) + heapq.heapify(small) + heapq.heapify(large) + + while small and large: + lo = heapq.heappop(small) + hi = heapq.heappop(large) + + assert lo != hi + assert scaled_probabilities[hi] > one + assert table[lo][1] is None + table[lo][1] = hi + table[lo][2] = one - scaled_probabilities[lo] + scaled_probabilities[hi] = ( + scaled_probabilities[hi] + scaled_probabilities[lo] + ) - one + + if scaled_probabilities[hi] < 1: + heapq.heappush(small, hi) + elif scaled_probabilities[hi] == 1: + table[hi][2] = zero + else: + heapq.heappush(large, hi) + while large: + table[large.pop()][2] = zero + while small: + table[small.pop()][2] = zero + + new_table: list[tuple[int, int, float]] = [] + for base, alternate, alternate_chance in table: + assert isinstance(base, int) + assert isinstance(alternate, int) or alternate is None + assert alternate_chance is not None + if alternate is None: + new_table.append((base, base, alternate_chance)) + elif alternate < base: + new_table.append((alternate, base, one - alternate_chance)) + else: + new_table.append((base, alternate, alternate_chance)) + new_table.sort() + return new_table + + class Sampler: """Sampler based on Vose's algorithm for the alias method. See http://www.keithschwarz.com/darts-dice-coins/ for a good explanation. @@ -108,79 +180,16 @@ class Sampler: def __init__(self, weights: Sequence[float], *, observe: bool = True): self.observe = observe - - n = len(weights) - table: "list[list[int | float | None]]" = [[i, None, None] for i in range(n)] - total = sum(weights) - num_type = type(total) - - zero = num_type(0) # type: ignore - one = num_type(1) # type: ignore - - small: "List[int]" = [] - large: "List[int]" = [] - - probabilities = [w / total for w in weights] - scaled_probabilities: "List[float]" = [] - - for i, alternate_chance in enumerate(probabilities): - scaled = alternate_chance * n - scaled_probabilities.append(scaled) - if scaled == 1: - table[i][2] = zero - elif scaled < 1: - small.append(i) - else: - large.append(i) - heapq.heapify(small) - heapq.heapify(large) - - while small and large: - lo = heapq.heappop(small) - hi = heapq.heappop(large) - - assert lo != hi - assert scaled_probabilities[hi] > one - assert table[lo][1] is None - table[lo][1] = hi - table[lo][2] = one - scaled_probabilities[lo] - scaled_probabilities[hi] = ( - scaled_probabilities[hi] + scaled_probabilities[lo] - ) - one - - if scaled_probabilities[hi] < 1: - heapq.heappush(small, hi) - elif scaled_probabilities[hi] == 1: - table[hi][2] = zero - else: - heapq.heappush(large, hi) - while large: - table[large.pop()][2] = zero - while small: - table[small.pop()][2] = zero - - self.table: "list[tuple[int, int, float]]" = [] - for base, alternate, alternate_chance in table: - assert isinstance(base, int) - assert isinstance(alternate, int) or alternate is None - assert alternate_chance is not None - if alternate is None: - self.table.append((base, base, alternate_chance)) - elif alternate < base: - self.table.append((alternate, base, one - alternate_chance)) - else: - self.table.append((base, alternate, alternate_chance)) - self.table.sort() + self.table = compute_sampler_table(tuple(weights)) def sample( self, data: "ConjectureData", *, forced: Optional[int] = None, - fake_forced: bool = False, ) -> int: if self.observe: - data.start_example(SAMPLE_IN_SAMPLER_LABEL) + data.start_span(SAMPLE_IN_SAMPLER_LABEL) forced_choice = ( # pragma: no branch # https://github.com/nedbat/coveragepy/issues/1617 None if forced is None @@ -193,7 +202,6 @@ class Sampler: base, alternate, alternate_chance = data.choice( self.table, forced=forced_choice, - fake_forced=fake_forced, observe=self.observe, ) forced_use_alternate = None @@ -207,11 +215,10 @@ class Sampler: use_alternate = data.draw_boolean( alternate_chance, forced=forced_use_alternate, - fake_forced=fake_forced, observe=self.observe, ) if self.observe: - data.stop_example() + data.stop_span() if use_alternate: assert forced is None or alternate == forced, (forced, alternate) return alternate @@ -244,7 +251,6 @@ class many: average_size: Union[int, float], *, forced: Optional[int] = None, - fake_forced: bool = False, observe: bool = True, ) -> None: assert 0 <= min_size <= average_size <= max_size @@ -253,7 +259,6 @@ class many: self.max_size = max_size self.data = data self.forced_size = forced - self.fake_forced = fake_forced self.p_continue = _calc_p_continue(average_size - min_size, max_size - min_size) self.count = 0 self.rejections = 0 @@ -262,23 +267,23 @@ class many: self.rejected = False self.observe = observe - def stop_example(self): + def stop_span(self): if self.observe: - self.data.stop_example() + self.data.stop_span() - def start_example(self, label): + def start_span(self, label): if self.observe: - self.data.start_example(label) + self.data.start_span(label) def more(self) -> bool: """Should I draw another element to add to the collection?""" if self.drawn: - self.stop_example() + self.stop_span() self.drawn = True self.rejected = False - self.start_example(ONE_FROM_MANY_LABEL) + self.start_span(ONE_FROM_MANY_LABEL) if self.min_size == self.max_size: # if we have to hit an exact size, draw unconditionally until that # point, and no further. @@ -299,7 +304,6 @@ class many: should_continue = self.data.draw_boolean( self.p_continue, forced=forced_result, - fake_forced=self.fake_forced, observe=self.observe, ) @@ -307,7 +311,7 @@ class many: self.count += 1 return True else: - self.stop_example() + self.stop_span() return False def reject(self, why: Optional[str] = None) -> None: @@ -330,13 +334,13 @@ SMALLEST_POSITIVE_FLOAT: float = next_up(0.0) or sys.float_info.min @lru_cache -def _calc_p_continue(desired_avg: float, max_size: int) -> float: +def _calc_p_continue(desired_avg: float, max_size: Union[int, float]) -> float: """Return the p_continue which will generate the desired average size.""" assert desired_avg <= max_size, (desired_avg, max_size) if desired_avg == max_size: return 1.0 p_continue = 1 - 1.0 / (1 + desired_avg) - if p_continue == 0 or max_size == float("inf"): + if p_continue == 0 or max_size == math.inf: assert 0 <= p_continue < 1, p_continue return p_continue assert 0 < p_continue < 1, p_continue @@ -368,7 +372,7 @@ def _calc_p_continue(desired_avg: float, max_size: int) -> float: return p_continue -def _p_continue_to_avg(p_continue: float, max_size: int) -> float: +def _p_continue_to_avg(p_continue: float, max_size: Union[int, float]) -> float: """Return the average_size generated by this p_continue and max_size.""" if p_continue >= 1: return max_size diff --git a/contrib/python/hypothesis/py3/hypothesis/internal/constants_ast.py b/contrib/python/hypothesis/py3/hypothesis/internal/constants_ast.py new file mode 100644 index 00000000000..c69e5ba7072 --- /dev/null +++ b/contrib/python/hypothesis/py3/hypothesis/internal/constants_ast.py @@ -0,0 +1,144 @@ +# This file is part of Hypothesis, which may be found at +# https://github.com/HypothesisWorks/hypothesis/ +# +# Copyright the Hypothesis Authors. +# Individual contributors are listed in AUTHORS.rst and the git log. +# +# This Source Code Form is subject to the terms of the Mozilla Public License, +# v. 2.0. If a copy of the MPL was not distributed with this file, You can +# obtain one at https://mozilla.org/MPL/2.0/. + +import ast +import inspect +import math +import sys +from ast import AST, Constant, Expr, NodeVisitor, UnaryOp, USub +from functools import lru_cache +from types import ModuleType +from typing import TYPE_CHECKING, Optional, Union + +from hypothesis.internal.escalation import is_hypothesis_file +from hypothesis.internal.scrutineer import ModuleLocation + +if TYPE_CHECKING: + from typing import TypeAlias + +ConstantT: "TypeAlias" = Union[int, float, bytes, str] + + +class ConstantVisitor(NodeVisitor): + def __init__(self): + super().__init__() + self.constants: set[ConstantT] = set() + + def _add_constant(self, constant: object) -> None: + self.constants |= self._unfold_constant(constant) + + def visit_UnaryOp(self, node: UnaryOp) -> None: + # `a = -1` is actually a combination of a USub and the constant 1. + if ( + isinstance(node.op, USub) + and isinstance(node.operand, Constant) + and isinstance(node.operand.value, (int, float)) + and not isinstance(node.operand.value, bool) + ): + self._add_constant(-node.operand.value) + # don't recurse on this node to avoid adding the positive variant + return + + self.generic_visit(node) + + def visit_Expr(self, node: Expr) -> None: + if isinstance(node.value, Constant) and isinstance(node.value.value, str): + return + + self.generic_visit(node) + + def visit_JoinedStr(self, node): + # dont recurse on JoinedStr, i.e. f strings. Constants that appear *only* + # in f strings are unlikely to be helpful. + return + + @classmethod + def _unfold_constant(cls, value: object) -> set[ConstantT]: + if isinstance(value, str) and ( + len(value) > 20 or value.isspace() or value == "" + ): + # discard long strings, which are unlikely to be useful. + return set() + if isinstance(value, bool): + return set() + if isinstance(value, float) and math.isinf(value): + # we already upweight inf. + return set() + if isinstance(value, (int, float, bytes, str)): + return {value} + # I don't kow what case could go here, but am also not confident there + # isn't one. + return set() # pragma: no cover + + def visit_Constant(self, node): + self._add_constant(node.value) + self.generic_visit(node) + + +@lru_cache(1024) +def constants_from_ast(tree: AST) -> set[ConstantT]: + visitor = ConstantVisitor() + visitor.visit(tree) + return visitor.constants + + +@lru_cache(1024) +def _module_ast(module: ModuleType) -> Optional[AST]: + try: + source = inspect.getsource(module) + tree = ast.parse(source) + except Exception: + return None + + return tree + + +def local_modules() -> tuple[ModuleType, ...]: + modules = [] + for module in sys.modules.values(): + if ( + not hasattr(module, "__file__") + or module.__file__ is None + # Skip expensive path lookup for stdlib modules. + # This will cause false negatives if a user names their module the + # same as a stdlib module. + # + # sys.stdlib_module_names is new in 3.10 + or ( + sys.version_info >= (3, 10) + and module.__name__ in sys.stdlib_module_names + ) + or ModuleLocation.from_path(module.__file__) is not ModuleLocation.LOCAL + ): + continue + + modules.append(module) + return tuple(modules) + + +def local_constants(): + constants = set() + for module in local_modules(): + # normally, hypothesis is a third-party library and is not returned + # by local_modules. However, if it is installed as an editable package + # with pip install -e, then we will pick up on it. Just hardcode an + # ignore here. + + # this is actually covered by test_constants_from_running_file, but + # not in the same process. + if is_hypothesis_file(module.__file__): # pragma: no cover + continue + + tree = _module_ast(module) + if tree is None: # pragma: no cover + continue + constants |= constants_from_ast(tree) + + return constants diff --git a/contrib/python/hypothesis/py3/hypothesis/internal/escalation.py b/contrib/python/hypothesis/py3/hypothesis/internal/escalation.py index 26907c4d061..9a7996ce21f 100644 --- a/contrib/python/hypothesis/py3/hypothesis/internal/escalation.py +++ b/contrib/python/hypothesis/py3/hypothesis/internal/escalation.py @@ -16,25 +16,29 @@ import traceback from functools import partial from inspect import getframeinfo from pathlib import Path -from typing import NamedTuple, Optional +from types import ModuleType, TracebackType +from typing import Callable, NamedTuple, Optional import hypothesis from hypothesis.errors import _Trimmable from hypothesis.internal.compat import BaseExceptionGroup from hypothesis.utils.dynamicvariables import DynamicVariable +FILE_CACHE: dict[ModuleType, dict[str, bool]] = {} -def belongs_to(package): - if not hasattr(package, "__file__"): # pragma: no cover + +def belongs_to(package: ModuleType) -> Callable[[str], bool]: + if getattr(package, "__file__", None) is None: # pragma: no cover return lambda filepath: False + assert package.__file__ is not None + FILE_CACHE.setdefault(package, {}) + cache = FILE_CACHE[package] root = Path(package.__file__).resolve().parent - cache = {str: {}, bytes: {}} - def accept(filepath): - ftype = type(filepath) + def accept(filepath: str) -> bool: try: - return cache[ftype][filepath] + return cache[filepath] except KeyError: pass try: @@ -45,20 +49,19 @@ def belongs_to(package): result = True except Exception: result = False - cache[ftype][filepath] = result + cache[filepath] = result return result accept.__name__ = f"is_{package.__name__}_file" return accept -FILE_CACHE: dict[bytes, bool] = {} - - is_hypothesis_file = belongs_to(hypothesis) -def get_trimmed_traceback(exception=None): +def get_trimmed_traceback( + exception: Optional[BaseException] = None, +) -> Optional[TracebackType]: """Return the current traceback, minus any frames added by Hypothesis.""" if exception is None: _, exception, tb = sys.exc_info() @@ -68,12 +71,15 @@ def get_trimmed_traceback(exception=None): # was raised inside Hypothesis. Additionally, the environment variable # HYPOTHESIS_NO_TRACEBACK_TRIM is respected if nonempty, because verbose # mode is prohibitively slow when debugging strategy recursion errors. + assert hypothesis.settings.default is not None if ( tb is None - or os.environ.get("HYPOTHESIS_NO_TRACEBACK_TRIM", None) + or os.environ.get("HYPOTHESIS_NO_TRACEBACK_TRIM") or hypothesis.settings.default.verbosity >= hypothesis.Verbosity.debug - or is_hypothesis_file(traceback.extract_tb(tb)[-1][0]) - and not isinstance(exception, _Trimmable) + or ( + is_hypothesis_file(traceback.extract_tb(tb)[-1][0]) + and not isinstance(exception, _Trimmable) + ) ): return tb while tb.tb_next is not None and ( diff --git a/contrib/python/hypothesis/py3/hypothesis/internal/filtering.py b/contrib/python/hypothesis/py3/hypothesis/internal/filtering.py index f06002dcedb..f56e9b84b62 100644 --- a/contrib/python/hypothesis/py3/hypothesis/internal/filtering.py +++ b/contrib/python/hypothesis/py3/hypothesis/internal/filtering.py @@ -44,7 +44,7 @@ Predicate = Callable[[Ex], bool] class ConstructivePredicate(NamedTuple): - """Return kwargs to the appropriate strategy, and the predicate if needed. + """Return constraints to the appropriate strategy, and the predicate if needed. For example:: @@ -61,7 +61,7 @@ class ConstructivePredicate(NamedTuple): for each numeric type, for strings, for bytes, for collection sizes, etc. """ - kwargs: dict[str, Any] + constraints: dict[str, Any] predicate: Optional[Predicate] @classmethod @@ -70,7 +70,7 @@ class ConstructivePredicate(NamedTuple): def __repr__(self) -> str: fn = get_pretty_function_description(self.predicate) - return f"{self.__class__.__name__}(kwargs={self.kwargs!r}, predicate={fn})" + return f"{self.__class__.__name__}(constraints={self.constraints!r}, predicate={fn})" ARG = object() @@ -92,7 +92,7 @@ def convert(node: ast.AST, argname: str) -> object: return ast.literal_eval(node) -def comp_to_kwargs(x: ast.AST, op: ast.AST, y: ast.AST, *, argname: str) -> dict: +def comp_to_constraints(x: ast.AST, op: ast.AST, y: ast.AST, *, argname: str) -> dict: a = convert(x, argname) b = convert(y, argname) num = (int, float) @@ -196,11 +196,11 @@ def numeric_bounds_from_ast( bounds = [] for comp in comparisons: try: - kwargs = comp_to_kwargs(*comp, argname=argname) + constraints = comp_to_constraints(*comp, argname=argname) # Because `len` could be redefined in the enclosing scope, we *always* # have to apply the condition as a filter, in addition to rewriting. - pred = fallback.predicate if "len" in kwargs else None - bounds.append(ConstructivePredicate(kwargs, pred)) + pred = fallback.predicate if "len" in constraints else None + bounds.append(ConstructivePredicate(constraints, pred)) except ValueError: bounds.append(fallback) return merge_preds(*bounds) @@ -282,54 +282,58 @@ def get_numeric_predicate_bounds(predicate: Predicate) -> ConstructivePredicate: def get_integer_predicate_bounds(predicate: Predicate) -> ConstructivePredicate: - kwargs, predicate = get_numeric_predicate_bounds(predicate) + constraints, predicate = get_numeric_predicate_bounds(predicate) - if "min_value" in kwargs: - if kwargs["min_value"] == -math.inf: - del kwargs["min_value"] - elif math.isinf(kwargs["min_value"]): + if "min_value" in constraints: + if constraints["min_value"] == -math.inf: + del constraints["min_value"] + elif math.isinf(constraints["min_value"]): return ConstructivePredicate({"min_value": 1, "max_value": -1}, None) - elif kwargs["min_value"] != int(kwargs["min_value"]): - kwargs["min_value"] = ceil(kwargs["min_value"]) - elif kwargs.get("exclude_min", False): - kwargs["min_value"] = int(kwargs["min_value"]) + 1 + elif constraints["min_value"] != int(constraints["min_value"]): + constraints["min_value"] = ceil(constraints["min_value"]) + elif constraints.get("exclude_min", False): + constraints["min_value"] = int(constraints["min_value"]) + 1 - if "max_value" in kwargs: - if kwargs["max_value"] == math.inf: - del kwargs["max_value"] - elif math.isinf(kwargs["max_value"]): + if "max_value" in constraints: + if constraints["max_value"] == math.inf: + del constraints["max_value"] + elif math.isinf(constraints["max_value"]): return ConstructivePredicate({"min_value": 1, "max_value": -1}, None) - elif kwargs["max_value"] != int(kwargs["max_value"]): - kwargs["max_value"] = floor(kwargs["max_value"]) - elif kwargs.get("exclude_max", False): - kwargs["max_value"] = int(kwargs["max_value"]) - 1 + elif constraints["max_value"] != int(constraints["max_value"]): + constraints["max_value"] = floor(constraints["max_value"]) + elif constraints.get("exclude_max", False): + constraints["max_value"] = int(constraints["max_value"]) - 1 kw_categories = {"min_value", "max_value", "len"} - kwargs = {k: v for k, v in kwargs.items() if k in kw_categories} - return ConstructivePredicate(kwargs, predicate) + constraints = {k: v for k, v in constraints.items() if k in kw_categories} + return ConstructivePredicate(constraints, predicate) def get_float_predicate_bounds(predicate: Predicate) -> ConstructivePredicate: - kwargs, predicate = get_numeric_predicate_bounds(predicate) + constraints, predicate = get_numeric_predicate_bounds(predicate) - if "min_value" in kwargs: - min_value = kwargs["min_value"] - kwargs["min_value"] = float(kwargs["min_value"]) - if min_value < kwargs["min_value"] or ( - min_value == kwargs["min_value"] and kwargs.get("exclude_min", False) + if "min_value" in constraints: + min_value = constraints["min_value"] + constraints["min_value"] = float(constraints["min_value"]) + if min_value < constraints["min_value"] or ( + min_value == constraints["min_value"] + and constraints.get("exclude_min", False) ): - kwargs["min_value"] = next_up(kwargs["min_value"]) + constraints["min_value"] = next_up(constraints["min_value"]) - if "max_value" in kwargs: - max_value = kwargs["max_value"] - kwargs["max_value"] = float(kwargs["max_value"]) - if max_value > kwargs["max_value"] or ( - max_value == kwargs["max_value"] and kwargs.get("exclude_max", False) + if "max_value" in constraints: + max_value = constraints["max_value"] + constraints["max_value"] = float(constraints["max_value"]) + if max_value > constraints["max_value"] or ( + max_value == constraints["max_value"] + and constraints.get("exclude_max", False) ): - kwargs["max_value"] = next_down(kwargs["max_value"]) + constraints["max_value"] = next_down(constraints["max_value"]) - kwargs = {k: v for k, v in kwargs.items() if k in {"min_value", "max_value"}} - return ConstructivePredicate(kwargs, predicate) + constraints = { + k: v for k, v in constraints.items() if k in {"min_value", "max_value"} + } + return ConstructivePredicate(constraints, predicate) def max_len(size: int, element: Collection[object]) -> bool: diff --git a/contrib/python/hypothesis/py3/hypothesis/internal/floats.py b/contrib/python/hypothesis/py3/hypothesis/internal/floats.py index 3ef5d6c030e..70835193dcd 100644 --- a/contrib/python/hypothesis/py3/hypothesis/internal/floats.py +++ b/contrib/python/hypothesis/py3/hypothesis/internal/floats.py @@ -11,7 +11,7 @@ import math import struct from sys import float_info -from typing import TYPE_CHECKING, Callable, Literal, Optional, SupportsFloat, Union +from typing import TYPE_CHECKING, Callable, Literal, SupportsFloat, Union if TYPE_CHECKING: from typing import TypeAlias @@ -136,38 +136,53 @@ width_smallest_normals: dict[int, float] = { } assert width_smallest_normals[64] == float_info.min +mantissa_mask = (1 << 52) - 1 + def make_float_clamper( - min_float: float = 0.0, - max_float: float = math.inf, + min_value: float, + max_value: float, *, - allow_zero: bool = False, # Allows +0.0 (even if minfloat > 0) -) -> Optional[Callable[[float], float]]: + allow_nan: bool, + smallest_nonzero_magnitude: float, +) -> Callable[[float], float]: """ Return a function that clamps positive floats into the given bounds. - - Returns None when no values are allowed (min > max and zero is not allowed). """ - if max_float < min_float: - if allow_zero: - min_float = max_float = 0.0 - else: - return None + from hypothesis.internal.conjecture.choice import choice_permitted - range_size = min(max_float - min_float, float_info.max) - mantissa_mask = (1 << 52) - 1 + assert sign_aware_lte(min_value, max_value) + range_size = min(max_value - min_value, float_info.max) - def float_clamper(float_val: float) -> float: - if min_float <= float_val <= max_float: - return float_val - if float_val == 0.0 and allow_zero: - return float_val + def float_clamper(f: float) -> float: + if choice_permitted( + f, + { + "min_value": min_value, + "max_value": max_value, + "allow_nan": allow_nan, + "smallest_nonzero_magnitude": smallest_nonzero_magnitude, + }, + ): + return f # Outside bounds; pick a new value, sampled from the allowed range, # using the mantissa bits. - mant = float_to_int(float_val) & mantissa_mask - float_val = min_float + range_size * (mant / mantissa_mask) + mant = float_to_int(abs(f)) & mantissa_mask + f = min_value + range_size * (mant / mantissa_mask) + + # if we resampled into the space disallowed by smallest_nonzero_magnitude, + # default to smallest_nonzero_magnitude. + if 0 < abs(f) < smallest_nonzero_magnitude: + f = smallest_nonzero_magnitude + # we must have either -smallest_nonzero_magnitude <= min_value or + # smallest_nonzero_magnitude >= max_value, or no values would be + # possible. If smallest_nonzero_magnitude is not valid (because it's + # larger than max_value), then -smallest_nonzero_magnitude must be valid. + if smallest_nonzero_magnitude > max_value: + f *= -1 + # Re-enforce the bounds (just in case of floating point arithmetic error) - return max(min_float, min(max_float, float_val)) + return clamp(min_value, f, max_value) return float_clamper @@ -180,7 +195,19 @@ def sign_aware_lte(x: float, y: float) -> bool: return x <= y +def clamp(lower: float, value: float, upper: float) -> float: + """Given a value and lower/upper bounds, 'clamp' the value so that + it satisfies lower <= value <= upper. NaN is mapped to lower.""" + # this seems pointless (and is for integers), but handles the -0.0/0.0 case. + if not sign_aware_lte(lower, value): + return lower + if not sign_aware_lte(value, upper): + return upper + return value + + SMALLEST_SUBNORMAL = next_up(0.0) SIGNALING_NAN = int_to_float(0x7FF8_0000_0000_0001) # nonzero mantissa +MAX_PRECISE_INTEGER = 2**53 assert math.isnan(SIGNALING_NAN) assert math.copysign(1, SIGNALING_NAN) == 1 diff --git a/contrib/python/hypothesis/py3/hypothesis/internal/healthcheck.py b/contrib/python/hypothesis/py3/hypothesis/internal/healthcheck.py index d43742e00b3..49673ca07a5 100644 --- a/contrib/python/hypothesis/py3/hypothesis/internal/healthcheck.py +++ b/contrib/python/hypothesis/py3/hypothesis/internal/healthcheck.py @@ -19,8 +19,9 @@ def fail_health_check(settings, message, label): if label in settings.suppress_health_check: return message += ( - "\nSee https://hypothesis.readthedocs.io/en/latest/health" - "checks.html for more information about this. " + "\nSee " + "https://hypothesis.readthedocs.io/en/latest/reference/api.html#health-checks " + "for more information about this. " f"If you want to disable just this health check, add {label} " "to the suppress_health_check settings for this test." ) diff --git a/contrib/python/hypothesis/py3/hypothesis/internal/intervalsets.py b/contrib/python/hypothesis/py3/hypothesis/internal/intervalsets.py index b42360acda7..1bf96cd2a5f 100644 --- a/contrib/python/hypothesis/py3/hypothesis/internal/intervalsets.py +++ b/contrib/python/hypothesis/py3/hypothesis/internal/intervalsets.py @@ -35,7 +35,7 @@ class IntervalSet: >>> IntervalSet.from_string('abcdef0123456789') ((48, 57), (97, 102)) """ - x = cls((ord(c), ord(c)) for c in sorted(s)) + x = cls([(ord(c), ord(c)) for c in sorted(s)]) return x.union(x) def __init__(self, intervals: Iterable[Sequence[int]] = ()) -> None: @@ -139,9 +139,9 @@ class IntervalSet: x = self.intervals y = other.intervals if not x: - return IntervalSet((u, v) for u, v in y) + return IntervalSet(y) if not y: - return IntervalSet((u, v) for u, v in x) + return IntervalSet(x) intervals = sorted(x + y, reverse=True) result = [intervals.pop()] while intervals: diff --git a/contrib/python/hypothesis/py3/hypothesis/internal/observability.py b/contrib/python/hypothesis/py3/hypothesis/internal/observability.py index 40fd2cf7e91..9449e894ce2 100644 --- a/contrib/python/hypothesis/py3/hypothesis/internal/observability.py +++ b/contrib/python/hypothesis/py3/hypothesis/internal/observability.py @@ -75,7 +75,7 @@ def make_testcase( }, "timing": timing, "metadata": { - "traceback": getattr(data.extra_information, "_expected_traceback", None), + "traceback": data.expected_traceback, "predicates": dict(data._observability_predicates), "backend": backend_metadata or {}, **_system_metadata(), diff --git a/contrib/python/hypothesis/py3/hypothesis/internal/reflection.py b/contrib/python/hypothesis/py3/hypothesis/internal/reflection.py index dbc987e9c70..6e3de425aaf 100644 --- a/contrib/python/hypothesis/py3/hypothesis/internal/reflection.py +++ b/contrib/python/hypothesis/py3/hypothesis/internal/reflection.py @@ -21,14 +21,15 @@ import sys import textwrap import types import warnings -from collections.abc import MutableMapping +from collections.abc import MutableMapping, Sequence from functools import partial, wraps +from inspect import Parameter, Signature from io import StringIO from keyword import iskeyword from random import _inst as global_random_instance from tokenize import COMMENT, detect_encoding, generate_tokens, untokenize from types import ModuleType -from typing import TYPE_CHECKING, Any, Callable +from typing import Any, Callable, Optional, TypeVar from unittest.mock import _patch as PatchType from weakref import WeakKeyDictionary @@ -37,14 +38,13 @@ from hypothesis.internal.compat import is_typed_named_tuple from hypothesis.utils.conventions import not_set from hypothesis.vendor.pretty import pretty -if TYPE_CHECKING: - from hypothesis.strategies._internal.strategies import T +T = TypeVar("T") READTHEDOCS = os.environ.get("READTHEDOCS", None) == "True" LAMBDA_SOURCE_CACHE: MutableMapping[Callable, str] = WeakKeyDictionary() -def is_mock(obj): +def is_mock(obj: object) -> bool: """Determine if the given argument is a mock type.""" # We want to be able to detect these when dealing with various test @@ -83,7 +83,7 @@ def _clean_source(src: str) -> bytes: return "\n".join(x.rstrip() for x in src.splitlines() if x.rstrip()).encode() -def function_digest(function): +def function_digest(function: Any) -> bytes: """Returns a string that is stable across multiple invocations across multiple processes and is prone to changing significantly in response to minor changes to the function. @@ -119,7 +119,7 @@ def function_digest(function): return hasher.digest() -def check_signature(sig: inspect.Signature) -> None: +def check_signature(sig: Signature) -> None: # Backport from Python 3.11; see https://github.com/python/cpython/pull/92065 for p in sig.parameters.values(): if iskeyword(p.name) and p.kind is not p.POSITIONAL_ONLY: @@ -133,17 +133,19 @@ def check_signature(sig: inspect.Signature) -> None: def get_signature( target: Any, *, follow_wrapped: bool = True, eval_str: bool = False -) -> inspect.Signature: +) -> Signature: # Special case for use of `@unittest.mock.patch` decorator, mimicking the # behaviour of getfullargspec instead of reporting unusable arguments. patches = getattr(target, "patchings", None) if isinstance(patches, list) and all(isinstance(p, PatchType) for p in patches): - P = inspect.Parameter - return inspect.Signature( - [P("args", P.VAR_POSITIONAL), P("keywargs", P.VAR_KEYWORD)] + return Signature( + [ + Parameter("args", Parameter.VAR_POSITIONAL), + Parameter("keywargs", Parameter.VAR_KEYWORD), + ] ) - if isinstance(getattr(target, "__signature__", None), inspect.Signature): + if isinstance(getattr(target, "__signature__", None), Signature): # This special case covers unusual codegen like Pydantic models sig = target.__signature__ check_signature(sig) @@ -153,7 +155,7 @@ def get_signature( selfy = next(iter(sig.parameters.values())) if ( selfy.name == "self" - and selfy.default is inspect.Parameter.empty + and selfy.default is Parameter.empty and selfy.kind.name.startswith("POSITIONAL_") ): return sig.replace( @@ -173,10 +175,10 @@ def get_signature( return sig -def arg_is_required(param): - return param.default is inspect.Parameter.empty and param.kind in ( - inspect.Parameter.POSITIONAL_OR_KEYWORD, - inspect.Parameter.KEYWORD_ONLY, +def arg_is_required(param: Parameter) -> bool: + return param.default is Parameter.empty and param.kind in ( + Parameter.POSITIONAL_OR_KEYWORD, + Parameter.KEYWORD_ONLY, ) @@ -205,7 +207,9 @@ def required_args(target, args=(), kwargs=()): } -def convert_keyword_arguments(function, args, kwargs): +def convert_keyword_arguments( + function: Any, args: Sequence[object], kwargs: dict[str, object] +) -> tuple[tuple[object, ...], dict[str, object]]: """Returns a pair of a tuple and a dictionary which would be equivalent passed as positional and keyword args to the function. Unless function has kwonlyargs or **kwargs the dictionary will always be empty. @@ -215,7 +219,9 @@ def convert_keyword_arguments(function, args, kwargs): return bound.args, bound.kwargs -def convert_positional_arguments(function, args, kwargs): +def convert_positional_arguments( + function: Any, args: Sequence[object], kwargs: dict[str, object] +) -> tuple[tuple[object, ...], dict[str, object]]: """Return a tuple (new_args, new_kwargs) where all possible arguments have been moved to kwargs. @@ -237,24 +243,22 @@ def convert_positional_arguments(function, args, kwargs): return tuple(new_args), new_kwargs -def ast_arguments_matches_signature(args, sig): - assert isinstance(args, ast.arguments) - assert isinstance(sig, inspect.Signature) - expected = [] +def ast_arguments_matches_signature(args: ast.arguments, sig: Signature) -> bool: + expected: list[tuple[str, int]] = [] for node in args.posonlyargs: - expected.append((node.arg, inspect.Parameter.POSITIONAL_ONLY)) + expected.append((node.arg, Parameter.POSITIONAL_ONLY)) for node in args.args: - expected.append((node.arg, inspect.Parameter.POSITIONAL_OR_KEYWORD)) + expected.append((node.arg, Parameter.POSITIONAL_OR_KEYWORD)) if args.vararg is not None: - expected.append((args.vararg.arg, inspect.Parameter.VAR_POSITIONAL)) + expected.append((args.vararg.arg, Parameter.VAR_POSITIONAL)) for node in args.kwonlyargs: - expected.append((node.arg, inspect.Parameter.KEYWORD_ONLY)) + expected.append((node.arg, Parameter.KEYWORD_ONLY)) if args.kwarg is not None: - expected.append((args.kwarg.arg, inspect.Parameter.VAR_KEYWORD)) + expected.append((args.kwarg.arg, Parameter.VAR_KEYWORD)) return expected == [(p.name, p.kind) for p in sig.parameters.values()] -def is_first_param_referenced_in_function(f): +def is_first_param_referenced_in_function(f: Any) -> bool: """Is the given name referenced within f?""" try: tree = ast.parse(textwrap.dedent(inspect.getsource(f))) @@ -300,7 +304,7 @@ def _extract_lambda_source(f): # The answer is that we add this at runtime, in new_given_signature(), # and we do support strange choices as applying @given() to a lambda. sig = inspect.signature(f) - assert sig.return_annotation in (inspect.Parameter.empty, None), sig + assert sig.return_annotation in (Parameter.empty, None), sig # Using pytest-xdist on Python 3.13, there's an entry in the linecache for # file "<string>", which then returns nonsense to getsource. Discard it. @@ -436,12 +440,12 @@ def extract_lambda_source(f): return source -def get_pretty_function_description(f): +def get_pretty_function_description(f: object) -> str: if isinstance(f, partial): return pretty(f) if not hasattr(f, "__name__"): return repr(f) - name = f.__name__ + name = f.__name__ # type: ignore # validated by hasattr above if name == "<lambda>": return extract_lambda_source(f) elif isinstance(f, (types.MethodType, types.BuiltinMethodType)): @@ -458,7 +462,7 @@ def get_pretty_function_description(f): return name -def nicerepr(v): +def nicerepr(v: Any) -> str: if inspect.isfunction(v): return get_pretty_function_description(v) elif isinstance(v, type): @@ -468,7 +472,9 @@ def nicerepr(v): return re.sub(r"(\[)~([A-Z][a-z]*\])", r"\g<1>\g<2>", pretty(v)) -def repr_call(f, args, kwargs, *, reorder=True): +def repr_call( + f: Any, args: Sequence[object], kwargs: dict[str, object], *, reorder: bool = True +) -> str: # Note: for multi-line pretty-printing, see RepresentationPrinter.repr_call() if reorder: args, kwargs = convert_positional_arguments(f, args, kwargs) @@ -497,15 +503,15 @@ def repr_call(f, args, kwargs, *, reorder=True): return rep + "(" + ", ".join(bits) + ")" -def check_valid_identifier(identifier): +def check_valid_identifier(identifier: str) -> None: if not identifier.isidentifier(): raise ValueError(f"{identifier!r} is not a valid python identifier") -eval_cache: dict = {} +eval_cache: dict[str, ModuleType] = {} -def source_exec_as_module(source): +def source_exec_as_module(source: str) -> ModuleType: try: return eval_cache[source] except KeyError: @@ -529,7 +535,9 @@ def accept({funcname}): """.lstrip() -def get_varargs(sig, kind=inspect.Parameter.VAR_POSITIONAL): +def get_varargs( + sig: Signature, kind: int = Parameter.VAR_POSITIONAL +) -> Optional[Parameter]: for p in sig.parameters.values(): if p.kind is kind: return p @@ -580,7 +588,7 @@ def define_function_signature(name, docstring, signature): for p in signature.parameters.values(): if p.kind is p.KEYWORD_ONLY: invocation_parts.append(f"{p.name}={p.name}") - varkw = get_varargs(signature, kind=inspect.Parameter.VAR_KEYWORD) + varkw = get_varargs(signature, kind=Parameter.VAR_KEYWORD) if varkw: invocation_parts.append("**" + varkw.name) @@ -648,7 +656,7 @@ def impersonate(target): return accept -def proxies(target: "T") -> Callable[[Callable], "T"]: +def proxies(target: T) -> Callable[[Callable], T]: replace_sig = define_function_signature( target.__name__.replace("<lambda>", "_lambda_"), # type: ignore target.__doc__, @@ -661,6 +669,6 @@ def proxies(target: "T") -> Callable[[Callable], "T"]: return accept -def is_identity_function(f): +def is_identity_function(f: object) -> bool: # TODO: pattern-match the AST to handle `def ...` identity functions too return bool(re.fullmatch(r"lambda (\w+): \1", get_pretty_function_description(f))) diff --git a/contrib/python/hypothesis/py3/hypothesis/internal/scrutineer.py b/contrib/python/hypothesis/py3/hypothesis/internal/scrutineer.py index e00cc6edcff..1df6a043adb 100644 --- a/contrib/python/hypothesis/py3/hypothesis/internal/scrutineer.py +++ b/contrib/python/hypothesis/py3/hypothesis/internal/scrutineer.py @@ -13,9 +13,11 @@ import os import re import subprocess import sys +import sysconfig import types from collections import defaultdict from collections.abc import Iterable +from enum import IntEnum from functools import lru_cache, reduce from os import sep from pathlib import Path @@ -27,12 +29,10 @@ from hypothesis.internal.escalation import is_hypothesis_file if TYPE_CHECKING: from typing import TypeAlias -else: - TypeAlias = object -Location: TypeAlias = tuple[str, int] -Branch: TypeAlias = tuple[Optional[Location], Location] -Trace: TypeAlias = set[Branch] +Location: "TypeAlias" = tuple[str, int] +Branch: "TypeAlias" = tuple[Optional[Location], Location] +Trace: "TypeAlias" = set[Branch] @lru_cache(maxsize=None) @@ -54,7 +54,7 @@ if sys.version_info[:2] >= (3, 12): class Tracer: """A super-simple branch coverage tracer.""" - __slots__ = ("branches", "_previous_location", "_should_trace") + __slots__ = ("_previous_location", "_should_trace", "branches") def __init__(self, *, should_trace: bool) -> None: self.branches: Trace = set() @@ -137,10 +137,15 @@ UNHELPFUL_LOCATIONS = ( "/warnings.py", # Quite rarely, the first AFNP line is in Pytest's internals. "/_pytest/_io/saferepr.py", + "/_pytest/_io/terminalwriter.py", "/_pytest/assertion/*.py", "/_pytest/config/__init__.py", "/_pytest/pytester.py", "/pluggy/_*.py", + # used by pytest for failure formatting in the terminal + "/pygments/lexer.py", + # used by pytest for failure formatting + "/difflib.py", "/reprlib.py", "/typing.py", "/conftest.py", @@ -210,18 +215,52 @@ def get_explaining_locations(traces): } -LIB_DIR = str(Path(sys.executable).parent / "lib") +# see e.g. https://docs.python.org/3/library/sysconfig.html#posix-user +# for examples of these path schemes +STDLIB_DIRS = { + Path(sysconfig.get_path("platstdlib")).resolve(), + Path(sysconfig.get_path("stdlib")).resolve(), +} +SITE_PACKAGES_DIRS = { + Path(sysconfig.get_path("purelib")).resolve(), + Path(sysconfig.get_path("platlib")).resolve(), +} + EXPLANATION_STUB = ( "Explanation:", " These lines were always and only run by failing examples:", ) -def make_report(explanations, cap_lines_at=5): +class ModuleLocation(IntEnum): + LOCAL = 0 + SITE_PACKAGES = 1 + STDLIB = 2 + + @classmethod + @lru_cache(1024) + def from_path(cls, path: str) -> "ModuleLocation": + path = Path(path).resolve() + # site-packages may be a subdir of stdlib or platlib, so it's important to + # check is_relative_to for this before the stdlib. + if any(path.is_relative_to(p) for p in SITE_PACKAGES_DIRS): + return cls.SITE_PACKAGES + if any(path.is_relative_to(p) for p in STDLIB_DIRS): + return cls.STDLIB + return cls.LOCAL + + +# show local files first, then site-packages, then stdlib +def _sort_key(path: str, lineno: int) -> tuple[int, str, int]: + return (ModuleLocation.from_path(path), path, lineno) + + +def make_report(explanations, *, cap_lines_at=5): report = defaultdict(list) for origin, locations in explanations.items(): + locations = list(locations) + locations.sort(key=lambda v: _sort_key(v[0], v[1])) report_lines = [f" {fname}:{lineno}" for fname, lineno in locations] - report_lines.sort(key=lambda line: (line.startswith(LIB_DIR), line)) if len(report_lines) > cap_lines_at + 1: msg = " (and {} more with settings.verbosity >= verbose)" report_lines[cap_lines_at:] = [msg.format(len(report_lines[cap_lines_at:]))] diff --git a/contrib/python/hypothesis/py3/hypothesis/provisional.py b/contrib/python/hypothesis/py3/hypothesis/provisional.py index 7f43762a30e..3f6f27ef2db 100644 --- a/contrib/python/hypothesis/py3/hypothesis/provisional.py +++ b/contrib/python/hypothesis/py3/hypothesis/provisional.py @@ -26,6 +26,8 @@ from typing import Optional from hypothesis import strategies as st from hypothesis.errors import InvalidArgument from hypothesis.internal.conjecture import utils as cu +from hypothesis.internal.conjecture.data import ConjectureData +from hypothesis.strategies import DrawFn from hypothesis.strategies._internal.utils import defines_strategy URL_SAFE_CHARACTERS = frozenset(string.ascii_letters + string.digits + "$-_.+!*'(),~") @@ -45,7 +47,17 @@ def get_top_level_domains() -> tuple[str, ...]: return ("COM", *sorted((d for d in _tlds if d != "ARPA"), key=len)) -class DomainNameStrategy(st.SearchStrategy): +def _recase_randomly(draw: DrawFn, tld: str) -> str: + tld = list(tld) + changes = draw(st.tuples(*(st.booleans() for _ in range(len(tld))))) + for i, change_case in enumerate(changes): + if change_case: + tld[i] = tld[i].lower() if tld[i].isupper() else tld[i].upper() + return "".join(tld) + + +class DomainNameStrategy(st.SearchStrategy[str]): @staticmethod def clean_inputs( minimum: int, maximum: int, value: Optional[int], variable_name: str @@ -89,34 +101,37 @@ class DomainNameStrategy(st.SearchStrategy): # information in https://tools.ietf.org/html/rfc1035#section-2.3.1 # which defines the allowed syntax of a subdomain string. if self.max_element_length == 1: - self.label_regex = r"[a-zA-Z]" + label_regex = r"[a-zA-Z]" elif self.max_element_length == 2: - self.label_regex = r"[a-zA-Z][a-zA-Z0-9]?" + label_regex = r"[a-zA-Z][a-zA-Z0-9]?" else: maximum_center_character_pattern_repetitions = self.max_element_length - 2 - self.label_regex = r"[a-zA-Z]([a-zA-Z0-9\-]{0,%d}[a-zA-Z0-9])?" % ( + label_regex = r"[a-zA-Z]([a-zA-Z0-9\-]{0,%d}[a-zA-Z0-9])?" % ( maximum_center_character_pattern_repetitions, ) - def do_draw(self, data): + # Construct reusable strategies here to avoid a performance hit by doing + # so repeatedly in do_draw. + # 1 - Select a valid top-level domain (TLD) name # 2 - Check that the number of characters in our selected TLD won't # prevent us from generating at least a 1 character subdomain. # 3 - Randomize the TLD between upper and lower case characters. - domain = data.draw( + + self.domain_strategy = ( st.sampled_from(get_top_level_domains()) .filter(lambda tld: len(tld) + 2 <= self.max_length) - .flatmap( - lambda tld: st.tuples( - *(st.sampled_from([c.lower(), c.upper()]) for c in tld) - ).map("".join) - ) + .flatmap(_recase_randomly) ) + # RFC-5890 s2.3.1 says such labels are reserved, and since we don't # want to bother with xn-- punycode labels we'll exclude them all. - elem_st = st.from_regex(self.label_regex, fullmatch=True).filter( + self.elem_strategy = st.from_regex(label_regex, fullmatch=True).filter( lambda label: len(label) < 4 or label[2:4] != "--" ) + + def do_draw(self, data: ConjectureData) -> str: + domain = data.draw(self.domain_strategy) # The maximum possible number of subdomains is 126, # 1 character subdomain + 1 '.' character, * 126 = 252, # with a max of 255, that leaves 3 characters for a TLD. @@ -125,9 +140,9 @@ class DomainNameStrategy(st.SearchStrategy): elements = cu.many(data, min_size=1, average_size=3, max_size=126) while elements.more(): # Generate a new valid subdomain using the regex strategy. - sub_domain = data.draw(elem_st) + sub_domain = data.draw(self.elem_strategy) if len(domain) + len(sub_domain) >= self.max_length: - data.stop_example(discard=True) + data.stop_span(discard=True) break domain = sub_domain + "." + domain return domain diff --git a/contrib/python/hypothesis/py3/hypothesis/reporting.py b/contrib/python/hypothesis/py3/hypothesis/reporting.py index 19073c5aff2..b1693e45b97 100644 --- a/contrib/python/hypothesis/py3/hypothesis/reporting.py +++ b/contrib/python/hypothesis/py3/hypothesis/reporting.py @@ -8,48 +8,56 @@ # v. 2.0. If a copy of the MPL was not distributed with this file, You can # obtain one at https://mozilla.org/MPL/2.0/. +from contextlib import AbstractContextManager +from typing import TYPE_CHECKING, Callable + from hypothesis._settings import Verbosity, settings from hypothesis.internal.compat import escape_unicode_characters from hypothesis.utils.dynamicvariables import DynamicVariable +if TYPE_CHECKING: + from typing import TypeAlias + -def default(value): +def default(value: object) -> None: try: print(value) except UnicodeEncodeError: - print(escape_unicode_characters(value)) + print(escape_unicode_characters(str(value))) -reporter = DynamicVariable(default) +ReporterT: "TypeAlias" = Callable[[object], None] +reporter = DynamicVariable[ReporterT](default) -def current_reporter(): +def current_reporter() -> ReporterT: return reporter.value -def with_reporter(new_reporter): +def with_reporter(new_reporter: ReporterT) -> AbstractContextManager[None]: return reporter.with_value(new_reporter) -def current_verbosity(): +def current_verbosity() -> Verbosity: + assert settings.default is not None return settings.default.verbosity -def verbose_report(text): +def verbose_report(text: str) -> None: if current_verbosity() >= Verbosity.verbose: base_report(text) -def debug_report(text): +def debug_report(text: str) -> None: if current_verbosity() >= Verbosity.debug: base_report(text) -def report(text): +def report(text: str) -> None: if current_verbosity() >= Verbosity.normal: base_report(text) -def base_report(text): +def base_report(text: str) -> None: assert isinstance(text, str), f"unexpected non-str {text=}" current_reporter()(text) diff --git a/contrib/python/hypothesis/py3/hypothesis/stateful.py b/contrib/python/hypothesis/py3/hypothesis/stateful.py index 7c60d2752f0..2ade095dbe8 100644 --- a/contrib/python/hypothesis/py3/hypothesis/stateful.py +++ b/contrib/python/hypothesis/py3/hypothesis/stateful.py @@ -22,7 +22,7 @@ from copy import copy from functools import lru_cache from io import StringIO from time import perf_counter -from typing import Any, Callable, ClassVar, Optional, Union, overload +from typing import Any, Callable, ClassVar, Optional, TypeVar, Union, overload from unittest import TestCase import attr @@ -54,17 +54,31 @@ from hypothesis.reporting import current_verbosity, report from hypothesis.strategies._internal.featureflags import FeatureStrategy from hypothesis.strategies._internal.strategies import ( Ex, - Ex_Inv, OneOfStrategy, SearchStrategy, check_strategy, ) from hypothesis.vendor.pretty import RepresentationPrinter +T = TypeVar("T") STATE_MACHINE_RUN_LABEL = cu.calc_label_from_name("another state machine step") SHOULD_CONTINUE_LABEL = cu.calc_label_from_name("should we continue drawing") +def _is_singleton(obj: object) -> bool: + """ + Returns True if two separately created instances of v will have the same id + (due to interning). + """ + # The range [-5, 256] is a cpython implementation detail. This may not work + # well on other platforms. + if isinstance(obj, int) and -5 <= obj <= 256: + return True + # cpython also interns compile-time strings, but let's just ignore those for + # now. + return isinstance(obj, bool) or obj is None + + class _OmittedArgument: """Sentinel class to prevent overlapping overloads in type hints. See comments above the overloads of @rule.""" @@ -83,14 +97,7 @@ class TestCaseProperty: # pragma: no cover raise AttributeError("Cannot delete TestCase") -def run_state_machine_as_test(state_machine_factory, *, settings=None, _min_steps=0): - """Run a state machine definition as a test, either silently doing nothing - or printing a minimal breaking program and raising an exception. - - state_machine_factory is anything which returns an instance of - RuleBasedStateMachine when called with no arguments - it can be a class or a - function. settings will be used to control the execution of the test. - """ +def get_state_machine_test(state_machine_factory, *, settings=None, _min_steps=0): if settings is None: try: settings = state_machine_factory.TestCase.settings @@ -137,13 +144,13 @@ def run_state_machine_as_test(state_machine_factory, *, settings=None, _min_step # find a failing test case, so we stop with probability of # 2 ** -16 during normal operation but force a stop when we've # generated enough steps. - cd.start_example(STATE_MACHINE_RUN_LABEL) + cd.start_span(STATE_MACHINE_RUN_LABEL) must_stop = None if steps_run >= max_steps: must_stop = True elif steps_run <= _min_steps: must_stop = False - elif cd._bytes_drawn > (0.8 * BUFFER_SIZE): + elif cd.length > (0.8 * BUFFER_SIZE): # Better to stop after fewer steps, than always overrun and retry. # See https://github.com/HypothesisWorks/hypothesis/issues/3618 must_stop = True @@ -220,7 +227,7 @@ def run_state_machine_as_test(state_machine_factory, *, settings=None, _min_step # then 'print_step' prints a multi-variable assignment. output(machine._repr_step(rule, data_to_print, result)) machine.check_invariants(settings, output, cd._stateful_run_times) - cd.stop_example() + cd.stop_span() finally: output("state.teardown()") machine.teardown() @@ -237,8 +244,21 @@ def run_state_machine_as_test(state_machine_factory, *, settings=None, _min_step state_machine_factory, "_hypothesis_internal_use_reproduce_failure", None ) run_state_machine._hypothesis_internal_print_given_args = False + return run_state_machine + - run_state_machine(state_machine_factory) +def run_state_machine_as_test(state_machine_factory, *, settings=None, _min_steps=0): + """Run a state machine definition as a test, either silently doing nothing + or printing a minimal breaking program and raising an exception. + + state_machine_factory is anything which returns an instance of + RuleBasedStateMachine when called with no arguments - it can be a class or a + function. settings will be used to control the execution of the test. + """ + state_machine_test = get_state_machine_test( + state_machine_factory, settings=settings, _min_steps=_min_steps + ) + state_machine_test(state_machine_factory) class StateMachineMeta(type): @@ -389,7 +409,10 @@ class RuleBasedStateMachine(metaclass=StateMachineMeta): def printer(obj, p, cycle, name=name): return p.text(name) - self.__printer.singleton_pprinters.setdefault(id(result), printer) + # see + # https://github.com/HypothesisWorks/hypothesis/pull/4266#discussion_r1949619102 + if not _is_singleton(result): + self.__printer.singleton_pprinters.setdefault(id(result), printer) self.names_to_values[name] = result self.bundles.setdefault(target, []).append(VarReference(name)) @@ -437,6 +460,7 @@ class RuleBasedStateMachine(metaclass=StateMachineMeta): run_state_machine_as_test(cls, settings=self.settings) runTest.is_hypothesis_test = True + runTest._hypothesis_state_machine_class = cls StateMachineTestCase.__name__ = cls.__name__ + ".TestCase" StateMachineTestCase.__qualname__ = cls.__qualname__ + ".TestCase" @@ -455,8 +479,11 @@ class Rule: self.arguments_strategies = {} bundles = [] for k, v in sorted(self.arguments.items()): + assert not isinstance(v, BundleReferenceStrategy) if isinstance(v, Bundle): bundles.append(v) + consume = isinstance(v, BundleConsumer) + v = BundleReferenceStrategy(v.name, consume=consume) self.arguments_strategies[k] = v self.bundles = tuple(bundles) @@ -469,6 +496,26 @@ class Rule: self_strategy = st.runner() +class BundleReferenceStrategy(SearchStrategy): + def __init__(self, name: str, *, consume: bool = False): + self.name = name + self.consume = consume + + def do_draw(self, data): + machine = data.draw(self_strategy) + bundle = machine.bundle(self.name) + if not bundle: + data.mark_invalid(f"Cannot draw from empty bundle {self.name!r}") + # Shrink towards the right rather than the left. This makes it easier + # to delete data generated earlier, as when the error is towards the + # end there can be a lot of hard to remove padding. + position = data.draw_integer(0, len(bundle) - 1, shrink_towards=len(bundle)) + if self.consume: + return bundle.pop(position) # pragma: no cover # coverage is flaky here + else: + return bundle[position] + + class Bundle(SearchStrategy[Ex]): """A collection of values for use in stateful testing. @@ -495,32 +542,16 @@ class Bundle(SearchStrategy[Ex]): self, name: str, *, consume: bool = False, draw_references: bool = True ) -> None: self.name = name - self.consume = consume + self.__reference_strategy = BundleReferenceStrategy(name, consume=consume) self.draw_references = draw_references def do_draw(self, data): machine = data.draw(self_strategy) - - bundle = machine.bundle(self.name) - if not bundle: - data.mark_invalid(f"Cannot draw from empty bundle {self.name!r}") - # Shrink towards the right rather than the left. This makes it easier - # to delete data generated earlier, as when the error is towards the - # end there can be a lot of hard to remove padding. - position = data.draw_integer(0, len(bundle) - 1, shrink_towards=len(bundle)) - if self.consume: - reference = bundle.pop( - position - ) # pragma: no cover # coverage is flaky here - else: - reference = bundle[position] - - if self.draw_references: - return reference + reference = data.draw(self.__reference_strategy) return machine.names_to_values[reference.name] def __repr__(self): - consume = self.consume + consume = self.__reference_strategy.consume if consume is False: return f"Bundle(name={self.name!r})" return f"Bundle(name={self.name!r}, {consume=})" @@ -539,11 +570,18 @@ class Bundle(SearchStrategy[Ex]): def flatmap(self, expand): if self.draw_references: return type(self)( - self.name, consume=self.consume, draw_references=False + self.name, + consume=self.__reference_strategy.consume, + draw_references=False, ).flatmap(expand) return super().flatmap(expand) +class BundleConsumer(Bundle[Ex]): + def __init__(self, bundle: Bundle[Ex]) -> None: + super().__init__(bundle.name, consume=True) + + def consumes(bundle: Bundle[Ex]) -> SearchStrategy[Ex]: """When introducing a rule in a RuleBasedStateMachine, this function can be used to mark bundles from which each value used in a step with the @@ -559,10 +597,7 @@ def consumes(bundle: Bundle[Ex]) -> SearchStrategy[Ex]: """ if not isinstance(bundle, Bundle): raise TypeError("Argument to be consumed must be a bundle.") - return type(bundle)( - name=bundle.name, - consume=True, - ) + return BundleConsumer(bundle) @attr.s() @@ -573,9 +608,7 @@ class MultipleResults(Iterable[Ex]): return iter(self.values) -# We need to use an invariant typevar here to avoid a mypy error, as covariant -# typevars cannot be used as parameters. -def multiple(*args: Ex_Inv) -> MultipleResults[Ex_Inv]: +def multiple(*args: T) -> MultipleResults[T]: """This function can be used to pass multiple results to the target(s) of a rule. Just use ``return multiple(result1, result2, ...)`` in your rule. @@ -609,7 +642,7 @@ def _convert_targets(targets, target): ) raise InvalidArgument(msg % (t, type(t))) while isinstance(t, Bundle): - if t.consume: + if isinstance(t, BundleConsumer): note_deprecation( f"Using consumes({t.name}) doesn't makes sense in this context. " "This will be an error in a future version of Hypothesis.", diff --git a/contrib/python/hypothesis/py3/hypothesis/statistics.py b/contrib/python/hypothesis/py3/hypothesis/statistics.py index cee3c2b9ef5..bdb29670c95 100644 --- a/contrib/python/hypothesis/py3/hypothesis/statistics.py +++ b/contrib/python/hypothesis/py3/hypothesis/statistics.py @@ -55,8 +55,8 @@ def format_ms(times: Iterable[float]) -> str: n = len(ordered) - 1 if n < 0 or any(math.isnan(t) for t in ordered): # pragma: no cover return "NaN ms" - lower = int(ordered[int(math.floor(n * 0.05))] * 1000) - upper = int(ordered[int(math.ceil(n * 0.95))] * 1000) + lower = int(ordered[math.floor(n * 0.05)] * 1000) + upper = int(ordered[math.ceil(n * 0.95)] * 1000) if upper == 0: return "< 1ms" elif lower == upper: diff --git a/contrib/python/hypothesis/py3/hypothesis/strategies/__init__.py b/contrib/python/hypothesis/py3/hypothesis/strategies/__init__.py index 1d8b1ea76f9..dfe89502909 100644 --- a/contrib/python/hypothesis/py3/hypothesis/strategies/__init__.py +++ b/contrib/python/hypothesis/py3/hypothesis/strategies/__init__.py @@ -65,6 +65,9 @@ from hypothesis.strategies._internal.utils import _strategies __all__ = [ + "DataObject", + "DrawFn", + "SearchStrategy", "binary", "booleans", "builds", @@ -72,13 +75,11 @@ __all__ = [ "complex_numbers", "composite", "data", - "DataObject", "dates", "datetimes", "decimals", "deferred", "dictionaries", - "DrawFn", "emails", "fixed_dictionaries", "floats", @@ -112,7 +113,6 @@ __all__ = [ "timezones", "tuples", "uuids", - "SearchStrategy", ] diff --git a/contrib/python/hypothesis/py3/hypothesis/strategies/_internal/collections.py b/contrib/python/hypothesis/py3/hypothesis/strategies/_internal/collections.py index e647ec07fbe..bcdbe7f333b 100644 --- a/contrib/python/hypothesis/py3/hypothesis/strategies/_internal/collections.py +++ b/contrib/python/hypothesis/py3/hypothesis/strategies/_internal/collections.py @@ -9,11 +9,14 @@ # obtain one at https://mozilla.org/MPL/2.0/. import copy +import math from collections.abc import Iterable -from typing import Any, overload +from typing import Any, Callable, Optional, Union, overload +from hypothesis import strategies as st from hypothesis.errors import InvalidArgument from hypothesis.internal.conjecture import utils as cu +from hypothesis.internal.conjecture.data import ConjectureData from hypothesis.internal.conjecture.engine import BUFFER_SIZE from hypothesis.internal.conjecture.junkdrawer import LazySequenceCopy from hypothesis.internal.conjecture.utils import combine_labels @@ -24,16 +27,18 @@ from hypothesis.strategies._internal.strategies import ( T4, T5, Ex, - MappedStrategy, + RecurT, + SampledFromStrategy, SearchStrategy, T, check_strategy, filter_not_satisfied, ) from hypothesis.strategies._internal.utils import cacheable, defines_strategy +from hypothesis.utils.conventions import UniqueIdentifier -class TupleStrategy(SearchStrategy): +class TupleStrategy(SearchStrategy[tuple[Ex, ...]]): """A strategy responsible for fixed length tuples based on heterogeneous strategies for each of their elements.""" @@ -41,26 +46,26 @@ class TupleStrategy(SearchStrategy): super().__init__() self.element_strategies = tuple(strategies) - def do_validate(self): + def do_validate(self) -> None: for s in self.element_strategies: s.validate() - def calc_label(self): + def calc_label(self) -> int: return combine_labels( self.class_label, *(s.label for s in self.element_strategies) ) - def __repr__(self): + def __repr__(self) -> str: tuple_string = ", ".join(map(repr, self.element_strategies)) return f"TupleStrategy(({tuple_string}))" - def calc_has_reusable_values(self, recur): + def calc_has_reusable_values(self, recur: RecurT) -> bool: return all(recur(e) for e in self.element_strategies) - def do_draw(self, data): + def do_draw(self, data: ConjectureData) -> tuple[Ex, ...]: return tuple(data.draw(e) for e in self.element_strategies) - def calc_is_empty(self, recur): + def calc_is_empty(self, recur: RecurT) -> bool: return any(recur(e) for e in self.element_strategies) @@ -133,16 +138,21 @@ def tuples(*args: SearchStrategy[Any]) -> SearchStrategy[tuple[Any, ...]]: return TupleStrategy(args) -class ListStrategy(SearchStrategy): +class ListStrategy(SearchStrategy[list[Ex]]): """A strategy for lists which takes a strategy for its elements and the allowed lengths, and generates lists with the correct size and contents.""" - _nonempty_filters: tuple = (bool, len, tuple, list) + _nonempty_filters: tuple[Callable[[Any], Any], ...] = (bool, len, tuple, list) - def __init__(self, elements, min_size=0, max_size=float("inf")): + def __init__( + self, + elements: SearchStrategy[Ex], + min_size: int = 0, + max_size: Optional[Union[float, int]] = math.inf, + ): super().__init__() self.min_size = min_size or 0 - self.max_size = max_size if max_size is not None else float("inf") + self.max_size = max_size if max_size is not None else math.inf assert 0 <= self.min_size <= self.max_size self.average_size = min( max(self.min_size * 2, self.min_size + 5), @@ -157,10 +167,10 @@ class ListStrategy(SearchStrategy): "a HealthCheck error." ) - def calc_label(self): + def calc_label(self) -> int: return combine_labels(self.class_label, self.element_strategy.label) - def do_validate(self): + def do_validate(self) -> None: self.element_strategy.validate() if self.is_empty: raise InvalidArgument( @@ -174,13 +184,13 @@ class ListStrategy(SearchStrategy): f"{self.element_strategy!r}" ) - def calc_is_empty(self, recur): + def calc_is_empty(self, recur: RecurT) -> bool: if self.min_size == 0: return False else: return recur(self.element_strategy) - def do_draw(self, data): + def do_draw(self, data: ConjectureData) -> list[Ex]: if self.element_strategy.is_empty: assert self.min_size == 0 return [] @@ -196,13 +206,13 @@ class ListStrategy(SearchStrategy): result.append(data.draw(self.element_strategy)) return result - def __repr__(self): + def __repr__(self) -> str: return ( f"{self.__class__.__name__}({self.element_strategy!r}, " f"min_size={self.min_size:_}, max_size={self.max_size:_})" ) - def filter(self, condition): + def filter(self, condition: Callable[[list[Ex]], Any]) -> SearchStrategy[list[Ex]]: if condition in self._nonempty_filters or is_identity_function(condition): assert self.max_size >= 1, "Always-empty is special cased in st.lists()" if self.min_size >= 1: @@ -211,11 +221,17 @@ class ListStrategy(SearchStrategy): new.min_size = 1 return new - kwargs, pred = get_integer_predicate_bounds(condition) - if kwargs.get("len") and ("min_value" in kwargs or "max_value" in kwargs): + constraints, pred = get_integer_predicate_bounds(condition) + if constraints.get("len") and ( + "min_value" in constraints or "max_value" in constraints + ): new = copy.copy(self) - new.min_size = max(self.min_size, kwargs.get("min_value", self.min_size)) - new.max_size = min(self.max_size, kwargs.get("max_value", self.max_size)) + new.min_size = max( + self.min_size, constraints.get("min_value", self.min_size) + ) + new.max_size = min( + self.max_size, constraints.get("max_value", self.max_size) + ) # Unsatisfiable filters are easiest to understand without rewriting. if new.min_size > new.max_size: return SearchStrategy.filter(self, condition) @@ -231,13 +247,22 @@ class ListStrategy(SearchStrategy): return SearchStrategy.filter(self, condition) -class UniqueListStrategy(ListStrategy): - def __init__(self, elements, min_size, max_size, keys, tuple_suffixes): +class UniqueListStrategy(ListStrategy[Ex]): + def __init__( + self, + elements: SearchStrategy[Ex], + min_size: int, + max_size: Optional[Union[float, int]], + # TODO: keys are guaranteed to be Hashable, not just Any, but this makes + # other things harder to type + keys: tuple[Callable[[Ex], Any], ...], + tuple_suffixes: Optional[SearchStrategy[tuple[Ex, ...]]], + ): super().__init__(elements, min_size, max_size) self.keys = keys self.tuple_suffixes = tuple_suffixes - def do_draw(self, data): + def do_draw(self, data: ConjectureData) -> list[Ex]: if self.element_strategy.is_empty: assert self.min_size == 0 return [] @@ -248,13 +273,17 @@ class UniqueListStrategy(ListStrategy): max_size=self.max_size, average_size=self.average_size, ) - seen_sets = tuple(set() for _ in self.keys) - result = [] + seen_sets: tuple[set[Ex], ...] = tuple(set() for _ in self.keys) + # actually list[Ex], but if self.tuple_suffixes is present then Ex is a + # tuple[T, ...] because self.element_strategy is a TuplesStrategy, and + # appending a concrete tuple to `result: list[Ex]` makes mypy unhappy + # without knowing that Ex = tuple. + result: list[Any] = [] # We construct a filtered strategy here rather than using a check-and-reject # approach because some strategies have special logic for generation under a # filter, and FilteredStrategy can consolidate multiple filters. - def not_yet_in_unique_list(val): + def not_yet_in_unique_list(val: Ex) -> bool: # type: ignore # covariant type param return all(key(val) not in seen for key, seen in zip(self.keys, seen_sets)) filtered = self.element_strategy._filter_for_filtered_draw( @@ -265,25 +294,28 @@ class UniqueListStrategy(ListStrategy): if value is filter_not_satisfied: elements.reject(f"Aborted test because unable to satisfy {filtered!r}") else: + assert not isinstance(value, UniqueIdentifier) for key, seen in zip(self.keys, seen_sets): seen.add(key(value)) if self.tuple_suffixes is not None: - value = (value, *data.draw(self.tuple_suffixes)) + value = (value, *data.draw(self.tuple_suffixes)) # type: ignore result.append(value) assert self.max_size >= len(result) >= self.min_size return result class UniqueSampledListStrategy(UniqueListStrategy): - def do_draw(self, data): + def do_draw(self, data: ConjectureData) -> list[Ex]: + assert isinstance(self.element_strategy, SampledFromStrategy) + should_draw = cu.many( data, min_size=self.min_size, max_size=self.max_size, average_size=self.average_size, ) - seen_sets = tuple(set() for _ in self.keys) - result = [] + seen_sets: tuple[set[Ex], ...] = tuple(set() for _ in self.keys) + result: list[Any] = [] remaining = LazySequenceCopy(self.element_strategy.elements) @@ -306,7 +338,7 @@ class UniqueSampledListStrategy(UniqueListStrategy): return result -class FixedKeysDictStrategy(MappedStrategy): +class FixedDictStrategy(SearchStrategy[dict[Any, Any]]): """A strategy which produces dicts with a fixed set of keys, given a strategy for each of their equivalent values. @@ -314,42 +346,25 @@ class FixedKeysDictStrategy(MappedStrategy): key 'foo' mapping to some integer. """ - def __init__(self, strategy_dict): - dict_type = type(strategy_dict) - self.keys = tuple(strategy_dict.keys()) - super().__init__( - strategy=TupleStrategy(strategy_dict[k] for k in self.keys), - pack=lambda value: dict_type(zip(self.keys, value)), + def __init__( + self, + mapping: dict[Any, SearchStrategy[Any]], + *, + optional: Optional[dict[Any, SearchStrategy[Any]]], + ): + dict_type = type(mapping) + self.mapping = mapping + keys = tuple(mapping.keys()) + self.fixed = st.tuples(*[mapping[k] for k in keys]).map( + lambda value: dict_type(zip(keys, value)) ) - - def calc_is_empty(self, recur): - return recur(self.mapped_strategy) - - def __repr__(self): - return f"FixedKeysDictStrategy({self.keys!r}, {self.mapped_strategy!r})" - - -class FixedAndOptionalKeysDictStrategy(SearchStrategy): - """A strategy which produces dicts with a fixed set of keys, given a - strategy for each of their equivalent values. - - e.g. {'foo' : some_int_strategy} would generate dicts with the single - key 'foo' mapping to some integer. - """ - - def __init__(self, strategy_dict, optional): - self.required = strategy_dict - self.fixed = FixedKeysDictStrategy(strategy_dict) self.optional = optional - def calc_is_empty(self, recur): - return recur(self.fixed) + def do_draw(self, data: ConjectureData) -> dict[Any, Any]: + value = data.draw(self.fixed) + if self.optional is None: + return value - def __repr__(self): - return f"FixedAndOptionalKeysDictStrategy({self.required!r}, {self.optional!r})" - - def do_draw(self, data): - result = data.draw(self.fixed) remaining = [k for k, v in self.optional.items() if not v.is_empty] should_draw = cu.many( data, min_size=0, max_size=len(remaining), average_size=len(remaining) / 2 @@ -358,5 +373,13 @@ class FixedAndOptionalKeysDictStrategy(SearchStrategy): j = data.draw_integer(0, len(remaining) - 1) remaining[-1], remaining[j] = remaining[j], remaining[-1] key = remaining.pop() - result[key] = data.draw(self.optional[key]) - return result + value[key] = data.draw(self.optional[key]) + return value + + def calc_is_empty(self, recur: RecurT) -> bool: + return recur(self.fixed) + + def __repr__(self) -> str: + if self.optional is not None: + return f"fixed_dictionaries({self.mapping!r}, optional={self.optional!r})" + return f"fixed_dictionaries({self.mapping!r})" diff --git a/contrib/python/hypothesis/py3/hypothesis/strategies/_internal/core.py b/contrib/python/hypothesis/py3/hypothesis/strategies/_internal/core.py index 9991bc6b08f..8e35ebab0b2 100644 --- a/contrib/python/hypothesis/py3/hypothesis/strategies/_internal/core.py +++ b/contrib/python/hypothesis/py3/hypothesis/strategies/_internal/core.py @@ -32,6 +32,7 @@ from typing import ( AnyStr, Callable, Literal, + NoReturn, Optional, Protocol, TypeVar, @@ -78,6 +79,7 @@ from hypothesis.internal.compat import ( get_type_hints, is_typed_named_tuple, ) +from hypothesis.internal.conjecture.data import ConjectureData from hypothesis.internal.conjecture.utils import ( calc_label_from_cls, check_sample, @@ -105,8 +107,7 @@ from hypothesis.internal.validation import ( ) from hypothesis.strategies._internal import SearchStrategy, check_strategy from hypothesis.strategies._internal.collections import ( - FixedAndOptionalKeysDictStrategy, - FixedKeysDictStrategy, + FixedDictStrategy, ListStrategy, TupleStrategy, UniqueListStrategy, @@ -127,7 +128,6 @@ from hypothesis.strategies._internal.recursive import RecursiveStrategy from hypothesis.strategies._internal.shared import SharedStrategy from hypothesis.strategies._internal.strategies import ( Ex, - Ex_Inv, SampledFromStrategy, T, one_of, @@ -175,14 +175,14 @@ def sampled_from(elements: type[enum.Enum]) -> SearchStrategy[Any]: # pragma: n @overload def sampled_from( - elements: Union[type[enum.Enum], Sequence[Any]] + elements: Union[type[enum.Enum], Sequence[Any]], ) -> SearchStrategy[Any]: # pragma: no cover ... @defines_strategy(try_non_lazy=True) def sampled_from( - elements: Union[type[enum.Enum], Sequence[Any]] + elements: Union[type[enum.Enum], Sequence[Any]], ) -> SearchStrategy[Any]: """Returns a strategy which generates any value present in ``elements``. @@ -332,40 +332,48 @@ def lists( # Note that lazy strategies automatically unwrap when passed to a defines_strategy # function. tuple_suffixes = None + # the type: ignores in the TupleStrategy and IntegersStrategy cases are + # for a mypy bug, which incorrectly narrows `elements` to Never. + # https://github.com/python/mypy/issues/16494 if ( # We're generating a list of tuples unique by the first element, perhaps # via st.dictionaries(), and this will be more efficient if we rearrange # our strategy somewhat to draw the first element then draw add the rest. isinstance(elements, TupleStrategy) - and len(elements.element_strategies) >= 1 + and len(elements.element_strategies) >= 1 # type: ignore and len(unique_by) == 1 and ( # Introspection for either `itemgetter(0)`, or `lambda x: x[0]` - isinstance(unique_by[0], operator.itemgetter) - and repr(unique_by[0]) == "operator.itemgetter(0)" - or isinstance(unique_by[0], FunctionType) - and re.fullmatch( - get_pretty_function_description(unique_by[0]), - r"lambda ([a-z]+): \1\[0\]", + ( + isinstance(unique_by[0], operator.itemgetter) + and repr(unique_by[0]) == "operator.itemgetter(0)" + ) + or ( + isinstance(unique_by[0], FunctionType) + and re.fullmatch( + get_pretty_function_description(unique_by[0]), + r"lambda ([a-z]+): \1\[0\]", + ) ) ) ): unique_by = (identity,) - tuple_suffixes = TupleStrategy(elements.element_strategies[1:]) - elements = elements.element_strategies[0] + tuple_suffixes = TupleStrategy(elements.element_strategies[1:]) # type: ignore + elements = elements.element_strategies[0] # type: ignore # UniqueSampledListStrategy offers a substantial performance improvement for # unique arrays with few possible elements, e.g. of eight-bit integer types. if ( isinstance(elements, IntegersStrategy) - and None not in (elements.start, elements.end) - and (elements.end - elements.start) <= 255 + and elements.start is not None # type: ignore + and elements.end is not None # type: ignore + and (elements.end - elements.start) <= 255 # type: ignore ): elements = SampledFromStrategy( - sorted(range(elements.start, elements.end + 1), key=abs) - if elements.end < 0 or elements.start > 0 - else list(range(elements.end + 1)) - + list(range(-1, elements.start - 1, -1)) + sorted(range(elements.start, elements.end + 1), key=abs) # type: ignore + if elements.end < 0 or elements.start > 0 # type: ignore + else list(range(elements.end + 1)) # type: ignore + + list(range(-1, elements.start - 1, -1)) # type: ignore ) if isinstance(elements, SampledFromStrategy): @@ -448,7 +456,7 @@ class PrettyIter: def __next__(self): return next(self._iter) - def __repr__(self): + def __repr__(self) -> str: return f"iter({self._values!r})" @@ -481,6 +489,37 @@ def iterables( ).map(PrettyIter) +# this type definition is imprecise, in multiple ways: +# * mapping and optional can be of different types: +# s: dict[str | int, int] = st.fixed_dictionaries( +# {"a": st.integers()}, optional={1: st.integers()} +# ) +# * the values in either mapping or optional need not all be of the same type: +# s: dict[str, int | bool] = st.fixed_dictionaries( +# {"a": st.integers(), "b": st.booleans()} +# ) +# * the arguments may be of any dict-compatible type, in which case the return +# value will be of that type instead of dit +# +# Overloads may help here, but I doubt we'll be able to satisfy all these +# constraints. +# +# Here's some platonic ideal test cases for revealed_types.py, with the understanding +# that some may not be achievable: +# +# ("fixed_dictionaries({'a': booleans()})", "dict[str, bool]"), +# ("fixed_dictionaries({'a': booleans(), 'b': integers()})", "dict[str, bool | int]"), +# ("fixed_dictionaries({}, optional={'a': booleans()})", "dict[str, bool]"), +# ( +# "fixed_dictionaries({'a': booleans()}, optional={1: booleans()})", +# "dict[str | int, bool]", +# ), +# ( +# "fixed_dictionaries({'a': booleans()}, optional={1: integers()})", +# "dict[str | int, bool | int]", +# ), + + @defines_strategy() def fixed_dictionaries( mapping: dict[T, SearchStrategy[Ex]], @@ -503,6 +542,7 @@ def fixed_dictionaries( check_type(dict, mapping, "mapping") for k, v in mapping.items(): check_strategy(v, f"mapping[{k!r}]") + if optional is not None: check_type(dict, optional, "optional") for k, v in optional.items(): @@ -517,8 +557,8 @@ def fixed_dictionaries( "The following keys were in both mapping and optional, " f"which is invalid: {set(mapping) & set(optional)!r}" ) - return FixedAndOptionalKeysDictStrategy(mapping, optional) - return FixedKeysDictStrategy(mapping) + + return FixedDictStrategy(mapping, optional=optional) @cacheable @@ -614,7 +654,7 @@ def characters( explicitly allowed, the ``codec`` argument will exclude them without raising an exception. - .. _general category: https://wikipedia.org/wiki/Unicode_character_property + .. _general category: https://en.wikipedia.org/wiki/Unicode_character_property .. _codec encodings: https://docs.python.org/3/library/codecs.html#encodings-and-unicode .. _python-specific text encodings: https://docs.python.org/3/library/codecs.html#python-specific-encodings @@ -789,6 +829,19 @@ def text( "The following elements in alphabet are not of length one, " f"which leads to violation of size constraints: {not_one_char!r}" ) + if alphabet in ["ascii", "utf-8"]: + warnings.warn( + f"st.text({alphabet!r}): it seems like you are trying to use the " + f"codec {alphabet!r}. st.text({alphabet!r}) instead generates " + f"strings using the literal characters {list(alphabet)!r}. To specify " + f"the {alphabet} codec, use st.text(st.characters(codec={alphabet!r})). " + "If you intended to use character literals, you can silence this " + "warning by reordering the characters.", + HypothesisWarning, + # this stacklevel is of course incorrect, but breaking out of the + # levels of LazyStrategy and validation isn't worthwhile. + stacklevel=1, + ) char_strategy = ( characters(categories=(), include_characters=alphabet) if alphabet @@ -796,7 +849,10 @@ def text( ) if (max_size == 0 or char_strategy.is_empty) and not min_size: return just("") - return TextStrategy(char_strategy, min_size=min_size, max_size=max_size) + # mypy is unhappy with ListStrategy(SearchStrategy[list[Ex]]) and then TextStrategy + # setting Ex = str. Mypy is correct to complain because we have an LSP violation + # here in the TextStrategy.do_draw override. Would need refactoring to resolve. + return TextStrategy(char_strategy, min_size=min_size, max_size=max_size) # type: ignore @overload @@ -1006,7 +1062,7 @@ class BuildsStrategy(SearchStrategy): tuples(*self.args).validate() fixed_dictionaries(self.kwargs).validate() - def __repr__(self): + def __repr__(self) -> str: bits = [get_pretty_function_description(self.target)] bits.extend(map(repr, self.args)) bits.extend(f"{k}={v!r}" for k, v in self.kwargs.items()) @@ -1089,7 +1145,7 @@ def builds( @cacheable @defines_strategy(never_lazy=True) -def from_type(thing: type[Ex_Inv]) -> SearchStrategy[Ex_Inv]: +def from_type(thing: type[T]) -> SearchStrategy[T]: """Looks up the appropriate search strategy for the given type. ``from_type`` is used internally to fill in missing arguments to @@ -1241,6 +1297,14 @@ def _from_type(thing: type[Ex]) -> SearchStrategy[Ex]: if strategy is not NotImplemented: return strategy return _from_type(thing.__supertype__) + if types.is_a_type_alias_type( + thing + ): # pragma: no cover # covered by 3.12+ tests + if thing in types._global_type_lookup: + strategy = as_strategy(types._global_type_lookup[thing], thing) + if strategy is not NotImplemented: + return strategy + return _from_type(thing.__value__) # Unions are not instances of `type` - but we still want to resolve them! if types.is_a_union(thing): args = sorted(thing.__args__, key=types.type_sorting_key) @@ -1299,10 +1363,8 @@ def _from_type(thing: type[Ex]) -> SearchStrategy[Ex]: # We've kept it because we turn out to have more type errors from... somewhere. # FIXME: investigate that, maybe it should be fixed more precisely? pass - if ( - hasattr(typing, "_TypedDictMeta") - and type(thing) is typing._TypedDictMeta - or hasattr(types.typing_extensions, "_TypedDictMeta") # type: ignore + if (hasattr(typing, "_TypedDictMeta") and type(thing) is typing._TypedDictMeta) or ( + hasattr(types.typing_extensions, "_TypedDictMeta") # type: ignore and type(thing) is types.typing_extensions._TypedDictMeta # type: ignore ): # pragma: no cover @@ -1432,17 +1494,35 @@ def _from_type(thing: type[Ex]) -> SearchStrategy[Ex]: params = get_signature(thing).parameters except Exception: params = {} # type: ignore + + posonly_args = [] kwargs = {} for k, p in params.items(): if ( - k in hints + p.kind in (p.POSITIONAL_ONLY, p.POSITIONAL_OR_KEYWORD, p.KEYWORD_ONLY) + and k in hints and k != "return" - and p.kind in (Parameter.POSITIONAL_OR_KEYWORD, Parameter.KEYWORD_ONLY) ): - kwargs[k] = from_type_guarded(hints[k]) - if p.default is not Parameter.empty and kwargs[k] is not ...: - kwargs[k] = just(p.default) | kwargs[k] - if params and not kwargs and not issubclass(thing, BaseException): + ps = from_type_guarded(hints[k]) + if p.default is not Parameter.empty and ps is not ...: + ps = just(p.default) | ps + if p.kind is Parameter.POSITIONAL_ONLY: + # builds() doesn't infer strategies for positional args, so: + if ps is ...: # pragma: no cover # rather fiddly to test + if p.default is Parameter.empty: + raise ResolutionFailed( + f"Could not resolve {thing!r} to a strategy; " + "consider using register_type_strategy" + ) + ps = just(p.default) + posonly_args.append(ps) + else: + kwargs[k] = ps + if ( + params + and not (posonly_args or kwargs) + and not issubclass(thing, BaseException) + ): from_type_repr = repr_call(from_type, (thing,), {}) builds_repr = repr_call(builds, (thing,), {}) warnings.warn( @@ -1453,7 +1533,7 @@ def _from_type(thing: type[Ex]) -> SearchStrategy[Ex]: SmallSearchSpaceWarning, stacklevel=2, ) - return builds(thing, **kwargs) + return builds(thing, *posonly_args, **kwargs) # And if it's an abstract type, we'll resolve to a union of subclasses instead. subclasses = thing.__subclasses__() if not subclasses: @@ -1461,7 +1541,7 @@ def _from_type(thing: type[Ex]) -> SearchStrategy[Ex]: f"Could not resolve {thing!r} to a strategy, because it is an abstract " "type without any subclasses. Consider using register_type_strategy" ) - subclass_strategies = nothing() + subclass_strategies: SearchStrategy = nothing() for sc in subclasses: try: subclass_strategies |= _from_type(sc) @@ -1741,7 +1821,7 @@ class CompositeStrategy(SearchStrategy): def do_draw(self, data): return self.definition(data.draw, *self.args, **self.kwargs) - def calc_label(self): + def calc_label(self) -> int: return calc_label_from_cls(self.definition) @@ -1829,39 +1909,58 @@ def _composite(f): return accept +composite_doc = """ +Defines a strategy that is built out of potentially arbitrarily many other +strategies. + +@composite provides a callable ``draw`` as the first parameter to the decorated +function, which can be used to dynamically draw a value from any strategy. For +example: + +.. code-block:: python + + from hypothesis import strategies as st, given + + @st.composite + def values(draw): + n1 = draw(st.integers()) + n2 = draw(st.integers(min_value=n1)) + return (n1, n2) + + @given(values()) + def f(value): + (n1, n2) = value + assert n1 <= n2 + +@composite cannot mix test code and generation code. If you need that, use +|st.data|. + +If :func:`@composite <hypothesis.strategies.composite>` is used to decorate a +method or classmethod, the ``draw`` argument must come before ``self`` or +``cls``. While we therefore recommend writing strategies as standalone functions +and using |st.register_type_strategy| to associate them with a class, methods +are supported and the ``@composite`` decorator may be applied either before or +after ``@classmethod`` or ``@staticmethod``. See :issue:`2578` and :pull:`2634` +for more details. + +Examples from this strategy shrink by shrinking the output of each draw call. +""" if typing.TYPE_CHECKING or ParamSpec is not None: P = ParamSpec("P") def composite( - f: Callable[Concatenate[DrawFn, P], Ex] + f: Callable[Concatenate[DrawFn, P], Ex], ) -> Callable[P, SearchStrategy[Ex]]: - """Defines a strategy that is built out of potentially arbitrarily many - other strategies. - - This is intended to be used as a decorator. See - :ref:`the full documentation for more details <composite-strategies>` - about how to use this function. - - Examples from this strategy shrink by shrinking the output of each draw - call. - """ return _composite(f) else: # pragma: no cover @cacheable def composite(f: Callable[..., Ex]) -> Callable[..., SearchStrategy[Ex]]: - """Defines a strategy that is built out of potentially arbitrarily many - other strategies. + return _composite(f) - This is intended to be used as a decorator. See - :ref:`the full documentation for more details <composite-strategies>` - about how to use this function. - Examples from this strategy shrink by shrinking the output of each draw - call. - """ - return _composite(f) +composite.__doc__ = composite_doc @defines_strategy(force_reusable_values=True) @@ -2101,13 +2200,13 @@ class DataObject: # Note that "only exists" here really means "is only exported to users", # but we want to treat it as "semi-stable", not document it as "public API". - def __init__(self, data): + def __init__(self, data: ConjectureData) -> None: self.count = 0 self.conjecture_data = data __signature__ = Signature() # hide internals from Sphinx introspection - def __repr__(self): + def __repr__(self) -> str: return "data(...)" def draw(self, strategy: SearchStrategy[Ex], label: Any = None) -> Ex: @@ -2121,7 +2220,10 @@ class DataObject: if should_note(): printer = RepresentationPrinter(context=current_build_context()) printer.text(f"{desc}: ") - printer.pretty(result) + if self.conjecture_data.provider.avoid_realization: + printer.text("<symbolic>") + else: + printer.pretty(result) note(printer.getvalue()) return result @@ -2134,22 +2236,22 @@ class DataStrategy(SearchStrategy): data.hypothesis_shared_data_strategy = DataObject(data) return data.hypothesis_shared_data_strategy - def __repr__(self): + def __repr__(self) -> str: return "data()" def map(self, f): self.__not_a_first_class_strategy("map") - def filter(self, f): + def filter(self, condition: Callable[[Ex], Any]) -> NoReturn: self.__not_a_first_class_strategy("filter") def flatmap(self, f): self.__not_a_first_class_strategy("flatmap") - def example(self): + def example(self) -> NoReturn: self.__not_a_first_class_strategy("example") - def __not_a_first_class_strategy(self, name): + def __not_a_first_class_strategy(self, name: str) -> NoReturn: raise InvalidArgument( f"Cannot call {name} on a DataStrategy. You should probably " "be using @composite for whatever it is you're trying to do." @@ -2159,14 +2261,56 @@ class DataStrategy(SearchStrategy): @cacheable @defines_strategy(never_lazy=True) def data() -> SearchStrategy[DataObject]: - """This isn't really a normal strategy, but instead gives you an object - which can be used to draw data interactively from other strategies. + """ + Provides an object ``data`` with a ``data.draw`` function which acts like + the ``draw`` callable provided by |st.composite|, in that it can be used + to dynamically draw values from strategies. |st.data| is more powerful + than |st.composite|, because it allows you to mix generation and test code. + + Here's an example of dynamically generating values using |st.data|: + + .. code-block:: python + + from hypothesis import strategies as st, given + + @given(st.data()) + def test_values(data): + n1 = data.draw(st.integers()) + n2 = data.draw(st.integers(min_value=n1)) + assert n1 + 1 <= n2 + + If the test fails, each draw will be printed with the falsifying example. + e.g. the above is wrong (it has a boundary condition error), so will print: + + .. code-block:: pycon + + Falsifying example: test_values(data=data(...)) + Draw 1: 0 + Draw 2: 0 + + Optionally, you can provide a label to identify values generated by each call + to ``data.draw()``. These labels can be used to identify values in the + output of a falsifying example. + + For instance: + + .. code-block:: python + + @given(st.data()) + def test_draw_sequentially(data): + x = data.draw(st.integers(), label="First number") + y = data.draw(st.integers(min_value=x), label="Second number") + assert x < y + + will produce: + + .. code-block:: pycon - See :ref:`the rest of the documentation <interactive-draw>` for more - complete information. + Falsifying example: test_draw_sequentially(data=data(...)) + Draw 1 (First number): 0 + Draw 2 (Second number): 0 - Examples from this strategy do not shrink (because there is only one), - but the result of calls to each ``data.draw()`` call shrink as they normally would. + Examples from this strategy shrink by shrinking the output of each draw call. """ return DataStrategy() @@ -2287,7 +2431,7 @@ def deferred(definition: Callable[[], SearchStrategy[Ex]]) -> SearchStrategy[Ex] return DeferredStrategy(definition) -def domains(): +def domains() -> SearchStrategy[str]: import hypothesis.provisional return hypothesis.provisional.domains() diff --git a/contrib/python/hypothesis/py3/hypothesis/strategies/_internal/deferred.py b/contrib/python/hypothesis/py3/hypothesis/strategies/_internal/deferred.py index f7dae9a1e59..c17cad50e19 100644 --- a/contrib/python/hypothesis/py3/hypothesis/strategies/_internal/deferred.py +++ b/contrib/python/hypothesis/py3/hypothesis/strategies/_internal/deferred.py @@ -9,24 +9,32 @@ # obtain one at https://mozilla.org/MPL/2.0/. import inspect +from collections.abc import Sequence +from typing import Callable, Optional from hypothesis.configuration import check_sideeffect_during_initialization from hypothesis.errors import InvalidArgument +from hypothesis.internal.conjecture.data import ConjectureData from hypothesis.internal.reflection import get_pretty_function_description -from hypothesis.strategies._internal.strategies import SearchStrategy, check_strategy +from hypothesis.strategies._internal.strategies import ( + Ex, + RecurT, + SearchStrategy, + check_strategy, +) -class DeferredStrategy(SearchStrategy): +class DeferredStrategy(SearchStrategy[Ex]): """A strategy which may be used before it is fully defined.""" - def __init__(self, definition): + def __init__(self, definition: Callable[[], SearchStrategy[Ex]]): super().__init__() - self.__wrapped_strategy = None - self.__in_repr = False - self.__definition = definition + self.__wrapped_strategy: Optional[SearchStrategy[Ex]] = None + self.__in_repr: bool = False + self.__definition: Optional[Callable[[], SearchStrategy[Ex]]] = definition @property - def wrapped_strategy(self): + def wrapped_strategy(self) -> SearchStrategy[Ex]: if self.__wrapped_strategy is None: check_sideeffect_during_initialization("deferred evaluation of {!r}", self) @@ -44,14 +52,14 @@ class DeferredStrategy(SearchStrategy): return self.__wrapped_strategy @property - def branches(self): + def branches(self) -> Sequence[SearchStrategy[Ex]]: return self.wrapped_strategy.branches @property - def supports_find(self): + def supports_find(self) -> bool: return self.wrapped_strategy.supports_find - def calc_label(self): + def calc_label(self) -> int: """Deferred strategies don't have a calculated label, because we would end up having to calculate the fixed point of some hash function in order to calculate it when they recursively refer to themself! @@ -64,13 +72,13 @@ class DeferredStrategy(SearchStrategy): # deliberate decision. return self.class_label - def calc_is_empty(self, recur): + def calc_is_empty(self, recur: RecurT) -> bool: return recur(self.wrapped_strategy) - def calc_has_reusable_values(self, recur): + def calc_has_reusable_values(self, recur: RecurT) -> bool: return recur(self.wrapped_strategy) - def __repr__(self): + def __repr__(self) -> str: if self.__wrapped_strategy is not None: if self.__in_repr: return f"(deferred@{id(self)!r})" @@ -83,5 +91,5 @@ class DeferredStrategy(SearchStrategy): description = get_pretty_function_description(self.__definition) return f"deferred({description})" - def do_draw(self, data): + def do_draw(self, data: ConjectureData) -> Ex: return data.draw(self.wrapped_strategy) diff --git a/contrib/python/hypothesis/py3/hypothesis/strategies/_internal/featureflags.py b/contrib/python/hypothesis/py3/hypothesis/strategies/_internal/featureflags.py index 98af8f087a3..f37ff421e92 100644 --- a/contrib/python/hypothesis/py3/hypothesis/strategies/_internal/featureflags.py +++ b/contrib/python/hypothesis/py3/hypothesis/strategies/_internal/featureflags.py @@ -8,7 +8,11 @@ # v. 2.0. If a copy of the MPL was not distributed with this file, You can # obtain one at https://mozilla.org/MPL/2.0/. +from collections.abc import Hashable, Iterable, Sequence +from typing import Any, Optional + from hypothesis.internal.conjecture import utils as cu +from hypothesis.internal.conjecture.data import ConjectureData from hypothesis.strategies._internal.strategies import SearchStrategy FEATURE_LABEL = cu.calc_label_from_name("feature flag") @@ -31,7 +35,13 @@ class FeatureFlags: required disabled features. """ - def __init__(self, data=None, enabled=(), disabled=(), at_least_one_of=()): + def __init__( + self, + data: Optional[ConjectureData] = None, + enabled: Sequence[Any] = (), + disabled: Sequence[Any] = (), + at_least_one_of: Iterable[Hashable] = (), + ): self.__data = data self.__is_disabled = {} @@ -52,7 +62,7 @@ class FeatureFlags: # features will be enabled. This is so that we shrink in the direction # of more features being enabled. if self.__data is not None: - self.__p_disabled = data.draw_integer(0, 254) / 255 + self.__p_disabled = self.__data.draw_integer(0, 254) / 255 else: # If data is None we're in example mode so all that matters is the # enabled/disabled lists above. We set this up so that everything @@ -64,7 +74,7 @@ class FeatureFlags: # Track the set of possible names, and ensure that at least one is enabled. self.__at_least_one_of = set(at_least_one_of) - def is_enabled(self, name): + def is_enabled(self, name: Any) -> bool: """Tests whether the feature named ``name`` should be enabled on this test run.""" if self.__data is None or self.__data.frozen: @@ -77,7 +87,7 @@ class FeatureFlags: data = self.__data - data.start_example(label=FEATURE_LABEL) + data.start_span(label=FEATURE_LABEL) # If we've already decided on this feature then we don't actually # need to draw anything, but we do write the same decision to the @@ -99,10 +109,10 @@ class FeatureFlags: if name in oneof and not is_disabled: oneof.clear() oneof.discard(name) - data.stop_example() + data.stop_span() return not is_disabled - def __repr__(self): + def __repr__(self) -> str: enabled = [] disabled = [] for name, is_disabled in self.__is_disabled.items(): @@ -113,10 +123,10 @@ class FeatureFlags: return f"FeatureFlags({enabled=}, {disabled=})" -class FeatureStrategy(SearchStrategy): - def __init__(self, at_least_one_of=()): +class FeatureStrategy(SearchStrategy[FeatureFlags]): + def __init__(self, at_least_one_of: Sequence[Hashable] = ()): super().__init__() self._at_least_one_of = frozenset(at_least_one_of) - def do_draw(self, data): + def do_draw(self, data: ConjectureData) -> FeatureFlags: return FeatureFlags(data, at_least_one_of=self._at_least_one_of) diff --git a/contrib/python/hypothesis/py3/hypothesis/strategies/_internal/flatmapped.py b/contrib/python/hypothesis/py3/hypothesis/strategies/_internal/flatmapped.py index 49cb4e0cddd..4e4f4ddafb5 100644 --- a/contrib/python/hypothesis/py3/hypothesis/strategies/_internal/flatmapped.py +++ b/contrib/python/hypothesis/py3/hypothesis/strategies/_internal/flatmapped.py @@ -8,33 +8,49 @@ # v. 2.0. If a copy of the MPL was not distributed with this file, You can # obtain one at https://mozilla.org/MPL/2.0/. +from typing import Callable, Generic, TypeVar + +from hypothesis.internal.conjecture.data import ConjectureData from hypothesis.internal.reflection import get_pretty_function_description -from hypothesis.strategies._internal.strategies import SearchStrategy, check_strategy +from hypothesis.strategies._internal.strategies import ( + RecurT, + SearchStrategy, + check_strategy, +) + +MappedFrom = TypeVar("MappedFrom") +MappedTo = TypeVar("MappedTo") -class FlatMapStrategy(SearchStrategy): - def __init__(self, strategy, expand): +class FlatMapStrategy(SearchStrategy[MappedTo], Generic[MappedFrom, MappedTo]): + def __init__( + self, + base: SearchStrategy[MappedFrom], + expand: Callable[[MappedFrom], SearchStrategy[MappedTo]], + ): super().__init__() - self.flatmapped_strategy = strategy + self.base = base self.expand = expand - def calc_is_empty(self, recur): - return recur(self.flatmapped_strategy) + def calc_is_empty(self, recur: RecurT) -> bool: + return recur(self.base) - def __repr__(self): + def __repr__(self) -> str: if not hasattr(self, "_cached_repr"): - self._cached_repr = f"{self.flatmapped_strategy!r}.flatmap({get_pretty_function_description(self.expand)})" + self._cached_repr = ( + f"{self.base!r}.flatmap({get_pretty_function_description(self.expand)})" + ) return self._cached_repr - def do_draw(self, data): - source = data.draw(self.flatmapped_strategy) - expanded_source = self.expand(source) - check_strategy(expanded_source) - return data.draw(expanded_source) + def do_draw(self, data: ConjectureData) -> MappedTo: + base = data.draw(self.base) + expanded = self.expand(base) + check_strategy(expanded) + return data.draw(expanded) @property - def branches(self): + def branches(self) -> list[SearchStrategy[MappedTo]]: return [ - FlatMapStrategy(strategy=strategy, expand=self.expand) - for strategy in self.flatmapped_strategy.branches + FlatMapStrategy(strategy, expand=self.expand) + for strategy in self.base.branches ] diff --git a/contrib/python/hypothesis/py3/hypothesis/strategies/_internal/functions.py b/contrib/python/hypothesis/py3/hypothesis/strategies/_internal/functions.py index cf4d6275f8d..a69adf51fc6 100644 --- a/contrib/python/hypothesis/py3/hypothesis/strategies/_internal/functions.py +++ b/contrib/python/hypothesis/py3/hypothesis/strategies/_internal/functions.py @@ -18,7 +18,7 @@ from hypothesis.internal.reflection import ( proxies, repr_call, ) -from hypothesis.strategies._internal.strategies import SearchStrategy +from hypothesis.strategies._internal.strategies import RecurT, SearchStrategy class FunctionStrategy(SearchStrategy): @@ -33,7 +33,7 @@ class FunctionStrategy(SearchStrategy): # garbage-collected at the end of each example, reducing memory use. self._cache = WeakKeyDictionary() - def calc_is_empty(self, recur): + def calc_is_empty(self, recur: RecurT) -> bool: return recur(self.returns) def do_draw(self, data): diff --git a/contrib/python/hypothesis/py3/hypothesis/strategies/_internal/lazy.py b/contrib/python/hypothesis/py3/hypothesis/strategies/_internal/lazy.py index 6cb582c5e03..b398c165e21 100644 --- a/contrib/python/hypothesis/py3/hypothesis/strategies/_internal/lazy.py +++ b/contrib/python/hypothesis/py3/hypothesis/strategies/_internal/lazy.py @@ -8,18 +8,20 @@ # v. 2.0. If a copy of the MPL was not distributed with this file, You can # obtain one at https://mozilla.org/MPL/2.0/. -from collections.abc import MutableMapping +from collections.abc import MutableMapping, Sequence from inspect import signature +from typing import Any, Callable, Optional from weakref import WeakKeyDictionary from hypothesis.configuration import check_sideeffect_during_initialization +from hypothesis.internal.conjecture.data import ConjectureData from hypothesis.internal.reflection import ( convert_keyword_arguments, convert_positional_arguments, get_pretty_function_description, repr_call, ) -from hypothesis.strategies._internal.strategies import SearchStrategy +from hypothesis.strategies._internal.strategies import Ex, RecurT, SearchStrategy unwrap_cache: MutableMapping[SearchStrategy, SearchStrategy] = WeakKeyDictionary() unwrap_depth = 0 @@ -61,41 +63,52 @@ def unwrap_strategies(s): assert unwrap_depth >= 0 -class LazyStrategy(SearchStrategy): +class LazyStrategy(SearchStrategy[Ex]): """A strategy which is defined purely by conversion to and from another strategy. Its parameter and distribution come from that other strategy. """ - def __init__(self, function, args, kwargs, *, transforms=(), force_repr=None): + def __init__( + self, + function: Callable[..., SearchStrategy[Ex]], + args: Sequence[object], + kwargs: dict[str, object], + *, + transforms: tuple[tuple[str, Callable[..., Any]], ...] = (), + force_repr: Optional[str] = None, + ): super().__init__() - self.__wrapped_strategy = None - self.__representation = force_repr + self.__wrapped_strategy: Optional[SearchStrategy[Ex]] = None + self.__representation: Optional[str] = force_repr self.function = function self.__args = args self.__kwargs = kwargs self._transformations = transforms @property - def supports_find(self): + def supports_find(self) -> bool: return self.wrapped_strategy.supports_find - def calc_is_empty(self, recur): + def calc_is_empty(self, recur: RecurT) -> bool: return recur(self.wrapped_strategy) - def calc_has_reusable_values(self, recur): + def calc_has_reusable_values(self, recur: RecurT) -> bool: return recur(self.wrapped_strategy) - def calc_is_cacheable(self, recur): + def calc_is_cacheable(self, recur: RecurT) -> bool: for source in (self.__args, self.__kwargs.values()): for v in source: if isinstance(v, SearchStrategy) and not v.is_cacheable: return False return True + def calc_label(self) -> int: + return self.wrapped_strategy.label + @property - def wrapped_strategy(self): + def wrapped_strategy(self) -> SearchStrategy[Ex]: if self.__wrapped_strategy is None: check_sideeffect_during_initialization("lazy evaluation of {!r}", self) @@ -113,13 +126,14 @@ class LazyStrategy(SearchStrategy): ) for method, fn in self._transformations: self.__wrapped_strategy = getattr(self.__wrapped_strategy, method)(fn) + assert self.__wrapped_strategy is not None return self.__wrapped_strategy def __with_transform(self, method, fn): repr_ = self.__representation if repr_: repr_ = f"{repr_}.{method}({get_pretty_function_description(fn)})" - return type(self)( + return LazyStrategy( self.function, self.__args, self.__kwargs, @@ -133,12 +147,12 @@ class LazyStrategy(SearchStrategy): def filter(self, condition): return self.__with_transform("filter", condition) - def do_validate(self): + def do_validate(self) -> None: w = self.wrapped_strategy assert isinstance(w, SearchStrategy), f"{self!r} returned non-strategy {w!r}" w.validate() - def __repr__(self): + def __repr__(self) -> str: if self.__representation is None: sig = signature(self.function) pos = [p for p in sig.parameters.values() if "POSITIONAL" in p.kind.name] @@ -163,9 +177,5 @@ class LazyStrategy(SearchStrategy): ) return self.__representation - def do_draw(self, data): + def do_draw(self, data: ConjectureData) -> Ex: return data.draw(self.wrapped_strategy) - - @property - def label(self): - return self.wrapped_strategy.label diff --git a/contrib/python/hypothesis/py3/hypothesis/strategies/_internal/misc.py b/contrib/python/hypothesis/py3/hypothesis/strategies/_internal/misc.py index 3d0b0c97e08..7318048ccc3 100644 --- a/contrib/python/hypothesis/py3/hypothesis/strategies/_internal/misc.py +++ b/contrib/python/hypothesis/py3/hypothesis/strategies/_internal/misc.py @@ -8,17 +8,26 @@ # v. 2.0. If a copy of the MPL was not distributed with this file, You can # obtain one at https://mozilla.org/MPL/2.0/. +from typing import TYPE_CHECKING, Any, Callable, NoReturn, Union + +from hypothesis.internal.conjecture.data import ConjectureData from hypothesis.internal.reflection import get_pretty_function_description from hypothesis.strategies._internal.strategies import ( + Ex, + RecurT, SampledFromStrategy, SearchStrategy, T, - is_simple_data, + is_hashable, ) from hypothesis.strategies._internal.utils import cacheable, defines_strategy +from hypothesis.utils.conventions import UniqueIdentifier + +if TYPE_CHECKING: + from typing_extensions import Never -class JustStrategy(SampledFromStrategy): +class JustStrategy(SampledFromStrategy[Ex]): """A strategy which always returns a single fixed value. It's implemented as a length-one SampledFromStrategy so that all our @@ -32,10 +41,10 @@ class JustStrategy(SampledFromStrategy): """ @property - def value(self): + def value(self) -> Ex: return self.elements[0] - def __repr__(self): + def __repr__(self) -> str: suffix = "".join( f".{name}({get_pretty_function_description(f)})" for name, f in self._transformations @@ -44,10 +53,10 @@ class JustStrategy(SampledFromStrategy): return "none()" + suffix return f"just({get_pretty_function_description(self.value)}){suffix}" - def calc_is_cacheable(self, recur): - return is_simple_data(self.value) + def calc_is_cacheable(self, recur: RecurT) -> bool: + return is_hashable(self.value) - def do_filtered_draw(self, data): + def do_filtered_draw(self, data: ConjectureData) -> Union[Ex, UniqueIdentifier]: # The parent class's `do_draw` implementation delegates directly to # `do_filtered_draw`, which we can greatly simplify in this case since # we have exactly one value. (This also avoids drawing any data.) @@ -79,28 +88,30 @@ def none() -> SearchStrategy[None]: return just(None) -class Nothing(SearchStrategy): - def calc_is_empty(self, recur): +class Nothing(SearchStrategy["Never"]): + def calc_is_empty(self, recur: RecurT) -> bool: return True - def do_draw(self, data): + def do_draw(self, data: ConjectureData) -> NoReturn: # This method should never be called because draw() will mark the # data as invalid immediately because is_empty is True. raise NotImplementedError("This should never happen") - def calc_has_reusable_values(self, recur): + def calc_has_reusable_values(self, recur: RecurT) -> bool: return True - def __repr__(self): + def __repr__(self) -> str: return "nothing()" - def map(self, f): + def map(self, pack: Callable[[Any], Any]) -> SearchStrategy["Never"]: return self - def filter(self, f): + def filter(self, condition: Callable[[Any], Any]) -> "SearchStrategy[Never]": return self - def flatmap(self, f): + def flatmap( + self, expand: Callable[[Any], "SearchStrategy[Any]"] + ) -> "SearchStrategy[Never]": return self @@ -109,7 +120,7 @@ NOTHING = Nothing() @cacheable @defines_strategy(never_lazy=True) -def nothing() -> SearchStrategy: +def nothing() -> SearchStrategy["Never"]: """This strategy never successfully draws a value and will always reject on an attempt to draw. @@ -118,9 +129,9 @@ def nothing() -> SearchStrategy: return NOTHING -class BooleansStrategy(SearchStrategy): - def do_draw(self, data): +class BooleansStrategy(SearchStrategy[bool]): + def do_draw(self, data: ConjectureData) -> bool: return data.draw_boolean() - def __repr__(self): + def __repr__(self) -> str: return "booleans()" diff --git a/contrib/python/hypothesis/py3/hypothesis/strategies/_internal/numbers.py b/contrib/python/hypothesis/py3/hypothesis/strategies/_internal/numbers.py index 2e4bf01732f..59bf66ecc9b 100644 --- a/contrib/python/hypothesis/py3/hypothesis/strategies/_internal/numbers.py +++ b/contrib/python/hypothesis/py3/hypothesis/strategies/_internal/numbers.py @@ -15,6 +15,7 @@ from typing import Literal, Optional, Union from hypothesis.control import reject from hypothesis.errors import InvalidArgument +from hypothesis.internal.conjecture.data import ConjectureData from hypothesis.internal.filtering import ( get_float_predicate_bounds, get_integer_predicate_bounds, @@ -47,15 +48,15 @@ from hypothesis.strategies._internal.utils import cacheable, defines_strategy Real = Union[int, float, Fraction, Decimal] -class IntegersStrategy(SearchStrategy): - def __init__(self, start, end): +class IntegersStrategy(SearchStrategy[int]): + def __init__(self, start: Optional[int], end: Optional[int]) -> None: assert isinstance(start, int) or start is None assert isinstance(end, int) or end is None assert start is None or end is None or start <= end self.start = start self.end = end - def __repr__(self): + def __repr__(self) -> str: if self.start is None and self.end is None: return "integers()" if self.end is None: @@ -64,7 +65,7 @@ class IntegersStrategy(SearchStrategy): return f"integers(max_value={self.end})" return f"integers({self.start}, {self.end})" - def do_draw(self, data): + def do_draw(self, data: ConjectureData) -> int: # For bounded integers, make the bounds and near-bounds more likely. weights = None if ( @@ -88,13 +89,13 @@ class IntegersStrategy(SearchStrategy): return self if condition in [math.isinf, math.isnan]: return nothing() - kwargs, pred = get_integer_predicate_bounds(condition) + constraints, pred = get_integer_predicate_bounds(condition) start, end = self.start, self.end - if "min_value" in kwargs: - start = max(kwargs["min_value"], -math.inf if start is None else start) - if "max_value" in kwargs: - end = min(kwargs["max_value"], math.inf if end is None else end) + if "min_value" in constraints: + start = max(constraints["min_value"], -math.inf if start is None else start) + if "max_value" in constraints: + end = min(constraints["max_value"], math.inf if end is None else end) if start != self.start or end != self.end: if start is not None and end is not None and start > end: @@ -141,7 +142,7 @@ def integers( return IntegersStrategy(min_value, max_value) -class FloatStrategy(SearchStrategy): +class FloatStrategy(SearchStrategy[float]): """A strategy for floating point numbers.""" def __init__( @@ -173,13 +174,13 @@ class FloatStrategy(SearchStrategy): self.allow_nan = allow_nan self.smallest_nonzero_magnitude = smallest_nonzero_magnitude - def __repr__(self): + def __repr__(self) -> str: return ( f"{self.__class__.__name__}({self.min_value=}, {self.max_value=}, " f"{self.allow_nan=}, {self.smallest_nonzero_magnitude=})" ).replace("self.", "") - def do_draw(self, data): + def do_draw(self, data: ConjectureData) -> float: return data.draw_float( min_value=self.min_value, max_value=self.max_value, @@ -209,11 +210,11 @@ class FloatStrategy(SearchStrategy): return nothing() return NanStrategy() - kwargs, pred = get_float_predicate_bounds(condition) - if not kwargs: + constraints, pred = get_float_predicate_bounds(condition) + if not constraints: return super().filter(pred) - min_bound = max(kwargs.get("min_value", -math.inf), self.min_value) - max_bound = min(kwargs.get("max_value", math.inf), self.max_value) + min_bound = max(constraints.get("min_value", -math.inf), self.min_value) + max_bound = min(constraints.get("max_value", math.inf), self.max_value) # Adjustments for allow_subnormal=False, if any need to be made if -self.smallest_nonzero_magnitude < min_bound < 0: @@ -500,7 +501,7 @@ def floats( if width < 64: - def downcast(x): + def downcast(x: float) -> float: try: return float_of(x, width) except OverflowError: # pragma: no cover @@ -510,10 +511,10 @@ def floats( return result -class NanStrategy(SearchStrategy): +class NanStrategy(SearchStrategy[float]): """Strategy for sampling the space of nan float values.""" - def do_draw(self, data): + def do_draw(self, data: ConjectureData) -> float: # Nans must have all exponent bits and the first mantissa bit set, so # we generate by taking 64 random bits and setting the required ones. sign_bit = int(data.draw_boolean()) << 63 diff --git a/contrib/python/hypothesis/py3/hypothesis/strategies/_internal/random.py b/contrib/python/hypothesis/py3/hypothesis/strategies/_internal/random.py index 0e874594430..e28b2596f6e 100644 --- a/contrib/python/hypothesis/py3/hypothesis/strategies/_internal/random.py +++ b/contrib/python/hypothesis/py3/hypothesis/strategies/_internal/random.py @@ -8,6 +8,7 @@ # v. 2.0. If a copy of the MPL was not distributed with this file, You can # obtain one at https://mozilla.org/MPL/2.0/. +import abc import inspect import math from random import Random @@ -16,42 +17,42 @@ from typing import Any import attr from hypothesis.control import should_note +from hypothesis.internal.conjecture.data import ConjectureData from hypothesis.internal.reflection import define_function_signature from hypothesis.reporting import report -from hypothesis.strategies._internal.core import ( - binary, - lists, - permutations, - sampled_from, -) +from hypothesis.strategies._internal.core import lists, permutations, sampled_from from hypothesis.strategies._internal.numbers import floats, integers from hypothesis.strategies._internal.strategies import SearchStrategy -class HypothesisRandom(Random): +class HypothesisRandom(Random, abc.ABC): """A subclass of Random designed to expose the seed it was initially provided with.""" - def __init__(self, note_method_calls): - self.__note_method_calls = note_method_calls + def __init__(self, *, note_method_calls: bool) -> None: + self._note_method_calls = note_method_calls def __deepcopy__(self, table): return self.__copy__() - def __repr__(self): - raise NotImplementedError - + @abc.abstractmethod def seed(self, seed): raise NotImplementedError + @abc.abstractmethod def getstate(self): raise NotImplementedError + @abc.abstractmethod def setstate(self, state): raise NotImplementedError + @abc.abstractmethod + def _hypothesis_do_random(self, method, kwargs): + raise NotImplementedError + def _hypothesis_log_random(self, method, kwargs, result): - if not (self.__note_method_calls and should_note()): + if not (self._note_method_calls and should_note()): return args, kwargs = convert_kwargs(method, kwargs) @@ -60,9 +61,6 @@ class HypothesisRandom(Random): ) report(f"{self!r}.{method}({argstr}) -> {result!r}") - def _hypothesis_do_random(self, method, kwargs): - raise NotImplementedError - RANDOM_METHODS = [ name @@ -171,9 +169,6 @@ def state_for_seed(data, seed): return state -UNIFORM = floats(0, 1) - - def normalize_zero(f: float) -> float: if f == 0.0: return 0.0 @@ -184,17 +179,17 @@ def normalize_zero(f: float) -> float: class ArtificialRandom(HypothesisRandom): VERSION = 10**6 - def __init__(self, note_method_calls, data): + def __init__(self, *, note_method_calls: bool, data: ConjectureData) -> None: super().__init__(note_method_calls=note_method_calls) self.__data = data self.__state = RandomState() - def __repr__(self): + def __repr__(self) -> str: return "HypothesisRandom(generated data)" - def __copy__(self): + def __copy__(self) -> "ArtificialRandom": result = ArtificialRandom( - note_method_calls=self._HypothesisRandom__note_method_calls, + note_method_calls=self._note_method_calls, data=self.__data, ) result.setstate(self.getstate()) @@ -233,8 +228,12 @@ class ArtificialRandom(HypothesisRandom): if method == "_randbelow": result = self.__data.draw_integer(0, kwargs["n"] - 1) - elif method in ("betavariate", "random"): - result = self.__data.draw(UNIFORM) + elif method == "random": + # See https://github.com/HypothesisWorks/hypothesis/issues/4297 + # for numerics/bounds of "random" and "betavariate" + result = self.__data.draw(floats(0, 1, exclude_max=True)) + elif method == "betavariate": + result = self.__data.draw(floats(0, 1)) elif method == "uniform": a = normalize_zero(kwargs["a"]) b = normalize_zero(kwargs["b"]) @@ -324,8 +323,8 @@ class ArtificialRandom(HypothesisRandom): elif method == "shuffle": result = self.__data.draw(permutations(range(len(kwargs["x"])))) elif method == "randbytes": - n = kwargs["n"] - result = self.__data.draw(binary(min_size=n, max_size=n)) + n = int(kwargs["n"]) + result = self.__data.draw_bytes(min_size=n, max_size=n) else: raise NotImplementedError(method) @@ -398,7 +397,7 @@ def convert_kwargs(name, kwargs): class TrueRandom(HypothesisRandom): def __init__(self, seed, note_method_calls): - super().__init__(note_method_calls) + super().__init__(note_method_calls=note_method_calls) self.__seed = seed self.__random = Random(seed) @@ -411,15 +410,15 @@ class TrueRandom(HypothesisRandom): args, kwargs = convert_kwargs(method, kwargs) return fn(*args, **kwargs) - def __copy__(self): + def __copy__(self) -> "TrueRandom": result = TrueRandom( seed=self.__seed, - note_method_calls=self._HypothesisRandom__note_method_calls, + note_method_calls=self._note_method_calls, ) result.setstate(self.getstate()) return result - def __repr__(self): + def __repr__(self) -> str: return f"Random({self.__seed!r})" def seed(self, seed): @@ -433,12 +432,12 @@ class TrueRandom(HypothesisRandom): self.__random.setstate(state) -class RandomStrategy(SearchStrategy): - def __init__(self, note_method_calls, use_true_random): +class RandomStrategy(SearchStrategy[HypothesisRandom]): + def __init__(self, *, note_method_calls: bool, use_true_random: bool) -> None: self.__note_method_calls = note_method_calls self.__use_true_random = use_true_random - def do_draw(self, data): + def do_draw(self, data: ConjectureData) -> HypothesisRandom: if self.__use_true_random: seed = data.draw_integer(0, 2**64 - 1) return TrueRandom(seed=seed, note_method_calls=self.__note_method_calls) diff --git a/contrib/python/hypothesis/py3/hypothesis/strategies/_internal/recursive.py b/contrib/python/hypothesis/py3/hypothesis/strategies/_internal/recursive.py index cf7add95381..5f632111448 100644 --- a/contrib/python/hypothesis/py3/hypothesis/strategies/_internal/recursive.py +++ b/contrib/python/hypothesis/py3/hypothesis/strategies/_internal/recursive.py @@ -47,10 +47,10 @@ class LimitedStrategy(SearchStrategy): def currently_capped(self, value): self._threadlocal.currently_capped = value - def __repr__(self): + def __repr__(self) -> str: return f"LimitedStrategy({self.base_strategy!r})" - def do_validate(self): + def do_validate(self) -> None: self.base_strategy.validate() def do_draw(self, data): @@ -83,7 +83,7 @@ class RecursiveStrategy(SearchStrategy): strategies.append(extend(OneOfStrategy(tuple(strategies)))) self.strategy = OneOfStrategy(strategies) - def __repr__(self): + def __repr__(self) -> str: if not hasattr(self, "_cached_repr"): self._cached_repr = "recursive(%r, %s, max_leaves=%d)" % ( self.base, @@ -92,7 +92,7 @@ class RecursiveStrategy(SearchStrategy): ) return self._cached_repr - def do_validate(self): + def do_validate(self) -> None: check_strategy(self.base, "base") extended = self.extend(self.limited_base) check_strategy(extended, f"extend({self.limited_base!r})") diff --git a/contrib/python/hypothesis/py3/hypothesis/strategies/_internal/shared.py b/contrib/python/hypothesis/py3/hypothesis/strategies/_internal/shared.py index fe495db9d68..285dbb68f8f 100644 --- a/contrib/python/hypothesis/py3/hypothesis/strategies/_internal/shared.py +++ b/contrib/python/hypothesis/py3/hypothesis/strategies/_internal/shared.py @@ -8,31 +8,33 @@ # v. 2.0. If a copy of the MPL was not distributed with this file, You can # obtain one at https://mozilla.org/MPL/2.0/. -from hypothesis.strategies._internal import SearchStrategy +from collections.abc import Hashable +from typing import Any, Optional -SHARED_STRATEGY_ATTRIBUTE = "_hypothesis_shared_strategies" +from hypothesis.internal.conjecture.data import ConjectureData +from hypothesis.strategies._internal import SearchStrategy +from hypothesis.strategies._internal.strategies import Ex -class SharedStrategy(SearchStrategy): - def __init__(self, base, key=None): +class SharedStrategy(SearchStrategy[Ex]): + def __init__(self, base: SearchStrategy[Ex], key: Optional[Hashable] = None): self.key = key self.base = base @property - def supports_find(self): + def supports_find(self) -> bool: return self.base.supports_find - def __repr__(self): + def __repr__(self) -> str: if self.key is not None: return f"shared({self.base!r}, key={self.key!r})" else: return f"shared({self.base!r})" - def do_draw(self, data): - if not hasattr(data, SHARED_STRATEGY_ATTRIBUTE): - setattr(data, SHARED_STRATEGY_ATTRIBUTE, {}) - sharing = getattr(data, SHARED_STRATEGY_ATTRIBUTE) + # Ideally would be -> Ex, but key collisions with different-typed values are + # possible. See https://github.com/HypothesisWorks/hypothesis/issues/4301. + def do_draw(self, data: ConjectureData) -> Any: key = self.key or self - if key not in sharing: - sharing[key] = data.draw(self.base) - return sharing[key] + if key not in data._shared_strategy_draws: + data._shared_strategy_draws[key] = data.draw(self.base) + return data._shared_strategy_draws[key] diff --git a/contrib/python/hypothesis/py3/hypothesis/strategies/_internal/strategies.py b/contrib/python/hypothesis/py3/hypothesis/strategies/_internal/strategies.py index 0d8c8ca10ee..58cb66b2f1c 100644 --- a/contrib/python/hypothesis/py3/hypothesis/strategies/_internal/strategies.py +++ b/contrib/python/hypothesis/py3/hypothesis/strategies/_internal/strategies.py @@ -20,6 +20,8 @@ from typing import ( Callable, ClassVar, Generic, + Literal, + Optional, TypeVar, Union, cast, @@ -39,6 +41,7 @@ from hypothesis.internal.conjecture import utils as cu from hypothesis.internal.conjecture.data import ConjectureData from hypothesis.internal.conjecture.utils import ( calc_label_from_cls, + calc_label_from_hash, calc_label_from_name, combine_labels, ) @@ -50,22 +53,20 @@ from hypothesis.internal.reflection import ( from hypothesis.strategies._internal.utils import defines_strategy from hypothesis.utils.conventions import UniqueIdentifier -# TODO: Use `(3, 13)` once Python 3.13 is released. -if sys.version_info >= (3, 13, 0, "final"): - Ex = TypeVar("Ex", covariant=True, default=Any) -elif TYPE_CHECKING: - from typing_extensions import TypeVar # type: ignore[assignment] +if TYPE_CHECKING: + from typing import TypeAlias - Ex = TypeVar("Ex", covariant=True, default=Any) # type: ignore[call-arg,misc] + Ex = TypeVar("Ex", covariant=True, default=Any) else: Ex = TypeVar("Ex", covariant=True) -Ex_Inv = TypeVar("Ex_Inv") T = TypeVar("T") T3 = TypeVar("T3") T4 = TypeVar("T4") T5 = TypeVar("T5") - +MappedFrom = TypeVar("MappedFrom") +MappedTo = TypeVar("MappedTo") +RecurT: "TypeAlias" = Callable[["SearchStrategy"], bool] calculating = UniqueIdentifier("calculating") MAPPED_SEARCH_STRATEGY_DO_DRAW_LABEL = calc_label_from_name( @@ -77,7 +78,7 @@ FILTERED_SEARCH_STRATEGY_DO_DRAW_LABEL = calc_label_from_name( ) -def recursive_property(name, default): +def recursive_property(strategy: "SearchStrategy", name: str, default: object) -> Any: """Handle properties which may be mutually recursive among a set of strategies. @@ -107,115 +108,112 @@ def recursive_property(name, default): calculation = "calc_" + name force_key = "force_" + name - def forced_value(target): + def forced_value(target: SearchStrategy) -> Any: try: return getattr(target, force_key) except AttributeError: return getattr(target, cache_key) - def accept(self): + try: + return forced_value(strategy) + except AttributeError: + pass + + mapping: dict[SearchStrategy, Any] = {} + sentinel = object() + hit_recursion = False + + # For a first pass we do a direct recursive calculation of the + # property, but we block recursively visiting a value in the + # computation of its property: When that happens, we simply + # note that it happened and return the default value. + def recur(strat: SearchStrategy) -> Any: + nonlocal hit_recursion try: - return forced_value(self) + return forced_value(strat) except AttributeError: pass + result = mapping.get(strat, sentinel) + if result is calculating: + hit_recursion = True + return default + elif result is sentinel: + mapping[strat] = calculating + mapping[strat] = getattr(strat, calculation)(recur) + return mapping[strat] + return result + + recur(strategy) + + # If we hit self-recursion in the computation of any strategy + # value, our mapping at the end is imprecise - it may or may + # not have the right values in it. We now need to proceed with + # a more careful fixed point calculation to get the exact + # values. Hopefully our mapping is still pretty good and it + # won't take a large number of updates to reach a fixed point. + if hit_recursion: + needs_update = set(mapping) - mapping = {} - sentinel = object() - hit_recursion = False + # We track which strategies use which in the course of + # calculating their property value. If A ever uses B in + # the course of calculating its value, then whenever the + # value of B changes we might need to update the value of + # A. + listeners: dict[SearchStrategy, set[SearchStrategy]] = defaultdict(set) + else: + needs_update = None - # For a first pass we do a direct recursive calculation of the - # property, but we block recursively visiting a value in the - # computation of its property: When that happens, we simply - # note that it happened and return the default value. - def recur(strat): - nonlocal hit_recursion + def recur2(strat: SearchStrategy) -> Any: + def recur_inner(other: SearchStrategy) -> Any: try: - return forced_value(strat) + return forced_value(other) except AttributeError: pass - result = mapping.get(strat, sentinel) - if result is calculating: - hit_recursion = True + listeners[other].add(strat) + result = mapping.get(other, sentinel) + if result is sentinel: + assert needs_update is not None + needs_update.add(other) + mapping[other] = default return default - elif result is sentinel: - mapping[strat] = calculating - mapping[strat] = getattr(strat, calculation)(recur) - return mapping[strat] return result - recur(self) + return recur_inner - # If we hit self-recursion in the computation of any strategy - # value, our mapping at the end is imprecise - it may or may - # not have the right values in it. We now need to proceed with - # a more careful fixed point calculation to get the exact - # values. Hopefully our mapping is still pretty good and it - # won't take a large number of updates to reach a fixed point. - if hit_recursion: - needs_update = set(mapping) + count = 0 + seen = set() + while needs_update: + count += 1 + # If we seem to be taking a really long time to stabilize we + # start tracking seen values to attempt to detect an infinite + # loop. This should be impossible, and most code will never + # hit the count, but having an assertion for it means that + # testing is easier to debug and we don't just have a hung + # test. + # Note: This is actually covered, by test_very_deep_deferral + # in tests/cover/test_deferred_strategies.py. Unfortunately it + # runs into a coverage bug. See + # https://github.com/nedbat/coveragepy/issues/605 + # for details. + if count > 50: # pragma: no cover + key = frozenset(mapping.items()) + assert key not in seen, (key, name) + seen.add(key) + to_update = needs_update + needs_update = set() + for strat in to_update: + new_value = getattr(strat, calculation)(recur2(strat)) + if new_value != mapping[strat]: + needs_update.update(listeners[strat]) + mapping[strat] = new_value - # We track which strategies use which in the course of - # calculating their property value. If A ever uses B in - # the course of calculating its value, then whenever the - # value of B changes we might need to update the value of - # A. - listeners = defaultdict(set) - else: - needs_update = None - - def recur2(strat): - def recur_inner(other): - try: - return forced_value(other) - except AttributeError: - pass - listeners[other].add(strat) - result = mapping.get(other, sentinel) - if result is sentinel: - needs_update.add(other) - mapping[other] = default - return default - return result - - return recur_inner - - count = 0 - seen = set() - while needs_update: - count += 1 - # If we seem to be taking a really long time to stabilize we - # start tracking seen values to attempt to detect an infinite - # loop. This should be impossible, and most code will never - # hit the count, but having an assertion for it means that - # testing is easier to debug and we don't just have a hung - # test. - # Note: This is actually covered, by test_very_deep_deferral - # in tests/cover/test_deferred_strategies.py. Unfortunately it - # runs into a coverage bug. See - # https://github.com/nedbat/coveragepy/issues/605 - # for details. - if count > 50: # pragma: no cover - key = frozenset(mapping.items()) - assert key not in seen, (key, name) - seen.add(key) - to_update = needs_update - needs_update = set() - for strat in to_update: - new_value = getattr(strat, calculation)(recur2(strat)) - if new_value != mapping[strat]: - needs_update.update(listeners[strat]) - mapping[strat] = new_value - - # We now have a complete and accurate calculation of the - # property values for everything we have seen in the course of - # running this calculation. We simultaneously update all of - # them (not just the strategy we started out with). - for k, v in mapping.items(): - setattr(k, cache_key, v) - return getattr(self, cache_key) - - accept.__name__ = name - return property(accept) + # We now have a complete and accurate calculation of the + # property values for everything we have seen in the course of + # running this calculation. We simultaneously update all of + # them (not just the strategy we started out with). + for k, v in mapping.items(): + setattr(k, cache_key, v) + return getattr(strategy, cache_key) class SearchStrategy(Generic[Ex]): @@ -228,12 +226,11 @@ class SearchStrategy(Generic[Ex]): releases. """ - supports_find = True - validate_called = False - __label = None - __module__ = "hypothesis.strategies" + validate_called: bool = False + __label: Union[int, UniqueIdentifier, None] = None + __module__: str = "hypothesis.strategies" - def available(self, data): + def available(self, data: ConjectureData) -> bool: """Returns whether this strategy can *currently* draw any values. This typically useful for stateful testing where ``Bundle`` grows over time a list of value to choose from. @@ -245,12 +242,18 @@ class SearchStrategy(Generic[Ex]): """ return not self.is_empty - # Returns True if this strategy can never draw a value and will always - # result in the data being marked invalid. - # The fact that this returns False does not guarantee that a valid value - # can be drawn - this is not intended to be perfect, and is primarily - # intended to be an optimisation for some cases. - is_empty = recursive_property("is_empty", True) + @property + def is_empty(self) -> Any: + # Returns True if this strategy can never draw a value and will always + # result in the data being marked invalid. + # The fact that this returns False does not guarantee that a valid value + # can be drawn - this is not intended to be perfect, and is primarily + # intended to be an optimisation for some cases. + return recursive_property(self, "is_empty", True) + + @property + def supports_find(self) -> bool: + return True # Returns True if values from this strategy can safely be reused without # this causing unexpected behaviour. @@ -260,15 +263,19 @@ class SearchStrategy(Generic[Ex]): # user-visible behaviour. Should be false for built-in strategies that # produce mutable values, and for strategies that have been mapped/filtered # by arbitrary user-provided functions. - has_reusable_values = recursive_property("has_reusable_values", True) + @property + def has_reusable_values(self) -> Any: + return recursive_property(self, "has_reusable_values", True) # Whether this strategy is suitable for holding onto in a cache. - is_cacheable = recursive_property("is_cacheable", True) + @property + def is_cacheable(self) -> Any: + return recursive_property(self, "is_cacheable", True) - def calc_is_cacheable(self, recur): + def calc_is_cacheable(self, recur: RecurT) -> bool: return True - def calc_is_empty(self, recur): + def calc_is_empty(self, recur: RecurT) -> bool: # Note: It is correct and significant that the default return value # from calc_is_empty is False despite the default value for is_empty # being true. The reason for this is that strategies should be treated @@ -277,7 +284,7 @@ class SearchStrategy(Generic[Ex]): # this method to show that. return False - def calc_has_reusable_values(self, recur): + def calc_has_reusable_values(self, recur: RecurT) -> bool: return False def example(self) -> Ex: @@ -344,7 +351,9 @@ class SearchStrategy(Generic[Ex]): phases=(Phase.generate,), suppress_health_check=list(HealthCheck), ) - def example_generating_inner_function(ex): + def example_generating_inner_function( + ex: Ex, # type: ignore # mypy is overzealous in preventing covariant params + ) -> None: self.__examples.append(ex) example_generating_inner_function() @@ -372,8 +381,16 @@ class SearchStrategy(Generic[Ex]): """ from hypothesis.strategies._internal.flatmapped import FlatMapStrategy - return FlatMapStrategy(expand=expand, strategy=self) + return FlatMapStrategy(self, expand=expand) + # Note that we previously had condition extracted to a type alias as + # PredicateT. However, that was only useful when not specifying a relationship + # between the generic Ts and some other function param / return value. + # If we do want to - like here, where we want to say that the Ex arg to condition + # is of the same type as the strategy's Ex - then you need to write out the + # entire Callable[[Ex], Any] expression rather than use a type alias. + # TypeAlias is *not* simply a macro that inserts the text. TypeAlias will not + # reference the local TypeVar context. def filter(self, condition: Callable[[Ex], Any]) -> "SearchStrategy[Ex]": """Returns a new strategy that generates values from this strategy which satisfy the provided condition. Note that if the condition is too @@ -384,7 +401,9 @@ class SearchStrategy(Generic[Ex]): """ return FilteredStrategy(conditions=(condition,), strategy=self) - def _filter_for_filtered_draw(self, condition): + def _filter_for_filtered_draw( + self, condition: Callable[[Ex], Any] + ) -> "FilteredStrategy[Ex]": # Hook for parent strategies that want to perform fallible filtering # on one of their internal strategies (e.g. UniqueListStrategy). # The returned object must have a `.do_filtered_draw(data)` method @@ -397,7 +416,7 @@ class SearchStrategy(Generic[Ex]): return FilteredStrategy(conditions=(condition,), strategy=self) @property - def branches(self) -> list["SearchStrategy[Ex]"]: + def branches(self) -> Sequence["SearchStrategy[Ex]"]: return [self] def __or__(self, other: "SearchStrategy[T]") -> "SearchStrategy[Union[Ex, T]]": @@ -408,7 +427,27 @@ class SearchStrategy(Generic[Ex]): """ if not isinstance(other, SearchStrategy): raise ValueError(f"Cannot | a SearchStrategy with {other!r}") - return OneOfStrategy((self, other)) + + # Unwrap explicitly or'd strategies. This turns the + # common case of e.g. st.integers() | st.integers() | st.integers() from + # + # one_of(one_of(integers(), integers()), integers()) + # + # into + # + # one_of(integers(), integers(), integers()) + # + # This is purely an aesthetic unwrapping, for e.g. reprs. In practice + # we use .branches / .element_strategies to get the list of possible + # strategies, so this unwrapping is *not* necessary for correctness. + strategies: list[SearchStrategy] = [] + strategies.extend( + self.original_strategies if isinstance(self, OneOfStrategy) else [self] + ) + strategies.extend( + other.original_strategies if isinstance(other, OneOfStrategy) else [other] + ) + return OneOfStrategy(strategies) def __bool__(self) -> bool: warnings.warn( @@ -437,7 +476,7 @@ class SearchStrategy(Generic[Ex]): LABELS: ClassVar[dict[type, int]] = {} @property - def class_label(self): + def class_label(self) -> int: cls = self.__class__ try: return cls.LABELS[cls] @@ -456,20 +495,17 @@ class SearchStrategy(Generic[Ex]): self.__label = self.calc_label() return cast(int, self.__label) - def calc_label(self): + def calc_label(self) -> int: return self.class_label - def do_validate(self): + def do_validate(self) -> None: pass def do_draw(self, data: ConjectureData) -> Ex: raise NotImplementedError(f"{type(self).__name__}.do_draw") - def __init__(self): - pass - -def is_simple_data(value): +def is_hashable(value: object) -> bool: try: hash(value) return True @@ -477,36 +513,46 @@ def is_simple_data(value): return False -class SampledFromStrategy(SearchStrategy): +class SampledFromStrategy(SearchStrategy[Ex]): """A strategy which samples from a set of elements. This is essentially equivalent to using a OneOfStrategy over Just strategies but may be more efficient and convenient. """ - _MAX_FILTER_CALLS = 10_000 + _MAX_FILTER_CALLS: ClassVar[int] = 10_000 - def __init__(self, elements, repr_=None, transformations=()): + def __init__( + self, + elements: Sequence[Ex], + repr_: Optional[str] = None, + transformations: tuple[ + tuple[Literal["filter", "map"], Callable[[Ex], Any]], + ..., + ] = (), + ): super().__init__() self.elements = cu.check_sample(elements, "sampled_from") assert self.elements self.repr_ = repr_ self._transformations = transformations - def map(self, pack): - return type(self)( + def map(self, pack: Callable[[Ex], T]) -> SearchStrategy[T]: + s = type(self)( self.elements, repr_=self.repr_, transformations=(*self._transformations, ("map", pack)), ) + # guaranteed by the ("map", pack) transformation + return cast(SearchStrategy[T], s) - def filter(self, condition): + def filter(self, condition: Callable[[Ex], Any]) -> SearchStrategy[Ex]: return type(self)( self.elements, repr_=self.repr_, transformations=(*self._transformations, ("filter", condition)), ) - def __repr__(self): + def __repr__(self) -> str: return ( self.repr_ or "sampled_from([" @@ -517,17 +563,72 @@ class SampledFromStrategy(SearchStrategy): for name, f in self._transformations ) - def calc_has_reusable_values(self, recur): + def calc_label(self) -> int: + # strategy.label is effectively an under-approximation of structural + # equality (i.e., some strategies may have the same label when they are not + # structurally identical). More importantly for calculating the + # SampledFromStrategy label, we might have hash(s1) != hash(s2) even + # when s1 and s2 are structurally identical. For instance: + # + # s1 = st.sampled_from([st.none()]) + # s2 = st.sampled_from([st.none()]) + # assert hash(s1) != hash(s2) + # + # (see also test cases in test_labels.py). + # + # We therefore use the labels of any component strategies when calculating + # our label, and only use the hash if it is not a strategy. + # + # That's the ideal, anyway. In reality the logic is more complicated than + # necessary in order to be efficient in the presence of (very) large sequences: + # * add an unabashed special case for range, to avoid iteration over an + # enormous range when we know it is entirely integers. + # * if there is at least one strategy in self.elements, use strategy label, + # and the element hash otherwise. + # * if there are no strategies in self.elements, take the hash of the + # entire sequence. This prevents worst-case performance of hashing each + # element when a hash of the entire sequence would have sufficed. + # + # The worst case performance of this scheme is + # itertools.chain(range(2**100), [st.none()]), where it degrades to + # hashing every int in the range. + + if isinstance(self.elements, range) or ( + is_hashable(self.elements) + and not any(isinstance(e, SearchStrategy) for e in self.elements) + ): + return combine_labels(self.class_label, calc_label_from_hash(self.elements)) + + labels = [self.class_label] + for element in self.elements: + if not is_hashable(element): + continue + + labels.append( + element.label + if isinstance(element, SearchStrategy) + else calc_label_from_hash(element) + ) + + return combine_labels(*labels) + + def calc_has_reusable_values(self, recur: RecurT) -> bool: # Because our custom .map/.filter implementations skip the normal # wrapper strategies (which would automatically return False for us), # we need to manually return False here if any transformations have # been applied. return not self._transformations - def calc_is_cacheable(self, recur): - return is_simple_data(self.elements) + def calc_is_cacheable(self, recur: RecurT) -> bool: + return is_hashable(self.elements) - def _transform(self, element): + def _transform( + self, + # https://github.com/python/mypy/issues/7049, we're not writing `element` + # anywhere in the class so this is still type-safe. mypy is being more + # conservative than necessary + element: Ex, # type: ignore + ) -> Union[Ex, UniqueIdentifier]: # Used in UniqueSampledListStrategy for name, f in self._transformations: if name == "map": @@ -541,7 +642,7 @@ class SampledFromStrategy(SearchStrategy): return filter_not_satisfied return element - def do_draw(self, data): + def do_draw(self, data: ConjectureData) -> Ex: result = self.do_filtered_draw(data) if isinstance(result, SearchStrategy) and all( isinstance(x, SearchStrategy) for x in self.elements @@ -553,15 +654,16 @@ class SampledFromStrategy(SearchStrategy): ) if result is filter_not_satisfied: data.mark_invalid(f"Aborted test because unable to satisfy {self!r}") + assert not isinstance(result, UniqueIdentifier) return result - def get_element(self, i): + def get_element(self, i: int) -> Union[Ex, UniqueIdentifier]: return self._transform(self.elements[i]) - def do_filtered_draw(self, data): + def do_filtered_draw(self, data: ConjectureData) -> Union[Ex, UniqueIdentifier]: # Set of indices that have been tried so far, so that we never test # the same element twice during a draw. - known_bad_indices = set() + known_bad_indices: set[int] = set() # Start with ordinary rejection sampling. It's fast if it works, and # if it doesn't work then it was only a small amount of overhead. @@ -593,11 +695,12 @@ class SampledFromStrategy(SearchStrategy): # of them at random. But if we encounter the speculatively-chosen one, # just use that and return immediately. Note that we also track the # allowed elements, in case of .map(some_stateful_function) - allowed = [] + allowed: list[tuple[int, Ex]] = [] for i in range(min(len(self.elements), self._MAX_FILTER_CALLS - 3)): if i not in known_bad_indices: element = self.get_element(i) if element is not filter_not_satisfied: + assert not isinstance(element, UniqueIdentifier) allowed.append((i, element)) if len(allowed) > speculative_index: # Early-exit case: We reached the speculative index, so @@ -624,24 +727,23 @@ class OneOfStrategy(SearchStrategy[Ex]): conditional distribution of that strategy. """ - def __init__(self, strategies): + def __init__(self, strategies: Sequence[SearchStrategy[Ex]]): super().__init__() - strategies = tuple(strategies) - self.original_strategies = list(strategies) - self.__element_strategies = None + self.original_strategies = tuple(strategies) + self.__element_strategies: Optional[Sequence[SearchStrategy[Ex]]] = None self.__in_branches = False - def calc_is_empty(self, recur): + def calc_is_empty(self, recur: RecurT) -> bool: return all(recur(e) for e in self.original_strategies) - def calc_has_reusable_values(self, recur): + def calc_has_reusable_values(self, recur: RecurT) -> bool: return all(recur(e) for e in self.original_strategies) - def calc_is_cacheable(self, recur): + def calc_is_cacheable(self, recur: RecurT) -> bool: return all(recur(e) for e in self.original_strategies) @property - def element_strategies(self): + def element_strategies(self) -> Sequence[SearchStrategy[Ex]]: if self.__element_strategies is None: # While strategies are hashable, they use object.__hash__ and are # therefore distinguished only by identity. @@ -658,8 +760,8 @@ class OneOfStrategy(SearchStrategy[Ex]): # Having made several attempts, the minor benefits of making strategies # hashable are simply not worth the engineering effort it would take. # See also issues #2291 and #2327. - seen = {self} - strategies = [] + seen: set[SearchStrategy] = {self} + strategies: list[SearchStrategy] = [] for arg in self.original_strategies: check_strategy(arg) if not arg.is_empty: @@ -670,7 +772,7 @@ class OneOfStrategy(SearchStrategy[Ex]): self.__element_strategies = strategies return self.__element_strategies - def calc_label(self): + def calc_label(self) -> int: return combine_labels( self.class_label, *(p.label for p in self.original_strategies) ) @@ -683,15 +785,15 @@ class OneOfStrategy(SearchStrategy[Ex]): ) return data.draw(strategy) - def __repr__(self): + def __repr__(self) -> str: return "one_of(%s)" % ", ".join(map(repr, self.original_strategies)) - def do_validate(self): + def do_validate(self) -> None: for e in self.element_strategies: e.validate() @property - def branches(self): + def branches(self) -> Sequence[SearchStrategy[Ex]]: if not self.__in_branches: try: self.__in_branches = True @@ -701,7 +803,7 @@ class OneOfStrategy(SearchStrategy[Ex]): else: return [self] - def filter(self, condition): + def filter(self, condition: Callable[[Ex], Any]) -> SearchStrategy[Ex]: return FilteredStrategy( OneOfStrategy([s.filter(condition) for s in self.original_strategies]), conditions=(), @@ -804,36 +906,43 @@ def one_of( f"Did you mean st.sampled_from({list(args)!r})? st.one_of() is used " "to combine strategies, but all of the arguments were of other types." ) + # we've handled the case where args is a one-element sequence [(s1, s2, ...)] + # above, so we can assume it's an actual sequence of strategies. + args = cast(Sequence[SearchStrategy], args) return OneOfStrategy(args) -class MappedStrategy(SearchStrategy[Ex]): +class MappedStrategy(SearchStrategy[MappedTo], Generic[MappedFrom, MappedTo]): """A strategy which is defined purely by conversion to and from another strategy. Its parameter and distribution come from that other strategy. """ - def __init__(self, strategy, pack): + def __init__( + self, + strategy: SearchStrategy[MappedFrom], + pack: Callable[[MappedFrom], MappedTo], + ) -> None: super().__init__() self.mapped_strategy = strategy self.pack = pack - def calc_is_empty(self, recur): + def calc_is_empty(self, recur: RecurT) -> bool: return recur(self.mapped_strategy) - def calc_is_cacheable(self, recur): + def calc_is_cacheable(self, recur: RecurT) -> bool: return recur(self.mapped_strategy) - def __repr__(self): + def __repr__(self) -> str: if not hasattr(self, "_cached_repr"): self._cached_repr = f"{self.mapped_strategy!r}.map({get_pretty_function_description(self.pack)})" return self._cached_repr - def do_validate(self): + def do_validate(self) -> None: self.mapped_strategy.validate() - def do_draw(self, data: ConjectureData) -> Any: + def do_draw(self, data: ConjectureData) -> MappedTo: with warnings.catch_warnings(): if isinstance(self.pack, type) and issubclass( self.pack, (abc.Mapping, abc.Set) @@ -841,24 +950,26 @@ class MappedStrategy(SearchStrategy[Ex]): warnings.simplefilter("ignore", BytesWarning) for _ in range(3): try: - data.start_example(MAPPED_SEARCH_STRATEGY_DO_DRAW_LABEL) + data.start_span(MAPPED_SEARCH_STRATEGY_DO_DRAW_LABEL) x = data.draw(self.mapped_strategy) - result = self.pack(x) # type: ignore - data.stop_example() + result = self.pack(x) + data.stop_span() current_build_context().record_call(result, self.pack, [x], {}) return result except UnsatisfiedAssumption: - data.stop_example(discard=True) + data.stop_span(discard=True) raise UnsatisfiedAssumption @property - def branches(self) -> list[SearchStrategy[Ex]]: + def branches(self) -> Sequence[SearchStrategy[MappedTo]]: return [ MappedStrategy(strategy, pack=self.pack) for strategy in self.mapped_strategy.branches ] - def filter(self, condition: Callable[[Ex], Any]) -> "SearchStrategy[Ex]": + def filter( + self, condition: Callable[[MappedTo], Any] + ) -> "SearchStrategy[MappedTo]": # Includes a special case so that we can rewrite filters on collection # lengths, when most collections are `st.lists(...).map(the_type)`. ListStrategy = _list_strategy_type() @@ -880,13 +991,13 @@ class MappedStrategy(SearchStrategy[Ex]): @lru_cache -def _list_strategy_type(): +def _list_strategy_type() -> Any: from hypothesis.strategies._internal.collections import ListStrategy return ListStrategy -def _collection_ish_functions(): +def _collection_ish_functions() -> Sequence[Any]: funcs = [sorted] if np := sys.modules.get("numpy"): # c.f. https://numpy.org/doc/stable/reference/routines.array-creation.html @@ -920,12 +1031,16 @@ filter_not_satisfied = UniqueIdentifier("filter not satisfied") class FilteredStrategy(SearchStrategy[Ex]): - def __init__(self, strategy, conditions): + def __init__( + self, strategy: SearchStrategy[Ex], conditions: tuple[Callable[[Ex], Any], ...] + ): super().__init__() if isinstance(strategy, FilteredStrategy): # Flatten chained filters into a single filter with multiple conditions. - self.flat_conditions = strategy.flat_conditions + conditions - self.filtered_strategy = strategy.filtered_strategy + self.flat_conditions: tuple[Callable[[Ex], Any], ...] = ( + strategy.flat_conditions + conditions + ) + self.filtered_strategy: SearchStrategy[Ex] = strategy.filtered_strategy else: self.flat_conditions = conditions self.filtered_strategy = strategy @@ -933,15 +1048,15 @@ class FilteredStrategy(SearchStrategy[Ex]): assert isinstance(self.flat_conditions, tuple) assert not isinstance(self.filtered_strategy, FilteredStrategy) - self.__condition = None + self.__condition: Optional[Callable[[Ex], Any]] = None - def calc_is_empty(self, recur): + def calc_is_empty(self, recur: RecurT) -> bool: return recur(self.filtered_strategy) - def calc_is_cacheable(self, recur): + def calc_is_cacheable(self, recur: RecurT) -> bool: return recur(self.filtered_strategy) - def __repr__(self): + def __repr__(self) -> str: if not hasattr(self, "_cached_repr"): self._cached_repr = "{!r}{}".format( self.filtered_strategy, @@ -952,7 +1067,7 @@ class FilteredStrategy(SearchStrategy[Ex]): ) return self._cached_repr - def do_validate(self): + def do_validate(self) -> None: # Start by validating our inner filtered_strategy. If this was a LazyStrategy, # validation also reifies it so that subsequent calls to e.g. `.filter()` will # be passed through. @@ -974,7 +1089,7 @@ class FilteredStrategy(SearchStrategy[Ex]): # an in-place method so we still just re-initialize the strategy! FilteredStrategy.__init__(self, fresh, ()) - def filter(self, condition): + def filter(self, condition: Callable[[Ex], Any]) -> "FilteredStrategy[Ex]": # If we can, it's more efficient to rewrite our strategy to satisfy the # condition. We therefore exploit the fact that the order of predicates # doesn't matter (`f(x) and g(x) == g(x) and f(x)`) by attempting to apply @@ -990,16 +1105,16 @@ class FilteredStrategy(SearchStrategy[Ex]): return FilteredStrategy(out, self.flat_conditions) @property - def condition(self): + def condition(self) -> Callable[[Ex], Any]: if self.__condition is None: if len(self.flat_conditions) == 1: # Avoid an extra indirection in the common case of only one condition. self.__condition = self.flat_conditions[0] elif len(self.flat_conditions) == 0: # Possible, if unlikely, due to filter predicate rewriting - self.__condition = lambda _: True + self.__condition = lambda _: True # type: ignore # covariant type param else: - self.__condition = lambda x: all( + self.__condition = lambda x: all( # type: ignore # covariant type param cond(x) for cond in self.flat_conditions ) return self.__condition @@ -1007,27 +1122,26 @@ class FilteredStrategy(SearchStrategy[Ex]): def do_draw(self, data: ConjectureData) -> Ex: result = self.do_filtered_draw(data) if result is not filter_not_satisfied: - return result + return cast(Ex, result) data.mark_invalid(f"Aborted test because unable to satisfy {self!r}") - raise NotImplementedError("Unreachable, for Mypy") - def do_filtered_draw(self, data): + def do_filtered_draw(self, data: ConjectureData) -> Union[Ex, UniqueIdentifier]: for i in range(3): - data.start_example(FILTERED_SEARCH_STRATEGY_DO_DRAW_LABEL) + data.start_span(FILTERED_SEARCH_STRATEGY_DO_DRAW_LABEL) value = data.draw(self.filtered_strategy) if self.condition(value): - data.stop_example() + data.stop_span() return value else: - data.stop_example(discard=True) + data.stop_span(discard=True) if i == 0: data.events[f"Retried draw from {self!r} to satisfy filter"] = "" return filter_not_satisfied @property - def branches(self) -> list[SearchStrategy[Ex]]: + def branches(self) -> Sequence[SearchStrategy[Ex]]: return [ FilteredStrategy(strategy=strategy, conditions=self.flat_conditions) for strategy in self.filtered_strategy.branches @@ -1035,7 +1149,7 @@ class FilteredStrategy(SearchStrategy[Ex]): @check_function -def check_strategy(arg, name=""): +def check_strategy(arg: object, name: str = "") -> None: assert isinstance(name, str) if not isinstance(arg, SearchStrategy): hint = "" diff --git a/contrib/python/hypothesis/py3/hypothesis/strategies/_internal/strings.py b/contrib/python/hypothesis/py3/hypothesis/strategies/_internal/strings.py index 53d81b73949..4f38627180a 100644 --- a/contrib/python/hypothesis/py3/hypothesis/strategies/_internal/strings.py +++ b/contrib/python/hypothesis/py3/hypothesis/strategies/_internal/strings.py @@ -16,7 +16,8 @@ from typing import Optional from hypothesis.errors import HypothesisWarning, InvalidArgument from hypothesis.internal import charmap -from hypothesis.internal.conjecture.data import COLLECTION_DEFAULT_MAX_SIZE +from hypothesis.internal.conjecture.data import ConjectureData +from hypothesis.internal.conjecture.providers import COLLECTION_DEFAULT_MAX_SIZE from hypothesis.internal.filtering import max_len, min_len from hypothesis.internal.intervalsets import IntervalSet from hypothesis.internal.reflection import get_pretty_function_description @@ -43,10 +44,12 @@ def _check_is_single_character(c): return c -class OneCharStringStrategy(SearchStrategy): +class OneCharStringStrategy(SearchStrategy[str]): """A strategy which generates single character strings of text type.""" - def __init__(self, intervals, force_repr=None): + def __init__( + self, intervals: IntervalSet, force_repr: Optional[str] = None + ) -> None: assert isinstance(intervals, IntervalSet) self.intervals = intervals self._force_repr = force_repr @@ -116,10 +119,10 @@ class OneCharStringStrategy(SearchStrategy): f"{alphabet=} must be a sampled_from() or characters() strategy" ) - def __repr__(self): + def __repr__(self) -> str: return self._force_repr or f"OneCharStringStrategy({self.intervals!r})" - def do_draw(self, data): + def do_draw(self, data: ConjectureData) -> str: return data.draw_string(self.intervals, min_size=1, max_size=1) @@ -150,7 +153,7 @@ _nonempty_and_content_names = ( ) -class TextStrategy(ListStrategy): +class TextStrategy(ListStrategy[str]): def do_draw(self, data): # if our element strategy is OneCharStringStrategy, we can skip the # ListStrategy draw and jump right to our nice IR string draw. @@ -169,7 +172,7 @@ class TextStrategy(ListStrategy): ) return "".join(super().do_draw(data)) - def __repr__(self): + def __repr__(self) -> str: args = [] if repr(self.element_strategy) != "characters()": args.append(repr(self.element_strategy)) diff --git a/contrib/python/hypothesis/py3/hypothesis/strategies/_internal/types.py b/contrib/python/hypothesis/py3/hypothesis/strategies/_internal/types.py index 4fd48291367..5e396964af2 100644 --- a/contrib/python/hypothesis/py3/hypothesis/strategies/_internal/types.py +++ b/contrib/python/hypothesis/py3/hypothesis/strategies/_internal/types.py @@ -272,6 +272,17 @@ def is_a_new_type(thing): return isinstance(thing, typing.NewType) +def is_a_type_alias_type(thing): # pragma: no cover # covered by 3.12+ tests + # TypeAliasType is new in python 3.12, through the type statement. If we're + # before python 3.12 then this can't possibly by a TypeAliasType. + # + # https://docs.python.org/3/reference/simple_stmts.html#type + # https://docs.python.org/3/library/typing.html#typing.TypeAliasType + if sys.version_info < (3, 12): + return False + return isinstance(thing, typing.TypeAliasType) + + def is_a_union(thing: object) -> bool: """Return True if thing is a typing.Union or types.UnionType (in py310).""" return isinstance(thing, UnionType) or get_origin(thing) is typing.Union @@ -279,7 +290,12 @@ def is_a_union(thing: object) -> bool: def is_a_type(thing: object) -> bool: """Return True if thing is a type or a generic type like thing.""" - return isinstance(thing, type) or is_generic_type(thing) or is_a_new_type(thing) + return ( + isinstance(thing, type) + or is_generic_type(thing) + or is_a_new_type(thing) + or is_a_type_alias_type(thing) + ) def is_typing_literal(thing: object) -> bool: @@ -525,7 +541,9 @@ def from_typing_type(thing): else: union_elems = () if not any( - isinstance(T, type) and issubclass(int, get_origin(T) or T) + # see https://github.com/HypothesisWorks/hypothesis/issues/4194 for + # try_issubclass. + isinstance(T, type) and try_issubclass(int, get_origin(T) or T) for T in [*union_elems, elem_type] ): mapping.pop(bytes, None) @@ -984,7 +1002,7 @@ class GeneratorStrategy(st.SearchStrategy): self.yields = yields self.returns = returns - def __repr__(self): + def __repr__(self) -> str: return f"<generators yields={self.yields!r} returns={self.returns!r}>" def do_draw(self, data): @@ -1031,6 +1049,9 @@ def resolve_Callable(thing): "Consider using an explicit strategy, or opening an issue." ) + if get_origin(thing) is collections.abc.Callable and return_type is None: + return_type = type(None) + return st.functions( like=(lambda *a, **k: None) if args_types else (lambda: None), returns=st.from_type(return_type), diff --git a/contrib/python/hypothesis/py3/hypothesis/strategies/_internal/utils.py b/contrib/python/hypothesis/py3/hypothesis/strategies/_internal/utils.py index 8fb6ff7e72a..ec7e5833cf1 100644 --- a/contrib/python/hypothesis/py3/hypothesis/strategies/_internal/utils.py +++ b/contrib/python/hypothesis/py3/hypothesis/strategies/_internal/utils.py @@ -10,6 +10,7 @@ import sys import threading +from functools import partial from inspect import signature from typing import TYPE_CHECKING, Callable @@ -17,8 +18,7 @@ import attr from hypothesis.internal.cache import LRUReusedCache from hypothesis.internal.compat import dataclass_asdict -from hypothesis.internal.conjecture.junkdrawer import clamp -from hypothesis.internal.floats import float_to_int +from hypothesis.internal.floats import clamp, float_to_int from hypothesis.internal.reflection import proxies from hypothesis.vendor.pretty import pretty @@ -157,7 +157,7 @@ def defines_strategy( return decorator -def to_jsonable(obj: object) -> object: +def to_jsonable(obj: object, *, avoid_realization: bool) -> object: """Recursively convert an object to json-encodable form. This is not intended to round-trip, but rather provide an analysis-ready @@ -165,27 +165,31 @@ def to_jsonable(obj: object) -> object: known types. """ if isinstance(obj, (str, int, float, bool, type(None))): - if isinstance(obj, int) and abs(obj) >= 2**63: - # Silently clamp very large ints to max_float, to avoid - # OverflowError when casting to float. + if isinstance(obj, int) and not isinstance(obj, bool) and abs(obj) >= 2**63: + # Silently clamp very large ints to max_float, to avoid OverflowError when + # casting to float. (but avoid adding more constraints to symbolic values) + if avoid_realization: + return "<symbolic>" obj = clamp(-sys.float_info.max, obj, sys.float_info.max) return float(obj) return obj + if avoid_realization: + return "<symbolic>" + recur = partial(to_jsonable, avoid_realization=avoid_realization) if isinstance(obj, (list, tuple, set, frozenset)): if isinstance(obj, tuple) and hasattr(obj, "_asdict"): - return to_jsonable(obj._asdict()) # treat namedtuples as dicts - return [to_jsonable(x) for x in obj] + return recur(obj._asdict()) # treat namedtuples as dicts + return [recur(x) for x in obj] if isinstance(obj, dict): return { - k if isinstance(k, str) else pretty(k): to_jsonable(v) - for k, v in obj.items() + k if isinstance(k, str) else pretty(k): recur(v) for k, v in obj.items() } # Hey, might as well try calling a .to_json() method - it works for Pandas! # We try this before the below general-purpose handlers to give folks a # chance to control this behavior on their custom classes. try: - return to_jsonable(obj.to_json()) # type: ignore + return recur(obj.to_json()) # type: ignore except Exception: pass @@ -195,11 +199,11 @@ def to_jsonable(obj: object) -> object: and dcs.is_dataclass(obj) and not isinstance(obj, type) ): - return to_jsonable(dataclass_asdict(obj)) + return recur(dataclass_asdict(obj)) if attr.has(type(obj)): - return to_jsonable(attr.asdict(obj, recurse=False)) # type: ignore + return recur(attr.asdict(obj, recurse=False)) # type: ignore if (pyd := sys.modules.get("pydantic")) and isinstance(obj, pyd.BaseModel): - return to_jsonable(obj.model_dump()) + return recur(obj.model_dump()) # If all else fails, we'll just pretty-print as a string. return pretty(obj) diff --git a/contrib/python/hypothesis/py3/hypothesis/utils/conventions.py b/contrib/python/hypothesis/py3/hypothesis/utils/conventions.py index ec01326b49d..7da7e200328 100644 --- a/contrib/python/hypothesis/py3/hypothesis/utils/conventions.py +++ b/contrib/python/hypothesis/py3/hypothesis/utils/conventions.py @@ -12,10 +12,10 @@ class UniqueIdentifier: """A factory for sentinel objects with nice reprs.""" - def __init__(self, identifier): + def __init__(self, identifier: str) -> None: self.identifier = identifier - def __repr__(self): + def __repr__(self) -> str: return self.identifier diff --git a/contrib/python/hypothesis/py3/hypothesis/utils/terminal.py b/contrib/python/hypothesis/py3/hypothesis/utils/terminal.py index afe6779a07d..7c45df19e2f 100644 --- a/contrib/python/hypothesis/py3/hypothesis/utils/terminal.py +++ b/contrib/python/hypothesis/py3/hypothesis/utils/terminal.py @@ -9,9 +9,10 @@ # obtain one at https://mozilla.org/MPL/2.0/. import os +from typing import Literal -def guess_background_color(): +def guess_background_color() -> Literal["light", "dark", "unknown"]: """Returns one of "dark", "light", or "unknown". This is basically just guessing, but better than always guessing "dark"! @@ -24,7 +25,7 @@ def guess_background_color(): return theme # Guessing based on the $COLORFGBG environment variable try: - fg, *_, bg = os.getenv("COLORFGBG").split(";") + fg, *_, bg = os.getenv("COLORFGBG", "").split(";") except Exception: pass else: diff --git a/contrib/python/hypothesis/py3/hypothesis/vendor/pretty.py b/contrib/python/hypothesis/py3/hypothesis/vendor/pretty.py index 0f0326c435b..b31404b8267 100644 --- a/contrib/python/hypothesis/py3/hypothesis/vendor/pretty.py +++ b/contrib/python/hypothesis/py3/hypothesis/vendor/pretty.py @@ -69,20 +69,33 @@ import struct import sys import types import warnings -from collections import defaultdict, deque +from collections import Counter, OrderedDict, defaultdict, deque +from collections.abc import Generator, Iterable, Sequence from contextlib import contextmanager, suppress -from enum import Flag -from io import StringIO +from enum import Enum, Flag +from functools import partial +from io import StringIO, TextIOBase from math import copysign, isnan +from typing import TYPE_CHECKING, Any, Callable, Optional, TypeVar, Union + +if TYPE_CHECKING: + from typing import TypeAlias + + from hypothesis.control import BuildContext + +# ruff: noqa: FBT001 + +T = TypeVar("T") +PrettyPrintFunction: "TypeAlias" = Callable[[Any, "RepresentationPrinter", bool], None] __all__ = [ - "pretty", "IDKey", "RepresentationPrinter", + "pretty", ] -def _safe_getattr(obj, attr, default=None): +def _safe_getattr(obj: object, attr: str, default: Optional[Any] = None) -> Any: """Safe version of getattr. Same as getattr, but will return ``default`` on any Exception, @@ -95,7 +108,7 @@ def _safe_getattr(obj, attr, default=None): return default -def pretty(obj): +def pretty(obj: object) -> str: """Pretty print the object's representation.""" printer = RepresentationPrinter() printer.pretty(obj) @@ -103,7 +116,7 @@ def pretty(obj): class IDKey: - def __init__(self, value): + def __init__(self, value: object): self.value = value def __hash__(self) -> int: @@ -123,30 +136,35 @@ class RepresentationPrinter: """ - def __init__(self, output=None, *, context=None): - """Pass the output stream, and optionally the current build context. + def __init__( + self, + output: Optional[TextIOBase] = None, + *, + context: Optional["BuildContext"] = None, + ) -> None: + """Optionally pass the output stream and the current build context. We use the context to represent objects constructed by strategies by showing *how* they were constructed, and add annotations showing which parts of the minimal failing example can vary without changing the test result. """ - self.broken = False - self.output = StringIO() if output is None else output - self.max_width = 79 - self.max_seq_length = 1000 - self.output_width = 0 - self.buffer_width = 0 - self.buffer = deque() + self.broken: bool = False + self.output: TextIOBase = StringIO() if output is None else output + self.max_width: int = 79 + self.max_seq_length: int = 1000 + self.output_width: int = 0 + self.buffer_width: int = 0 + self.buffer: deque[Union[Breakable, Text]] = deque() root_group = Group(0) self.group_stack = [root_group] self.group_queue = GroupQueue(root_group) - self.indentation = 0 + self.indentation: int = 0 - self.stack = [] - self.singleton_pprinters = {} - self.type_pprinters = {} - self.deferred_pprinters = {} + self.stack: list[int] = [] + self.singleton_pprinters: dict[int, PrettyPrintFunction] = {} + self.type_pprinters: dict[type, PrettyPrintFunction] = {} + self.deferred_pprinters: dict[tuple[str, str], PrettyPrintFunction] = {} # If IPython has been imported, load up their pretty-printer registry if "IPython.lib.pretty" in sys.modules: ipp = sys.modules["IPython.lib.pretty"] @@ -163,6 +181,8 @@ class RepresentationPrinter: # but we report each separately so that's someone else's problem here. # Invocations of self.repr_call() can report the slice for each argument, # which will then be used to look up the relevant comment if any. + self.known_object_printers: dict[IDKey, list[PrettyPrintFunction]] + self.slice_comments: dict[tuple[int, int], str] if context is None: self.known_object_printers = defaultdict(list) self.slice_comments = {} @@ -171,7 +191,7 @@ class RepresentationPrinter: self.slice_comments = context.data.slice_comments assert all(isinstance(k, IDKey) for k in self.known_object_printers) - def pretty(self, obj): + def pretty(self, obj: object) -> None: """Pretty print the given object.""" obj_id = id(obj) cycle = obj_id in self.stack @@ -260,7 +280,7 @@ class RepresentationPrinter: finally: self.stack.pop() - def _break_outer_groups(self): + def _break_outer_groups(self) -> None: while self.max_width < self.output_width + self.buffer_width: group = self.group_queue.deq() if not group: @@ -274,7 +294,7 @@ class RepresentationPrinter: self.output_width = x.output(self.output, self.output_width) self.buffer_width -= x.width - def text(self, obj): + def text(self, obj: str) -> None: """Add literal text to the output.""" width = len(obj) if self.buffer: @@ -289,7 +309,7 @@ class RepresentationPrinter: self.output.write(obj) self.output_width += width - def breakable(self, sep=" "): + def breakable(self, sep: str = " ") -> None: """Add a breakable separator to the output. This does not mean that it will automatically break here. If no @@ -309,7 +329,7 @@ class RepresentationPrinter: self.buffer_width += width self._break_outer_groups() - def break_(self): + def break_(self) -> None: """Explicitly insert a newline into the output, maintaining correct indentation.""" self.flush() @@ -318,7 +338,7 @@ class RepresentationPrinter: self.buffer_width = 0 @contextmanager - def indent(self, indent): + def indent(self, indent: int) -> Generator[None, None, None]: """`with`-statement support for indenting/dedenting.""" self.indentation += indent try: @@ -327,7 +347,9 @@ class RepresentationPrinter: self.indentation -= indent @contextmanager - def group(self, indent=0, open="", close=""): + def group( + self, indent: int = 0, open: str = "", close: str = "" + ) -> Generator[None, None, None]: """Context manager for an indented group. with p.group(1, '{', '}'): @@ -342,7 +364,7 @@ class RepresentationPrinter: finally: self.end_group(dedent=indent, close=close) - def begin_group(self, indent=0, open=""): + def begin_group(self, indent: int = 0, open: str = "") -> None: """Use the `with group(...) context manager instead. The begin_group() and end_group() methods are for IPython compatibility only; @@ -355,7 +377,7 @@ class RepresentationPrinter: self.group_queue.enq(group) self.indentation += indent - def end_group(self, dedent=0, close=""): + def end_group(self, dedent: int = 0, close: str = "") -> None: """See begin_group().""" self.indentation -= dedent group = self.group_stack.pop() @@ -364,7 +386,7 @@ class RepresentationPrinter: if close: self.text(close) - def _enumerate(self, seq): + def _enumerate(self, seq: Iterable[T]) -> Generator[tuple[int, T], None, None]: """Like enumerate, but with an upper limit on the number of items.""" for idx, x in enumerate(seq): if self.max_seq_length and idx >= self.max_seq_length: @@ -374,19 +396,26 @@ class RepresentationPrinter: return yield idx, x - def flush(self): + def flush(self) -> None: """Flush data that is left in the buffer.""" for data in self.buffer: self.output_width += data.output(self.output, self.output_width) self.buffer.clear() self.buffer_width = 0 - def getvalue(self): + def getvalue(self) -> str: assert isinstance(self.output, StringIO) self.flush() return self.output.getvalue() - def maybe_repr_known_object_as_call(self, obj, cycle, name, args, kwargs): + def maybe_repr_known_object_as_call( + self, + obj: object, + cycle: bool, + name: str, + args: Sequence[object], + kwargs: dict[str, object], + ) -> None: # pprint this object as a call, _unless_ the call would be invalid syntax # and the repr would be valid and there are not comments on arguments. if cycle: @@ -411,14 +440,15 @@ class RepresentationPrinter: def repr_call( self, - func_name, - args, - kwargs, + func_name: str, + args: Sequence[object], + kwargs: dict[str, object], *, - force_split=None, - arg_slices=None, - leading_comment=None, - ): + force_split: Optional[bool] = None, + arg_slices: Optional[dict[str, tuple[int, int]]] = None, + leading_comment: Optional[str] = None, + avoid_realization: bool = False, + ) -> None: """Helper function to represent a function call. - func_name, args, and kwargs should all be pretty obvious. @@ -432,7 +462,9 @@ class RepresentationPrinter: func_name = f"({func_name})" self.text(func_name) all_args = [(None, v) for v in args] + list(kwargs.items()) - comments = { + # int indicates the position of a positional argument, rather than a keyword + # argument. Currently no callers use this; see #3624. + comments: dict[Union[int, str], object] = { k: self.slice_comments[v] for k, v in (arg_slices or {}).items() if v in self.slice_comments @@ -463,11 +495,15 @@ class RepresentationPrinter: self.breakable(" " if i else "") if k: self.text(f"{k}=") - self.pretty(v) + if avoid_realization: + self.text("<symbolic>") + else: + self.pretty(v) if force_split or i + 1 < len(all_args): self.text(",") - # Optional comments are used to annotate which-parts-matter - comment = comments.get(i) or comments.get(k) + comment = None + if k is not None: + comment = comments.get(i) or comments.get(k) if comment: self.text(f" # {comment}") if all_args and force_split: @@ -476,27 +512,27 @@ class RepresentationPrinter: class Printable: - def output(self, stream, output_width): # pragma: no cover + def output(self, stream: TextIOBase, output_width: int) -> int: # pragma: no cover raise NotImplementedError class Text(Printable): - def __init__(self): - self.objs = [] - self.width = 0 + def __init__(self) -> None: + self.objs: list[str] = [] + self.width: int = 0 - def output(self, stream, output_width): + def output(self, stream: TextIOBase, output_width: int) -> int: for obj in self.objs: stream.write(obj) return output_width + self.width - def add(self, obj, width): + def add(self, obj: str, width: int) -> None: self.objs.append(obj) self.width += width class Breakable(Printable): - def __init__(self, seq, width, pretty): + def __init__(self, seq: str, width: int, pretty: RepresentationPrinter) -> None: self.obj = seq self.width = width self.pretty = pretty @@ -504,7 +540,7 @@ class Breakable(Printable): self.group = pretty.group_stack[-1] self.group.breakables.append(self) - def output(self, stream, output_width): + def output(self, stream: TextIOBase, output_width: int) -> int: self.group.breakables.popleft() if self.group.want_break: stream.write("\n" + " " * self.indentation) @@ -516,25 +552,25 @@ class Breakable(Printable): class Group(Printable): - def __init__(self, depth): + def __init__(self, depth: int) -> None: self.depth = depth - self.breakables = deque() - self.want_break = False + self.breakables: deque[Breakable] = deque() + self.want_break: bool = False class GroupQueue: - def __init__(self, *groups): - self.queue = [] + def __init__(self, *groups: Group) -> None: + self.queue: list[list[Group]] = [] for group in groups: self.enq(group) - def enq(self, group): + def enq(self, group: Group) -> None: depth = group.depth while depth > len(self.queue) - 1: self.queue.append([]) self.queue[depth].append(group) - def deq(self): + def deq(self) -> Optional[Group]: for stack in self.queue: for idx, group in enumerate(reversed(stack)): if group.breakables: @@ -544,26 +580,29 @@ class GroupQueue: for group in stack: group.want_break = True del stack[:] + return None - def remove(self, group): + def remove(self, group: Group) -> None: try: self.queue[group.depth].remove(group) except ValueError: pass -def _seq_pprinter_factory(start, end, basetype): +def _seq_pprinter_factory(start: str, end: str, basetype: type) -> PrettyPrintFunction: """Factory that returns a pprint function useful for sequences. Used by the default pprint for tuples, dicts, and lists. """ - def inner(obj, p, cycle): + def inner( + obj: Union[tuple[object], list[object]], p: RepresentationPrinter, cycle: bool + ) -> None: typ = type(obj) if ( basetype is not None and typ is not basetype - and typ.__repr__ != basetype.__repr__ + and typ.__repr__ != basetype.__repr__ # type: ignore[comparison-overlap] ): # If the subclass provides its own repr, use it instead. return p.text(typ.__repr__(obj)) @@ -584,15 +623,23 @@ def _seq_pprinter_factory(start, end, basetype): return inner -def get_class_name(cls): - return _safe_getattr(cls, "__qualname__", cls.__name__) +def get_class_name(cls: type[object]) -> str: + class_name = _safe_getattr(cls, "__qualname__", cls.__name__) + assert isinstance(class_name, str) + return class_name -def _set_pprinter_factory(start, end, basetype): +def _set_pprinter_factory( + start: str, end: str, basetype: type[object] +) -> PrettyPrintFunction: """Factory that returns a pprint function useful for sets and frozensets.""" - def inner(obj, p, cycle): + def inner( + obj: Union[set[Any], frozenset[Any]], + p: RepresentationPrinter, + cycle: bool, + ) -> None: typ = type(obj) if ( basetype is not None @@ -611,7 +658,7 @@ def _set_pprinter_factory(start, end, basetype): step = len(start) with p.group(step, start, end): # Like dictionary keys, try to sort the items if there aren't too many - items = obj + items: Iterable[object] = obj if not (p.max_seq_length and len(obj) >= p.max_seq_length): try: items = sorted(obj) @@ -627,11 +674,13 @@ def _set_pprinter_factory(start, end, basetype): return inner -def _dict_pprinter_factory(start, end, basetype=None): +def _dict_pprinter_factory( + start: str, end: str, basetype: Optional[type[object]] = None +) -> PrettyPrintFunction: """Factory that returns a pprint function used by the default pprint of dicts and dict proxies.""" - def inner(obj, p, cycle): + def inner(obj: dict[object, object], p: RepresentationPrinter, cycle: bool) -> None: typ = type(obj) if ( basetype is not None @@ -661,7 +710,7 @@ def _dict_pprinter_factory(start, end, basetype=None): return inner -def _super_pprint(obj, p, cycle): +def _super_pprint(obj: Any, p: RepresentationPrinter, cycle: bool) -> None: """The pprint for the super type.""" with p.group(8, "<super: ", ">"): p.pretty(obj.__thisclass__) @@ -670,7 +719,7 @@ def _super_pprint(obj, p, cycle): p.pretty(obj.__self__) -def _re_pattern_pprint(obj, p, cycle): +def _re_pattern_pprint(obj: re.Pattern, p: RepresentationPrinter, cycle: bool) -> None: """The pprint function for regular expression patterns.""" p.text("re.compile(") pattern = repr(obj.pattern) @@ -703,14 +752,14 @@ def _re_pattern_pprint(obj, p, cycle): p.text(")") -def _type_pprint(obj, p, cycle): +def _type_pprint(obj: type[object], p: RepresentationPrinter, cycle: bool) -> None: """The pprint for classes and types.""" # Heap allocated types might not have the module attribute, # and others may set it to None. # Checks for a __repr__ override in the metaclass # != rather than is not because pypy compatibility - if type(obj).__repr__ != type.__repr__: + if type(obj).__repr__ != type.__repr__: # type: ignore[comparison-overlap] _repr_pprint(obj, p, cycle) return @@ -728,7 +777,7 @@ def _type_pprint(obj, p, cycle): p.text(mod + "." + name) -def _repr_pprint(obj, p, cycle): +def _repr_pprint(obj: object, p: RepresentationPrinter, cycle: bool) -> None: """A pprint that just redirects to the normal repr function.""" # Find newlines and replace them with p.break_() output = repr(obj) @@ -738,7 +787,9 @@ def _repr_pprint(obj, p, cycle): p.text(output_line) -def pprint_fields(obj, p, cycle, fields): +def pprint_fields( + obj: object, p: RepresentationPrinter, cycle: bool, fields: Iterable[str] +) -> None: name = get_class_name(obj.__class__) if cycle: return p.text(f"{name}(...)") @@ -752,14 +803,20 @@ def pprint_fields(obj, p, cycle, fields): p.pretty(getattr(obj, field)) -def _function_pprint(obj, p, cycle): +def _function_pprint( + obj: Union[types.FunctionType, types.BuiltinFunctionType, types.MethodType], + p: RepresentationPrinter, + cycle: bool, +) -> None: """Base pprint for all functions and builtin functions.""" from hypothesis.internal.reflection import get_pretty_function_description p.text(get_pretty_function_description(obj)) -def _exception_pprint(obj, p, cycle): +def _exception_pprint( + obj: BaseException, p: RepresentationPrinter, cycle: bool +) -> None: """Base pprint for all exceptions.""" name = getattr(obj.__class__, "__qualname__", obj.__class__.__name__) if obj.__class__.__module__ not in ("exceptions", "builtins"): @@ -773,7 +830,7 @@ def _exception_pprint(obj, p, cycle): p.pretty(arg) -def _repr_integer(obj, p, cycle): +def _repr_integer(obj: int, p: RepresentationPrinter, cycle: bool) -> None: if abs(obj) < 1_000_000_000: p.text(repr(obj)) elif abs(obj) < 10**640: @@ -785,7 +842,9 @@ def _repr_integer(obj, p, cycle): p.text(f"{obj:#_x}") -def _repr_float_counting_nans(obj, p, cycle): +def _repr_float_counting_nans( + obj: float, p: RepresentationPrinter, cycle: bool +) -> None: if isnan(obj): if struct.pack("!d", abs(obj)) != struct.pack("!d", float("nan")): show = hex(*struct.unpack("Q", struct.pack("d", obj))) @@ -796,7 +855,7 @@ def _repr_float_counting_nans(obj, p, cycle): #: printers for builtin types -_type_pprinters = { +_type_pprinters: dict[type, PrettyPrintFunction] = { int: _repr_integer, float: _repr_float_counting_nans, str: _repr_pprint, @@ -820,10 +879,12 @@ _type_pprinters = { } #: printers for types specified by name -_deferred_type_pprinters = {} # type: ignore +_deferred_type_pprinters: dict[tuple[str, str], PrettyPrintFunction] = {} -def for_type_by_name(type_module, type_name, func): +def for_type_by_name( + type_module: str, type_name: str, func: PrettyPrintFunction +) -> Optional[PrettyPrintFunction]: """Add a pretty printer for a type specified by the module and name of a type rather than the type object itself.""" key = (type_module, type_name) @@ -833,12 +894,14 @@ def for_type_by_name(type_module, type_name, func): #: printers for the default singletons -_singleton_pprinters = dict.fromkeys( +_singleton_pprinters: dict[int, PrettyPrintFunction] = dict.fromkeys( map(id, [None, True, False, Ellipsis, NotImplemented]), _repr_pprint ) -def _defaultdict_pprint(obj, p, cycle): +def _defaultdict_pprint( + obj: defaultdict[object, object], p: RepresentationPrinter, cycle: bool +) -> None: name = obj.__class__.__name__ with p.group(len(name) + 1, name + "(", ")"): if cycle: @@ -850,7 +913,9 @@ def _defaultdict_pprint(obj, p, cycle): p.pretty(dict(obj)) -def _ordereddict_pprint(obj, p, cycle): +def _ordereddict_pprint( + obj: OrderedDict[object, object], p: RepresentationPrinter, cycle: bool +) -> None: name = obj.__class__.__name__ with p.group(len(name) + 1, name + "(", ")"): if cycle: @@ -859,7 +924,7 @@ def _ordereddict_pprint(obj, p, cycle): p.pretty(list(obj.items())) -def _deque_pprint(obj, p, cycle): +def _deque_pprint(obj: deque[object], p: RepresentationPrinter, cycle: bool) -> None: name = obj.__class__.__name__ with p.group(len(name) + 1, name + "(", ")"): if cycle: @@ -868,7 +933,9 @@ def _deque_pprint(obj, p, cycle): p.pretty(list(obj)) -def _counter_pprint(obj, p, cycle): +def _counter_pprint( + obj: Counter[object], p: RepresentationPrinter, cycle: bool +) -> None: name = obj.__class__.__name__ with p.group(len(name) + 1, name + "(", ")"): if cycle: @@ -877,14 +944,16 @@ def _counter_pprint(obj, p, cycle): p.pretty(dict(obj)) -def _repr_dataframe(obj, p, cycle): # pragma: no cover +def _repr_dataframe( + obj: object, p: RepresentationPrinter, cycle: bool +) -> None: # pragma: no cover with p.indent(4): p.break_() _repr_pprint(obj, p, cycle) p.break_() -def _repr_enum(obj, p, cycle): +def _repr_enum(obj: Enum, p: RepresentationPrinter, cycle: bool) -> None: tname = get_class_name(type(obj)) if isinstance(obj, Flag): p.text( @@ -900,7 +969,7 @@ class _ReprDots: return "..." -def _repr_partial(obj, p, cycle): +def _repr_partial(obj: partial[Any], p: RepresentationPrinter, cycle: bool) -> None: args, kw = obj.args, obj.keywords if cycle: args, kw = (_ReprDots(),), {} diff --git a/contrib/python/hypothesis/py3/hypothesis/vendor/tlds-alpha-by-domain.txt b/contrib/python/hypothesis/py3/hypothesis/vendor/tlds-alpha-by-domain.txt index a5776075a13..b534e60ce4d 100644 --- a/contrib/python/hypothesis/py3/hypothesis/vendor/tlds-alpha-by-domain.txt +++ b/contrib/python/hypothesis/py3/hypothesis/vendor/tlds-alpha-by-domain.txt @@ -1,4 +1,4 @@ -# Version 2024092800, Last Updated Sat Sep 28 07:07:01 2024 UTC +# Version 2025030100, Last Updated Sat Mar 1 07:07:02 2025 UTC AAA AARP ABB @@ -627,7 +627,6 @@ KAUFEN KDDI KE KERRYHOTELS -KERRYLOGISTICS KERRYPROPERTIES KFH KG @@ -691,7 +690,6 @@ LIMITED LIMO LINCOLN LINK -LIPSY LIVE LIVING LK diff --git a/contrib/python/hypothesis/py3/hypothesis/version.py b/contrib/python/hypothesis/py3/hypothesis/version.py index 7595a898e0c..28ac392f296 100644 --- a/contrib/python/hypothesis/py3/hypothesis/version.py +++ b/contrib/python/hypothesis/py3/hypothesis/version.py @@ -8,5 +8,5 @@ # v. 2.0. If a copy of the MPL was not distributed with this file, You can # obtain one at https://mozilla.org/MPL/2.0/. -__version_info__ = (6, 120, 0) +__version_info__ = (6, 130, 13) __version__ = ".".join(map(str, __version_info__)) diff --git a/contrib/python/hypothesis/py3/ya.make b/contrib/python/hypothesis/py3/ya.make index ada2d6d8e20..8750c13d8bc 100644 --- a/contrib/python/hypothesis/py3/ya.make +++ b/contrib/python/hypothesis/py3/ya.make @@ -2,7 +2,7 @@ PY3_LIBRARY() -VERSION(6.120.0) +VERSION(6.130.13) LICENSE(MPL-2.0) @@ -55,7 +55,7 @@ PY_SRCS( hypothesis/internal/charmap.py hypothesis/internal/compat.py hypothesis/internal/conjecture/__init__.py - hypothesis/internal/conjecture/choicetree.py + hypothesis/internal/conjecture/choice.py hypothesis/internal/conjecture/data.py hypothesis/internal/conjecture/datatree.py hypothesis/internal/conjecture/dfa/__init__.py @@ -65,9 +65,11 @@ PY_SRCS( hypothesis/internal/conjecture/junkdrawer.py hypothesis/internal/conjecture/optimiser.py hypothesis/internal/conjecture/pareto.py + hypothesis/internal/conjecture/providers.py hypothesis/internal/conjecture/shrinker.py hypothesis/internal/conjecture/shrinking/__init__.py hypothesis/internal/conjecture/shrinking/bytes.py + hypothesis/internal/conjecture/shrinking/choicetree.py hypothesis/internal/conjecture/shrinking/collection.py hypothesis/internal/conjecture/shrinking/common.py hypothesis/internal/conjecture/shrinking/floats.py @@ -75,6 +77,7 @@ PY_SRCS( hypothesis/internal/conjecture/shrinking/ordering.py hypothesis/internal/conjecture/shrinking/string.py hypothesis/internal/conjecture/utils.py + hypothesis/internal/constants_ast.py hypothesis/internal/coverage.py hypothesis/internal/detection.py hypothesis/internal/entropy.py |
