Intermediate changes

author: robot-piglet <robot-piglet@yandex-team.com> 2024-05-25 20:04:50 +0300
committer: robot-piglet <robot-piglet@yandex-team.com> 2024-05-25 20:20:36 +0300
commit: c981104c914df8a13dd4fc7393a2c83b992d4f19 (patch)
tree: 0acb283ab513417ce1a89e5620a9fdc0264bbe4f /contrib/python/hypothesis
parent: 583af11cf703cccf01c0139eca58bef0d745d9d4 (diff)
download: ydb-c981104c914df8a13dd4fc7393a2c83b992d4f19.tar.gz
7 files changed, 207 insertions, 29 deletions
diff --git a/contrib/python/hypothesis/py3/.dist-info/METADATA b/contrib/python/hypothesis/py3/.dist-info/METADATA
index f405fc380f..e9db66d500 100644
--- a/contrib/python/hypothesis/py3/.dist-info/METADATA
+++ b/contrib/python/hypothesis/py3/.dist-info/METADATA
@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: hypothesis
-Version: 6.100.5
+Version: 6.100.6
 Summary: A library for property-based testing
 Home-page: https://hypothesis.works
 Author: David R. MacIver and Zac Hatfield-Dodds
diff --git a/contrib/python/hypothesis/py3/hypothesis/internal/conjecture/data.py b/contrib/python/hypothesis/py3/hypothesis/internal/conjecture/data.py
index d3b7ba9a8a..99b00ebbca 100644
--- a/contrib/python/hypothesis/py3/hypothesis/internal/conjecture/data.py
+++ b/contrib/python/hypothesis/py3/hypothesis/internal/conjecture/data.py
@@ -136,6 +136,7 @@ IRKWargsType: TypeAlias = Union[
     IntegerKWargs, FloatKWargs, StringKWargs, BytesKWargs, BooleanKWargs
 ]
 IRTypeName: TypeAlias = Literal["integer", "string", "boolean", "float", "bytes"]
+InvalidAt: TypeAlias = Tuple[IRTypeName, IRKWargsType]
 
 
 class ExtraInformation:
@@ -956,6 +957,9 @@ class DataObserver:
     ) -> None:
         pass
 
+    def mark_invalid(self, invalid_at: InvalidAt) -> None:
+        pass
+
 
 @attr.s(slots=True, repr=False, eq=False)
 class IRNode:
@@ -1048,6 +1052,16 @@ class IRNode:
             and self.was_forced == other.was_forced
         )
 
+    def __hash__(self):
+        return hash(
+            (
+                self.ir_type,
+                ir_value_key(self.ir_type, self.value),
+                ir_kwargs_key(self.ir_type, self.kwargs),
+                self.was_forced,
+            )
+        )
+
     def __repr__(self):
         # repr to avoid "BytesWarning: str() on a bytes instance" for bytes nodes
         forced_marker = " [forced]" if self.was_forced else ""
@@ -1087,22 +1101,36 @@ def ir_value_permitted(value, ir_type, kwargs):
     raise NotImplementedError(f"unhandled type {type(value)} of ir value {value}")
 
 
+def ir_value_key(ir_type, v):
+    if ir_type == "float":
+        return float_to_int(v)
+    return v
+
+
+def ir_kwargs_key(ir_type, kwargs):
+    if ir_type == "float":
+        return (
+            float_to_int(kwargs["min_value"]),
+            float_to_int(kwargs["max_value"]),
+            kwargs["allow_nan"],
+            kwargs["smallest_nonzero_magnitude"],
+        )
+    if ir_type == "integer":
+        return (
+            kwargs["min_value"],
+            kwargs["max_value"],
+            None if kwargs["weights"] is None else tuple(kwargs["weights"]),
+            kwargs["shrink_towards"],
+        )
+    return tuple(kwargs[key] for key in sorted(kwargs))
+
+
 def ir_value_equal(ir_type, v1, v2):
-    if ir_type != "float":
-        return v1 == v2
-    return float_to_int(v1) == float_to_int(v2)
+    return ir_value_key(ir_type, v1) == ir_value_key(ir_type, v2)
 
 
 def ir_kwargs_equal(ir_type, kwargs1, kwargs2):
-    if ir_type != "float":
-        return kwargs1 == kwargs2
-    return (
-        float_to_int(kwargs1["min_value"]) == float_to_int(kwargs2["min_value"])
-        and float_to_int(kwargs1["max_value"]) == float_to_int(kwargs2["max_value"])
-        and kwargs1["allow_nan"] == kwargs2["allow_nan"]
-        and kwargs1["smallest_nonzero_magnitude"]
-        == kwargs2["smallest_nonzero_magnitude"]
-    )
+    return ir_kwargs_key(ir_type, kwargs1) == ir_kwargs_key(ir_type, kwargs2)
 
 
 @dataclass_transform()
@@ -1125,6 +1153,7 @@ class ConjectureResult:
     examples: Examples = attr.ib(repr=False)
     arg_slices: Set[Tuple[int, int]] = attr.ib(repr=False)
     slice_comments: Dict[Tuple[int, int], str] = attr.ib(repr=False)
+    invalid_at: Optional[InvalidAt] = attr.ib(repr=False)
 
     index: int = attr.ib(init=False)
 
@@ -1977,6 +2006,7 @@ class ConjectureData:
         self.extra_information = ExtraInformation()
 
         self.ir_tree_nodes = ir_tree_prefix
+        self.invalid_at: Optional[InvalidAt] = None
         self._node_index = 0
         self.start_example(TOP_LABEL)
 
@@ -2279,7 +2309,6 @@ class ConjectureData:
             self.mark_overrun()
 
         node = self.ir_tree_nodes[self._node_index]
-        self._node_index += 1
         # If we're trying to draw a different ir type at the same location, then
         # this ir tree has become badly misaligned. We don't have many good/simple
         # options here for realigning beyond giving up.
@@ -2292,14 +2321,21 @@ class ConjectureData:
         # (in fact, it is possible that giving up early here results in more time
         # for useful shrinks to run).
         if node.ir_type != ir_type:
+            invalid_at = (ir_type, kwargs)
+            self.invalid_at = invalid_at
+            self.observer.mark_invalid(invalid_at)
             self.mark_invalid(f"(internal) want a {ir_type} but have a {node.ir_type}")
 
         # if a node has different kwargs (and so is misaligned), but has a value
         # that is allowed by the expected kwargs, then we can coerce this node
         # into an aligned one by using its value. It's unclear how useful this is.
         if not ir_value_permitted(node.value, node.ir_type, kwargs):
+            invalid_at = (ir_type, kwargs)
+            self.invalid_at = invalid_at
+            self.observer.mark_invalid(invalid_at)
             self.mark_invalid(f"(internal) got a {ir_type} but outside the valid range")
 
+        self._node_index += 1
         return node
 
     def as_result(self) -> Union[ConjectureResult, _Overrun]:
@@ -2328,6 +2364,7 @@ class ConjectureData:
                 forced_indices=frozenset(self.forced_indices),
                 arg_slices=self.arg_slices,
                 slice_comments=self.slice_comments,
+                invalid_at=self.invalid_at,
             )
             assert self.__result is not None
             self.blocks.transfer_ownership(self.__result)
@@ -2475,7 +2512,34 @@ class ConjectureData:
         self.frozen = True
 
         self.buffer = bytes(self.buffer)
-        self.observer.conclude_test(self.status, self.interesting_origin)
+
+        # if we were invalid because of a misalignment in the tree, we don't
+        # want to tell the DataTree that. Doing so would lead to inconsistent behavior.
+        # Given an empty DataTree
+        #               ┌──────┐
+        #               │ root │
+        #               └──────┘
+        # and supposing the very first draw is misaligned, concluding here would
+        # tell the datatree that the *only* possibility at the root node is Status.INVALID:
+        #               ┌──────┐
+        #               │ root │
+        #               └──┬───┘
+        #      ┌───────────┴───────────────┐
+        #      │ Conclusion(Status.INVALID)│
+        #      └───────────────────────────┘
+        # when in fact this is only the case when we try to draw a misaligned node.
+        # For instance, suppose we come along in the second test case and try a
+        # valid node as the first draw from the root. The DataTree thinks this
+        # is flaky (because root must lead to Status.INVALID in the tree) while
+        # in fact nothing in the test function has changed and the only change
+        # is in the ir tree prefix we are supplying.
+        #
+        # From the perspective of DataTree, it is safe to not conclude here. This
+        # tells the datatree that we don't know what happens after this node - which
+        # is true! We are aborting early here because the ir tree became misaligned,
+        # which is a semantically different invalidity than an assume or filter failing.
+        if self.invalid_at is None:
+            self.observer.conclude_test(self.status, self.interesting_origin)
 
     def choice(
         self,
diff --git a/contrib/python/hypothesis/py3/hypothesis/internal/conjecture/datatree.py b/contrib/python/hypothesis/py3/hypothesis/internal/conjecture/datatree.py
index ac79b42e6e..c4aded558c 100644
--- a/contrib/python/hypothesis/py3/hypothesis/internal/conjecture/datatree.py
+++ b/contrib/python/hypothesis/py3/hypothesis/internal/conjecture/datatree.py
@@ -24,6 +24,7 @@ from hypothesis.internal.conjecture.data import (
     DataObserver,
     FloatKWargs,
     IntegerKWargs,
+    InvalidAt,
     IRKWargsType,
     IRType,
     IRTypeName,
@@ -422,6 +423,8 @@ class TreeNode:
     #   be explored when generating novel prefixes)
     transition: Union[None, Branch, Conclusion, Killed] = attr.ib(default=None)
 
+    invalid_at: Optional[InvalidAt] = attr.ib(default=None)
+
     # A tree node is exhausted if every possible sequence of draws below it has
     # been explored. We only update this when performing operations that could
     # change the answer.
@@ -475,6 +478,8 @@ class TreeNode:
         del self.ir_types[i:]
         del self.values[i:]
         del self.kwargs[i:]
+        # we have a transition now, so we don't need to carry around invalid_at.
+        self.invalid_at = None
         assert len(self.values) == len(self.kwargs) == len(self.ir_types) == i
 
     def check_exhausted(self):
@@ -811,6 +816,9 @@ class DataTree:
         node = self.root
 
         def draw(ir_type, kwargs, *, forced=None):
+            if ir_type == "float" and forced is not None:
+                forced = int_to_float(forced)
+
             draw_func = getattr(data, f"draw_{ir_type}")
             value = draw_func(**kwargs, forced=forced)
 
@@ -832,6 +840,13 @@ class DataTree:
                     t = node.transition
                     data.conclude_test(t.status, t.interesting_origin)
                 elif node.transition is None:
+                    if node.invalid_at is not None:
+                        (ir_type, kwargs) = node.invalid_at
+                        try:
+                            draw(ir_type, kwargs)
+                        except StopTest:
+                            if data.invalid_at is not None:
+                                raise
                     raise PreviouslyUnseenBehaviour
                 elif isinstance(node.transition, Branch):
                     v = draw(node.transition.ir_type, node.transition.kwargs)
@@ -977,6 +992,9 @@ class TreeRecordingObserver(DataObserver):
     ) -> None:
         self.draw_value("boolean", value, was_forced=was_forced, kwargs=kwargs)
 
+    def mark_invalid(self, invalid_at: InvalidAt) -> None:
+        self.__current_node.invalid_at = invalid_at
+
     def draw_value(
         self,
         ir_type: IRTypeName,
diff --git a/contrib/python/hypothesis/py3/hypothesis/internal/conjecture/engine.py b/contrib/python/hypothesis/py3/hypothesis/internal/conjecture/engine.py
index ddf8c0e090..d154fdc28e 100644
--- a/contrib/python/hypothesis/py3/hypothesis/internal/conjecture/engine.py
+++ b/contrib/python/hypothesis/py3/hypothesis/internal/conjecture/engine.py
@@ -34,6 +34,8 @@ from hypothesis.internal.conjecture.data import (
     Overrun,
     PrimitiveProvider,
     Status,
+    ir_kwargs_key,
+    ir_value_key,
 )
 from hypothesis.internal.conjecture.datatree import DataTree, PreviouslyUnseenBehaviour
 from hypothesis.internal.conjecture.junkdrawer import clamp, ensure_free_stackframes
@@ -186,6 +188,7 @@ class ConjectureRunner:
         # shrinking where we need to know about the structure of the
         # executed test case.
         self.__data_cache = LRUReusedCache(CACHE_SIZE)
+        self.__data_cache_ir = LRUReusedCache(CACHE_SIZE)
 
         self.__pending_call_explanation = None
         self._switch_to_hypothesis_provider = False
@@ -239,10 +242,70 @@ class ConjectureRunner:
                     # correct engine.
                     raise
 
-    def ir_tree_to_data(self, ir_tree_nodes):
-        data = ConjectureData.for_ir_tree(ir_tree_nodes)
-        self.__stoppable_test_function(data)
-        return data
+    def _cache_key_ir(self, *, nodes=None, data=None):
+        assert (nodes is not None) ^ (data is not None)
+        extension = []
+        if data is not None:
+            nodes = data.examples.ir_tree_nodes
+            if data.invalid_at is not None:
+                # if we're invalid then we should have at least one node left (the invalid one).
+                assert data._node_index < len(data.ir_tree_nodes)
+                extension = [data.ir_tree_nodes[data._node_index]]
+
+        # intentionally drop was_forced from equality here, because the was_forced
+        # of node prefixes on ConjectureData has no impact on that data's result
+        return tuple(
+            (
+                node.ir_type,
+                ir_value_key(node.ir_type, node.value),
+                ir_kwargs_key(node.ir_type, node.kwargs),
+            )
+            for node in nodes + extension
+        )
+
+    def _cache(self, data):
+        result = data.as_result()
+        # when we shrink, we try out of bounds things, which can lead to the same
+        # data.buffer having multiple outcomes. eg data.buffer=b'' is Status.OVERRUN
+        # in normal circumstances, but a data with
+        # ir_nodes=[integer -5 {min_value: 0, max_value: 10}] will also have
+        # data.buffer=b'' but will be Status.INVALID instead. We do not want to
+        # change the cached value to INVALID in this case.
+        #
+        # We handle this specially for the ir cache by keying off the misaligned node
+        # as well, but we cannot do the same for buffers as we do not know ahead of
+        # time what buffer a node maps to. I think it's largely fine that we don't
+        # write to the buffer cache here as we move more things to the ir cache.
+        if data.invalid_at is None:
+            self.__data_cache[data.buffer] = result
+        key = self._cache_key_ir(data=data)
+        self.__data_cache_ir[key] = result
+
+    def cached_test_function_ir(self, nodes):
+        key = self._cache_key_ir(nodes=nodes)
+        try:
+            return self.__data_cache_ir[key]
+        except KeyError:
+            pass
+
+        try:
+            trial_data = self.new_conjecture_data_ir(nodes)
+            self.tree.simulate_test_function(trial_data)
+        except PreviouslyUnseenBehaviour:
+            pass
+        else:
+            trial_data.freeze()
+            key = self._cache_key_ir(data=trial_data)
+            try:
+                return self.__data_cache_ir[key]
+            except KeyError:
+                pass
+
+        data = self.new_conjecture_data_ir(nodes)
+        # note that calling test_function caches `data` for us, for both an ir
+        # tree key and a buffer key.
+        self.test_function(data)
+        return data.as_result()
 
     def test_function(self, data):
         if self.__pending_call_explanation is not None:
@@ -274,7 +337,7 @@ class ConjectureRunner:
                     ),
                 }
                 self.stats_per_test_case.append(call_stats)
-                self.__data_cache[data.buffer] = data.as_result()
+                self._cache(data)
 
         self.debug_data(data)
 
@@ -321,8 +384,9 @@ class ConjectureRunner:
 
                 # drive the ir tree through the test function to convert it
                 # to a buffer
-                data = self.ir_tree_to_data(data.examples.ir_tree_nodes)
-                self.__data_cache[data.buffer] = data.as_result()
+                data = ConjectureData.for_ir_tree(data.examples.ir_tree_nodes)
+                self.__stoppable_test_function(data)
+                self._cache(data)
 
             key = data.interesting_origin
             changed = False
@@ -983,6 +1047,18 @@ class ConjectureRunner:
             self.shrink_interesting_examples()
         self.exit_with(ExitReason.finished)
 
+    def new_conjecture_data_ir(self, ir_tree_prefix, *, observer=None):
+        provider = (
+            HypothesisProvider if self._switch_to_hypothesis_provider else self.provider
+        )
+        observer = observer or self.tree.new_observer()
+        if self.settings.backend != "hypothesis":
+            observer = DataObserver()
+
+        return ConjectureData.for_ir_tree(
+            ir_tree_prefix, observer=observer, provider=provider
+        )
+
     def new_conjecture_data(self, prefix, max_length=BUFFER_SIZE, observer=None):
         provider = (
             HypothesisProvider if self._switch_to_hypothesis_provider else self.provider
diff --git a/contrib/python/hypothesis/py3/hypothesis/internal/conjecture/shrinker.py b/contrib/python/hypothesis/py3/hypothesis/internal/conjecture/shrinker.py
index 04bbe079a3..dc1b3397f0 100644
--- a/contrib/python/hypothesis/py3/hypothesis/internal/conjecture/shrinker.py
+++ b/contrib/python/hypothesis/py3/hypothesis/internal/conjecture/shrinker.py
@@ -381,9 +381,31 @@ class Shrinker:
         test function."""
         return self.engine.call_count
 
+    def check_calls(self):
+        if self.calls - self.calls_at_last_shrink >= self.max_stall:
+            raise StopShrinking
+
+    def cached_test_function_ir(self, tree):
+        result = self.engine.cached_test_function_ir(tree)
+        self.incorporate_test_data(result)
+        self.check_calls()
+        return result
+
     def consider_new_tree(self, tree):
-        data = self.engine.ir_tree_to_data(tree)
-        return self.consider_new_buffer(data.buffer)
+        tree = tree[: len(self.nodes)]
+
+        def startswith(t1, t2):
+            return t1[: len(t2)] == t2
+
+        if startswith(tree, self.nodes):
+            return True
+
+        if startswith(self.nodes, tree):
+            return False
+
+        previous = self.shrink_target
+        self.cached_test_function_ir(tree)
+        return previous is not self.shrink_target
 
     def consider_new_buffer(self, buffer):
         """Returns True if after running this buffer the result would be
@@ -430,12 +452,10 @@ class Shrinker:
         that the result is either an Overrun object (if the buffer is
         too short to be a valid test case) or a ConjectureData object
         with status >= INVALID that would result from running this buffer."""
-
         buffer = bytes(buffer)
         result = self.engine.cached_test_function(buffer, extend=self.__extend)
         self.incorporate_test_data(result)
-        if self.calls - self.calls_at_last_shrink >= self.max_stall:
-            raise StopShrinking
+        self.check_calls()
         return result
 
     def debug(self, msg):
diff --git a/contrib/python/hypothesis/py3/hypothesis/version.py b/contrib/python/hypothesis/py3/hypothesis/version.py
index 5040febc7d..659ee4dc34 100644
--- a/contrib/python/hypothesis/py3/hypothesis/version.py
+++ b/contrib/python/hypothesis/py3/hypothesis/version.py
@@ -8,5 +8,5 @@
 # v. 2.0. If a copy of the MPL was not distributed with this file, You can
 # obtain one at https://mozilla.org/MPL/2.0/.
 
-__version_info__ = (6, 100, 5)
+__version_info__ = (6, 100, 6)
 __version__ = ".".join(map(str, __version_info__))
diff --git a/contrib/python/hypothesis/py3/ya.make b/contrib/python/hypothesis/py3/ya.make
index b5f51bc88f..a54ba569eb 100644
--- a/contrib/python/hypothesis/py3/ya.make
+++ b/contrib/python/hypothesis/py3/ya.make
@@ -2,7 +2,7 @@
 
 PY3_LIBRARY()
 
-VERSION(6.100.5)
+VERSION(6.100.6)
 
 LICENSE(MPL-2.0)
author	robot-piglet <robot-piglet@yandex-team.com>	2024-05-25 20:04:50 +0300
committer	robot-piglet <robot-piglet@yandex-team.com>	2024-05-25 20:20:36 +0300
commit	c981104c914df8a13dd4fc7393a2c83b992d4f19 (patch)
tree	0acb283ab513417ce1a89e5620a9fdc0264bbe4f /contrib/python/hypothesis
parent	583af11cf703cccf01c0139eca58bef0d745d9d4 (diff)
download	ydb-c981104c914df8a13dd4fc7393a2c83b992d4f19.tar.gz