diff options
author | robot-piglet <robot-piglet@yandex-team.com> | 2024-06-13 11:00:53 +0300 |
---|---|---|
committer | robot-piglet <robot-piglet@yandex-team.com> | 2024-06-13 11:13:03 +0300 |
commit | 794883bf8e7f044ecf319b0dac8b773a8db1383f (patch) | |
tree | 500684bfb727054a50cf29f67c611616c8a66861 /contrib | |
parent | f95df48ba1824e3a51c19c15499eaa71905260aa (diff) | |
download | ydb-794883bf8e7f044ecf319b0dac8b773a8db1383f.tar.gz |
Intermediate changes
Diffstat (limited to 'contrib')
14 files changed, 534 insertions, 823 deletions
diff --git a/contrib/python/hypothesis/py3/.dist-info/METADATA b/contrib/python/hypothesis/py3/.dist-info/METADATA index fa62bb86a3..0d4287365b 100644 --- a/contrib/python/hypothesis/py3/.dist-info/METADATA +++ b/contrib/python/hypothesis/py3/.dist-info/METADATA @@ -1,6 +1,6 @@ Metadata-Version: 2.1 Name: hypothesis -Version: 6.102.6 +Version: 6.103.0 Summary: A library for property-based testing Home-page: https://hypothesis.works Author: David R. MacIver and Zac Hatfield-Dodds diff --git a/contrib/python/hypothesis/py3/hypothesis/internal/conjecture/data.py b/contrib/python/hypothesis/py3/hypothesis/internal/conjecture/data.py index de6372d2fe..3fc6658e08 100644 --- a/contrib/python/hypothesis/py3/hypothesis/internal/conjecture/data.py +++ b/contrib/python/hypothesis/py3/hypothesis/internal/conjecture/data.py @@ -779,10 +779,6 @@ class Blocks: """Equivalent to self[i].end.""" return self.endpoints[i] - def bounds(self, i: int) -> Tuple[int, int]: - """Equivalent to self[i].bounds.""" - return (self.start(i), self.end(i)) - def all_bounds(self) -> Iterable[Tuple[int, int]]: """Equivalent to [(b.start, b.end) for b in self].""" prev = 0 @@ -970,7 +966,12 @@ class IRNode: was_forced: bool = attr.ib() index: Optional[int] = attr.ib(default=None) - def copy(self, *, with_value: IRType) -> "IRNode": + def copy( + self, + *, + with_value: Optional[IRType] = None, + with_kwargs: Optional[IRKWargsType] = None, + ) -> "IRNode": # we may want to allow this combination in the future, but for now it's # a footgun. assert not self.was_forced, "modifying a forced node doesn't make sense" @@ -979,8 +980,8 @@ class IRNode: # after copying. return IRNode( ir_type=self.ir_type, - value=with_value, - kwargs=self.kwargs, + value=self.value if with_value is None else with_value, + kwargs=self.kwargs if with_kwargs is None else with_kwargs, was_forced=self.was_forced, ) @@ -1071,9 +1072,17 @@ class IRNode: def ir_value_permitted(value, ir_type, kwargs): if ir_type == "integer": - if kwargs["min_value"] is not None and value < kwargs["min_value"]: + min_value = kwargs["min_value"] + max_value = kwargs["max_value"] + shrink_towards = kwargs["shrink_towards"] + if min_value is not None and value < min_value: return False - if kwargs["max_value"] is not None and value > kwargs["max_value"]: + if max_value is not None and value > max_value: + return False + + if (max_value is None or min_value is None) and ( + value - shrink_towards + ).bit_length() >= 128: return False return True @@ -1144,14 +1153,22 @@ class ConjectureResult: status: Status = attr.ib() interesting_origin: Optional[InterestingOrigin] = attr.ib() buffer: bytes = attr.ib() - blocks: Blocks = attr.ib() + # some ConjectureDatas pass through the ir and some pass through buffers. + # the ir does not drive its result through the buffer, which means blocks/examples + # may differ (I think for forced values?) even when the buffer is the same. + # I don't *think* anything was relying on anything but .buffer for result equality, + # though that assumption may be leaning on flakiness detection invariants. + # + # If we consider blocks or examples in equality checks, multiple semantically equal + # results get stored in e.g. the pareto front. + blocks: Blocks = attr.ib(eq=False) output: str = attr.ib() extra_information: Optional[ExtraInformation] = attr.ib() has_discards: bool = attr.ib() target_observations: TargetObservations = attr.ib() tags: FrozenSet[StructuralCoverageTag] = attr.ib() forced_indices: FrozenSet[int] = attr.ib(repr=False) - examples: Examples = attr.ib(repr=False) + examples: Examples = attr.ib(repr=False, eq=False) arg_slices: Set[Tuple[int, int]] = attr.ib(repr=False) slice_comments: Dict[Tuple[int, int], str] = attr.ib(repr=False) invalid_at: Optional[InvalidAt] = attr.ib(repr=False) diff --git a/contrib/python/hypothesis/py3/hypothesis/internal/conjecture/engine.py b/contrib/python/hypothesis/py3/hypothesis/internal/conjecture/engine.py index f25c0833e8..89a8c7829c 100644 --- a/contrib/python/hypothesis/py3/hypothesis/internal/conjecture/engine.py +++ b/contrib/python/hypothesis/py3/hypothesis/internal/conjecture/engine.py @@ -342,7 +342,7 @@ class ConjectureRunner: for node in nodes + extension ) - def _cache(self, data: Union[ConjectureData, ConjectureResult]) -> None: + def _cache(self, data: ConjectureData) -> None: result = data.as_result() # when we shrink, we try out of bounds things, which can lead to the same # data.buffer having multiple outcomes. eg data.buffer=b'' is Status.OVERRUN @@ -357,8 +357,25 @@ class ConjectureRunner: # write to the buffer cache here as we move more things to the ir cache. if data.invalid_at is None: self.__data_cache[data.buffer] = result - key = self._cache_key_ir(data=data) - self.__data_cache_ir[key] = result + + # interesting buffer-based data can mislead the shrinker if we cache them. + # + # @given(st.integers()) + # def f(n): + # assert n < 100 + # + # may generate two counterexamples, n=101 and n=m > 101, in that order, + # where the buffer corresponding to n is large due to eg failed probes. + # We shrink m and eventually try n=101, but it is cached to a large buffer + # and so the best we can do is n=102, a non-ideal shrink. + # + # We can cache ir-based buffers fine, which always correspond to the + # smallest buffer via forced=. The overhead here is small because almost + # all interesting data are ir-based via the shrinker (and that overhead + # will tend towards zero as we move generation to the ir). + if data.ir_tree_nodes is not None or data.status < Status.INTERESTING: + key = self._cache_key_ir(data=data) + self.__data_cache_ir[key] = result def cached_test_function_ir( self, nodes: List[IRNode] @@ -1218,7 +1235,7 @@ class ConjectureRunner: self.interesting_examples.values(), key=lambda d: sort_key(d.buffer) ): assert prev_data.status == Status.INTERESTING - data = self.new_conjecture_data_for_buffer(prev_data.buffer) + data = self.new_conjecture_data_ir(prev_data.examples.ir_tree_nodes) self.test_function(data) if data.status != Status.INTERESTING: self.exit_with(ExitReason.flaky) diff --git a/contrib/python/hypothesis/py3/hypothesis/internal/conjecture/shrinker.py b/contrib/python/hypothesis/py3/hypothesis/internal/conjecture/shrinker.py index 4e2ca9a49b..a004338372 100644 --- a/contrib/python/hypothesis/py3/hypothesis/internal/conjecture/shrinker.py +++ b/contrib/python/hypothesis/py3/hypothesis/internal/conjecture/shrinker.py @@ -8,7 +8,6 @@ # v. 2.0. If a copy of the MPL was not distributed with this file, You can # obtain one at https://mozilla.org/MPL/2.0/. -import math from collections import defaultdict from typing import TYPE_CHECKING, Callable, Dict, Optional, Union @@ -25,17 +24,18 @@ from hypothesis.internal.conjecture.data import ( ConjectureResult, Status, bits_to_bytes, + ir_value_equal, + ir_value_key, ir_value_permitted, ) -from hypothesis.internal.conjecture.dfa import ConcreteDFA -from hypothesis.internal.conjecture.floats import is_simple -from hypothesis.internal.conjecture.junkdrawer import ( - binary_search, - find_integer, - replace_all, +from hypothesis.internal.conjecture.junkdrawer import find_integer, replace_all +from hypothesis.internal.conjecture.shrinking import ( + Bytes, + Float, + Integer, + Ordering, + String, ) -from hypothesis.internal.conjecture.shrinking import Float, Integer, Lexical, Ordering -from hypothesis.internal.conjecture.shrinking.learned_dfas import SHRINKING_DFAS if TYPE_CHECKING: from hypothesis.internal.conjecture.engine import ConjectureRunner @@ -311,10 +311,6 @@ class Shrinker: self.passes_by_name: Dict[str, ShrinkPass] = {} - # Extra DFAs that may be installed. This is used solely for - # testing and learning purposes. - self.extra_dfas: Dict[str, ConcreteDFA] = {} - # Because the shrinker is also used to `pareto_optimise` in the target phase, # we sometimes want to allow extending buffers instead of aborting at the end. if in_target_phase: @@ -360,24 +356,6 @@ class Shrinker: self.add_new_pass(name) return self.passes_by_name[name] - @derived_value # type: ignore - def match_cache(self): - return {} - - def matching_regions(self, dfa): - """Returns all pairs (u, v) such that self.buffer[u:v] is accepted - by this DFA.""" - - try: - return self.match_cache[dfa] - except KeyError: - pass - results = dfa.all_matching_regions(self.buffer) - results.sort(key=lambda t: (t[1] - t[0], t[1])) - assert all(dfa.matches(self.buffer[u:v]) for u, v in results) - self.match_cache[dfa] = results - return results - @property def calls(self): """Return the number of calls that have been made to the underlying @@ -393,6 +371,12 @@ class Shrinker: raise StopShrinking def cached_test_function_ir(self, tree): + # sometimes our shrinking passes try obviously invalid things. We handle + # discarding them in one place here. + for node in tree: + if not ir_value_permitted(node.value, node.ir_type, node.kwargs): + return None + result = self.engine.cached_test_function_ir(tree) self.incorporate_test_data(result) self.check_calls() @@ -492,6 +476,15 @@ class Shrinker: self.explain() return + # There are multiple buffers that represent the same counterexample, eg + # n=2 (from the 16 bit integer bucket) and n=2 (from the 32 bit integer + # bucket). Before we start shrinking, we need to normalize to the minimal + # such buffer, else a buffer-smaller but ir-larger value may be chosen + # as the minimal counterexample. + data = self.engine.new_conjecture_data_ir(self.nodes) + self.engine.test_function(data) + self.incorporate_test_data(data.as_result()) + try: self.greedy_shrink() except StopShrinking: @@ -675,22 +668,18 @@ class Shrinker: """ self.fixate_shrink_passes( [ - block_program("X" * 5), - block_program("X" * 4), - block_program("X" * 3), - block_program("X" * 2), - block_program("X" * 1), + node_program("X" * 5), + node_program("X" * 4), + node_program("X" * 3), + node_program("X" * 2), + node_program("X" * 1), "pass_to_descendant", "reorder_examples", - "minimize_floats", - "minimize_duplicated_blocks", - block_program("-XX"), - "minimize_individual_blocks", - block_program("--X"), + "minimize_duplicated_nodes", + "minimize_individual_nodes", "redistribute_block_pairs", "lower_blocks_together", ] - + [dfa_replacement(n) for n in SHRINKING_DFAS] ) @derived_value # type: ignore @@ -809,9 +798,6 @@ class Shrinker: def examples(self): return self.shrink_target.examples - def all_block_bounds(self): - return self.shrink_target.blocks.all_bounds() - @derived_value # type: ignore def examples_by_label(self): """An index of all examples grouped by their label, with @@ -881,9 +867,9 @@ class Shrinker: + self.nodes[ancestor.ir_end :] ) - def lower_common_block_offset(self): + def lower_common_node_offset(self): """Sometimes we find ourselves in a situation where changes to one part - of the byte stream unlock changes to other parts. Sometimes this is + of the choice sequence unlock changes to other parts. Sometimes this is good, but sometimes this can cause us to exhibit exponential slow downs! @@ -900,9 +886,9 @@ class Shrinker: This will take us O(m) iterations to complete, which is exponential in the data size, as we gradually zig zag our way towards zero. - This can only happen if we're failing to reduce the size of the byte - stream: The number of iterations that reduce the length of the byte - stream is bounded by that length. + This can only happen if we're failing to reduce the size of the choice + sequence: The number of iterations that reduce the length of the choice + sequence is bounded by that length. So what we do is this: We keep track of which blocks are changing, and then if there's some non-zero common offset to them we try and minimize @@ -914,91 +900,83 @@ class Shrinker: but it fails fast when it doesn't work and gets us out of a really nastily slow case when it does. """ - if len(self.__changed_blocks) <= 1: + if len(self.__changed_nodes) <= 1: return - current = self.shrink_target - - blocked = [current.buffer[u:v] for u, v in self.all_block_bounds()] - - changed = [ - i - for i in sorted(self.__changed_blocks) - if not self.shrink_target.blocks[i].trivial - ] + changed = [] + for i in sorted(self.__changed_nodes): + node = self.nodes[i] + if node.trivial or node.ir_type != "integer": + continue + changed.append(node) if not changed: return - ints = [int_from_bytes(blocked[i]) for i in changed] + ints = [abs(node.value - node.kwargs["shrink_towards"]) for node in changed] offset = min(ints) assert offset > 0 for i in range(len(ints)): ints[i] -= offset - def reoffset(o): - new_blocks = list(blocked) - for i, v in zip(changed, ints): - new_blocks[i] = int_to_bytes(v + o, len(blocked[i])) - return self.incorporate_new_buffer(b"".join(new_blocks)) + st = self.shrink_target + + def offset_node(node, n): + return ( + node.index, + node.index + 1, + [node.copy(with_value=node.kwargs["shrink_towards"] + n)], + ) - Integer.shrink(offset, reoffset) + def consider(n, sign): + return self.consider_new_tree( + replace_all( + st.examples.ir_tree_nodes, + [ + offset_node(node, sign * (n + v)) + for node, v in zip(changed, ints) + ], + ) + ) + + # shrink from both sides + Integer.shrink(offset, lambda n: consider(n, 1)) + Integer.shrink(offset, lambda n: consider(n, -1)) self.clear_change_tracking() def clear_change_tracking(self): self.__last_checked_changed_at = self.shrink_target - self.__all_changed_blocks = set() + self.__all_changed_nodes = set() def mark_changed(self, i): - self.__changed_blocks.add(i) + self.__changed_nodes.add(i) @property - def __changed_blocks(self): - if self.__last_checked_changed_at is not self.shrink_target: - prev_target = self.__last_checked_changed_at - new_target = self.shrink_target - assert prev_target is not new_target - prev = prev_target.buffer - new = new_target.buffer - assert sort_key(new) < sort_key(prev) - - if ( - len(new_target.blocks) != len(prev_target.blocks) - or new_target.blocks.endpoints != prev_target.blocks.endpoints - ): - self.__all_changed_blocks = set() - else: - blocks = new_target.blocks - - # Index of last block whose contents have been modified, found - # by checking if the tail past this point has been modified. - last_changed = binary_search( - 0, - len(blocks), - lambda i: prev[blocks.start(i) :] != new[blocks.start(i) :], - ) - - # Index of the first block whose contents have been changed, - # because we know that this predicate is true for zero (because - # the prefix from the start is empty), so the result must be True - # for the bytes from the start of this block and False for the - # bytes from the end, hence the change is in this block. - first_changed = binary_search( - 0, - len(blocks), - lambda i: prev[: blocks.start(i)] == new[: blocks.start(i)], - ) + def __changed_nodes(self): + if self.__last_checked_changed_at is self.shrink_target: + return self.__all_changed_nodes + + prev_target = self.__last_checked_changed_at + new_target = self.shrink_target + assert prev_target is not new_target + prev_nodes = prev_target.examples.ir_tree_nodes + new_nodes = new_target.examples.ir_tree_nodes + assert sort_key(new_target.buffer) < sort_key(prev_target.buffer) + + if len(prev_nodes) != len(new_nodes) or any( + n1.ir_type != n2.ir_type for n1, n2 in zip(prev_nodes, new_nodes) + ): + # should we check kwargs are equal as well? + self.__all_changed_nodes = set() + else: + assert len(prev_nodes) == len(new_nodes) + for i, (n1, n2) in enumerate(zip(prev_nodes, new_nodes)): + assert n1.ir_type == n2.ir_type + if not ir_value_equal(n1.ir_type, n1.value, n2.value): + self.__all_changed_nodes.add(i) - # Between these two changed regions we now do a linear scan to - # check if any specific block values have changed. - for i in range(first_changed, last_changed + 1): - u, v = blocks.bounds(i) - if i not in self.__all_changed_blocks and prev[u:v] != new[u:v]: - self.__all_changed_blocks.add(i) - self.__last_checked_changed_at = new_target - assert self.__last_checked_changed_at is self.shrink_target - return self.__all_changed_blocks + return self.__all_changed_nodes def update_shrink_target(self, new_target): assert isinstance(new_target, ConjectureResult) @@ -1016,94 +994,141 @@ class Shrinker: self.shrink_target = new_target self.__derived_values = {} - def try_shrinking_blocks(self, blocks, b): - """Attempts to replace each block in the blocks list with b. Returns + def try_shrinking_nodes(self, nodes, n): + """Attempts to replace each node in the nodes list with n. Returns True if it succeeded (which may include some additional modifications to shrink_target). - In current usage it is expected that each of the blocks currently have - the same value, although this is not essential. Note that b must be - < the block at min(blocks) or this is not a valid shrink. + In current usage it is expected that each of the nodes currently have + the same value and ir type, although this is not essential. Note that + n must be < the node at min(nodes) or this is not a valid shrink. This method will attempt to do some small amount of work to delete data - that occurs after the end of the blocks. This is useful for cases where - there is some size dependency on the value of a block. + that occurs after the end of the nodes. This is useful for cases where + there is some size dependency on the value of a node. """ - initial_attempt = bytearray(self.shrink_target.buffer) - for i, block in enumerate(blocks): - if block >= len(self.blocks): - blocks = blocks[:i] - break - u, v = self.blocks[block].bounds - n = min(self.blocks[block].length, len(b)) - initial_attempt[v - n : v] = b[-n:] + # If the length of the shrink target has changed from under us such that + # the indices are out of bounds, give up on the replacement. + # TODO_BETTER_SHRINK: we probably want to narrow down the root cause here at some point. + if any(node.index >= len(self.nodes) for node in nodes): + return # pragma: no cover - if not blocks: - return False + initial_attempt = replace_all( + self.nodes, + [(node.index, node.index + 1, [node.copy(with_value=n)]) for node in nodes], + ) - start = self.shrink_target.blocks[blocks[0]].start - end = self.shrink_target.blocks[blocks[-1]].end + attempt = self.cached_test_function_ir(initial_attempt) - initial_data = self.cached_test_function(initial_attempt) + if attempt is None: + return False - if initial_data is self.shrink_target: - self.lower_common_block_offset() + if attempt is self.shrink_target: + # if the initial shrink was a success, try lowering offsets. + self.lower_common_node_offset() return True # If this produced something completely invalid we ditch it # here rather than trying to persevere. - if initial_data.status < Status.VALID: + if attempt.status is Status.OVERRUN: return False - # We've shrunk inside our group of blocks, so we have no way to - # continue. (This only happens when shrinking more than one block at - # a time). - if len(initial_data.buffer) < v: + if attempt.status is Status.INVALID and attempt.invalid_at is None: return False - lost_data = len(self.shrink_target.buffer) - len(initial_data.buffer) + if attempt.status is Status.INVALID and attempt.invalid_at is not None: + # we're invalid due to a misalignment in the tree. We'll try to fix + # a very specific type of misalignment here: where we have a node of + # {"size": n} and tried to draw the same node, but with {"size": m < n}. + # This can occur with eg + # + # n = data.draw_integer() + # s = data.draw_string(min_size=n) + # + # where we try lowering n, resulting in the test_function drawing a lower + # min_size than our attempt had for the draw_string node. + # + # We'll now try realigning this tree by: + # * replacing the kwargs in our attempt with what test_function tried + # to draw in practice + # * truncating the value of that node to match min_size + # + # This helps in the specific case of drawing a value and then drawing + # a collection of that size...and not much else. In practice this + # helps because this antipattern is fairly common. + + # TODO we'll probably want to apply the same trick as in the valid + # case of this function of preserving from the right instead of + # preserving from the left. see test_can_shrink_variable_string_draws. + + node = self.nodes[len(attempt.examples.ir_tree_nodes)] + (attempt_ir_type, attempt_kwargs, _attempt_forced) = attempt.invalid_at + if node.ir_type != attempt_ir_type: + return False + if node.was_forced: + return False # pragma: no cover - # If this did not in fact cause the data size to shrink we - # bail here because it's not worth trying to delete stuff from - # the remainder. - if lost_data <= 0: + if node.ir_type == "string": + # if the size *increased*, we would have to guess what to pad with + # in order to try fixing up this attempt. Just give up. + if node.kwargs["min_size"] <= attempt_kwargs["min_size"]: + return False + # the size decreased in our attempt. Try again, but replace with + # the min_size that we would have gotten, and truncate the value + # to that size by removing any elements past min_size. + return self.consider_new_tree( + initial_attempt[: node.index] + + [ + initial_attempt[node.index].copy( + with_kwargs=attempt_kwargs, + with_value=initial_attempt[node.index].value[ + : attempt_kwargs["min_size"] + ], + ) + ] + + initial_attempt[node.index :] + ) + if node.ir_type == "bytes": + if node.kwargs["size"] <= attempt_kwargs["size"]: + return False + return self.consider_new_tree( + initial_attempt[: node.index] + + [ + initial_attempt[node.index].copy( + with_kwargs=attempt_kwargs, + with_value=initial_attempt[node.index].value[ + : attempt_kwargs["size"] + ], + ) + ] + + initial_attempt[node.index :] + ) + + lost_nodes = len(self.nodes) - len(attempt.examples.ir_tree_nodes) + if lost_nodes <= 0: return False + start = nodes[0].index + end = nodes[-1].index + 1 # We now look for contiguous regions to delete that might help fix up # this failed shrink. We only look for contiguous regions of the right # lengths because doing anything more than that starts to get very # expensive. See minimize_individual_blocks for where we # try to be more aggressive. - regions_to_delete = {(end, end + lost_data)} - - for j in (blocks[-1] + 1, blocks[-1] + 2): - if j >= min(len(initial_data.blocks), len(self.blocks)): - continue - # We look for a block very shortly after the last one that has - # lost some of its size, and try to delete from the beginning so - # that it retains the same integer value. This is a bit of a hyper - # specific trick designed to make our integers() strategy shrink - # well. - r1, s1 = self.shrink_target.blocks[j].bounds - r2, s2 = initial_data.blocks[j].bounds - lost = (s1 - r1) - (s2 - r2) - # Apparently a coverage bug? An assert False in the body of this - # will reliably fail, but it shows up as uncovered. - if lost <= 0 or r1 != r2: # pragma: no cover - continue - regions_to_delete.add((r1, r1 + lost)) + regions_to_delete = {(end, end + lost_nodes)} - for ex in self.shrink_target.examples: - if ex.start > start: + for ex in self.examples: + if ex.ir_start > start: continue - if ex.end <= end: + if ex.ir_end <= end: continue - replacement = initial_data.examples[ex.index] - - in_original = [c for c in ex.children if c.start >= end] + if ex.index >= len(attempt.examples): + continue # pragma: no cover - in_replaced = [c for c in replacement.children if c.start >= end] + replacement = attempt.examples[ex.index] + in_original = [c for c in ex.children if c.ir_start >= end] + in_replaced = [c for c in replacement.children if c.ir_start >= end] if len(in_replaced) >= len(in_original) or not in_replaced: continue @@ -1115,14 +1140,14 @@ class Shrinker: # important, so we try to arrange it so that it retains its # rightmost children instead of its leftmost. regions_to_delete.add( - (in_original[0].start, in_original[-len(in_replaced)].start) + (in_original[0].ir_start, in_original[-len(in_replaced)].ir_start) ) for u, v in sorted(regions_to_delete, key=lambda x: x[1] - x[0], reverse=True): - try_with_deleted = bytearray(initial_attempt) - del try_with_deleted[u:v] - if self.incorporate_new_buffer(try_with_deleted): + try_with_deleted = initial_attempt[:u] + initial_attempt[v:] + if self.consider_new_tree(try_with_deleted): return True + return False def remove_discarded(self): @@ -1168,26 +1193,17 @@ class Shrinker: return True @derived_value # type: ignore - def blocks_by_non_zero_suffix(self): - """Returns a list of blocks grouped by their non-zero suffix, - as a list of (suffix, indices) pairs, skipping all groupings - where there is only one index. - - This is only used for the arguments of minimize_duplicated_blocks. - """ + def duplicated_nodes(self): + """Returns a list of nodes grouped (ir_type, value).""" duplicates = defaultdict(list) - for block in self.blocks: - duplicates[non_zero_suffix(self.buffer[block.start : block.end])].append( - block.index + for node in self.nodes: + duplicates[(node.ir_type, ir_value_key(node.ir_type, node.value))].append( + node ) - return duplicates - - @derived_value # type: ignore - def duplicated_block_suffixes(self): - return sorted(self.blocks_by_non_zero_suffix) + return list(duplicates.values()) @defines_shrink_pass() - def minimize_duplicated_blocks(self, chooser): + def minimize_duplicated_nodes(self, chooser): """Find blocks that have been duplicated in multiple places and attempt to minimize all of the duplicates simultaneously. @@ -1207,57 +1223,17 @@ class Shrinker: of the blocks doesn't matter very much because it allows us to replace more values at once. """ - block = chooser.choose(self.duplicated_block_suffixes) - targets = self.blocks_by_non_zero_suffix[block] - if len(targets) <= 1: + nodes = chooser.choose(self.duplicated_nodes) + if len(nodes) <= 1: return - Lexical.shrink( - block, - lambda b: self.try_shrinking_blocks(targets, b), - ) - @defines_shrink_pass() - def minimize_floats(self, chooser): - """Some shrinks that we employ that only really make sense for our - specific floating point encoding that are hard to discover from any - sort of reasonable general principle. This allows us to make - transformations like replacing a NaN with an Infinity or replacing - a float with its nearest integers that we would otherwise not be - able to due to them requiring very specific transformations of - the bit sequence. - - We only apply these transformations to blocks that "look like" our - standard float encodings because they are only really meaningful - there. The logic for detecting this is reasonably precise, but - it doesn't matter if it's wrong. These are always valid - transformations to make, they just don't necessarily correspond to - anything particularly meaningful for non-float values. - """ - - node = chooser.choose( - self.nodes, - lambda node: node.ir_type == "float" and not node.trivial - # avoid shrinking integer-valued floats. In our current ordering, these - # are already simpler than all other floats, so it's better to shrink - # them in other passes. - and not is_simple(node.value), - ) + # no point in lowering nodes together if one is already trivial. + # TODO_BETTER_SHRINK: we could potentially just drop the trivial nodes + # here and carry on with nontrivial ones? + if any(node.trivial for node in nodes): + return - # the Float shrinker was only built to handle positive floats. We'll - # shrink the positive portion and reapply the sign after, which is - # equivalent to this shrinker's previous behavior. We'll want to refactor - # Float to handle negative floats natively in the future. (likely a pure - # code quality change, with no shrinking impact.) - sign = math.copysign(1.0, node.value) - Float.shrink( - abs(node.value), - lambda val: self.consider_new_tree( - self.nodes[: node.index] - + [node.copy(with_value=sign * val)] - + self.nodes[node.index + 1 :] - ), - node=node, - ) + self.minimize_nodes(nodes) @defines_shrink_pass() def redistribute_block_pairs(self, chooser): @@ -1303,10 +1279,6 @@ class Shrinker: node_value = m - k next_node_value = n + k - if (not ir_value_permitted(node_value, "integer", node.kwargs)) or ( - not ir_value_permitted(next_node_value, "integer", next_node.kwargs) - ): - return False return self.consider_new_tree( self.nodes[: node.index] @@ -1351,9 +1323,68 @@ class Shrinker: find_integer(lower) + def minimize_nodes(self, nodes): + ir_type = nodes[0].ir_type + value = nodes[0].value + # unlike ir_type and value, kwargs are *not* guaranteed to be equal among all + # passed nodes. We arbitrarily use the kwargs of the first node. I think + # this is unsound (= leads to us trying shrinks that could not have been + # generated), but those get discarded at test-time, and this enables useful + # slips where kwargs are not equal but are close enough that doing the + # same operation on both basically just works. + kwargs = nodes[0].kwargs + assert all( + node.ir_type == ir_type and ir_value_equal(ir_type, node.value, value) + for node in nodes + ) + + if ir_type == "integer": + shrink_towards = kwargs["shrink_towards"] + # try shrinking from both sides towards shrink_towards. + # we're starting from n = abs(shrink_towards - value). Because the + # shrinker will not check its starting value, we need to try + # shrinking to n first. + self.try_shrinking_nodes(nodes, abs(shrink_towards - value)) + Integer.shrink( + abs(shrink_towards - value), + lambda n: self.try_shrinking_nodes(nodes, shrink_towards + n), + ) + Integer.shrink( + abs(shrink_towards - value), + lambda n: self.try_shrinking_nodes(nodes, shrink_towards - n), + ) + elif ir_type == "float": + self.try_shrinking_nodes(nodes, abs(value)) + Float.shrink( + abs(value), + lambda val: self.try_shrinking_nodes(nodes, val), + ) + Float.shrink( + abs(value), + lambda val: self.try_shrinking_nodes(nodes, -val), + ) + elif ir_type == "boolean": + # must be True, otherwise would be trivial and not selected. + assert value is True + # only one thing to try: false! + self.try_shrinking_nodes(nodes, False) + elif ir_type == "bytes": + Bytes.shrink( + value, + lambda val: self.try_shrinking_nodes(nodes, val), + ) + elif ir_type == "string": + String.shrink( + value, + lambda val: self.try_shrinking_nodes(nodes, val), + intervals=kwargs["intervals"], + ) + else: + raise NotImplementedError + @defines_shrink_pass() - def minimize_individual_blocks(self, chooser): - """Attempt to minimize each block in sequence. + def minimize_individual_nodes(self, chooser): + """Attempt to minimize each node in sequence. This is the pass that ensures that e.g. each integer we draw is a minimum value. So it's the part that guarantees that if we e.g. do @@ -1363,73 +1394,94 @@ class Shrinker: then in our shrunk example, x = 10 rather than say 97. - If we are unsuccessful at minimizing a block of interest we then + If we are unsuccessful at minimizing a node of interest we then check if that's because it's changing the size of the test case and, if so, we also make an attempt to delete parts of the test case to see if that fixes it. - We handle most of the common cases in try_shrinking_blocks which is + We handle most of the common cases in try_shrinking_nodes which is pretty good at clearing out large contiguous blocks of dead space, but it fails when there is data that has to stay in particular places in the list. """ - block = chooser.choose(self.blocks, lambda b: not b.trivial) + node = chooser.choose(self.nodes, lambda node: not node.trivial) + initial_target = self.shrink_target - initial = self.shrink_target - u, v = block.bounds - i = block.index - Lexical.shrink( - self.shrink_target.buffer[u:v], - lambda b: self.try_shrinking_blocks((i,), b), - ) + self.minimize_nodes([node]) + if self.shrink_target is not initial_target: + # the shrink target changed, so our shrink worked. Defer doing + # anything more intelligent until this shrink fails. + return - if self.shrink_target is not initial: + # the shrink failed. One particularly common case where minimizing a + # node can fail is the antipattern of drawing a size and then drawing a + # collection of that size, or more generally when there is a size + # dependency on some single node. We'll explicitly try and fix up this + # common case here: if decreasing an integer node by one would reduce + # the size of the generated input, we'll try deleting things after that + # node and see if the resulting attempt works. + + if node.ir_type != "integer": + # Only try this fixup logic on integer draws. Almost all size + # dependencies are on integer draws, and if it's not, it's doing + # something convoluted enough that it is unlikely to shrink well anyway. + # TODO: extent to floats? we probably currently fail on the following, + # albeit convoluted example: + # n = int(data.draw(st.floats())) + # s = data.draw(st.lists(st.integers(), min_size=n, max_size=n)) return lowered = ( - self.buffer[: block.start] - + int_to_bytes( - int_from_bytes(self.buffer[block.start : block.end]) - 1, block.length - ) - + self.buffer[block.end :] + self.nodes[: node.index] + + [node.copy(with_value=node.value - 1)] + + self.nodes[node.index + 1 :] ) - attempt = self.cached_test_function(lowered) + attempt = self.cached_test_function_ir(lowered) if ( - attempt.status < Status.VALID - or len(attempt.buffer) == len(self.buffer) - or len(attempt.buffer) == block.end + attempt is None + or attempt.status < Status.VALID + or len(attempt.examples.ir_tree_nodes) == len(self.nodes) + or len(attempt.examples.ir_tree_nodes) == node.index + 1 ): + # no point in trying our size-dependency-logic if our attempt at + # lowering the node resulted in: + # * an invalid conjecture data + # * the same number of nodes as before + # * no nodes beyond the lowered node (nothing to try to delete afterwards) return - # If it were then the lexical shrink should have worked and we could + # If it were then the original shrink should have worked and we could # never have got here. assert attempt is not self.shrink_target - @self.cached(block.index) - def first_example_after_block(): + @self.cached(node.index) + def first_example_after_node(): lo = 0 hi = len(self.examples) while lo + 1 < hi: mid = (lo + hi) // 2 ex = self.examples[mid] - if ex.start >= block.end: + if ex.ir_start >= node.index: hi = mid else: lo = mid return hi - ex = self.examples[ - chooser.choose( - range(first_example_after_block, len(self.examples)), - lambda i: self.examples[i].length > 0, - ) - ] - - u, v = block.bounds - - buf = bytearray(lowered) - del buf[ex.start : ex.end] - self.incorporate_new_buffer(buf) + # we try deleting both entire examples, and single nodes. + # If we wanted to get more aggressive, we could try deleting n + # consecutive nodes (that don't cross an example boundary) for say + # n <= 2 or n <= 3. + if chooser.choose([True, False]): + ex = self.examples[ + chooser.choose( + range(first_example_after_node, len(self.examples)), + lambda i: self.examples[i].ir_length > 0, + ) + ] + self.consider_new_tree(lowered[: ex.ir_start] + lowered[ex.ir_end :]) + else: + node = self.nodes[chooser.choose(range(node.index + 1, len(self.nodes)))] + self.consider_new_tree(lowered[: node.index] + lowered[node.index + 1 :]) @defines_shrink_pass() def reorder_examples(self, chooser): @@ -1480,45 +1532,36 @@ class Shrinker: key=lambda i: st.buffer[examples[i].start : examples[i].end], ) - def run_block_program(self, i, description, original, repeats=1): - """Block programs are a mini-DSL for block rewriting, defined as a sequence - of commands that can be run at some index into the blocks + def run_node_program(self, i, description, original, repeats=1): + """Node programs are a mini-DSL for node rewriting, defined as a sequence + of commands that can be run at some index into the nodes Commands are: - * "-", subtract one from this block. - * "X", delete this block - - If a command does not apply (currently only because it's - on a zero - block) the block will be silently skipped over. + * "X", delete this node - This method runs the block program in ``description`` at block index + This method runs the node program in ``description`` at node index ``i`` on the ConjectureData ``original``. If ``repeats > 1`` then it will attempt to approximate the results of running it that many times. Returns True if this successfully changes the underlying shrink target, else False. """ - if i + len(description) > len(original.blocks) or i < 0: + if i + len(description) > len(original.examples.ir_tree_nodes) or i < 0: return False - attempt = bytearray(original.buffer) + attempt = list(original.examples.ir_tree_nodes) for _ in range(repeats): - for k, d in reversed(list(enumerate(description))): + for k, command in reversed(list(enumerate(description))): j = i + k - u, v = original.blocks[j].bounds - if v > len(attempt): + if j >= len(attempt): return False - if d == "-": - value = int_from_bytes(attempt[u:v]) - if value == 0: - return False - else: - attempt[u:v] = int_to_bytes(value - 1, v - u) - elif d == "X": - del attempt[u:v] + + if command == "X": + del attempt[j] else: - raise NotImplementedError(f"Unrecognised command {d!r}") - return self.incorporate_new_buffer(attempt) + raise NotImplementedError(f"Unrecognised command {command!r}") + + return self.consider_new_tree(attempt) def shrink_pass_family(f): @@ -1538,33 +1581,20 @@ def shrink_pass_family(f): @shrink_pass_family -def block_program(self, chooser, description): - """Mini-DSL for block rewriting. A sequence of commands that will be run - over all contiguous sequences of blocks of the description length in order. - Commands are: - - * ".", keep this block unchanged - * "-", subtract one from this block. - * "0", replace this block with zero - * "X", delete this block - - If a command does not apply (currently only because it's - on a zero - block) the block will be silently skipped over. As a side effect of - running a block program its score will be updated. - """ +def node_program(self, chooser, description): n = len(description) + # Adaptively attempt to run the node program at the current + # index. If this successfully applies the node program ``k`` times + # then this runs in ``O(log(k))`` test function calls. + i = chooser.choose(range(len(self.nodes) - n + 1)) - """Adaptively attempt to run the block program at the current - index. If this successfully applies the block program ``k`` times - then this runs in ``O(log(k))`` test function calls.""" - i = chooser.choose(range(len(self.shrink_target.blocks) - n)) - # First, run the block program at the chosen index. If this fails, + # First, run the node program at the chosen index. If this fails, # don't do any extra work, so that failure is as cheap as possible. - if not self.run_block_program(i, description, original=self.shrink_target): + if not self.run_node_program(i, description, original=self.shrink_target): return # Because we run in a random order we will often find ourselves in the middle - # of a region where we could run the block program. We thus start by moving + # of a region where we could run the node program. We thus start by moving # left to the beginning of that region if possible in order to to start from # the beginning of that region. def offset_left(k): @@ -1572,47 +1602,19 @@ def block_program(self, chooser, description): i = offset_left( find_integer( - lambda k: self.run_block_program( + lambda k: self.run_node_program( offset_left(k), description, original=self.shrink_target ) ) ) original = self.shrink_target - # Now try to run the block program multiple times here. find_integer( - lambda k: self.run_block_program(i, description, original=original, repeats=k) + lambda k: self.run_node_program(i, description, original=original, repeats=k) ) -@shrink_pass_family -def dfa_replacement(self, chooser, dfa_name): - """Use one of our previously learned shrinking DFAs to reduce - the current test case. This works by finding a match of the DFA in the - current buffer that is not already minimal and attempting to replace it - with the minimal string matching that DFA. - """ - - try: - dfa = SHRINKING_DFAS[dfa_name] - except KeyError: - dfa = self.extra_dfas[dfa_name] - - matching_regions = self.matching_regions(dfa) - minimal = next(dfa.all_matching_strings()) - u, v = chooser.choose( - matching_regions, lambda t: self.buffer[t[0] : t[1]] != minimal - ) - p = self.buffer[u:v] - assert sort_key(minimal) < sort_key(p) - replaced = self.buffer[:u] + minimal + self.buffer[v:] - - assert sort_key(replaced) < sort_key(self.buffer) - - self.consider_new_buffer(replaced) - - @attr.s(slots=True, eq=False) class ShrinkPass: run_with_chooser = attr.ib() @@ -1660,14 +1662,5 @@ class ShrinkPass: return self.run_with_chooser.__name__ -def non_zero_suffix(b): - """Returns the longest suffix of b that starts with a non-zero - byte.""" - i = 0 - while i < len(b) and b[i] == 0: - i += 1 - return b[i:] - - class StopShrinking(Exception): pass diff --git a/contrib/python/hypothesis/py3/hypothesis/internal/conjecture/shrinking/__init__.py b/contrib/python/hypothesis/py3/hypothesis/internal/conjecture/shrinking/__init__.py index 556e77461e..46cc166000 100644 --- a/contrib/python/hypothesis/py3/hypothesis/internal/conjecture/shrinking/__init__.py +++ b/contrib/python/hypothesis/py3/hypothesis/internal/conjecture/shrinking/__init__.py @@ -8,9 +8,11 @@ # v. 2.0. If a copy of the MPL was not distributed with this file, You can # obtain one at https://mozilla.org/MPL/2.0/. +from hypothesis.internal.conjecture.shrinking.bytes import Bytes +from hypothesis.internal.conjecture.shrinking.collection import Collection from hypothesis.internal.conjecture.shrinking.floats import Float from hypothesis.internal.conjecture.shrinking.integer import Integer -from hypothesis.internal.conjecture.shrinking.lexical import Lexical from hypothesis.internal.conjecture.shrinking.ordering import Ordering +from hypothesis.internal.conjecture.shrinking.string import String -__all__ = ["Lexical", "Integer", "Ordering", "Float"] +__all__ = ["Integer", "Ordering", "Float", "Collection", "String", "Bytes"] diff --git a/contrib/python/hypothesis/py3/hypothesis/internal/conjecture/shrinking/bytes.py b/contrib/python/hypothesis/py3/hypothesis/internal/conjecture/shrinking/bytes.py new file mode 100644 index 0000000000..3ba75a2719 --- /dev/null +++ b/contrib/python/hypothesis/py3/hypothesis/internal/conjecture/shrinking/bytes.py @@ -0,0 +1,24 @@ +# This file is part of Hypothesis, which may be found at +# https://github.com/HypothesisWorks/hypothesis/ +# +# Copyright the Hypothesis Authors. +# Individual contributors are listed in AUTHORS.rst and the git log. +# +# This Source Code Form is subject to the terms of the Mozilla Public License, +# v. 2.0. If a copy of the MPL was not distributed with this file, You can +# obtain one at https://mozilla.org/MPL/2.0/. + +from hypothesis.internal.compat import int_from_bytes, int_to_bytes +from hypothesis.internal.conjecture.shrinking.integer import Integer + + +class Bytes(Integer): + def __init__(self, initial, predicate, **kwargs): + # shrink by interpreting the bytes as an integer. + # move to Collection.shrink when we support variable-size bytes, + # because b'\x00\x02' could shrink to either b'\x00\x01' or b'\x02'. + super().__init__( + int_from_bytes(initial), + lambda n: predicate(int_to_bytes(n, len(initial))), + **kwargs, + ) diff --git a/contrib/python/hypothesis/py3/hypothesis/internal/conjecture/shrinking/collection.py b/contrib/python/hypothesis/py3/hypothesis/internal/conjecture/shrinking/collection.py new file mode 100644 index 0000000000..bc96e14d48 --- /dev/null +++ b/contrib/python/hypothesis/py3/hypothesis/internal/conjecture/shrinking/collection.py @@ -0,0 +1,60 @@ +# This file is part of Hypothesis, which may be found at +# https://github.com/HypothesisWorks/hypothesis/ +# +# Copyright the Hypothesis Authors. +# Individual contributors are listed in AUTHORS.rst and the git log. +# +# This Source Code Form is subject to the terms of the Mozilla Public License, +# v. 2.0. If a copy of the MPL was not distributed with this file, You can +# obtain one at https://mozilla.org/MPL/2.0/. + +from hypothesis.internal.conjecture.shrinking.common import Shrinker +from hypothesis.internal.conjecture.shrinking.ordering import Ordering + + +def identity(v): + return v + + +class Collection(Shrinker): + def setup(self, *, ElementShrinker, to_order=identity, from_order=identity): + self.ElementShrinker = ElementShrinker + self.to_order = to_order + self.from_order = from_order + + def make_immutable(self, value): + return tuple(value) + + def left_is_better(self, left, right): + if len(left) < len(right): + return True + + # examine elements one by one from the left until an element differs. + for v1, v2 in zip(left, right): + if self.to_order(v1) == self.to_order(v2): + continue + return self.to_order(v1) < self.to_order(v2) + + # equal length and all values were equal by our ordering, so must be equal + # by our ordering. + assert list(map(self.to_order, left)) == list(map(self.to_order, right)) + return False + + def run_step(self): + # try deleting each element in turn, starting from the back + # TODO_BETTER_SHRINK: adaptively delete here by deleting larger chunks at once + # if early deletes succeed. use find_integer. turns O(n) into O(log(n)) + for i in reversed(range(len(self.current))): + self.consider(self.current[:i] + self.current[i + 1 :]) + + # then try reordering + Ordering.shrink(self.current, self.consider, key=self.to_order) + + # then try minimizing each element in turn + for i, val in enumerate(self.current): + self.ElementShrinker.shrink( + self.to_order(val), + lambda v: self.consider( + self.current[:i] + (self.from_order(v),) + self.current[i + 1 :] + ), + ) diff --git a/contrib/python/hypothesis/py3/hypothesis/internal/conjecture/shrinking/dfas.py b/contrib/python/hypothesis/py3/hypothesis/internal/conjecture/shrinking/dfas.py deleted file mode 100644 index 050672c5da..0000000000 --- a/contrib/python/hypothesis/py3/hypothesis/internal/conjecture/shrinking/dfas.py +++ /dev/null @@ -1,338 +0,0 @@ -# This file is part of Hypothesis, which may be found at -# https://github.com/HypothesisWorks/hypothesis/ -# -# Copyright the Hypothesis Authors. -# Individual contributors are listed in AUTHORS.rst and the git log. -# -# This Source Code Form is subject to the terms of the Mozilla Public License, -# v. 2.0. If a copy of the MPL was not distributed with this file, You can -# obtain one at https://mozilla.org/MPL/2.0/. - -""" -This is a module for learning new DFAs that help normalize test -functions. That is, given a test function that sometimes shrinks -to one thing and sometimes another, this module is designed to -help learn new DFA-based shrink passes that will cause it to -always shrink to the same thing. -""" - -import hashlib -import math -from itertools import islice -from pathlib import Path - -from hypothesis import HealthCheck, settings -from hypothesis.errors import HypothesisException -from hypothesis.internal.conjecture.data import ConjectureResult, Status -from hypothesis.internal.conjecture.dfa.lstar import LStar -from hypothesis.internal.conjecture.shrinking.learned_dfas import ( - SHRINKING_DFAS, - __file__ as _learned_dfa_file, -) - -learned_dfa_file = Path(_learned_dfa_file) - - -class FailedToNormalise(HypothesisException): - pass - - -def update_learned_dfas(): - """Write any modifications to the SHRINKING_DFAS dictionary - back to the learned DFAs file.""" - - source = learned_dfa_file.read_text(encoding="utf-8") - - lines = source.splitlines() - - i = lines.index("# AUTOGENERATED BEGINS") - - del lines[i + 1 :] - - lines.append("") - lines.append("# fmt: off") - lines.append("") - - for k, v in sorted(SHRINKING_DFAS.items()): - lines.append(f"SHRINKING_DFAS[{k!r}] = {v!r} # noqa: E501") - - lines.append("") - lines.append("# fmt: on") - - new_source = "\n".join(lines) + "\n" - - if new_source != source: - learned_dfa_file.write_text(new_source, encoding="utf-8") - - -def learn_a_new_dfa(runner, u, v, predicate): - """Given two buffers ``u`` and ``v```, learn a DFA that will - allow the shrinker to normalise them better. ``u`` and ``v`` - should not currently shrink to the same test case when calling - this function.""" - from hypothesis.internal.conjecture.shrinker import dfa_replacement, sort_key - - assert predicate(runner.cached_test_function(u)) - assert predicate(runner.cached_test_function(v)) - - u_shrunk = fully_shrink(runner, u, predicate) - v_shrunk = fully_shrink(runner, v, predicate) - - u, v = sorted((u_shrunk.buffer, v_shrunk.buffer), key=sort_key) - - assert u != v - - assert not v.startswith(u) - - # We would like to avoid using LStar on large strings as its - # behaviour can be quadratic or worse. In order to help achieve - # this we peel off a common prefix and suffix of the two final - # results and just learn the internal bit where they differ. - # - # This potentially reduces the length quite far if there's - # just one tricky bit of control flow we're struggling to - # reduce inside a strategy somewhere and the rest of the - # test function reduces fine. - if v.endswith(u): - prefix = b"" - suffix = u - u_core = b"" - assert len(u) > 0 - v_core = v[: -len(u)] - else: - i = 0 - while u[i] == v[i]: - i += 1 - prefix = u[:i] - assert u.startswith(prefix) - assert v.startswith(prefix) - - i = 1 - while u[-i] == v[-i]: - i += 1 - - suffix = u[max(len(prefix), len(u) + 1 - i) :] - assert u.endswith(suffix) - assert v.endswith(suffix) - - u_core = u[len(prefix) : len(u) - len(suffix)] - v_core = v[len(prefix) : len(v) - len(suffix)] - - assert u == prefix + u_core + suffix, (list(u), list(v)) - assert v == prefix + v_core + suffix, (list(u), list(v)) - - better = runner.cached_test_function(u) - worse = runner.cached_test_function(v) - - allow_discards = worse.has_discards or better.has_discards - - def is_valid_core(s): - if not (len(u_core) <= len(s) <= len(v_core)): - return False - buf = prefix + s + suffix - result = runner.cached_test_function(buf) - return ( - predicate(result) - # Because we're often using this to learn strategies - # rather than entire complex test functions, it's - # important that our replacements are precise and - # don't leave the rest of the test case in a weird - # state. - and result.buffer == buf - # Because the shrinker is good at removing discarded - # data, unless we need discards to allow one or both - # of u and v to result in valid shrinks, we don't - # count attempts that have them as valid. This will - # cause us to match fewer strings, which will make - # the resulting shrink pass more efficient when run - # on test functions it wasn't really intended for. - and (allow_discards or not result.has_discards) - ) - - assert sort_key(u_core) < sort_key(v_core) - - assert is_valid_core(u_core) - assert is_valid_core(v_core) - - learner = LStar(is_valid_core) - - prev = -1 - while learner.generation != prev: - prev = learner.generation - learner.learn(u_core) - learner.learn(v_core) - - # L* has a tendency to learn DFAs which wrap around to - # the beginning. We don't want to it to do that unless - # it's accurate, so we use these as examples to show - # check going around the DFA twice. - learner.learn(u_core * 2) - learner.learn(v_core * 2) - - if learner.dfa.max_length(learner.dfa.start) > len(v_core): - # The language we learn is finite and bounded above - # by the length of v_core. This is important in order - # to keep our shrink passes reasonably efficient - - # otherwise they can match far too much. So whenever - # we learn a DFA that could match a string longer - # than len(v_core) we fix it by finding the first - # string longer than v_core and learning that as - # a correction. - x = next(learner.dfa.all_matching_strings(min_length=len(v_core) + 1)) - assert not is_valid_core(x) - learner.learn(x) - assert not learner.dfa.matches(x) - assert learner.generation != prev - else: - # We mostly care about getting the right answer on the - # minimal test case, but because we're doing this offline - # anyway we might as well spend a little more time trying - # small examples to make sure the learner gets them right. - for x in islice(learner.dfa.all_matching_strings(), 100): - if not is_valid_core(x): - learner.learn(x) - assert learner.generation != prev - break - - # We've now successfully learned a DFA that works for shrinking - # our failed normalisation further. Canonicalise it into a concrete - # DFA so we can save it for later. - new_dfa = learner.dfa.canonicalise() - - assert math.isfinite(new_dfa.max_length(new_dfa.start)) - - shrinker = runner.new_shrinker(runner.cached_test_function(v), predicate) - - assert (len(prefix), len(v) - len(suffix)) in shrinker.matching_regions(new_dfa) - - name = "tmp-dfa-" + repr(new_dfa) - - shrinker.extra_dfas[name] = new_dfa - - shrinker.fixate_shrink_passes([dfa_replacement(name)]) - - assert sort_key(shrinker.buffer) < sort_key(v) - - return new_dfa - - -def fully_shrink(runner, test_case, predicate): - if not isinstance(test_case, ConjectureResult): - test_case = runner.cached_test_function(test_case) - while True: - shrunk = runner.shrink(test_case, predicate) - if shrunk.buffer == test_case.buffer: - break - test_case = shrunk - return test_case - - -def normalize( - base_name, - test_function, - *, - required_successes=100, - allowed_to_update=False, - max_dfas=10, - random=None, -): - """Attempt to ensure that this test function successfully normalizes - i.e. - whenever it declares a test case to be interesting, we are able - to shrink that to the same interesting test case (which logically should - be the shortlex minimal interesting test case, though we may not be able - to detect if it is). - - Will run until we have seen ``required_successes`` many interesting test - cases in a row normalize to the same value. - - If ``allowed_to_update`` is True, whenever we fail to normalize we will - learn a new DFA-based shrink pass that allows us to make progress. Any - learned DFAs will be written back into the learned DFA file at the end - of this function. If ``allowed_to_update`` is False, this will raise an - error as soon as it encounters a failure to normalize. - - Additionally, if more than ``max_dfas` DFAs are required to normalize - this test function, this function will raise an error - it's essentially - designed for small patches that other shrink passes don't cover, and - if it's learning too many patches then you need a better shrink pass - than this can provide. - """ - # Need import inside the function to avoid circular imports - from hypothesis.internal.conjecture.engine import BUFFER_SIZE, ConjectureRunner - - runner = ConjectureRunner( - test_function, - settings=settings(database=None, suppress_health_check=list(HealthCheck)), - ignore_limits=True, - random=random, - ) - - seen = set() - - dfas_added = 0 - - found_interesting = False - consecutive_successes = 0 - failures_to_find_interesting = 0 - while consecutive_successes < required_successes: - attempt = runner.cached_test_function(b"", extend=BUFFER_SIZE) - if attempt.status < Status.INTERESTING: - failures_to_find_interesting += 1 - assert ( - found_interesting or failures_to_find_interesting <= 1000 - ), "Test function seems to have no interesting test cases" - continue - - found_interesting = True - - target = attempt.interesting_origin - - def shrinking_predicate(d): - return d.status == Status.INTERESTING and d.interesting_origin == target - - if target not in seen: - seen.add(target) - runner.shrink(attempt, shrinking_predicate) - continue - - previous = fully_shrink( - runner, runner.interesting_examples[target], shrinking_predicate - ) - current = fully_shrink(runner, attempt, shrinking_predicate) - - if current.buffer == previous.buffer: - consecutive_successes += 1 - continue - - consecutive_successes = 0 - - if not allowed_to_update: - raise FailedToNormalise( - f"Shrinker failed to normalize {previous.buffer!r} to " - f"{current.buffer!r} and we are not allowed to learn new DFAs." - ) - - if dfas_added >= max_dfas: - raise FailedToNormalise( - f"Test function is too hard to learn: Added {dfas_added} " - "DFAs and still not done." - ) - - dfas_added += 1 - - new_dfa = learn_a_new_dfa( - runner, previous.buffer, current.buffer, shrinking_predicate - ) - - name = base_name + "-" + hashlib.sha256(repr(new_dfa).encode()).hexdigest()[:10] - - # If there is a name collision this DFA should already be being - # used for shrinking, so we should have already been able to shrink - # v further. - assert name not in SHRINKING_DFAS - SHRINKING_DFAS[name] = new_dfa - - if dfas_added > 0: - # We've learned one or more DFAs in the course of normalising, so now - # we update the file to record those for posterity. - update_learned_dfas() diff --git a/contrib/python/hypothesis/py3/hypothesis/internal/conjecture/shrinking/floats.py b/contrib/python/hypothesis/py3/hypothesis/internal/conjecture/shrinking/floats.py index acc878b7b4..4802153502 100644 --- a/contrib/python/hypothesis/py3/hypothesis/internal/conjecture/shrinking/floats.py +++ b/contrib/python/hypothesis/py3/hypothesis/internal/conjecture/shrinking/floats.py @@ -11,7 +11,6 @@ import math import sys -from hypothesis.internal.conjecture.data import ir_value_permitted from hypothesis.internal.conjecture.floats import float_to_lex from hypothesis.internal.conjecture.shrinking.common import Shrinker from hypothesis.internal.conjecture.shrinking.integer import Integer @@ -20,16 +19,9 @@ MAX_PRECISE_INTEGER = 2**53 class Float(Shrinker): - def setup(self, node): + def setup(self): self.NAN = math.nan self.debugging_enabled = True - self.node = node - - def consider(self, value): - if not ir_value_permitted(value, "float", self.node.kwargs): - self.debug(f"rejecting {value} as disallowed for {self.node.kwargs}") - return False - return super().consider(value) def make_immutable(self, f): f = float(f) @@ -60,11 +52,16 @@ class Float(Shrinker): if not math.isfinite(self.current): return True - # If its too large to represent as an integer, bail out here. It's - # better to try shrinking it in the main representation. - return self.current >= MAX_PRECISE_INTEGER - def run_step(self): + # above MAX_PRECISE_INTEGER, all floats are integers. Shrink like one. + # TODO_BETTER_SHRINK: at 2 * MAX_PRECISE_INTEGER, n - 1 == n - 2, and + # Integer.shrink will likely perform badly. We should have a specialized + # big-float shrinker, which mostly follows Integer.shrink but replaces + # n - 1 with next_down(n). + if self.current > MAX_PRECISE_INTEGER: + self.delegate(Integer, convert_to=int, convert_from=float) + return + # Finally we get to the important bit: Each of these is a small change # to the floating point number that corresponds to a large change in # the lexical representation. Trying these ensures that our floating diff --git a/contrib/python/hypothesis/py3/hypothesis/internal/conjecture/shrinking/learned_dfas.py b/contrib/python/hypothesis/py3/hypothesis/internal/conjecture/shrinking/learned_dfas.py deleted file mode 100644 index 3a414de534..0000000000 --- a/contrib/python/hypothesis/py3/hypothesis/internal/conjecture/shrinking/learned_dfas.py +++ /dev/null @@ -1,32 +0,0 @@ -# This file is part of Hypothesis, which may be found at -# https://github.com/HypothesisWorks/hypothesis/ -# -# Copyright the Hypothesis Authors. -# Individual contributors are listed in AUTHORS.rst and the git log. -# -# This Source Code Form is subject to the terms of the Mozilla Public License, -# v. 2.0. If a copy of the MPL was not distributed with this file, You can -# obtain one at https://mozilla.org/MPL/2.0/. - -from hypothesis.internal.conjecture.dfa import ConcreteDFA - -SHRINKING_DFAS = {} - -# Note: Everything below the following line is auto generated. -# Any code added after this point will be deleted by an automated -# process. Don't write code below this point. -# -# AUTOGENERATED BEGINS - -# fmt: off - -SHRINKING_DFAS['datetimes()-d66625c3b7'] = ConcreteDFA([[(0, 1), (1, 255, 2)], [(0, 3), (1, 255, 4)], [(0, 255, 4)], [(0, 5), (1, 255, 6)], [(0, 255, 6)], [(5, 255, 7)], [(0, 255, 7)], []], {7}) # noqa: E501 -SHRINKING_DFAS['emails()-fde8f71142'] = ConcreteDFA([[(0, 1), (1, 255, 2)], [(0, 255, 2)], []], {2}) # noqa: E501 -SHRINKING_DFAS['floats()-58ab5aefc9'] = ConcreteDFA([[(1, 1), (2, 255, 2)], [(1, 3)], [(0, 1, 3)], []], {3}) # noqa: E501 -SHRINKING_DFAS['floats()-6b86629f89'] = ConcreteDFA([[(3, 1), (4, 255, 2)], [(1, 3)], [(0, 1, 3)], []], {3}) # noqa: E501 -SHRINKING_DFAS['floats()-aa8aef1e72'] = ConcreteDFA([[(2, 1), (3, 255, 2)], [(1, 3)], [(0, 1, 3)], []], {3}) # noqa: E501 -SHRINKING_DFAS['floats()-bf71ffe70f'] = ConcreteDFA([[(4, 1), (5, 255, 2)], [(1, 3)], [(0, 1, 3)], []], {3}) # noqa: E501 -SHRINKING_DFAS['text()-05c917b389'] = ConcreteDFA([[(0, 1), (1, 8, 2)], [(9, 255, 3)], [(0, 255, 4)], [], [(0, 255, 5)], [(0, 255, 3)]], {3}) # noqa: E501 -SHRINKING_DFAS['text()-807e5f9650'] = ConcreteDFA([[(0, 8, 1), (9, 255, 2)], [(1, 8, 3)], [(1, 8, 3)], [(0, 4)], [(0, 255, 5)], []], {2, 5}) # noqa: E501 - -# fmt: on diff --git a/contrib/python/hypothesis/py3/hypothesis/internal/conjecture/shrinking/lexical.py b/contrib/python/hypothesis/py3/hypothesis/internal/conjecture/shrinking/lexical.py deleted file mode 100644 index 2f69f1fee3..0000000000 --- a/contrib/python/hypothesis/py3/hypothesis/internal/conjecture/shrinking/lexical.py +++ /dev/null @@ -1,53 +0,0 @@ -# This file is part of Hypothesis, which may be found at -# https://github.com/HypothesisWorks/hypothesis/ -# -# Copyright the Hypothesis Authors. -# Individual contributors are listed in AUTHORS.rst and the git log. -# -# This Source Code Form is subject to the terms of the Mozilla Public License, -# v. 2.0. If a copy of the MPL was not distributed with this file, You can -# obtain one at https://mozilla.org/MPL/2.0/. - -from hypothesis.internal.compat import int_from_bytes, int_to_bytes -from hypothesis.internal.conjecture.shrinking.common import Shrinker -from hypothesis.internal.conjecture.shrinking.integer import Integer -from hypothesis.internal.conjecture.shrinking.ordering import Ordering - -""" -This module implements a lexicographic minimizer for blocks of bytes. -""" - - -class Lexical(Shrinker): - def make_immutable(self, value): - return bytes(value) - - @property - def size(self): - return len(self.current) - - def check_invariants(self, value): - assert len(value) == self.size - - def left_is_better(self, left, right): - return left < right - - def incorporate_int(self, i): - return self.incorporate(int_to_bytes(i, self.size)) - - @property - def current_int(self): - return int_from_bytes(self.current) - - def minimize_as_integer(self): - Integer.shrink( - self.current_int, - lambda c: c == self.current_int or self.incorporate_int(c), - ) - - def partial_sort(self): - Ordering.shrink(self.current, self.consider) - - def run_step(self): - self.minimize_as_integer() - self.partial_sort() diff --git a/contrib/python/hypothesis/py3/hypothesis/internal/conjecture/shrinking/string.py b/contrib/python/hypothesis/py3/hypothesis/internal/conjecture/shrinking/string.py new file mode 100644 index 0000000000..bbb82523ff --- /dev/null +++ b/contrib/python/hypothesis/py3/hypothesis/internal/conjecture/shrinking/string.py @@ -0,0 +1,24 @@ +# This file is part of Hypothesis, which may be found at +# https://github.com/HypothesisWorks/hypothesis/ +# +# Copyright the Hypothesis Authors. +# Individual contributors are listed in AUTHORS.rst and the git log. +# +# This Source Code Form is subject to the terms of the Mozilla Public License, +# v. 2.0. If a copy of the MPL was not distributed with this file, You can +# obtain one at https://mozilla.org/MPL/2.0/. + +from hypothesis.internal.conjecture.shrinking.collection import Collection +from hypothesis.internal.conjecture.shrinking.integer import Integer + + +class String(Collection): + def __init__(self, initial, predicate, *, intervals, **kwargs): + super().__init__( + list(initial), + lambda val: predicate("".join(val)), + to_order=intervals.index_from_char_in_shrink_order, + from_order=intervals.char_in_shrink_order, + ElementShrinker=Integer, + **kwargs, + ) diff --git a/contrib/python/hypothesis/py3/hypothesis/version.py b/contrib/python/hypothesis/py3/hypothesis/version.py index 6b66ca6a37..a3a5493aa1 100644 --- a/contrib/python/hypothesis/py3/hypothesis/version.py +++ b/contrib/python/hypothesis/py3/hypothesis/version.py @@ -8,5 +8,5 @@ # v. 2.0. If a copy of the MPL was not distributed with this file, You can # obtain one at https://mozilla.org/MPL/2.0/. -__version_info__ = (6, 102, 6) +__version_info__ = (6, 103, 0) __version__ = ".".join(map(str, __version_info__)) diff --git a/contrib/python/hypothesis/py3/ya.make b/contrib/python/hypothesis/py3/ya.make index 57db535574..35e83e2000 100644 --- a/contrib/python/hypothesis/py3/ya.make +++ b/contrib/python/hypothesis/py3/ya.make @@ -2,7 +2,7 @@ PY3_LIBRARY() -VERSION(6.102.6) +VERSION(6.103.0) LICENSE(MPL-2.0) @@ -67,13 +67,13 @@ PY_SRCS( hypothesis/internal/conjecture/pareto.py hypothesis/internal/conjecture/shrinker.py hypothesis/internal/conjecture/shrinking/__init__.py + hypothesis/internal/conjecture/shrinking/bytes.py + hypothesis/internal/conjecture/shrinking/collection.py hypothesis/internal/conjecture/shrinking/common.py - hypothesis/internal/conjecture/shrinking/dfas.py hypothesis/internal/conjecture/shrinking/floats.py hypothesis/internal/conjecture/shrinking/integer.py - hypothesis/internal/conjecture/shrinking/learned_dfas.py - hypothesis/internal/conjecture/shrinking/lexical.py hypothesis/internal/conjecture/shrinking/ordering.py + hypothesis/internal/conjecture/shrinking/string.py hypothesis/internal/conjecture/utils.py hypothesis/internal/coverage.py hypothesis/internal/detection.py |