summaryrefslogtreecommitdiffstats
path: root/contrib/python/hypothesis
diff options
context:
space:
mode:
authorrobot-piglet <[email protected]>2026-03-05 14:39:59 +0300
committerrobot-piglet <[email protected]>2026-03-05 15:19:33 +0300
commit121700c36064dc750373582e9ef644d994432c6a (patch)
tree7642d1d52994fc91c65200f439ad88354d3d8cee /contrib/python/hypothesis
parent0c847097dd0fb94196ca6286b3f7968f0d7ac0ec (diff)
Intermediate changes
commit_hash:1acacac341994cb2dfa3e049023f42b377fa05bc
Diffstat (limited to 'contrib/python/hypothesis')
-rw-r--r--contrib/python/hypothesis/py3/.dist-info/METADATA2
-rw-r--r--contrib/python/hypothesis/py3/hypothesis/internal/conjecture/dfa/__init__.py674
-rw-r--r--contrib/python/hypothesis/py3/hypothesis/internal/conjecture/dfa/lstar.py497
-rw-r--r--contrib/python/hypothesis/py3/hypothesis/internal/conjecture/junkdrawer.py43
-rw-r--r--contrib/python/hypothesis/py3/hypothesis/version.py2
-rw-r--r--contrib/python/hypothesis/py3/ya.make4
6 files changed, 3 insertions, 1219 deletions
diff --git a/contrib/python/hypothesis/py3/.dist-info/METADATA b/contrib/python/hypothesis/py3/.dist-info/METADATA
index 33587591b67..84b1acc1606 100644
--- a/contrib/python/hypothesis/py3/.dist-info/METADATA
+++ b/contrib/python/hypothesis/py3/.dist-info/METADATA
@@ -1,6 +1,6 @@
Metadata-Version: 2.4
Name: hypothesis
-Version: 6.151.8
+Version: 6.151.9
Summary: The property-based testing library for Python
Author-email: "David R. MacIver and Zac Hatfield-Dodds" <[email protected]>
License-Expression: MPL-2.0
diff --git a/contrib/python/hypothesis/py3/hypothesis/internal/conjecture/dfa/__init__.py b/contrib/python/hypothesis/py3/hypothesis/internal/conjecture/dfa/__init__.py
deleted file mode 100644
index f30602c7394..00000000000
--- a/contrib/python/hypothesis/py3/hypothesis/internal/conjecture/dfa/__init__.py
+++ /dev/null
@@ -1,674 +0,0 @@
-# This file is part of Hypothesis, which may be found at
-# https://github.com/HypothesisWorks/hypothesis/
-#
-# Copyright the Hypothesis Authors.
-# Individual contributors are listed in AUTHORS.rst and the git log.
-#
-# This Source Code Form is subject to the terms of the Mozilla Public License,
-# v. 2.0. If a copy of the MPL was not distributed with this file, You can
-# obtain one at https://mozilla.org/MPL/2.0/.
-
-import threading
-from collections import Counter, defaultdict, deque
-from math import inf
-
-from hypothesis.internal.reflection import proxies
-
-
-def cached(fn):
- @proxies(fn)
- def wrapped(self, *args):
- cache = self._DFA__cache(fn.__name__)
- try:
- return cache[args]
- except KeyError:
- return cache.setdefault(args, fn(self, *args))
-
- return wrapped
-
-
-class DFA:
- """Base class for implementations of deterministic finite
- automata.
-
- This is abstract to allow for the possibility of states
- being calculated lazily as we traverse the DFA (which
- we make heavy use of in our L* implementation - see
- lstar.py for details).
-
- States can be of any hashable type.
- """
-
- def __init__(self):
- self.__caches = threading.local()
-
- def __cache(self, name):
- try:
- cache = getattr(self.__caches, name)
- except AttributeError:
- cache = {}
- setattr(self.__caches, name, cache)
- return cache
-
- @property
- def start(self):
- """Returns the starting state."""
- raise NotImplementedError
-
- def is_accepting(self, i):
- """Returns if state ``i`` is an accepting one."""
- raise NotImplementedError
-
- def transition(self, i, c):
- """Returns the state that i transitions to on reading
- character c from a string."""
- raise NotImplementedError
-
- @property
- def alphabet(self):
- return range(256)
-
- def transitions(self, i):
- """Iterates over all pairs (byte, state) of transitions
- which do not lead to dead states."""
- for c, j in self.raw_transitions(i):
- if not self.is_dead(j):
- yield c, j
-
- @cached
- def transition_counts(self, state):
- counts = Counter()
- for _, j in self.transitions(state):
- counts[j] += 1
- return list(counts.items())
-
- def matches(self, s):
- """Returns whether the string ``s`` is accepted
- by this automaton."""
- i = self.start
- for c in s:
- i = self.transition(i, c)
- return self.is_accepting(i)
-
- def all_matching_regions(self, string):
- """Return all pairs ``(u, v)`` such that ``self.matches(string[u:v])``."""
-
- # Stack format: (k, state, indices). After reading ``k`` characters
- # starting from any i in ``indices`` the DFA would be at ``state``.
- stack = [(0, self.start, range(len(string)))]
-
- results = []
-
- while stack:
- k, state, indices = stack.pop()
-
- # If the state is dead, abort early - no point continuing on
- # from here where there will be no more matches.
- if self.is_dead(state):
- continue
-
- # If the state is accepting, then every one of these indices
- # has a matching region of length ``k`` starting from it.
- if self.is_accepting(state):
- results.extend([(i, i + k) for i in indices])
-
- next_by_state = defaultdict(list)
-
- for i in indices:
- if i + k < len(string):
- c = string[i + k]
- next_by_state[self.transition(state, c)].append(i)
- for next_state, next_indices in next_by_state.items():
- stack.append((k + 1, next_state, next_indices))
- return results
-
- def max_length(self, i):
- """Returns the maximum length of a string that is
- accepted when starting from i."""
- if self.is_dead(i):
- return 0
-
- cache = self.__cache("max_length")
-
- try:
- return cache[i]
- except KeyError:
- pass
-
- # Naively we can calculate this as 1 longer than the
- # max length of the non-dead states this can immediately
- # transition to, but a) We don't want unbounded recursion
- # because that's how you get RecursionErrors and b) This
- # makes it hard to look for cycles. So we basically do
- # the recursion explicitly with a stack, but we maintain
- # a parallel set that tracks what's already on the stack
- # so that when we encounter a loop we can immediately
- # determine that the max length here is infinite.
-
- stack = [i]
- stack_set = {i}
-
- def pop():
- """Remove the top element from the stack, maintaining
- the stack set appropriately."""
- assert len(stack) == len(stack_set)
- j = stack.pop()
- stack_set.remove(j)
- assert len(stack) == len(stack_set)
-
- while stack:
- j = stack[-1]
- assert not self.is_dead(j)
- # If any of the children have infinite max_length we don't
- # need to check all of them to know that this state does
- # too.
- if any(cache.get(k) == inf for k in self.successor_states(j)):
- cache[j] = inf
- pop()
- continue
-
- # Recurse to the first child node that we have not yet
- # calculated max_length for.
- for k in self.successor_states(j):
- if k in stack_set:
- # k is part of a loop and is known to be live
- # (since we never push dead states on the stack),
- # so it can reach strings of unbounded length.
- assert not self.is_dead(k)
- cache[k] = inf
- break
- if k not in cache and not self.is_dead(k):
- stack.append(k)
- stack_set.add(k)
- break
- else:
- # All of j's successors have a known max_length or are dead,
- # so we can now compute a max_length for j itself.
- cache[j] = max(
- (
- 1 + cache[k]
- for k in self.successor_states(j)
- if not self.is_dead(k)
- ),
- default=0,
- )
-
- # j is live so it must either be accepting or have a live child.
- assert self.is_accepting(j) or cache[j] > 0
- pop()
- return cache[i]
-
- @cached
- def has_strings(self, state, length):
- """Returns if any strings of length ``length`` are accepted when
- starting from state ``state``."""
- assert length >= 0
-
- cache = self.__cache("has_strings")
-
- try:
- return cache[state, length]
- except KeyError:
- pass
-
- pending = [(state, length)]
- seen = set()
- i = 0
-
- while i < len(pending):
- s, n = pending[i]
- i += 1
- if n > 0:
- for t in self.successor_states(s):
- key = (t, n - 1)
- if key not in cache and key not in seen:
- pending.append(key)
- seen.add(key)
-
- while pending:
- s, n = pending.pop()
- if n == 0:
- cache[s, n] = self.is_accepting(s)
- else:
- cache[s, n] = any(
- cache.get((t, n - 1)) for t in self.successor_states(s)
- )
-
- return cache[state, length]
-
- def count_strings(self, state, length):
- """Returns the number of strings of length ``length``
- that are accepted when starting from state ``state``."""
- assert length >= 0
- cache = self.__cache("count_strings")
-
- try:
- return cache[state, length]
- except KeyError:
- pass
-
- pending = [(state, length)]
- seen = set()
- i = 0
-
- while i < len(pending):
- s, n = pending[i]
- i += 1
- if n > 0:
- for t in self.successor_states(s):
- key = (t, n - 1)
- if key not in cache and key not in seen:
- pending.append(key)
- seen.add(key)
-
- while pending:
- s, n = pending.pop()
- if n == 0:
- cache[s, n] = int(self.is_accepting(s))
- else:
- cache[s, n] = sum(
- cache[t, n - 1] * k for t, k in self.transition_counts(s)
- )
-
- return cache[state, length]
-
- @cached
- def successor_states(self, state):
- """Returns all of the distinct states that can be reached via one
- transition from ``state``, in the lexicographic order of the
- smallest character that reaches them."""
- seen = set()
- result = []
- for _, j in self.raw_transitions(state):
- if j not in seen:
- seen.add(j)
- result.append(j)
- return tuple(result)
-
- def is_dead(self, state):
- """Returns True if no strings can be accepted
- when starting from ``state``."""
- return not self.is_live(state)
-
- def is_live(self, state):
- """Returns True if any strings can be accepted
- when starting from ``state``."""
- if self.is_accepting(state):
- return True
-
- # We work this out by calculating is_live for all nodes
- # reachable from state which have not already had it calculated.
- cache = self.__cache("is_live")
- try:
- return cache[state]
- except KeyError:
- pass
-
- # roots are states that we know already must be live,
- # either because we have previously calculated them to
- # be or because they are an accepting state.
- roots = set()
-
- # We maintain a backwards graph where ``j in backwards_graph[k]``
- # if there is a transition from j to k. Thus if a key in this
- # graph is live, so must all its values be.
- backwards_graph = defaultdict(set)
-
- # First we find all reachable nodes from i which have not
- # already been cached, noting any which are roots and
- # populating the backwards graph.
-
- explored = set()
- queue = deque([state])
- while queue:
- j = queue.popleft()
- if cache.get(j, self.is_accepting(j)):
- # If j can be immediately determined to be live
- # then there is no point in exploring beneath it,
- # because any effect of states below it is screened
- # off by the known answer for j.
- roots.add(j)
- continue
-
- if j in cache:
- # Likewise if j is known to be dead then there is
- # no point exploring beneath it because we know
- # that all nodes reachable from it must be dead.
- continue
-
- if j in explored:
- continue
- explored.add(j)
-
- for k in self.successor_states(j):
- backwards_graph[k].add(j)
- queue.append(k)
-
- marked_live = set()
- queue = deque(roots)
- while queue:
- j = queue.popleft()
- if j in marked_live:
- continue
- marked_live.add(j)
- for k in backwards_graph[j]:
- queue.append(k)
- for j in explored:
- cache[j] = j in marked_live
-
- return cache[state]
-
- def all_matching_strings_of_length(self, k):
- """Yields all matching strings whose length is ``k``, in ascending
- lexicographic order."""
- if k == 0:
- if self.is_accepting(self.start):
- yield b""
- return
-
- if not self.has_strings(self.start, k):
- return
-
- # This tracks a path through the DFA. We alternate between growing
- # it until it has length ``k`` and is in an accepting state, then
- # yielding that as a result, then modifying it so that the next
- # time we do that it will yield the lexicographically next matching
- # string.
- path = bytearray()
-
- # Tracks the states that are visited by following ``path`` from the
- # starting point.
- states = [self.start]
-
- while True:
- # First we build up our current best prefix to the lexicographically
- # first string starting with it.
- while len(path) < k:
- state = states[-1]
- for c, j in self.transitions(state):
- if self.has_strings(j, k - len(path) - 1):
- states.append(j)
- path.append(c)
- break
- else:
- raise NotImplementedError("Should be unreachable")
- assert self.is_accepting(states[-1])
- assert len(states) == len(path) + 1
- yield bytes(path)
-
- # Now we want to replace this string with the prefix that will
- # cause us to extend to its lexicographic successor. This can
- # be thought of as just repeatedly moving to the next lexicographic
- # successor until we find a matching string, but we're able to
- # use our length counts to jump over long sequences where there
- # cannot be a match.
- while True:
- # As long as we are in this loop we are trying to move to
- # the successor of the current string.
-
- # If we've removed the entire prefix then we're done - no
- # successor is possible.
- if not path:
- return
-
- if path[-1] == 255:
- # If our last element is maximal then the we have to "carry
- # the one" - our lexicographic successor must be incremented
- # earlier than this.
- path.pop()
- states.pop()
- else:
- # Otherwise increment by one.
- path[-1] += 1
- states[-1] = self.transition(states[-2], path[-1])
-
- # If there are no strings of the right length starting from
- # this prefix we need to keep going. Otherwise, this is
- # the right place to be and we break out of our loop of
- # trying to find the successor because it starts here.
- if self.count_strings(states[-1], k - len(path)) > 0:
- break
-
- def all_matching_strings(self, min_length=0):
- """Iterate over all strings matched by this automaton
- in shortlex-ascending order."""
- # max_length might be infinite, hence the while loop
- max_length = self.max_length(self.start)
- length = min_length
- while length <= max_length:
- yield from self.all_matching_strings_of_length(length)
- length += 1
-
- def raw_transitions(self, i):
- for c in self.alphabet:
- j = self.transition(i, c)
- yield c, j
-
- def canonicalise(self):
- """Return a canonical version of ``self`` as a ConcreteDFA.
-
- The DFA is not minimized, but nodes are sorted and relabelled
- and dead nodes are pruned, so two minimized DFAs for the same
- language will end up with identical canonical representatives.
- This is mildly important because it means that the output of
- L* should produce the same canonical DFA regardless of what
- order we happen to have run it in.
- """
- # We map all states to their index of appearance in depth
- # first search. This both is useful for canonicalising and
- # also allows for states that aren't integers.
- state_map = {}
- reverse_state_map = []
- accepting = set()
-
- seen = set()
-
- queue = deque([self.start])
- while queue:
- state = queue.popleft()
- if state in state_map:
- continue
- i = len(reverse_state_map)
- if self.is_accepting(state):
- accepting.add(i)
- reverse_state_map.append(state)
- state_map[state] = i
- for _, j in self.transitions(state):
- if j in seen:
- continue
- seen.add(j)
- queue.append(j)
-
- transitions = [
- {c: state_map[s] for c, s in self.transitions(t)} for t in reverse_state_map
- ]
-
- result = ConcreteDFA(transitions, accepting)
- assert self.equivalent(result)
- return result
-
- def equivalent(self, other):
- """Checks whether this DFA and other match precisely the same
- language.
-
- Uses the classic algorithm of Hopcroft and Karp (more or less):
- Hopcroft, John E. A linear algorithm for testing equivalence
- of finite automata. Vol. 114. Defense Technical Information Center, 1971.
- """
-
- # The basic idea of this algorithm is that we repeatedly
- # merge states that would be equivalent if the two start
- # states were. This starts by merging the two start states,
- # and whenever we merge two states merging all pairs of
- # states that are reachable by following the same character
- # from that point.
- #
- # Whenever we merge two states, we check if one of them
- # is accepting and the other non-accepting. If so, we have
- # obtained a contradiction and have made a bad merge, so
- # the two start states must not have been equivalent in the
- # first place and we return False.
- #
- # If the languages matched are different then some string
- # is contained in one but not the other. By looking at
- # the pairs of states visited by traversing the string in
- # each automaton in parallel, we eventually come to a pair
- # of states that would have to be merged by this algorithm
- # where one is accepting and the other is not. Thus this
- # algorithm always returns False as a result of a bad merge
- # if the two languages are not the same.
- #
- # If we successfully complete all merges without a contradiction
- # we can thus safely return True.
-
- # We maintain a union/find table for tracking merges of states.
- table = {}
-
- def find(s):
- trail = [s]
- while trail[-1] in table and table[trail[-1]] != trail[-1]:
- trail.append(table[trail[-1]])
-
- for t in trail:
- table[t] = trail[-1]
-
- return trail[-1]
-
- def union(s, t):
- s = find(s)
- t = find(t)
- table[s] = t
-
- alphabet = sorted(set(self.alphabet) | set(other.alphabet))
-
- queue = deque([(self.start, other.start)])
- while queue:
- self_state, other_state = queue.popleft()
-
- # We use a DFA/state pair for keys because the same value
- # may represent a different state in each DFA.
- self_key = (self, self_state)
- other_key = (other, other_state)
-
- # We have already merged these, no need to remerge.
- if find(self_key) == find(other_key):
- continue
-
- # We have found a contradiction, therefore the two DFAs must
- # not be equivalent.
- if self.is_accepting(self_state) != other.is_accepting(other_state):
- return False
-
- # Merge the two states
- union(self_key, other_key)
-
- # And also queue any logical consequences of merging those
- # two states for merging.
- for c in alphabet:
- queue.append(
- (self.transition(self_state, c), other.transition(other_state, c))
- )
- return True
-
-
-DEAD = "DEAD"
-
-
-class ConcreteDFA(DFA):
- """A concrete representation of a DFA in terms of an explicit list
- of states."""
-
- def __init__(self, transitions, accepting, start=0):
- """
- * ``transitions`` is a list where transitions[i] represents the
- valid transitions out of state ``i``. Elements may be either dicts
- (in which case they map characters to other states) or lists. If they
- are a list they may contain tuples of length 2 or 3. A tuple ``(c, j)``
- indicates that this state transitions to state ``j`` given ``c``. A
- tuple ``(u, v, j)`` indicates this state transitions to state ``j``
- given any ``c`` with ``u <= c <= v``.
- * ``accepting`` is a set containing the integer labels of accepting
- states.
- * ``start`` is the integer label of the starting state.
- """
- super().__init__()
- self.__start = start
- self.__accepting = accepting
- self.__transitions = list(transitions)
-
- def __repr__(self):
- transitions = []
- # Particularly for including in source code it's nice to have the more
- # compact repr, so where possible we convert to the tuple based representation
- # which can represent ranges more compactly.
- for i in range(len(self.__transitions)):
- table = []
- for c, j in self.transitions(i):
- if not table or j != table[-1][-1] or c != table[-1][1] + 1:
- table.append([c, c, j])
- else:
- table[-1][1] = c
- transitions.append([(u, j) if u == v else (u, v, j) for u, v, j in table])
-
- start = "" if self.__start == 0 else f", start={self.__start!r}"
- return f"ConcreteDFA({transitions!r}, {self.__accepting!r}{start})"
-
- @property
- def start(self):
- return self.__start
-
- def is_accepting(self, i):
- return i in self.__accepting
-
- def transition(self, state, char):
- """Returns the state that i transitions to on reading
- character c from a string."""
- if state == DEAD:
- return DEAD
-
- table = self.__transitions[state]
-
- # Given long transition tables we convert them to
- # dictionaries for more efficient lookup.
- if not isinstance(table, dict) and len(table) >= 5:
- new_table = {}
- for t in table:
- if len(t) == 2:
- new_table[t[0]] = t[1]
- else:
- u, v, j = t
- for c in range(u, v + 1):
- new_table[c] = j
- self.__transitions[state] = new_table
- table = new_table
-
- if isinstance(table, dict):
- try:
- return self.__transitions[state][char]
- except KeyError:
- return DEAD
- else:
- for t in table:
- if len(t) == 2:
- if t[0] == char:
- return t[1]
- else:
- u, v, j = t
- if u <= char <= v:
- return j
- return DEAD
-
- def raw_transitions(self, i):
- if i == DEAD:
- return
- transitions = self.__transitions[i]
- if isinstance(transitions, dict):
- yield from sorted(transitions.items())
- else:
- for t in transitions:
- if len(t) == 2:
- yield t
- else:
- u, v, j = t
- for c in range(u, v + 1):
- yield c, j
diff --git a/contrib/python/hypothesis/py3/hypothesis/internal/conjecture/dfa/lstar.py b/contrib/python/hypothesis/py3/hypothesis/internal/conjecture/dfa/lstar.py
deleted file mode 100644
index 25e638637b2..00000000000
--- a/contrib/python/hypothesis/py3/hypothesis/internal/conjecture/dfa/lstar.py
+++ /dev/null
@@ -1,497 +0,0 @@
-# This file is part of Hypothesis, which may be found at
-# https://github.com/HypothesisWorks/hypothesis/
-#
-# Copyright the Hypothesis Authors.
-# Individual contributors are listed in AUTHORS.rst and the git log.
-#
-# This Source Code Form is subject to the terms of the Mozilla Public License,
-# v. 2.0. If a copy of the MPL was not distributed with this file, You can
-# obtain one at https://mozilla.org/MPL/2.0/.
-
-from bisect import bisect_right, insort
-from collections import Counter
-from dataclasses import dataclass, field
-
-from hypothesis.errors import InvalidState
-from hypothesis.internal.conjecture.dfa import DFA, cached
-from hypothesis.internal.conjecture.junkdrawer import (
- IntList,
- NotFound,
- SelfOrganisingList,
- find_integer,
-)
-
-"""
-This module contains an implementation of the L* algorithm
-for learning a deterministic finite automaton based on an
-unknown membership function and a series of examples of
-strings that may or may not satisfy it.
-
-The two relevant papers for understanding this are:
-
-* Angluin, Dana. "Learning regular sets from queries and counterexamples."
- Information and computation 75.2 (1987): 87-106.
-* Rivest, Ronald L., and Robert E. Schapire. "Inference of finite automata
- using homing sequences." Information and Computation 103.2 (1993): 299-347.
- Note that we only use the material from section 4.5 "Improving Angluin's L*
- algorithm" (page 318), and all of the rest of the material on homing
- sequences can be skipped.
-
-The former explains the core algorithm, the latter a modification
-we use (which we have further modified) which allows it to
-be implemented more efficiently.
-
-Although we continue to call this L*, we in fact depart heavily from it to the
-point where honestly this is an entirely different algorithm and we should come
-up with a better name.
-
-We have several major departures from the papers:
-
-1. We learn the automaton lazily as we traverse it. This is particularly
- valuable because if we make many corrections on the same string we only
- have to learn the transitions that correspond to the string we are
- correcting on.
-2. We make use of our ``find_integer`` method rather than a binary search
- as proposed in the Rivest and Schapire paper, as we expect that
- usually most strings will be mispredicted near the beginning.
-3. We try to learn a smaller alphabet of "interestingly distinct"
- values. e.g. if all bytes larger than two result in an invalid
- string, there is no point in distinguishing those bytes. In aid
- of this we learn a single canonicalisation table which maps integers
- to smaller integers that we currently think are equivalent, and learn
- their inequivalence where necessary. This may require more learning
- steps, as at each stage in the process we might learn either an
- inequivalent pair of integers or a new experiment, but it may greatly
- reduce the number of membership queries we have to make.
-
-
-In addition, we have a totally different approach for mapping a string to its
-canonical representative, which will be explained below inline. The general gist
-is that our implementation is much more willing to make mistakes: It will often
-create a DFA that is demonstrably wrong, based on information that it already
-has, but where it is too expensive to discover that before it causes us to
-make a mistake.
-
-A note on performance: This code is not really fast enough for
-us to ever want to run in production on large strings, and this
-is somewhat intrinsic. We should only use it in testing or for
-learning languages offline that we can record for later use.
-
-"""
-
-
-@dataclass(slots=True, frozen=False)
-class DistinguishedState:
- """Relevant information for a state that we have witnessed as definitely
- distinct from ones we have previously seen so far."""
-
- # Index of this state in the learner's list of states
- index: int
-
- # A string that witnesses this state (i.e. when starting from the origin
- # and following this string you will end up in this state).
- label: str
-
- # A boolean as to whether this is an accepting state.
- accepting: bool
-
- # A list of experiments that it is necessary to run to determine whether
- # a string is in this state. This is stored as a dict mapping experiments
- # to their expected result. A string is only considered to lead to this
- # state if ``all(learner.member(s + experiment) == result for experiment,
- # result in self.experiments.items())``.
- experiments: dict
-
- # A cache of transitions out of this state, mapping bytes to the states
- # that they lead to.
- transitions: dict = field(default_factory=dict)
-
-
-class LStar:
- """This class holds the state for learning a DFA. The current DFA can be
- accessed as the ``dfa`` member of this class. Such a DFA becomes invalid
- as soon as ``learn`` has been called, and should only be used until the
- next call to ``learn``.
-
- Note that many of the DFA methods are on this class, but it is not itself
- a DFA. The reason for this is that it stores mutable state which can cause
- the structure of the learned DFA to change in potentially arbitrary ways,
- making all cached properties become nonsense.
- """
-
- def __init__(self, member):
- self.experiments = []
- self.__experiment_set = set()
- self.normalizer = IntegerNormalizer()
-
- self.__member_cache = {}
- self.__member = member
- self.__generation = 0
-
- # A list of all state objects that correspond to strings we have
- # seen and can demonstrate map to unique states.
- self.__states = [
- DistinguishedState(
- index=0,
- label=b"",
- accepting=self.member(b""),
- experiments={b"": self.member(b"")},
- )
- ]
-
- # When we're trying to figure out what state a string leads to we will
- # end up searching to find a suitable candidate. By putting states in
- # a self-organising list we ideally minimise the number of lookups.
- self.__self_organising_states = SelfOrganisingList(self.__states)
-
- self.start = 0
-
- self.__dfa_changed()
-
- def __dfa_changed(self):
- """Note that something has changed, updating the generation
- and resetting any cached state."""
- self.__generation += 1
- self.dfa = LearnedDFA(self)
-
- def is_accepting(self, i):
- """Equivalent to ``self.dfa.is_accepting(i)``"""
- return self.__states[i].accepting
-
- def label(self, i):
- """Returns the string label for state ``i``."""
- return self.__states[i].label
-
- def transition(self, i, c):
- """Equivalent to ``self.dfa.transition(i, c)```"""
- c = self.normalizer.normalize(c)
- state = self.__states[i]
- try:
- return state.transitions[c]
- except KeyError:
- pass
-
- # The state that we transition to when reading ``c`` is reached by
- # this string, because this state is reached by state.label. We thus
- # want our candidate for the transition to be some state with a label
- # equivalent to this string.
- #
- # We find such a state by looking for one such that all of its listed
- # experiments agree on the result for its state label and this string.
- string = state.label + bytes([c])
-
- # We keep track of some useful experiments for distinguishing this
- # string from other states, as this both allows us to more accurately
- # select the state to map to and, if necessary, create the new state
- # that this string corresponds to with a decent set of starting
- # experiments.
- accumulated = {}
- counts = Counter()
-
- def equivalent(t):
- """Checks if ``string`` could possibly lead to state ``t``."""
- for e, expected in accumulated.items():
- if self.member(t.label + e) != expected:
- counts[e] += 1
- return False
-
- for e, expected in t.experiments.items():
- result = self.member(string + e)
- if result != expected:
- # We expect most experiments to return False so if we add
- # only True ones to our collection of essential experiments
- # we keep the size way down and select only ones that are
- # likely to provide useful information in future.
- if result:
- accumulated[e] = result
- return False
- return True
-
- try:
- destination = self.__self_organising_states.find(equivalent)
- except NotFound:
- i = len(self.__states)
- destination = DistinguishedState(
- index=i,
- label=string,
- experiments=accumulated,
- accepting=self.member(string),
- )
- self.__states.append(destination)
- self.__self_organising_states.add(destination)
- state.transitions[c] = destination.index
- return destination.index
-
- def member(self, s):
- """Check whether this string is a member of the language
- to be learned."""
- try:
- return self.__member_cache[s]
- except KeyError:
- result = self.__member(s)
- self.__member_cache[s] = result
- return result
-
- @property
- def generation(self):
- """Return an integer value that will be incremented
- every time the DFA we predict changes."""
- return self.__generation
-
- def learn(self, string):
- """Learn to give the correct answer on this string.
- That is, after this method completes we will have
- ``self.dfa.matches(s) == self.member(s)``.
-
- Note that we do not guarantee that this will remain
- true in the event that learn is called again with
- a different string. It is in principle possible that
- future learning will cause us to make a mistake on
- this string. However, repeatedly calling learn on
- each of a set of strings until the generation stops
- changing is guaranteed to terminate.
- """
- string = bytes(string)
- correct_outcome = self.member(string)
-
- # We don't want to check this inside the loop because it potentially
- # causes us to evaluate more of the states than we actually need to,
- # but if our model is mostly correct then this will be faster because
- # we only need to evaluate strings that are of the form
- # ``state + experiment``, which will generally be cached and/or needed
- # later.
- if self.dfa.matches(string) == correct_outcome:
- return
-
- # In the papers they assume that we only run this process
- # once, but this is silly - often when you've got a messy
- # string it will be wrong for many different reasons.
- #
- # Thus we iterate this to a fixed point where we repair
- # the DFA by repeatedly adding experiments until the DFA
- # agrees with the membership function on this string.
-
- # First we make sure that normalization is not the source of the
- # failure to match.
- while True:
- normalized = bytes(self.normalizer.normalize(c) for c in string)
- # We can correctly replace the string with its normalized version
- # so normalization is not the problem here.
- if self.member(normalized) == correct_outcome:
- string = normalized
- break
- alphabet = sorted(set(string), reverse=True)
- target = string
- for a in alphabet:
-
- def replace(b):
- if a == b:
- return target
- return bytes(b if c == a else c for c in target)
-
- self.normalizer.distinguish(a, lambda x: self.member(replace(x)))
- target = replace(self.normalizer.normalize(a))
- assert self.member(target) == correct_outcome
- assert target != normalized
- self.__dfa_changed()
-
- if self.dfa.matches(string) == correct_outcome:
- return
-
- # Now we know normalization is correct we can attempt to determine if
- # any of our transitions are wrong.
- while True:
- dfa = self.dfa
-
- states = [dfa.start]
-
- def seems_right(n):
- """After reading n characters from s, do we seem to be
- in the right state?
-
- We determine this by replacing the first n characters
- of s with the label of the state we expect to be in.
- If we are in the right state, that will replace a substring
- with an equivalent one so must produce the same answer.
- """
- if n > len(string):
- return False
-
- # Populate enough of the states list to know where we are.
- while n >= len(states):
- states.append(dfa.transition(states[-1], string[len(states) - 1]))
-
- return self.member(dfa.label(states[n]) + string[n:]) == correct_outcome
-
- assert seems_right(0)
-
- n = find_integer(seems_right)
-
- # We got to the end without ever finding ourself in a bad
- # state, so we must correctly match this string.
- if n == len(string):
- assert dfa.matches(string) == correct_outcome
- break
-
- # Reading n characters does not put us in a bad state but
- # reading n + 1 does. This means that the remainder of
- # the string that we have not read yet is an experiment
- # that allows us to distinguish the state that we ended
- # up in from the state that we should have ended up in.
-
- source = states[n]
- character = string[n]
- wrong_destination = states[n + 1]
-
- # We've made an error in transitioning from ``source`` to
- # ``wrong_destination`` via ``character``. We now need to update
- # the DFA so that this transition no longer occurs. Note that we
- # do not guarantee that the transition is *correct* after this,
- # only that we don't make this particular error.
- assert self.transition(source, character) == wrong_destination
-
- labels_wrong_destination = self.dfa.label(wrong_destination)
- labels_correct_destination = self.dfa.label(source) + bytes([character])
-
- ex = string[n + 1 :]
-
- assert self.member(labels_wrong_destination + ex) != self.member(
- labels_correct_destination + ex
- )
-
- # Adding this experiment causes us to distinguish the wrong
- # destination from the correct one.
- self.__states[wrong_destination].experiments[ex] = self.member(
- labels_wrong_destination + ex
- )
-
- # We now clear the cached details that caused us to make this error
- # so that when we recalculate this transition we get to a
- # (hopefully now correct) different state.
- del self.__states[source].transitions[character]
- self.__dfa_changed()
-
- # We immediately recalculate the transition so that we can check
- # that it has changed as we expect it to have.
- new_destination = self.transition(source, string[n])
- assert new_destination != wrong_destination
-
-
-class LearnedDFA(DFA):
- """This implements a lazily calculated DFA where states
- are labelled by some string that reaches them, and are
- distinguished by a membership test and a set of experiments."""
-
- def __init__(self, lstar):
- super().__init__()
- self.__lstar = lstar
- self.__generation = lstar.generation
-
- def __check_changed(self):
- if self.__generation != self.__lstar.generation:
- raise InvalidState(
- "The underlying L* model has changed, so this DFA is no longer valid. "
- "If you want to preserve a previously learned DFA for posterity, call "
- "canonicalise() on it first."
- )
-
- def label(self, i):
- self.__check_changed()
- return self.__lstar.label(i)
-
- @property
- def start(self):
- self.__check_changed()
- return self.__lstar.start
-
- def is_accepting(self, i):
- self.__check_changed()
- return self.__lstar.is_accepting(i)
-
- def transition(self, i, c):
- self.__check_changed()
-
- return self.__lstar.transition(i, c)
-
- @cached
- def successor_states(self, state):
- """Returns all of the distinct states that can be reached via one
- transition from ``state``, in the lexicographic order of the
- smallest character that reaches them."""
- seen = set()
- result = []
- for c in self.__lstar.normalizer.representatives():
- j = self.transition(state, c)
- if j not in seen:
- seen.add(j)
- result.append(j)
- return tuple(result)
-
-
-class IntegerNormalizer:
- """A class for replacing non-negative integers with a
- "canonical" value that is equivalent for all relevant
- purposes."""
-
- def __init__(self):
- # We store canonical values as a sorted list of integers
- # with each value being treated as equivalent to the largest
- # integer in the list that is below it.
- self.__values = IntList([0])
- self.__cache = {}
-
- def __repr__(self):
- return f"IntegerNormalizer({list(self.__values)!r})"
-
- def __copy__(self):
- result = IntegerNormalizer()
- result.__values = IntList(self.__values)
- return result
-
- def representatives(self):
- yield from self.__values
-
- def normalize(self, value):
- """Return the canonical integer considered equivalent
- to ``value``."""
- try:
- return self.__cache[value]
- except KeyError:
- pass
- i = bisect_right(self.__values, value) - 1
- assert i >= 0
- return self.__cache.setdefault(value, self.__values[i])
-
- def distinguish(self, value, test):
- """Checks whether ``test`` gives the same answer for
- ``value`` and ``self.normalize(value)``. If it does
- not, updates the list of canonical values so that
- it does.
-
- Returns True if and only if this makes a change to
- the underlying canonical values."""
- canonical = self.normalize(value)
- if canonical == value:
- return False
-
- value_test = test(value)
-
- if test(canonical) == value_test:
- return False
-
- self.__cache.clear()
-
- def can_lower(k):
- new_canon = value - k
- if new_canon <= canonical:
- return False
- return test(new_canon) == value_test
-
- new_canon = value - find_integer(can_lower)
-
- assert new_canon not in self.__values
-
- insort(self.__values, new_canon)
-
- assert self.normalize(value) == new_canon
- return True
diff --git a/contrib/python/hypothesis/py3/hypothesis/internal/conjecture/junkdrawer.py b/contrib/python/hypothesis/py3/hypothesis/internal/conjecture/junkdrawer.py
index ef81176a902..6f356706def 100644
--- a/contrib/python/hypothesis/py3/hypothesis/internal/conjecture/junkdrawer.py
+++ b/contrib/python/hypothesis/py3/hypothesis/internal/conjecture/junkdrawer.py
@@ -441,49 +441,6 @@ def find_integer(f: Callable[[int], bool]) -> int:
return lo
-class NotFound(Exception):
- pass
-
-
-class SelfOrganisingList(Generic[T]):
- """A self-organising list with the move-to-front heuristic.
-
- A self-organising list is a collection which we want to retrieve items
- that satisfy some predicate from. There is no faster way to do this than
- a linear scan (as the predicates may be arbitrary), but the performance
- of a linear scan can vary dramatically - if we happen to find a good item
- on the first try it's O(1) after all. The idea of a self-organising list is
- to reorder the list to try to get lucky this way as often as possible.
-
- There are various heuristics we could use for this, and it's not clear
- which are best. We use the simplest, which is that every time we find
- an item we move it to the "front" (actually the back in our implementation
- because we iterate in reverse) of the list.
-
- """
-
- def __init__(self, values: Iterable[T] = ()) -> None:
- self.__values = list(values)
-
- def __repr__(self) -> str:
- return f"SelfOrganisingList({self.__values!r})"
-
- def add(self, value: T) -> None:
- """Add a value to this list."""
- self.__values.append(value)
-
- def find(self, condition: Callable[[T], bool]) -> T:
- """Returns some value in this list such that ``condition(value)`` is
- True. If no such value exists raises ``NotFound``."""
- for i in range(len(self.__values) - 1, -1, -1):
- value = self.__values[i]
- if condition(value):
- del self.__values[i]
- self.__values.append(value)
- return value
- raise NotFound("No values satisfying condition")
-
-
_gc_initialized = False
_gc_start: float = 0
_gc_cumulative_time: float = 0
diff --git a/contrib/python/hypothesis/py3/hypothesis/version.py b/contrib/python/hypothesis/py3/hypothesis/version.py
index 9dccf2fedc7..42e857e2b94 100644
--- a/contrib/python/hypothesis/py3/hypothesis/version.py
+++ b/contrib/python/hypothesis/py3/hypothesis/version.py
@@ -8,5 +8,5 @@
# v. 2.0. If a copy of the MPL was not distributed with this file, You can
# obtain one at https://mozilla.org/MPL/2.0/.
-__version_info__ = (6, 151, 8)
+__version_info__ = (6, 151, 9)
__version__ = ".".join(map(str, __version_info__))
diff --git a/contrib/python/hypothesis/py3/ya.make b/contrib/python/hypothesis/py3/ya.make
index c523e57783f..ad34b51c676 100644
--- a/contrib/python/hypothesis/py3/ya.make
+++ b/contrib/python/hypothesis/py3/ya.make
@@ -2,7 +2,7 @@
PY3_LIBRARY()
-VERSION(6.151.8)
+VERSION(6.151.9)
LICENSE(MPL-2.0)
@@ -57,8 +57,6 @@ PY_SRCS(
hypothesis/internal/conjecture/choice.py
hypothesis/internal/conjecture/data.py
hypothesis/internal/conjecture/datatree.py
- hypothesis/internal/conjecture/dfa/__init__.py
- hypothesis/internal/conjecture/dfa/lstar.py
hypothesis/internal/conjecture/engine.py
hypothesis/internal/conjecture/floats.py
hypothesis/internal/conjecture/junkdrawer.py