Prune parts of the data tree that have discards in them

HypothesisWorks · Nov 7, 2019 · 70f902d · 70f902d
1 parent e978f32
commit 70f902d
Show file tree

Hide file tree

Showing 4 changed files with 64 additions and 2 deletions.
diff --git a/hypothesis-python/RELEASE.rst b/hypothesis-python/RELEASE.rst
@@ -0,0 +1,7 @@
+RELEASE_TYPE: patch
+
+This release changes how Hypothesis manages its search space in cases where it
+generates redundant data. This should cause it to generate significantly fewer
+duplicated examples (especially with short integer ranges), and may cause it to
+produce more useful examples in some cases (especially ones where there is a
+significant amount of filtering).
diff --git a/hypothesis-python/src/hypothesis/internal/conjecture/data.py b/hypothesis-python/src/hypothesis/internal/conjecture/data.py
@@ -684,6 +684,9 @@ def draw_bits(self, n_bits, forced, value):
         * ``value`` is the result that ``draw_bits`` returned.
         """
 
+    def kill_branch(self):
+        """Mark this part of the tree as not worth re-exploring."""
+
 
 @attr.s(slots=True)
 class ConjectureResult(object):
@@ -897,6 +900,8 @@ def stop_example(self, discard=False):
                     self.mark_invalid()
             else:
                 self.consecutive_discard_counts[-1] = 0
+        if discard:
+            self.observer.kill_branch()
 
     def note_event(self, event):
         self.events.add(event)

diff --git a/hypothesis-python/src/hypothesis/internal/conjecture/datatree.py b/hypothesis-python/src/hypothesis/internal/conjecture/datatree.py
@@ -300,6 +300,7 @@ def __init__(self, tree):
         self.__current_node = tree.root
         self.__index_in_current_node = 0
         self.__trail = [self.__current_node]
+        self.__kill_point = None
 
     def draw_bits(self, n_bits, forced, value):
         i = self.__index_in_current_node
@@ -349,6 +350,12 @@ def draw_bits(self, n_bits, forced, value):
         if self.__trail[-1] is not self.__current_node:
             self.__trail.append(self.__current_node)
 
+    def kill_branch(self):
+        """Mark this part of the tree as not worth re-exploring."""
+        if self.__kill_point is None:
+            self.__kill_point = len(self.__trail)
+            self.__current_node.is_exhausted = True
+
     def conclude_test(self, status, interesting_origin):
         """Says that ``status`` occurred at node ``node``. This updates the
         node if necessary and checks for consistency."""
@@ -381,7 +388,12 @@ def conclude_test(self, status, interesting_origin):
         node.check_exhausted()
         assert len(node.values) > 0 or node.check_exhausted()
 
-        for t in reversed(self.__trail):
+        if self.__kill_point is None:
+            end = len(self.__trail)
+        else:
+            end = self.__kill_point
+
+        for t in reversed(self.__trail[:end]):
             # Any node we've traversed might have now become exhausted.
             # We check from the right. As soon as we hit a node that
             # isn't exhausted, this automatically implies that all of

diff --git a/hypothesis-python/tests/cover/test_conjecture_engine.py b/hypothesis-python/tests/cover/test_conjecture_engine.py
@@ -37,7 +37,11 @@
 )
 from hypothesis.internal.conjecture.shrinker import Shrinker, block_program
 from hypothesis.internal.conjecture.shrinking import Float
-from hypothesis.internal.conjecture.utils import Sampler, calc_label_from_name
+from hypothesis.internal.conjecture.utils import (
+    Sampler,
+    calc_label_from_name,
+    integer_range,
+)
 from hypothesis.internal.entropy import deterministic_PRNG
 from tests.common.strategies import SLOW, HardToShrink
 from tests.common.utils import no_shrink
@@ -1397,3 +1401,37 @@ def test_exhaust_space():
         runner.run()
         assert runner.tree.is_exhausted
         assert runner.valid_examples == 2
+
+
+def test_discards_kill_branches():
+    starts = set()
+
+    with deterministic_PRNG():
+
+        def test(data):
+            while True:
+                data.start_example(1)
+                b = data.draw_bits(8)
+                data.stop_example(b != 0)
+                if len(data.buffer) == 1:
+                    s = hbytes(data.buffer)
+                    assert s not in starts
+                    starts.add(s)
+                if b == 0:
+                    break
+
+        runner = ConjectureRunner(test, settings=TEST_SETTINGS)
+        runner.run()
+        assert runner.call_count == 256
+
+
+@pytest.mark.parametrize("n", range(1, 32))
+def test_number_of_examples_in_integer_range_is_bounded(n):
+    pass
+
+    with deterministic_PRNG():
+        runner = ConjectureRunner(
+            lambda data: integer_range(data, 0, n), settings=TEST_SETTINGS
+        )
+        runner.run()
+        assert runner.call_count <= 2 * n