HypothesisWorks · Zac-HD · Jul 5, 2019 · Jul 5, 2019 · Jul 5, 2019 · Jul 2, 2019
diff --git a/hypothesis-python/RELEASE.rst b/hypothesis-python/RELEASE.rst
@@ -0,0 +1,6 @@
+RELEASE_TYPE: patch
+
+This release fixes  :issue:`2027`, by changing the way Hypothesis tries to generate distinct examples to be more efficient.
+
+This may result in slightly different data distribution, and should improve generation performance in general,
+but should otherwise have minimal user impact.
@@ -28,7 +28,7 @@
     StopTest,
     bits_to_bytes,
 )
-from hypothesis.internal.conjecture.junkdrawer import IntList, uniform
+from hypothesis.internal.conjecture.junkdrawer import IntList
 
 
 class PreviouslyUnseenBehaviour(HypothesisException):
@@ -196,31 +196,19 @@ def is_exhausted(self):
         described must have been fully explored."""
         return self.root.is_exhausted
 
-    def find_necessary_prefix_for_novelty(self):
-        """Finds a prefix that any novel example must start with.
-        This is currently only used for generate_novel_prefix, where
-        it allows us to significantly speed it up in the case where
-        we start with a very shallow tree.
-
-        For example, suppose we had a test function that looked like:
-
-        .. code-block:: python
-
-            def test_function(data):
-                while data.draw_bits(1):
-                    pass
+    def generate_novel_prefix(self, random):
+        """Generate a short random string that (after rewriting) is not
+        a prefix of any buffer previously added to the tree.
 
-        This has a unique example of size ``n`` for any ``n``, but we
-        only draw that example with probability ``2 ** (-n)`` through
-        random sampling, so we will very rapidly exhaust the search
-        space. By first searching to find the necessary sequence
-        that any novel example must satisfy, we can find novel
-        examples with probability 1 instead.
+        The resulting prefix is essentially arbitrary - it would be nice
+        for it to be uniform at random, but previous attempts to do that
+        have proven too expensive.
         """
-        necessary_prefix = bytearray()
+        assert not self.is_exhausted
+        novel_prefix = bytearray()
 
         def append_int(n_bits, value):
-            necessary_prefix.extend(int_to_bytes(value, bits_to_bytes(n_bits)))
+            novel_prefix.extend(int_to_bytes(value, bits_to_bytes(n_bits)))
 
         current_node = self.root
         while True:
@@ -231,56 +219,33 @@ def append_int(n_bits, value):
                 if i in current_node.forced:
                     append_int(n_bits, value)
                 else:
+                    while True:
+                        k = random.getrandbits(n_bits)
+                        if k != value:
+                            append_int(n_bits, k)
+                            break
                     # We've now found a value that is allowed to
                     # vary, so what follows is not fixed.
-                    return hbytes(necessary_prefix)
+                    return hbytes(novel_prefix)
             else:
                 assert not isinstance(current_node.transition, Conclusion)
                 if current_node.transition is None:
-                    return hbytes(necessary_prefix)
+                    return hbytes(novel_prefix)
                 branch = current_node.transition
                 assert isinstance(branch, Branch)
-                if len(branch.children) < branch.max_children:
-                    return hbytes(necessary_prefix)
-                else:
-                    choices = [
-                        (k, v) for k, v in branch.children.items() if not v.is_exhausted
-                    ]
-                    assert len(choices) > 0
-                    if len(choices) == 1:
-                        k, v = choices[0]
-                        append_int(branch.bit_length, k)
-                        current_node = v
-                    else:
-                        return hbytes(necessary_prefix)
-
-    def generate_novel_prefix(self, random):
-        """Generate a short random string that (after rewriting) is not
-        a prefix of any buffer previously added to the tree.
-
-        This is logically equivalent to generating the test case uniformly
-        at random and returning the first point at which we hit unknown
-        territory, but with an optimisation for the only common case where
-        that would be inefficient.
-        """
-        assert not self.is_exhausted
-
-        initial = self.find_necessary_prefix_for_novelty()
+                n_bits = branch.bit_length
 
-        while True:
-
-            def draw_bytes(data, n):
-                i = data.index
-                if i < len(initial):
-                    return initial[i : i + n]
-                else:
-                    return uniform(random, n)
-
-            data = ConjectureData(draw_bytes=draw_bytes, max_length=float("inf"))
-            try:
-                self.simulate_test_function(data)
-            except PreviouslyUnseenBehaviour:
-                return hbytes(data.buffer)
+                while True:
+                    k = random.getrandbits(n_bits)
+                    try:
+                        child = branch.children[k]
+                    except KeyError:
+                        append_int(n_bits, k)
+                        return hbytes(novel_prefix)
+                    if not child.is_exhausted:
+                        append_int(n_bits, k)
+                        current_node = child
+                        break
 
     def rewrite(self, buffer):
         """Use previously seen ConjectureData objects to return a tuple of

diff --git a/hypothesis-python/tests/common/debug.py b/hypothesis-python/tests/common/debug.py
@@ -17,8 +17,10 @@
 
 from __future__ import absolute_import, division, print_function
 
+import hypothesis.strategies as st
 from hypothesis import HealthCheck, Verbosity, given, settings as Settings
 from hypothesis.errors import NoSuchExample, Unsatisfiable
+from hypothesis.internal.conjecture.data import ConjectureData, StopTest
 from hypothesis.internal.reflection import get_pretty_function_description
 from tests.common.utils import no_shrink
 
@@ -95,3 +97,15 @@ def assert_examples(s):
         assert predicate(s), msg
 
     assert_examples()
+
+
+def assert_can_trigger_event(strategy, predicate):
+    def test(buf):
+        data = ConjectureData.for_buffer(buf)
+        try:
+            data.draw(strategy)
+        except StopTest:
+            pass
+        return any(predicate(e) for e in data.events)
+
+    find_any(st.binary(), test)
diff --git a/hypothesis-python/tests/cover/test_conjecture_data_tree.py b/hypothesis-python/tests/cover/test_conjecture_data_tree.py
@@ -341,20 +341,6 @@ def test_child_becomes_exhausted_after_split():
     assert tree.root.transition.children[0].is_exhausted
 
 
-def test_will_avoid_exhausted_branches_for_necessary_prefix():
-    tree = DataTree()
-    data = ConjectureData.for_buffer([0], observer=tree.new_observer())
-    data.draw_bits(1)
-    data.freeze()
-
-    data = ConjectureData.for_buffer([1, 1], observer=tree.new_observer())
-    data.draw_bits(1)
-    data.draw_bits(8)
-    data.freeze()
-
-    assert list(tree.find_necessary_prefix_for_novelty()) == [1]
-
-
 def test_will_generate_novel_prefix_to_avoid_exhausted_branches():
     tree = DataTree()
     data = ConjectureData.for_buffer([1], observer=tree.new_observer())

diff --git a/hypothesis-python/tests/cover/test_slippage.py b/hypothesis-python/tests/cover/test_slippage.py
@@ -167,7 +167,7 @@ def test_shrinks_both_failures():
     duds = set()
     second_target = [None]
 
-    @settings(database=None)
+    @settings(database=None, max_examples=1000)
     @given(st.integers(min_value=0).map(int))
     def test(i):
         if i >= 10000:

diff --git a/hypothesis-python/tests/datetime/test_pytz_timezones.py b/hypothesis-python/tests/datetime/test_pytz_timezones.py
@@ -26,7 +26,7 @@
 from hypothesis.errors import InvalidArgument
 from hypothesis.extra.pytz import timezones
 from hypothesis.strategies import datetimes, sampled_from, times
-from tests.common.debug import minimal
+from tests.common.debug import assert_can_trigger_event, minimal
 
 
 def test_utc_is_minimal():
@@ -97,3 +97,12 @@ def test_can_generate_non_utc():
 def test_time_bounds_must_be_naive(name, val):
     with pytest.raises(InvalidArgument):
         times(**{name: val}).validate()
+
+
+def test_can_trigger_error_in_draw_near_max_date():
+    assert_can_trigger_event(
+        datetimes(
+            min_value=dt.datetime.max - dt.timedelta(days=3), timezones=timezones()
+        ),
+        lambda event: "Failed to draw a datetime" in event,
+    )
diff --git a/hypothesis-python/tests/nocover/test_regressions.py b/hypothesis-python/tests/nocover/test_regressions.py
@@ -19,6 +19,8 @@
 
 import warnings
 
+import hypothesis.strategies as st
+from hypothesis import given
 from hypothesis._settings import note_deprecation
 from hypothesis.errors import HypothesisDeprecationWarning
 from hypothesis.strategies import composite, integers
@@ -42,3 +44,11 @@ def deprecated_strategy(draw):
     assert isinstance(record.message, HypothesisDeprecationWarning)
     assert record.message.args == (msg,)
     assert record.filename == __file__
+
+
+@given(
+    x=st.one_of(st.just(0) | st.just(1)),
+    y=st.one_of(st.just(0) | st.just(1) | st.just(2)),
+)
+def test_performance_issue_2027(x, y):
+    pass