From b88f88812b8c3242f19066db4d2f5f97dfb12f8a Mon Sep 17 00:00:00 2001
From: "David R. MacIver" <david@drmaciver.com>
Date: Tue, 2 Jul 2019 10:16:18 +0100
Subject: [PATCH] Bound the number of consecutive discards we allow,
 terminating all rejection sampling

---
 .../hypothesis/internal/conjecture/data.py    | 20 +++++++++++++++++++
 .../tests/cover/test_conjecture_test_data.py  | 13 ++++++++++++
 2 files changed, 33 insertions(+)

diff --git a/hypothesis-python/src/hypothesis/internal/conjecture/data.py b/hypothesis-python/src/hypothesis/internal/conjecture/data.py
index 97f4a13e56..c2a1ce585b 100644
--- a/hypothesis-python/src/hypothesis/internal/conjecture/data.py
+++ b/hypothesis-python/src/hypothesis/internal/conjecture/data.py
@@ -752,6 +752,7 @@ def __init__(self, max_length, draw_bytes, observer=None):
         self.draw_times = []
         self.max_depth = 0
         self.has_discards = False
+        self.consecutive_discard_counts = []
 
         self.__result = None
 
@@ -862,15 +863,34 @@ def start_example(self, label):
         if self.depth > self.max_depth:
             self.max_depth = self.depth
         self.__example_record.start_example(label)
+        self.consecutive_discard_counts.append(0)
 
     def stop_example(self, discard=False):
         if self.frozen:
             return
+        self.consecutive_discard_counts.pop()
         if discard:
             self.has_discards = True
         self.depth -= 1
         assert self.depth >= -1
         self.__example_record.stop_example(discard)
+        if self.consecutive_discard_counts:
+            # We block long sequences of discards. This helps us avoid performance
+            # problems where there is rejection sampling. In particular tests which
+            # have a very small actual state space but use rejection sampling will
+            # play badly with generate_novel_prefix() in DataTree, and will end up
+            # generating very long tests with long runs of the rejection sample.
+            if discard:
+                self.consecutive_discard_counts[-1] += 1
+                # 20 is a fairly arbitrary limit chosen mostly so that all of the
+                # existing tests passed under it. Essentially no reasonable
+                # generation should hit this limit when running in purely random
+                # mode, but unreasonable generation is fairly widespread, and our
+                # manipulation of the bitstream can make it more likely.
+                if self.consecutive_discard_counts[-1] > 20:
+                    self.mark_invalid()
+            else:
+                self.consecutive_discard_counts[-1] = 0
 
     def note_event(self, event):
         self.events.add(event)
diff --git a/hypothesis-python/tests/cover/test_conjecture_test_data.py b/hypothesis-python/tests/cover/test_conjecture_test_data.py
index 479d0b48d5..581d2c2ad4 100644
--- a/hypothesis-python/tests/cover/test_conjecture_test_data.py
+++ b/hypothesis-python/tests/cover/test_conjecture_test_data.py
@@ -470,3 +470,16 @@ def test_example_equality():
 
         assert not (ex == "hello")
         assert ex != "hello"
+
+
+def test_discarded_data_is_eventually_terminated():
+
+    data = ConjectureData.for_buffer(hbytes(100))
+
+    with pytest.raises(StopTest):
+        for _ in hrange(100):
+            data.start_example(1)
+            data.draw_bits(1)
+            data.stop_example(discard=True)
+
+    assert data.status == Status.INVALID