From b88f88812b8c3242f19066db4d2f5f97dfb12f8a Mon Sep 17 00:00:00 2001 From: "David R. MacIver" Date: Tue, 2 Jul 2019 10:16:18 +0100 Subject: [PATCH] Bound the number of consecutive discards we allow, terminating all rejection sampling --- .../hypothesis/internal/conjecture/data.py | 20 +++++++++++++++++++ .../tests/cover/test_conjecture_test_data.py | 13 ++++++++++++ 2 files changed, 33 insertions(+) diff --git a/hypothesis-python/src/hypothesis/internal/conjecture/data.py b/hypothesis-python/src/hypothesis/internal/conjecture/data.py index 97f4a13e56..c2a1ce585b 100644 --- a/hypothesis-python/src/hypothesis/internal/conjecture/data.py +++ b/hypothesis-python/src/hypothesis/internal/conjecture/data.py @@ -752,6 +752,7 @@ def __init__(self, max_length, draw_bytes, observer=None): self.draw_times = [] self.max_depth = 0 self.has_discards = False + self.consecutive_discard_counts = [] self.__result = None @@ -862,15 +863,34 @@ def start_example(self, label): if self.depth > self.max_depth: self.max_depth = self.depth self.__example_record.start_example(label) + self.consecutive_discard_counts.append(0) def stop_example(self, discard=False): if self.frozen: return + self.consecutive_discard_counts.pop() if discard: self.has_discards = True self.depth -= 1 assert self.depth >= -1 self.__example_record.stop_example(discard) + if self.consecutive_discard_counts: + # We block long sequences of discards. This helps us avoid performance + # problems where there is rejection sampling. In particular tests which + # have a very small actual state space but use rejection sampling will + # play badly with generate_novel_prefix() in DataTree, and will end up + # generating very long tests with long runs of the rejection sample. + if discard: + self.consecutive_discard_counts[-1] += 1 + # 20 is a fairly arbitrary limit chosen mostly so that all of the + # existing tests passed under it. Essentially no reasonable + # generation should hit this limit when running in purely random + # mode, but unreasonable generation is fairly widespread, and our + # manipulation of the bitstream can make it more likely. + if self.consecutive_discard_counts[-1] > 20: + self.mark_invalid() + else: + self.consecutive_discard_counts[-1] = 0 def note_event(self, event): self.events.add(event) diff --git a/hypothesis-python/tests/cover/test_conjecture_test_data.py b/hypothesis-python/tests/cover/test_conjecture_test_data.py index 479d0b48d5..581d2c2ad4 100644 --- a/hypothesis-python/tests/cover/test_conjecture_test_data.py +++ b/hypothesis-python/tests/cover/test_conjecture_test_data.py @@ -470,3 +470,16 @@ def test_example_equality(): assert not (ex == "hello") assert ex != "hello" + + +def test_discarded_data_is_eventually_terminated(): + + data = ConjectureData.for_buffer(hbytes(100)) + + with pytest.raises(StopTest): + for _ in hrange(100): + data.start_example(1) + data.draw_bits(1) + data.stop_example(discard=True) + + assert data.status == Status.INVALID