Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve performance of generate_novel_prefix #2037

Merged
merged 5 commits into from Jul 5, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
6 changes: 6 additions & 0 deletions hypothesis-python/RELEASE.rst
@@ -0,0 +1,6 @@
RELEASE_TYPE: patch

This release fixes :issue:`2027`, by changing the way Hypothesis tries to generate distinct examples to be more efficient.

This may result in slightly different data distribution, and should improve generation performance in general,
but should otherwise have minimal user impact.
93 changes: 29 additions & 64 deletions hypothesis-python/src/hypothesis/internal/conjecture/datatree.py
Expand Up @@ -28,7 +28,7 @@
StopTest,
bits_to_bytes,
)
from hypothesis.internal.conjecture.junkdrawer import IntList, uniform
from hypothesis.internal.conjecture.junkdrawer import IntList


class PreviouslyUnseenBehaviour(HypothesisException):
Expand Down Expand Up @@ -196,31 +196,19 @@ def is_exhausted(self):
described must have been fully explored."""
return self.root.is_exhausted

def find_necessary_prefix_for_novelty(self):
"""Finds a prefix that any novel example must start with.
This is currently only used for generate_novel_prefix, where
it allows us to significantly speed it up in the case where
we start with a very shallow tree.

For example, suppose we had a test function that looked like:

.. code-block:: python

def test_function(data):
while data.draw_bits(1):
pass
def generate_novel_prefix(self, random):
"""Generate a short random string that (after rewriting) is not
a prefix of any buffer previously added to the tree.

This has a unique example of size ``n`` for any ``n``, but we
only draw that example with probability ``2 ** (-n)`` through
random sampling, so we will very rapidly exhaust the search
space. By first searching to find the necessary sequence
that any novel example must satisfy, we can find novel
examples with probability 1 instead.
The resulting prefix is essentially arbitrary - it would be nice
for it to be uniform at random, but previous attempts to do that
have proven too expensive.
"""
necessary_prefix = bytearray()
assert not self.is_exhausted
novel_prefix = bytearray()

def append_int(n_bits, value):
necessary_prefix.extend(int_to_bytes(value, bits_to_bytes(n_bits)))
novel_prefix.extend(int_to_bytes(value, bits_to_bytes(n_bits)))

current_node = self.root
while True:
Expand All @@ -231,56 +219,33 @@ def append_int(n_bits, value):
if i in current_node.forced:
append_int(n_bits, value)
else:
while True:
k = random.getrandbits(n_bits)
if k != value:
append_int(n_bits, k)
break
# We've now found a value that is allowed to
# vary, so what follows is not fixed.
return hbytes(necessary_prefix)
return hbytes(novel_prefix)
else:
assert not isinstance(current_node.transition, Conclusion)
if current_node.transition is None:
return hbytes(necessary_prefix)
return hbytes(novel_prefix)
branch = current_node.transition
assert isinstance(branch, Branch)
if len(branch.children) < branch.max_children:
return hbytes(necessary_prefix)
else:
choices = [
(k, v) for k, v in branch.children.items() if not v.is_exhausted
]
assert len(choices) > 0
if len(choices) == 1:
k, v = choices[0]
append_int(branch.bit_length, k)
current_node = v
else:
return hbytes(necessary_prefix)

def generate_novel_prefix(self, random):
"""Generate a short random string that (after rewriting) is not
a prefix of any buffer previously added to the tree.

This is logically equivalent to generating the test case uniformly
at random and returning the first point at which we hit unknown
territory, but with an optimisation for the only common case where
that would be inefficient.
"""
assert not self.is_exhausted

initial = self.find_necessary_prefix_for_novelty()
n_bits = branch.bit_length

while True:

def draw_bytes(data, n):
i = data.index
if i < len(initial):
return initial[i : i + n]
else:
return uniform(random, n)

data = ConjectureData(draw_bytes=draw_bytes, max_length=float("inf"))
try:
self.simulate_test_function(data)
except PreviouslyUnseenBehaviour:
return hbytes(data.buffer)
while True:
k = random.getrandbits(n_bits)
try:
child = branch.children[k]
except KeyError:
append_int(n_bits, k)
return hbytes(novel_prefix)
if not child.is_exhausted:
append_int(n_bits, k)
current_node = child
break

def rewrite(self, buffer):
"""Use previously seen ConjectureData objects to return a tuple of
Expand Down
14 changes: 14 additions & 0 deletions hypothesis-python/tests/common/debug.py
Expand Up @@ -17,8 +17,10 @@

from __future__ import absolute_import, division, print_function

import hypothesis.strategies as st
from hypothesis import HealthCheck, Verbosity, given, settings as Settings
from hypothesis.errors import NoSuchExample, Unsatisfiable
from hypothesis.internal.conjecture.data import ConjectureData, StopTest
from hypothesis.internal.reflection import get_pretty_function_description
from tests.common.utils import no_shrink

Expand Down Expand Up @@ -95,3 +97,15 @@ def assert_examples(s):
assert predicate(s), msg

assert_examples()


def assert_can_trigger_event(strategy, predicate):
def test(buf):
data = ConjectureData.for_buffer(buf)
try:
data.draw(strategy)
except StopTest:
pass
return any(predicate(e) for e in data.events)

find_any(st.binary(), test)
14 changes: 0 additions & 14 deletions hypothesis-python/tests/cover/test_conjecture_data_tree.py
Expand Up @@ -341,20 +341,6 @@ def test_child_becomes_exhausted_after_split():
assert tree.root.transition.children[0].is_exhausted


def test_will_avoid_exhausted_branches_for_necessary_prefix():
tree = DataTree()
data = ConjectureData.for_buffer([0], observer=tree.new_observer())
data.draw_bits(1)
data.freeze()

data = ConjectureData.for_buffer([1, 1], observer=tree.new_observer())
data.draw_bits(1)
data.draw_bits(8)
data.freeze()

assert list(tree.find_necessary_prefix_for_novelty()) == [1]


def test_will_generate_novel_prefix_to_avoid_exhausted_branches():
tree = DataTree()
data = ConjectureData.for_buffer([1], observer=tree.new_observer())
Expand Down
2 changes: 1 addition & 1 deletion hypothesis-python/tests/cover/test_slippage.py
Expand Up @@ -167,7 +167,7 @@ def test_shrinks_both_failures():
duds = set()
second_target = [None]

@settings(database=None)
@settings(database=None, max_examples=1000)
@given(st.integers(min_value=0).map(int))
def test(i):
if i >= 10000:
Expand Down
11 changes: 10 additions & 1 deletion hypothesis-python/tests/datetime/test_pytz_timezones.py
Expand Up @@ -26,7 +26,7 @@
from hypothesis.errors import InvalidArgument
from hypothesis.extra.pytz import timezones
from hypothesis.strategies import datetimes, sampled_from, times
from tests.common.debug import minimal
from tests.common.debug import assert_can_trigger_event, minimal


def test_utc_is_minimal():
Expand Down Expand Up @@ -97,3 +97,12 @@ def test_can_generate_non_utc():
def test_time_bounds_must_be_naive(name, val):
with pytest.raises(InvalidArgument):
times(**{name: val}).validate()


def test_can_trigger_error_in_draw_near_max_date():
assert_can_trigger_event(
datetimes(
min_value=dt.datetime.max - dt.timedelta(days=3), timezones=timezones()
),
lambda event: "Failed to draw a datetime" in event,
)
10 changes: 10 additions & 0 deletions hypothesis-python/tests/nocover/test_regressions.py
Expand Up @@ -19,6 +19,8 @@

import warnings

import hypothesis.strategies as st
from hypothesis import given
from hypothesis._settings import note_deprecation
from hypothesis.errors import HypothesisDeprecationWarning
from hypothesis.strategies import composite, integers
Expand All @@ -42,3 +44,11 @@ def deprecated_strategy(draw):
assert isinstance(record.message, HypothesisDeprecationWarning)
assert record.message.args == (msg,)
assert record.filename == __file__


@given(
x=st.one_of(st.just(0) | st.just(1)),
y=st.one_of(st.just(0) | st.just(1) | st.just(2)),
)
def test_performance_issue_2027(x, y):
pass