From 220868a0ec3efce2c28862b748f4146ab3e25967 Mon Sep 17 00:00:00 2001 From: Zac-HD Date: Wed, 24 Nov 2021 17:18:23 +1100 Subject: [PATCH] Fix average_size calculation --- hypothesis-python/RELEASE.rst | 3 ++ .../src/hypothesis/extra/array_api.py | 5 ++- .../src/hypothesis/extra/numpy.py | 7 ++-- .../hypothesis/internal/conjecture/utils.py | 34 +++++++++++++++++-- .../tests/nocover/test_conjecture_utils.py | 10 ++++++ 5 files changed, 54 insertions(+), 5 deletions(-) create mode 100644 hypothesis-python/RELEASE.rst diff --git a/hypothesis-python/RELEASE.rst b/hypothesis-python/RELEASE.rst new file mode 100644 index 0000000000..125a49651c --- /dev/null +++ b/hypothesis-python/RELEASE.rst @@ -0,0 +1,3 @@ +RELEASE_TYPE: patch + +This release fixes some internal calculations related to collection sizes (:issue:`3143`). diff --git a/hypothesis-python/src/hypothesis/extra/array_api.py b/hypothesis-python/src/hypothesis/extra/array_api.py index adc71ecb14..922fa642f9 100644 --- a/hypothesis-python/src/hypothesis/extra/array_api.py +++ b/hypothesis-python/src/hypothesis/extra/array_api.py @@ -347,7 +347,10 @@ def do_draw(self, data): # sqrt isn't chosen for any particularly principled reason. It # just grows reasonably quickly but sublinearly, and for small # arrays it represents a decent fraction of the array size. - average_size=math.sqrt(self.array_size), + average_size=min( + 0.9 * self.array_size, # ensure small arrays sometimes use fill + max(10, math.sqrt(self.array_size)), # ...but *only* sometimes + ), ) assigned = set() diff --git a/hypothesis-python/src/hypothesis/extra/numpy.py b/hypothesis-python/src/hypothesis/extra/numpy.py index 71d88eece8..e8d37f5381 100644 --- a/hypothesis-python/src/hypothesis/extra/numpy.py +++ b/hypothesis-python/src/hypothesis/extra/numpy.py @@ -255,7 +255,10 @@ def do_draw(self, data): # sqrt isn't chosen for any particularly principled reason. It # just grows reasonably quickly but sublinearly, and for small # arrays it represents a decent fraction of the array size. - average_size=math.sqrt(self.array_size), + average_size=min( + 0.9 * self.array_size, # ensure small arrays sometimes use fill + max(10, math.sqrt(self.array_size)), # ...but *only* sometimes + ), ) needs_fill = np.full(self.array_size, True) @@ -312,7 +315,7 @@ def do_draw(self, data): if mismatch.any(): raise InvalidArgument( "Array elements %r cannot be represented as dtype %r - instead " - "they becomes %r. Use a more precise strategy, e.g. without " + "they become %r. Use a more precise strategy, e.g. without " "trailing null bytes, as this will be an error future versions." % (result[mismatch], self.dtype, out[mismatch]) ) diff --git a/hypothesis-python/src/hypothesis/internal/conjecture/utils.py b/hypothesis-python/src/hypothesis/internal/conjecture/utils.py index aa8b074d49..9b6f1304bf 100644 --- a/hypothesis-python/src/hypothesis/internal/conjecture/utils.py +++ b/hypothesis-python/src/hypothesis/internal/conjecture/utils.py @@ -390,7 +390,9 @@ def __init__(self, data, min_size, max_size, average_size): self.min_size = min_size self.max_size = max_size self.data = data - self.stopping_value = 1 - 1.0 / (1 + average_size) + self.p_continue = self._calc_p_continue( + average_size - min_size, max_size - min_size + ) self.count = 0 self.rejections = 0 self.drawn = False @@ -418,7 +420,7 @@ def more(self): elif self.count >= self.max_size: forced_result = False should_continue = biased_coin( - self.data, self.stopping_value, forced=forced_result + self.data, self.p_continue, forced=forced_result ) if should_continue: @@ -442,3 +444,31 @@ def reject(self): self.data.mark_invalid() else: self.force_stop = True + + @staticmethod + def _calc_p_continue(desired_avg, max_size): + p_continue = 1 - 1.0 / (1 + desired_avg) + if p_continue == 0 or max_size == float("inf"): + return p_continue + # For small max_size, the infinite-series p_continue is a poor approximation, + # and while we can't solve the polynomial a few rounds of iteration quickly + # gets us a good approximate solution in almost all cases (sometimes exact!). + for i in range(5): + # Calculate the average_size resulting from our current p_continue, + # and how we would update p_continue if the relation was linear. + curr_avg = _p_continue_to_avg(p_continue, max_size) + assert 0 < curr_avg <= desired_avg or i == 0 + inc = 0.5 * (desired_avg - curr_avg) / (desired_avg + curr_avg) + # When max_size is only a little larger than desired_avg, this can explode + # (if p_continue > 1) or just oscillate if we allow curr_avg > desired_avg. + # Avoid both by decrementing inc so that curr_avg <= desired_avg. + while desired_avg < _p_continue_to_avg(p_continue * (1 + inc), max_size): + inc /= 2 + p_continue *= 1 + inc + assert 0 < p_continue < 1, p_continue + return p_continue + + +def _p_continue_to_avg(p_continue, max_size): + """Return the average_size generated by this p_continue and max_size.""" + return (1.0 / (1 - p_continue) - 1) * (1 - p_continue ** max_size) diff --git a/hypothesis-python/tests/nocover/test_conjecture_utils.py b/hypothesis-python/tests/nocover/test_conjecture_utils.py index 166744f746..c64bca08b7 100644 --- a/hypothesis-python/tests/nocover/test_conjecture_utils.py +++ b/hypothesis-python/tests/nocover/test_conjecture_utils.py @@ -15,6 +15,7 @@ from fractions import Fraction +from hypothesis import assume, given, strategies as st from hypothesis.internal.compat import int_to_bytes from hypothesis.internal.conjecture import utils as cu from hypothesis.internal.conjecture.data import ConjectureData, StopTest @@ -44,3 +45,12 @@ def test_gives_the_correct_probabilities(): i += 256 else: i += 1 + + +@given(st.floats(0, 50), st.integers(0, 200)) +def test_p_continue(average_size, max_size): + assume(average_size <= max_size) + p = cu.many._calc_p_continue(average_size, max_size) + assert 0 <= p <= 1 + lo = max(0, average_size - 1) + assert lo <= cu._p_continue_to_avg(p, max_size) <= average_size