Skip to content

Commit

Permalink
Fix average_size calculation
Browse files Browse the repository at this point in the history
  • Loading branch information
Zac-HD committed Nov 24, 2021
1 parent e36ffe9 commit 220868a
Show file tree
Hide file tree
Showing 5 changed files with 54 additions and 5 deletions.
3 changes: 3 additions & 0 deletions hypothesis-python/RELEASE.rst
@@ -0,0 +1,3 @@
RELEASE_TYPE: patch

This release fixes some internal calculations related to collection sizes (:issue:`3143`).
5 changes: 4 additions & 1 deletion hypothesis-python/src/hypothesis/extra/array_api.py
Expand Up @@ -347,7 +347,10 @@ def do_draw(self, data):
# sqrt isn't chosen for any particularly principled reason. It
# just grows reasonably quickly but sublinearly, and for small
# arrays it represents a decent fraction of the array size.
average_size=math.sqrt(self.array_size),
average_size=min(
0.9 * self.array_size, # ensure small arrays sometimes use fill
max(10, math.sqrt(self.array_size)), # ...but *only* sometimes
),
)

assigned = set()
Expand Down
7 changes: 5 additions & 2 deletions hypothesis-python/src/hypothesis/extra/numpy.py
Expand Up @@ -255,7 +255,10 @@ def do_draw(self, data):
# sqrt isn't chosen for any particularly principled reason. It
# just grows reasonably quickly but sublinearly, and for small
# arrays it represents a decent fraction of the array size.
average_size=math.sqrt(self.array_size),
average_size=min(
0.9 * self.array_size, # ensure small arrays sometimes use fill
max(10, math.sqrt(self.array_size)), # ...but *only* sometimes
),
)

needs_fill = np.full(self.array_size, True)
Expand Down Expand Up @@ -312,7 +315,7 @@ def do_draw(self, data):
if mismatch.any():
raise InvalidArgument(
"Array elements %r cannot be represented as dtype %r - instead "
"they becomes %r. Use a more precise strategy, e.g. without "
"they become %r. Use a more precise strategy, e.g. without "
"trailing null bytes, as this will be an error future versions."
% (result[mismatch], self.dtype, out[mismatch])
)
Expand Down
34 changes: 32 additions & 2 deletions hypothesis-python/src/hypothesis/internal/conjecture/utils.py
Expand Up @@ -390,7 +390,9 @@ def __init__(self, data, min_size, max_size, average_size):
self.min_size = min_size
self.max_size = max_size
self.data = data
self.stopping_value = 1 - 1.0 / (1 + average_size)
self.p_continue = self._calc_p_continue(
average_size - min_size, max_size - min_size
)
self.count = 0
self.rejections = 0
self.drawn = False
Expand Down Expand Up @@ -418,7 +420,7 @@ def more(self):
elif self.count >= self.max_size:
forced_result = False
should_continue = biased_coin(
self.data, self.stopping_value, forced=forced_result
self.data, self.p_continue, forced=forced_result
)

if should_continue:
Expand All @@ -442,3 +444,31 @@ def reject(self):
self.data.mark_invalid()
else:
self.force_stop = True

@staticmethod
def _calc_p_continue(desired_avg, max_size):
p_continue = 1 - 1.0 / (1 + desired_avg)
if p_continue == 0 or max_size == float("inf"):
return p_continue
# For small max_size, the infinite-series p_continue is a poor approximation,
# and while we can't solve the polynomial a few rounds of iteration quickly
# gets us a good approximate solution in almost all cases (sometimes exact!).
for i in range(5):
# Calculate the average_size resulting from our current p_continue,
# and how we would update p_continue if the relation was linear.
curr_avg = _p_continue_to_avg(p_continue, max_size)
assert 0 < curr_avg <= desired_avg or i == 0
inc = 0.5 * (desired_avg - curr_avg) / (desired_avg + curr_avg)
# When max_size is only a little larger than desired_avg, this can explode
# (if p_continue > 1) or just oscillate if we allow curr_avg > desired_avg.
# Avoid both by decrementing inc so that curr_avg <= desired_avg.
while desired_avg < _p_continue_to_avg(p_continue * (1 + inc), max_size):
inc /= 2
p_continue *= 1 + inc
assert 0 < p_continue < 1, p_continue
return p_continue


def _p_continue_to_avg(p_continue, max_size):
"""Return the average_size generated by this p_continue and max_size."""
return (1.0 / (1 - p_continue) - 1) * (1 - p_continue ** max_size)
10 changes: 10 additions & 0 deletions hypothesis-python/tests/nocover/test_conjecture_utils.py
Expand Up @@ -15,6 +15,7 @@

from fractions import Fraction

from hypothesis import assume, given, strategies as st
from hypothesis.internal.compat import int_to_bytes
from hypothesis.internal.conjecture import utils as cu
from hypothesis.internal.conjecture.data import ConjectureData, StopTest
Expand Down Expand Up @@ -44,3 +45,12 @@ def test_gives_the_correct_probabilities():
i += 256
else:
i += 1


@given(st.floats(0, 50), st.integers(0, 200))
def test_p_continue(average_size, max_size):
assume(average_size <= max_size)
p = cu.many._calc_p_continue(average_size, max_size)
assert 0 <= p <= 1
lo = max(0, average_size - 1)
assert lo <= cu._p_continue_to_avg(p, max_size) <= average_size

0 comments on commit 220868a

Please sign in to comment.