Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix average_size calculation #3160

Merged
merged 1 commit into from Nov 28, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
1 change: 1 addition & 0 deletions AUTHORS.rst
Expand Up @@ -115,6 +115,7 @@ their individual contributions.
* `Pierre-Jean Campigotto <https://github.com/PJCampi>`_
* `Przemek Konopko <https://github.com/soutys>`_
* `Richard Boulton <https://www.github.com/rboulton>`_ (richard@tartarus.org)
* `Robert Howlett <https://github.com/jebob>`_
* `Robert Knight <https://github.com/robertknight>`_ (robertknight@gmail.com)
* `Rónán Carrigan <https://www.github.com/rcarriga>`_ (rcarriga@tcd.ie)
* `Ruben Opdebeeck <https://github.com/ROpdebee>`_
Expand Down
3 changes: 3 additions & 0 deletions hypothesis-python/RELEASE.rst
@@ -0,0 +1,3 @@
RELEASE_TYPE: patch

This release fixes some internal calculations related to collection sizes (:issue:`3143`).
5 changes: 4 additions & 1 deletion hypothesis-python/src/hypothesis/extra/array_api.py
Expand Up @@ -347,7 +347,10 @@ def do_draw(self, data):
# sqrt isn't chosen for any particularly principled reason. It
# just grows reasonably quickly but sublinearly, and for small
# arrays it represents a decent fraction of the array size.
average_size=math.sqrt(self.array_size),
average_size=min(
0.9 * self.array_size, # ensure small arrays sometimes use fill
max(10, math.sqrt(self.array_size)), # ...but *only* sometimes
),
)

assigned = set()
Expand Down
7 changes: 5 additions & 2 deletions hypothesis-python/src/hypothesis/extra/numpy.py
Expand Up @@ -255,7 +255,10 @@ def do_draw(self, data):
# sqrt isn't chosen for any particularly principled reason. It
# just grows reasonably quickly but sublinearly, and for small
# arrays it represents a decent fraction of the array size.
average_size=math.sqrt(self.array_size),
average_size=min(
0.9 * self.array_size, # ensure small arrays sometimes use fill
max(10, math.sqrt(self.array_size)), # ...but *only* sometimes
),
)

needs_fill = np.full(self.array_size, True)
Expand Down Expand Up @@ -312,7 +315,7 @@ def do_draw(self, data):
if mismatch.any():
raise InvalidArgument(
"Array elements %r cannot be represented as dtype %r - instead "
"they becomes %r. Use a more precise strategy, e.g. without "
"they become %r. Use a more precise strategy, e.g. without "
"trailing null bytes, as this will be an error future versions."
% (result[mismatch], self.dtype, out[mismatch])
)
Expand Down
42 changes: 40 additions & 2 deletions hypothesis-python/src/hypothesis/internal/conjecture/utils.py
Expand Up @@ -19,6 +19,7 @@
import math
import sys
from collections import OrderedDict, abc
from functools import lru_cache

from hypothesis.errors import InvalidArgument
from hypothesis.internal.compat import floor, int_from_bytes
Expand Down Expand Up @@ -390,7 +391,7 @@ def __init__(self, data, min_size, max_size, average_size):
self.min_size = min_size
self.max_size = max_size
self.data = data
self.stopping_value = 1 - 1.0 / (1 + average_size)
self.p_continue = _calc_p_continue(average_size - min_size, max_size - min_size)
self.count = 0
self.rejections = 0
self.drawn = False
Expand Down Expand Up @@ -418,7 +419,7 @@ def more(self):
elif self.count >= self.max_size:
forced_result = False
should_continue = biased_coin(
self.data, self.stopping_value, forced=forced_result
self.data, self.p_continue, forced=forced_result
)

if should_continue:
Expand All @@ -442,3 +443,40 @@ def reject(self):
self.data.mark_invalid()
else:
self.force_stop = True


@lru_cache()
def _calc_p_continue(desired_avg, max_size):
"""Return the p_continue which will generate the desired average size."""
if desired_avg == max_size:
return 1.0
p_continue = 1 - 1.0 / (1 + desired_avg)
if p_continue == 0 or max_size == float("inf"):
assert 0 <= p_continue < 1, p_continue
return p_continue
# For small max_size, the infinite-series p_continue is a poor approximation,
# and while we can't solve the polynomial a few rounds of iteration quickly
# gets us a good approximate solution in almost all cases (sometimes exact!).
while _p_continue_to_avg(p_continue, max_size) > desired_avg:
# This is impossible over the reals, but *can* happen with floats.
p_continue -= 0.0001
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In this case I think we can just return p_continue (or do nothing, thus skipping the zeroth iteration of the while loop)? If 1 - 1.0 / (1 + desired_avg) is close enough to the correct answer that floating-point error causes p_continue to be an overestimate then we're close enough for practical purposes.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I agree that the estimate is close enough that we could just use the initial p_continue.

However, it's cheap to make this adjustment, and that allows us to test that our binary search always maintains a strict upper bound.

# Let's binary-search our way to a better estimate! We tried fancier options
# like gradient descent, but this is numerically stable and works better.
hi = 1.0
while desired_avg - _p_continue_to_avg(p_continue, max_size) > 0.01:
assert p_continue < hi
mid = (p_continue + hi) / 2
if _p_continue_to_avg(mid, max_size) <= desired_avg:
p_continue = mid
else:
hi = mid
assert 0 < p_continue < 1
assert _p_continue_to_avg(p_continue, max_size) <= desired_avg
return p_continue


def _p_continue_to_avg(p_continue, max_size):
"""Return the average_size generated by this p_continue and max_size."""
if p_continue >= 1:
return max_size
return (1.0 / (1 - p_continue) - 1) * (1 - p_continue ** max_size)
Zac-HD marked this conversation as resolved.
Show resolved Hide resolved
2 changes: 1 addition & 1 deletion hypothesis-python/src/hypothesis/provisional.py
Expand Up @@ -119,7 +119,7 @@ def do_draw(self, data):
# with a max of 255, that leaves 3 characters for a TLD.
# Allowing any more subdomains would not leave enough
# characters for even the shortest possible TLDs.
elements = cu.many(data, min_size=1, average_size=1, max_size=126)
elements = cu.many(data, min_size=1, average_size=3, max_size=126)
while elements.more():
# Generate a new valid subdomain using the regex strategy.
sub_domain = data.draw(st.from_regex(self.label_regex, fullmatch=True))
Expand Down
4 changes: 4 additions & 0 deletions hypothesis-python/tests/conjecture/test_utils.py
Expand Up @@ -320,3 +320,7 @@ def test_can_draw_arbitrary_fractions(p, b):
def test_samples_from_a_range_directly():
s = cu.check_sample(range(10 ** 1000), "")
assert isinstance(s, range)


def test_p_continue_to_average_saturates():
assert cu._p_continue_to_avg(1.1, 100) == 100
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we replace this special case with an @example on test_p_continue_to_average?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We do actually have such an example, but also need this test to reliably get full coverage from our conjecture-coverage task (the other is in nocover).

20 changes: 20 additions & 0 deletions hypothesis-python/tests/nocover/test_conjecture_utils.py
Expand Up @@ -15,9 +15,11 @@

from fractions import Fraction

from hypothesis import assume, example, given, strategies as st, target
from hypothesis.internal.compat import int_to_bytes
from hypothesis.internal.conjecture import utils as cu
from hypothesis.internal.conjecture.data import ConjectureData, StopTest
from hypothesis.internal.conjecture.engine import BUFFER_SIZE


def test_gives_the_correct_probabilities():
Expand All @@ -44,3 +46,21 @@ def test_gives_the_correct_probabilities():
i += 256
else:
i += 1


# BUFFER_SIZE divided by (2bytes coin + 1byte element) gives the
# maximum number of elements that we would ever be able to generate.
@given(st.floats(0, BUFFER_SIZE // 3), st.integers(0, BUFFER_SIZE // 3))
def test_p_continue(average_size, max_size):
assume(average_size <= max_size)
p = cu._calc_p_continue(average_size, max_size)
assert 0 <= target(p, label="p") <= 1
abs_err = abs(average_size - cu._p_continue_to_avg(p, max_size))
assert target(abs_err, label="abs_err") < 0.01


@example(1.1, 10)
@given(st.floats(0, 1), st.integers(0, BUFFER_SIZE // 3))
def test_p_continue_to_average(p_continue, max_size):
average = cu._p_continue_to_avg(p_continue, max_size)
assert 0 <= average <= max_size