Merge pull request #3160 from Zac-HD/denser-small-arrays

Fix average_size calculation
HypothesisWorks · Nov 28, 2021 · 84afe88 · 84afe88
2 parents d28ffc2 + 59521cd
commit 84afe88
Show file tree

Hide file tree

Showing 8 changed files with 78 additions and 6 deletions.
diff --git a/AUTHORS.rst b/AUTHORS.rst
@@ -115,6 +115,7 @@ their individual contributions.
 * `Pierre-Jean Campigotto <https://github.com/PJCampi>`_
 * `Przemek Konopko <https://github.com/soutys>`_
 * `Richard Boulton <https://www.github.com/rboulton>`_ (richard@tartarus.org)
+* `Robert Howlett <https://github.com/jebob>`_
 * `Robert Knight <https://github.com/robertknight>`_ (robertknight@gmail.com)
 * `Rónán Carrigan <https://www.github.com/rcarriga>`_ (rcarriga@tcd.ie)
 * `Ruben Opdebeeck <https://github.com/ROpdebee>`_

diff --git a/hypothesis-python/RELEASE.rst b/hypothesis-python/RELEASE.rst
@@ -0,0 +1,3 @@
+RELEASE_TYPE: patch
+
+This release fixes some internal calculations related to collection sizes (:issue:`3143`).
diff --git a/hypothesis-python/src/hypothesis/extra/array_api.py b/hypothesis-python/src/hypothesis/extra/array_api.py
@@ -347,7 +347,10 @@ def do_draw(self, data):
                 # sqrt isn't chosen for any particularly principled reason. It
                 # just grows reasonably quickly but sublinearly, and for small
                 # arrays it represents a decent fraction of the array size.
-                average_size=math.sqrt(self.array_size),
+                average_size=min(
+                    0.9 * self.array_size,  # ensure small arrays sometimes use fill
+                    max(10, math.sqrt(self.array_size)),  # ...but *only* sometimes
+                ),
             )
 
             assigned = set()

diff --git a/hypothesis-python/src/hypothesis/extra/numpy.py b/hypothesis-python/src/hypothesis/extra/numpy.py
@@ -255,7 +255,10 @@ def do_draw(self, data):
                 # sqrt isn't chosen for any particularly principled reason. It
                 # just grows reasonably quickly but sublinearly, and for small
                 # arrays it represents a decent fraction of the array size.
-                average_size=math.sqrt(self.array_size),
+                average_size=min(
+                    0.9 * self.array_size,  # ensure small arrays sometimes use fill
+                    max(10, math.sqrt(self.array_size)),  # ...but *only* sometimes
+                ),
             )
 
             needs_fill = np.full(self.array_size, True)
@@ -312,7 +315,7 @@ def do_draw(self, data):
             if mismatch.any():
                 raise InvalidArgument(
                     "Array elements %r cannot be represented as dtype %r - instead "
-                    "they becomes %r.  Use a more precise strategy, e.g. without "
+                    "they become %r.  Use a more precise strategy, e.g. without "
                     "trailing null bytes, as this will be an error future versions."
                     % (result[mismatch], self.dtype, out[mismatch])
                 )

diff --git a/hypothesis-python/src/hypothesis/internal/conjecture/utils.py b/hypothesis-python/src/hypothesis/internal/conjecture/utils.py
@@ -19,6 +19,7 @@
 import math
 import sys
 from collections import OrderedDict, abc
+from functools import lru_cache
 
 from hypothesis.errors import InvalidArgument
 from hypothesis.internal.compat import floor, int_from_bytes
@@ -390,7 +391,7 @@ def __init__(self, data, min_size, max_size, average_size):
         self.min_size = min_size
         self.max_size = max_size
         self.data = data
-        self.stopping_value = 1 - 1.0 / (1 + average_size)
+        self.p_continue = _calc_p_continue(average_size - min_size, max_size - min_size)
         self.count = 0
         self.rejections = 0
         self.drawn = False
@@ -418,7 +419,7 @@ def more(self):
             elif self.count >= self.max_size:
                 forced_result = False
             should_continue = biased_coin(
-                self.data, self.stopping_value, forced=forced_result
+                self.data, self.p_continue, forced=forced_result
             )
 
         if should_continue:
@@ -442,3 +443,40 @@ def reject(self):
                 self.data.mark_invalid()
             else:
                 self.force_stop = True
+
+
+@lru_cache()
+def _calc_p_continue(desired_avg, max_size):
+    """Return the p_continue which will generate the desired average size."""
+    if desired_avg == max_size:
+        return 1.0
+    p_continue = 1 - 1.0 / (1 + desired_avg)
+    if p_continue == 0 or max_size == float("inf"):
+        assert 0 <= p_continue < 1, p_continue
+        return p_continue
+    # For small max_size, the infinite-series p_continue is a poor approximation,
+    # and while we can't solve the polynomial a few rounds of iteration quickly
+    # gets us a good approximate solution in almost all cases (sometimes exact!).
+    while _p_continue_to_avg(p_continue, max_size) > desired_avg:
+        # This is impossible over the reals, but *can* happen with floats.
+        p_continue -= 0.0001
+    # Let's binary-search our way to a better estimate!  We tried fancier options
+    # like gradient descent, but this is numerically stable and works better.
+    hi = 1.0
+    while desired_avg - _p_continue_to_avg(p_continue, max_size) > 0.01:
+        assert p_continue < hi
+        mid = (p_continue + hi) / 2
+        if _p_continue_to_avg(mid, max_size) <= desired_avg:
+            p_continue = mid
+        else:
+            hi = mid
+    assert 0 < p_continue < 1
+    assert _p_continue_to_avg(p_continue, max_size) <= desired_avg
+    return p_continue
+
+
+def _p_continue_to_avg(p_continue, max_size):
+    """Return the average_size generated by this p_continue and max_size."""
+    if p_continue >= 1:
+        return max_size
+    return (1.0 / (1 - p_continue) - 1) * (1 - p_continue ** max_size)
diff --git a/hypothesis-python/src/hypothesis/provisional.py b/hypothesis-python/src/hypothesis/provisional.py
@@ -119,7 +119,7 @@ def do_draw(self, data):
         # with a max of 255, that leaves 3 characters for a TLD.
         # Allowing any more subdomains would not leave enough
         # characters for even the shortest possible TLDs.
-        elements = cu.many(data, min_size=1, average_size=1, max_size=126)
+        elements = cu.many(data, min_size=1, average_size=3, max_size=126)
         while elements.more():
             # Generate a new valid subdomain using the regex strategy.
             sub_domain = data.draw(st.from_regex(self.label_regex, fullmatch=True))

diff --git a/hypothesis-python/tests/conjecture/test_utils.py b/hypothesis-python/tests/conjecture/test_utils.py
@@ -320,3 +320,7 @@ def test_can_draw_arbitrary_fractions(p, b):
 def test_samples_from_a_range_directly():
     s = cu.check_sample(range(10 ** 1000), "")
     assert isinstance(s, range)
+
+
+def test_p_continue_to_average_saturates():
+    assert cu._p_continue_to_avg(1.1, 100) == 100
diff --git a/hypothesis-python/tests/nocover/test_conjecture_utils.py b/hypothesis-python/tests/nocover/test_conjecture_utils.py
@@ -15,9 +15,11 @@
 
 from fractions import Fraction
 
+from hypothesis import assume, example, given, strategies as st, target
 from hypothesis.internal.compat import int_to_bytes
 from hypothesis.internal.conjecture import utils as cu
 from hypothesis.internal.conjecture.data import ConjectureData, StopTest
+from hypothesis.internal.conjecture.engine import BUFFER_SIZE
 
 
 def test_gives_the_correct_probabilities():
@@ -44,3 +46,21 @@ def test_gives_the_correct_probabilities():
             i += 256
         else:
             i += 1
+
+
+# BUFFER_SIZE divided by (2bytes coin + 1byte element) gives the
+# maximum number of elements that we would ever be able to generate.
+@given(st.floats(0, BUFFER_SIZE // 3), st.integers(0, BUFFER_SIZE // 3))
+def test_p_continue(average_size, max_size):
+    assume(average_size <= max_size)
+    p = cu._calc_p_continue(average_size, max_size)
+    assert 0 <= target(p, label="p") <= 1
+    abs_err = abs(average_size - cu._p_continue_to_avg(p, max_size))
+    assert target(abs_err, label="abs_err") < 0.01
+
+
+@example(1.1, 10)
+@given(st.floats(0, 1), st.integers(0, BUFFER_SIZE // 3))
+def test_p_continue_to_average(p_continue, max_size):
+    average = cu._p_continue_to_avg(p_continue, max_size)
+    assert 0 <= average <= max_size