diff --git a/hypothesis-python/RELEASE.rst b/hypothesis-python/RELEASE.rst new file mode 100644 index 0000000000..7b28e79746 --- /dev/null +++ b/hypothesis-python/RELEASE.rst @@ -0,0 +1,6 @@ +RELEASE_TYPE: patch + +This patch changes the backing datastructures of :func:`~hypothesis.register_random` +and a few internal caches to use :class:`weakref.WeakKeyDictionary`. This reduces +memory usage and may improve performance when registered :class:`~random.Random` +instances are only used for a subset of your tests (:issue:`3131`). diff --git a/hypothesis-python/src/hypothesis/internal/entropy.py b/hypothesis-python/src/hypothesis/internal/entropy.py index 78b425720a..39c53ef578 100644 --- a/hypothesis-python/src/hypothesis/internal/entropy.py +++ b/hypothesis-python/src/hypothesis/internal/entropy.py @@ -16,10 +16,16 @@ import contextlib import random import sys +from itertools import count +from weakref import WeakValueDictionary from hypothesis.errors import InvalidArgument -RANDOMS_TO_MANAGE: list = [random] +# This is effectively a WeakSet, which allows us to associate the saved states +# with their respective Random instances even as new ones are registered and old +# ones go out of scope and get garbage collected. Keys are ascending integers. +_RKEY = count() +RANDOMS_TO_MANAGE: WeakValueDictionary = WeakValueDictionary({next(_RKEY): random}) class NumpyRandomWrapper: @@ -34,6 +40,9 @@ def __init__(self): self.setstate = numpy.random.set_state +NP_RANDOM = None + + def register_random(r: random.Random) -> None: """Register the given Random instance for management by Hypothesis. @@ -49,8 +58,8 @@ def register_random(r: random.Random) -> None: """ if not (hasattr(r, "seed") and hasattr(r, "getstate") and hasattr(r, "setstate")): raise InvalidArgument(f"r={r!r} does not have all the required methods") - if r not in RANDOMS_TO_MANAGE: - RANDOMS_TO_MANAGE.append(r) + if r not in RANDOMS_TO_MANAGE.values(): + RANDOMS_TO_MANAGE[next(_RKEY)] = r def get_seeder_and_restorer(seed=0): @@ -64,24 +73,26 @@ def get_seeder_and_restorer(seed=0): using the global random state. See e.g. #1709. """ assert isinstance(seed, int) and 0 <= seed < 2 ** 32 - states: list = [] + states: dict = {} - if "numpy" in sys.modules and not any( - isinstance(x, NumpyRandomWrapper) for x in RANDOMS_TO_MANAGE - ): - RANDOMS_TO_MANAGE.append(NumpyRandomWrapper()) + if "numpy" in sys.modules: + global NP_RANDOM + if NP_RANDOM is None: + # Protect this from garbage-collection by adding it to global scope + NP_RANDOM = RANDOMS_TO_MANAGE[next(_RKEY)] = NumpyRandomWrapper() def seed_all(): assert not states - for r in RANDOMS_TO_MANAGE: - states.append(r.getstate()) + for k, r in RANDOMS_TO_MANAGE.items(): + states[k] = r.getstate() r.seed(seed) def restore_all(): - assert len(states) == len(RANDOMS_TO_MANAGE) - for r, state in zip(RANDOMS_TO_MANAGE, states): - r.setstate(state) - del states[:] + for k, state in states.items(): + r = RANDOMS_TO_MANAGE.get(k) + if r is not None: # i.e., hasn't been garbage-collected + r.setstate(state) + states.clear() return seed_all, restore_all diff --git a/hypothesis-python/src/hypothesis/strategies/_internal/lazy.py b/hypothesis-python/src/hypothesis/strategies/_internal/lazy.py index aff1bce0df..92847a1cfe 100644 --- a/hypothesis-python/src/hypothesis/strategies/_internal/lazy.py +++ b/hypothesis-python/src/hypothesis/strategies/_internal/lazy.py @@ -14,7 +14,8 @@ # END HEADER from inspect import getfullargspec -from typing import Dict +from typing import MutableMapping +from weakref import WeakKeyDictionary from hypothesis.internal.reflection import ( arg_string, @@ -24,7 +25,7 @@ ) from hypothesis.strategies._internal.strategies import SearchStrategy -unwrap_cache: Dict[SearchStrategy, SearchStrategy] = {} +unwrap_cache: MutableMapping[SearchStrategy, SearchStrategy] = WeakKeyDictionary() unwrap_depth = 0 diff --git a/hypothesis-python/tests/cover/test_random_module.py b/hypothesis-python/tests/cover/test_random_module.py index 14d7d25b61..4bccd93ca2 100644 --- a/hypothesis-python/tests/cover/test_random_module.py +++ b/hypothesis-python/tests/cover/test_random_module.py @@ -13,6 +13,7 @@ # # END HEADER +import gc import random import pytest @@ -20,11 +21,21 @@ from hypothesis import core, find, given, register_random, reporting, strategies as st from hypothesis.errors import InvalidArgument from hypothesis.internal import entropy +from hypothesis.internal.compat import PYPY from hypothesis.internal.entropy import deterministic_PRNG from tests.common.utils import capture_out +def gc_on_pypy(): + # CPython uses reference counting, so objects (without circular refs) + # are collected immediately on `del`, breaking weak references. + # PyPy doesn't, so we use this function in tests before counting the + # surviving references to ensure that they're deterministic. + if PYPY: + gc.collect() + + def test_can_seed_random(): with capture_out() as out: with reporting.with_reporter(reporting.default): @@ -54,11 +65,15 @@ def test_cannot_register_non_Random(): def test_registering_a_Random_is_idempotent(): + gc_on_pypy() + n_registered = len(entropy.RANDOMS_TO_MANAGE) r = random.Random() register_random(r) register_random(r) - assert entropy.RANDOMS_TO_MANAGE.pop() is r - assert r not in entropy.RANDOMS_TO_MANAGE + assert len(entropy.RANDOMS_TO_MANAGE) == n_registered + 1 + del r + gc_on_pypy() + assert len(entropy.RANDOMS_TO_MANAGE) == n_registered def test_manages_registered_Random_instance(): @@ -78,9 +93,6 @@ def inner(x): inner() assert state == r.getstate() - entropy.RANDOMS_TO_MANAGE.remove(r) - assert r not in entropy.RANDOMS_TO_MANAGE - def test_registered_Random_is_seeded_by_random_module_strategy(): r = random.Random() @@ -98,9 +110,6 @@ def inner(x): assert count[0] > len(results) * 0.9, "too few unique random numbers" assert state == r.getstate() - entropy.RANDOMS_TO_MANAGE.remove(r) - assert r not in entropy.RANDOMS_TO_MANAGE - @given(st.random_module()) def test_will_actually_use_the_random_seed(rnd): @@ -143,3 +152,33 @@ def test_find_does_not_pollute_state(): assert state_a == state_b assert state_a2 != state_b2 + + +def test_evil_prng_registration_nonsense(): + gc_on_pypy() + n_registered = len(entropy.RANDOMS_TO_MANAGE) + r1, r2, r3 = random.Random(1), random.Random(2), random.Random(3) + s2 = r2.getstate() + + # We're going to be totally evil here: register two randoms, then + # drop one and add another, and finally check that we reset only + # the states that we collected before we started + register_random(r1) + k = max(entropy.RANDOMS_TO_MANAGE) # get a handle to check if r1 still exists + register_random(r2) + assert len(entropy.RANDOMS_TO_MANAGE) == n_registered + 2 + + with deterministic_PRNG(0): + del r1 + gc_on_pypy() + assert k not in entropy.RANDOMS_TO_MANAGE, "r1 has been garbage-collected" + assert len(entropy.RANDOMS_TO_MANAGE) == n_registered + 1 + + r2.seed(4) + register_random(r3) + r3.seed(4) + s4 = r3.getstate() + + # Implicit check, no exception was raised in __exit__ + assert r2.getstate() == s2, "reset previously registered random state" + assert r3.getstate() == s4, "retained state when registered within the context"