Skip to content

Commit

Permalink
Use weak references
Browse files Browse the repository at this point in the history
  • Loading branch information
Zac-HD committed Nov 15, 2021
1 parent b343d21 commit 2c4002e
Show file tree
Hide file tree
Showing 4 changed files with 81 additions and 24 deletions.
6 changes: 6 additions & 0 deletions hypothesis-python/RELEASE.rst
@@ -0,0 +1,6 @@
RELEASE_TYPE: patch

This patch changes the backing datastructures of :func:`~hypothesis.register_random`
and a few internal caches to use :class:`weakref.WeakKeyDictionary`. This reduces
memory usage and may improve performance when registered :class:`~random.Random`
instances are only used for a subset of your tests (:issue:`3131`).
39 changes: 25 additions & 14 deletions hypothesis-python/src/hypothesis/internal/entropy.py
Expand Up @@ -16,10 +16,16 @@
import contextlib
import random
import sys
from itertools import count
from weakref import WeakValueDictionary

from hypothesis.errors import InvalidArgument

RANDOMS_TO_MANAGE: list = [random]
# This is effectively a WeakSet, which allows us to associate the saved states
# with their respective Random instances even as new ones are registered and old
# ones go out of scope and get garbage collected. Keys are ascending integers.
_RKEY = count()
RANDOMS_TO_MANAGE: WeakValueDictionary = WeakValueDictionary({next(_RKEY): random})


class NumpyRandomWrapper:
Expand All @@ -34,6 +40,9 @@ def __init__(self):
self.setstate = numpy.random.set_state


NP_RANDOM = None


def register_random(r: random.Random) -> None:
"""Register the given Random instance for management by Hypothesis.
Expand All @@ -49,8 +58,8 @@ def register_random(r: random.Random) -> None:
"""
if not (hasattr(r, "seed") and hasattr(r, "getstate") and hasattr(r, "setstate")):
raise InvalidArgument(f"r={r!r} does not have all the required methods")
if r not in RANDOMS_TO_MANAGE:
RANDOMS_TO_MANAGE.append(r)
if r not in RANDOMS_TO_MANAGE.values():
RANDOMS_TO_MANAGE[next(_RKEY)] = r


def get_seeder_and_restorer(seed=0):
Expand All @@ -64,24 +73,26 @@ def get_seeder_and_restorer(seed=0):
using the global random state. See e.g. #1709.
"""
assert isinstance(seed, int) and 0 <= seed < 2 ** 32
states: list = []
states: dict = {}

if "numpy" in sys.modules and not any(
isinstance(x, NumpyRandomWrapper) for x in RANDOMS_TO_MANAGE
):
RANDOMS_TO_MANAGE.append(NumpyRandomWrapper())
if "numpy" in sys.modules:
global NP_RANDOM
if NP_RANDOM is None:
# Protect this from garbage-collection by adding it to global scope
NP_RANDOM = RANDOMS_TO_MANAGE[next(_RKEY)] = NumpyRandomWrapper()

def seed_all():
assert not states
for r in RANDOMS_TO_MANAGE:
states.append(r.getstate())
for k, r in RANDOMS_TO_MANAGE.items():
states[k] = r.getstate()
r.seed(seed)

def restore_all():
assert len(states) == len(RANDOMS_TO_MANAGE)
for r, state in zip(RANDOMS_TO_MANAGE, states):
r.setstate(state)
del states[:]
for k, state in states.items():
r = RANDOMS_TO_MANAGE.get(k)
if r is not None: # i.e., hasn't been garbage-collected
r.setstate(state)
states.clear()

return seed_all, restore_all

Expand Down
5 changes: 3 additions & 2 deletions hypothesis-python/src/hypothesis/strategies/_internal/lazy.py
Expand Up @@ -14,7 +14,8 @@
# END HEADER

from inspect import getfullargspec
from typing import Dict
from typing import MutableMapping
from weakref import WeakKeyDictionary

from hypothesis.internal.reflection import (
arg_string,
Expand All @@ -24,7 +25,7 @@
)
from hypothesis.strategies._internal.strategies import SearchStrategy

unwrap_cache: Dict[SearchStrategy, SearchStrategy] = {}
unwrap_cache: MutableMapping[SearchStrategy, SearchStrategy] = WeakKeyDictionary()
unwrap_depth = 0


Expand Down
55 changes: 47 additions & 8 deletions hypothesis-python/tests/cover/test_random_module.py
Expand Up @@ -13,18 +13,29 @@
#
# END HEADER

import gc
import random

import pytest

from hypothesis import core, find, given, register_random, reporting, strategies as st
from hypothesis.errors import InvalidArgument
from hypothesis.internal import entropy
from hypothesis.internal.compat import PYPY
from hypothesis.internal.entropy import deterministic_PRNG

from tests.common.utils import capture_out


def gc_on_pypy():
# CPython uses reference counting, so objects (without circular refs)
# are collected immediately on `del`, breaking weak references.
# PyPy doesn't, so we use this function in tests before counting the
# surviving references to ensure that they're deterministic.
if PYPY:
gc.collect()


def test_can_seed_random():
with capture_out() as out:
with reporting.with_reporter(reporting.default):
Expand Down Expand Up @@ -54,11 +65,15 @@ def test_cannot_register_non_Random():


def test_registering_a_Random_is_idempotent():
gc_on_pypy()
n_registered = len(entropy.RANDOMS_TO_MANAGE)
r = random.Random()
register_random(r)
register_random(r)
assert entropy.RANDOMS_TO_MANAGE.pop() is r
assert r not in entropy.RANDOMS_TO_MANAGE
assert len(entropy.RANDOMS_TO_MANAGE) == n_registered + 1
del r
gc_on_pypy()
assert len(entropy.RANDOMS_TO_MANAGE) == n_registered


def test_manages_registered_Random_instance():
Expand All @@ -78,9 +93,6 @@ def inner(x):
inner()
assert state == r.getstate()

entropy.RANDOMS_TO_MANAGE.remove(r)
assert r not in entropy.RANDOMS_TO_MANAGE


def test_registered_Random_is_seeded_by_random_module_strategy():
r = random.Random()
Expand All @@ -98,9 +110,6 @@ def inner(x):
assert count[0] > len(results) * 0.9, "too few unique random numbers"
assert state == r.getstate()

entropy.RANDOMS_TO_MANAGE.remove(r)
assert r not in entropy.RANDOMS_TO_MANAGE


@given(st.random_module())
def test_will_actually_use_the_random_seed(rnd):
Expand Down Expand Up @@ -143,3 +152,33 @@ def test_find_does_not_pollute_state():

assert state_a == state_b
assert state_a2 != state_b2


def test_evil_prng_registration_nonsense():
gc_on_pypy()
n_registered = len(entropy.RANDOMS_TO_MANAGE)
r1, r2, r3 = random.Random(1), random.Random(2), random.Random(3)
s2 = r2.getstate()

# We're going to be totally evil here: register two randoms, then
# drop one and add another, and finally check that we reset only
# the states that we collected before we started
register_random(r1)
k = max(entropy.RANDOMS_TO_MANAGE) # get a handle to check if r1 still exists
register_random(r2)
assert len(entropy.RANDOMS_TO_MANAGE) == n_registered + 2

with deterministic_PRNG(0):
del r1
gc_on_pypy()
assert k not in entropy.RANDOMS_TO_MANAGE, "r1 has been garbage-collected"
assert len(entropy.RANDOMS_TO_MANAGE) == n_registered + 1

r2.seed(4)
register_random(r3)
r3.seed(4)
s4 = r3.getstate()

# Implicit check, no exception was raised in __exit__
assert r2.getstate() == s2, "reset previously registered random state"
assert r3.getstate() == s4, "retained state when registered within the context"

0 comments on commit 2c4002e

Please sign in to comment.