Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add strategy for numpy.random.Generator #3510

Closed
wants to merge 12 commits into from
10 changes: 10 additions & 0 deletions hypothesis-python/RELEASE.rst
@@ -0,0 +1,10 @@
RELEASE_TYPE: minor

This release increases Hypothesis' minimum supported version of NumPy to 1.19.0.

This release adds the strategy :func:`~hypothesis.extra.numpy.rand_generators`, which
draws instances of :obj:`numpy.random.Generator <numpy:numpy.random.Generator>` backed
by a bit-generator initialized with a Hypothesis-controlled seed. Fail cases display
the initial seed that was used to create the generator, enabling reproducibility.
Accordingly, Hypothesis can now infer a strategy for the ``numpy.random.Generator``
type.
2 changes: 1 addition & 1 deletion hypothesis-python/setup.py
Expand Up @@ -58,7 +58,7 @@ def local_file(name):
"pytz": ["pytz>=2014.1"],
"dateutil": ["python-dateutil>=1.4"],
"lark": ["lark-parser>=0.6.5"],
"numpy": ["numpy>=1.9.0"],
"numpy": ["numpy>=1.19.0"],
"pandas": ["pandas>=1.0"],
"pytest": ["pytest>=4.6"],
"dpcontracts": ["dpcontracts>=0.4"],
Expand Down
86 changes: 85 additions & 1 deletion hypothesis-python/src/hypothesis/extra/numpy.py
Expand Up @@ -9,9 +9,10 @@
# obtain one at https://mozilla.org/MPL/2.0/.

import math
from typing import Any, Mapping, Optional, Sequence, Tuple, Union
from typing import Any, List, Mapping, Optional, Sequence, Tuple, Type, Union

import numpy as np
from numpy.random import PCG64

from hypothesis import strategies as st
from hypothesis._settings import note_deprecation
Expand Down Expand Up @@ -59,6 +60,7 @@
"mutually_broadcastable_shapes",
"basic_indices",
"integer_array_indices",
"rand_generators",
]

TIME_RESOLUTIONS = tuple("Y M D h m s ms us ns ps fs as".split())
Expand Down Expand Up @@ -955,3 +957,85 @@ def array_for(index_shape, size):
return result_shape.flatmap(
lambda index_shape: st.tuples(*(array_for(index_shape, size) for size in shape))
)


_ALL_BIT_GENERATORS: List[Type[np.random.BitGenerator]] = [
x
for x in (getattr(np.random, name) for name in np.random.__all__)
if isinstance(x, type)
and issubclass(x, np.random.BitGenerator)
and x is not np.random.BitGenerator
]


class NumpyGeneratorStrategy(st.SearchStrategy):
class _GeneratorWithSeedRepr(np.random.Generator):
# must be attached to the instance externally
_hypothesis_initial_seed: int

def __repr__(self) -> str:
return f"Generator({type(self.bit_generator).__name__}({self._hypothesis_initial_seed!r}))"

def __init__(self, bit_gen_types: Tuple[Type[np.random.BitGenerator], ...]):
assert bit_gen_types
self._bit_gen_types = (
st.just(bit_gen_types[0])
if len(bit_gen_types) == 1
else st.sampled_from(bit_gen_types)
)

def do_draw(self, data):
CurrentBitGenerator = data.draw(self._bit_gen_types)
seed = data.draw_bits(64)
gen = self._GeneratorWithSeedRepr(CurrentBitGenerator(seed))
gen._hypothesis_initial_seed = seed
return gen


@defines_strategy()
def rand_generators(
*bit_generator_types: Type[np.random.BitGenerator],
) -> st.SearchStrategy[np.random.Generator]:
"""Generates instances of
:obj:`numpy.random.Generator <numpy:numpy.random.Generator>` backed by a
bit-generator initialized with a Hypothesis-drawn 64 bit seed.

Accepts one or more
:obj:`numpy.random.BitGenerator <numpy:numpy.random.BitGenerator>` types,
defaulting to ``PCG64``, that will be sampled from during example generation.
Examples from this strategy shrink towards a generator backed by the
first-specified bit-generator type, seeded with 0.

This is the recommended way for utilizing diverse, reproducible sources of
NumPy-based random number generation in Hypothesis tests. The resulting generators
are of a special Hypothesis subclass whose repr displays the initial seed for the
bit generator.

.. code-block:: pycon

>>> [rand_generators().example() for _ in range(3)]
[Generator(PCG64(17731618377865219412)),
Generator(PCG64(16938332804403789103)),
Generator(PCG64(9641801721570554589))]

>>> from numpy.random import MT19937, PCG64
>>> # specifying multiple bit-generator types
>>> [rand_generators(MT19937, PCG64).example() for _ in range(3)]
[Generator(PCG64(1138900339423482065)),
Generator(MT19937(13796052070681794055)),
Generator(MT19937(16637614687104877655))
"""
if not bit_generator_types:
bit_generator_types = (PCG64,)

for g in bit_generator_types:
if (
not isinstance(g, type)
or not issubclass(g, np.random.BitGenerator)
or g is np.random.BitGenerator
):
raise InvalidArgument(
f"`rand_generators` must be passed BitGenerator subclasses "
"(BitGenerator itself is not a valid implementation). Got {g}"
)
return NumpyGeneratorStrategy(bit_generator_types)
11 changes: 10 additions & 1 deletion hypothesis-python/src/hypothesis/strategies/_internal/types.py
Expand Up @@ -590,10 +590,19 @@ def _networks(bits):
try: # pragma: no cover
import numpy as np

from hypothesis.extra.numpy import array_dtypes, array_shapes, arrays, scalar_dtypes
from hypothesis.extra.numpy import (
_ALL_BIT_GENERATORS,
array_dtypes,
array_shapes,
arrays,
rand_generators,
scalar_dtypes,
)

_global_type_lookup[np.dtype] = array_dtypes()
_global_type_lookup[np.ndarray] = arrays(scalar_dtypes(), array_shapes(max_dims=2))
# is it too intense to sample from all available bit-generators
_global_type_lookup[np.random.Generator] = rand_generators(*_ALL_BIT_GENERATORS)
except ImportError:
pass

Expand Down
6 changes: 5 additions & 1 deletion hypothesis-python/tests/common/debug.py
Expand Up @@ -8,6 +8,8 @@
# v. 2.0. If a copy of the MPL was not distributed with this file, You can
# obtain one at https://mozilla.org/MPL/2.0/.

from typing import Any, Callable

from hypothesis import (
HealthCheck,
Verbosity,
Expand Down Expand Up @@ -74,7 +76,9 @@ def inner(x):
)


def find_any(definition, condition=lambda _: True, settings=None):
def find_any(
definition, condition: Callable[[Any], bool] = lambda _: True, settings=None
):
settings = settings or Settings.default
return minimal(
definition,
Expand Down
18 changes: 18 additions & 0 deletions hypothesis-python/tests/numpy/test_argument_validation.py
Expand Up @@ -23,6 +23,10 @@ def e(a, **kwargs):
return pytest.param(a, kwargs, id=f"{a.__name__}({kw})")


def p(a, *args):
return pytest.param(a, args, id=f"{a.__name__}({', '.join(str(x) for x in args)})")


@pytest.mark.parametrize(
("function", "kwargs"),
[
Expand Down Expand Up @@ -288,3 +292,17 @@ def test_raise_invalid_argument(function, kwargs):
def test_raise_invalid_argument_deprecated(function, kwargs):
with pytest.raises(InvalidArgument):
function(**kwargs).example()


@pytest.mark.parametrize(
("function", "args"),
[
p(nps.rand_generators, True),
p(nps.rand_generators, numpy.random.Generator),
p(nps.rand_generators, numpy.random.BitGenerator),
p(nps.rand_generators, numpy.random.PCG64, numpy.random.BitGenerator),
],
)
def test_raise_invalid_pos_args(function, args):
with pytest.raises(InvalidArgument):
function(*args).example()
53 changes: 53 additions & 0 deletions hypothesis-python/tests/numpy/test_gen_data.py
Expand Up @@ -11,9 +11,11 @@
import sys
from functools import reduce
from itertools import zip_longest
from typing import Type

import numpy as np
import pytest
from numpy.random import BitGenerator, Generator

from hypothesis import (
HealthCheck,
Expand Down Expand Up @@ -1212,3 +1214,54 @@ def test_basic_indices_generate_valid_indexers(
def test_array_owns_memory(x: np.ndarray):
assert x.base is None
assert x[...].base is x


@pytest.mark.parametrize("BitGen", nps._ALL_BIT_GENERATORS)
@given(data=st.data())
def test_fuzz_all_bit_generators(BitGen: Type[BitGenerator], data: st.DrawFn):
# ensure that our seed mechanism works for all bit generators
gen = data.draw(nps.rand_generators(BitGen))
assert isinstance(gen.bit_generator, BitGen)
gen.normal()


@pytest.mark.parametrize("BitGen", nps._ALL_BIT_GENERATORS)
def test_random_generators_have_accurate_reprs(BitGen: Type[BitGenerator]):
cache = []
name_to_bit_gen = {x.__name__: x for x in nps._ALL_BIT_GENERATORS}

@settings(max_examples=10, database=None)
@given(nps.rand_generators(BitGen))
def runner(generator):
cache.append(generator)

runner()

for gen in cache:
# reconstruct bit-generator from repr and ensure they generate same numbers
_, bit_gen_name, seed_tail = repr(gen).split("(")
bit_gen_type = name_to_bit_gen[bit_gen_name]
seed = int(seed_tail[:-2])
reconstructed_gen = Generator(bit_gen_type(seed))
# It would be preferable to compare bit-generator states, but
# different bit generators implement state differently, and some
# require traversing pytrees with array leaves -- not easy to compare
#
# So instead we compare that the generators match 10 consecutive
# draws.
for _ in range(10):
assert gen.uniform() == reconstructed_gen.uniform()


@pytest.mark.parametrize("BitGen", nps._ALL_BIT_GENERATORS)
def test_samples_from_bit_generators(BitGen: Type[BitGenerator]):
find_any(
nps.rand_generators(*nps._ALL_BIT_GENERATORS),
lambda gen: type(gen.bit_generator) is BitGen,
)


@settings(max_examples=10)
@given(nps.rand_generators())
def test_default_bit_generator_matches_default_rng_backing(gen: Generator):
assert type(gen.bit_generator) is type(np.random.default_rng().bit_generator)
14 changes: 9 additions & 5 deletions hypothesis-python/tests/numpy/test_randomness.py
Expand Up @@ -15,15 +15,19 @@


def test_numpy_prng_is_seeded():
first = []
prng_state = np.random.get_state()

@given(none())
def inner(_):
val = np.random.bytes(10)
if not first:
first.append(val)
assert val == first[0], "Numpy random module should be reproducible"
# Hypothesis sets seed to 0 by default
implicitly_seeded_val = np.random.bytes(10)

np.random.seed(0)
explicitly_seeded_val = np.random.bytes(10)

assert (
implicitly_seeded_val == explicitly_seeded_val
), "Numpy random module should be reproducible"

inner()

Expand Down