From 20e5c43039fc51c966f80d9bebdafc6fba4bea7f Mon Sep 17 00:00:00 2001 From: Ryan Soklaski Date: Mon, 24 Jun 2019 11:02:33 -0400 Subject: [PATCH] add advanced integer indexing strategy and tests --- hypothesis-python/RELEASE.rst | 6 ++ .../src/hypothesis/_strategies.py | 4 +- .../src/hypothesis/extra/numpy.py | 75 +++++++++++++- .../tests/numpy/test_argument_validation.py | 5 + .../tests/numpy/test_gen_data.py | 98 +++++++++++++++++++ .../projects/hypothesispython.py | 2 +- 6 files changed, 183 insertions(+), 7 deletions(-) create mode 100644 hypothesis-python/RELEASE.rst diff --git a/hypothesis-python/RELEASE.rst b/hypothesis-python/RELEASE.rst new file mode 100644 index 0000000000..2e22e64c75 --- /dev/null +++ b/hypothesis-python/RELEASE.rst @@ -0,0 +1,6 @@ +RELEASE_TYPE: patch + +This release adds the strategy :func:`~hypothesis.extra.numpy.integer_array_indices`, +which generates tuples of Numpy arrays that can be used for +`advanced indexing `_ +to select an array of a specified shape. diff --git a/hypothesis-python/src/hypothesis/_strategies.py b/hypothesis-python/src/hypothesis/_strategies.py index 97d0c7c461..eae3ba7588 100644 --- a/hypothesis-python/src/hypothesis/_strategies.py +++ b/hypothesis-python/src/hypothesis/_strategies.py @@ -1318,8 +1318,8 @@ def everything_except(excluded_types): For example, ``everything_except(int)`` returns a strategy that can generate anything that ``from_type()`` can ever generate, except for - instances of :class:python:int, and excluding instances of types - added via :func:~hypothesis.strategies.register_type_strategy. + instances of :class:`python:int`, and excluding instances of types + added via :func:`~hypothesis.strategies.register_type_strategy`. This is useful when writing tests which check that invalid input is rejected in a certain way. diff --git a/hypothesis-python/src/hypothesis/extra/numpy.py b/hypothesis-python/src/hypothesis/extra/numpy.py index 583d924f89..3c31321f59 100644 --- a/hypothesis-python/src/hypothesis/extra/numpy.py +++ b/hypothesis-python/src/hypothesis/extra/numpy.py @@ -37,6 +37,8 @@ from typing import Any, Union, Sequence, Tuple, Optional # noqa from hypothesis.searchstrategy.strategies import T # noqa + Shape = Tuple[int, ...] # noqa + TIME_RESOLUTIONS = tuple("Y M D h m s ms us ns ps fs as".split()) @@ -295,7 +297,7 @@ def fill_for(elements, unique, fill, name=""): @st.defines_strategy def arrays( dtype, # type: Any - shape, # type: Union[int, Sequence[int], st.SearchStrategy[Sequence[int]]] + shape, # type: Union[int, Shape, st.SearchStrategy[Shape]] elements=None, # type: st.SearchStrategy[Any] fill=None, # type: st.SearchStrategy[Any] unique=False, # type: bool @@ -401,7 +403,7 @@ def arrays( @st.defines_strategy def array_shapes(min_dims=1, max_dims=None, min_side=1, max_side=None): - # type: (int, int, int, int) -> st.SearchStrategy[Tuple[int, ...]] + # type: (int, int, int, int) -> st.SearchStrategy[Shape] """Return a strategy for array shapes (tuples of int >= 1).""" check_type(integer_types, min_dims, "min_dims") check_type(integer_types, min_side, "min_side") @@ -672,7 +674,7 @@ def nested_dtypes( @st.defines_strategy def valid_tuple_axes(ndim, min_size=0, max_size=None): - # type: (int, int, int) -> st.SearchStrategy[Tuple[int, ...]] + # type: (int, int, int) -> st.SearchStrategy[Shape] """Return a strategy for generating permissible tuple-values for the ``axis`` argument for a numpy sequential function (e.g. :func:`numpy:numpy.sum`), given an array of the specified @@ -763,7 +765,7 @@ def do_draw(self, data): @st.defines_strategy def broadcastable_shapes(shape, min_dims=0, max_dims=None, min_side=1, max_side=None): - # type: (Sequence[int], int, Optional[int], int, Optional[int]) -> st.SearchStrategy[Tuple[int, ...]] + # type: (Shape, int, int, int, int) -> st.SearchStrategy[Shape] """Return a strategy for generating shapes that are broadcast-compatible with the provided shape. @@ -846,3 +848,68 @@ def broadcastable_shapes(shape, min_dims=0, max_dims=None, min_side=1, max_side= min_side=min_side, max_side=max_side, ) + + +@st.defines_strategy +def integer_array_indices(shape, result_shape=array_shapes(), dtype="int"): + # type: (Shape, SearchStrategy[Shape], np.dtype) -> st.SearchStrategy[Tuple[np.ndarray, ...]] + """Return a search strategy for tuples of integer-arrays that, when used + to index into an array of shape ``shape``, given an array whose shape + was drawn from ``result_shape``. + + Examples from this strategy shrink towards the tuple of index-arrays:: + + len(shape) * (np.zeros(drawn_result_shape, dtype), ) + + * ``shape`` a tuple of integers that indicates the shape of the array, + whose indices are being generated. + * ``result_shape`` a strategy for generating tuples of integers, which + describe the shape of the resulting index arrays. The default is + :func:`~hypothesis.extra.numpy.array_shapes`. The shape drawn from + this strategy determines the shape of the array that will be produced + when the corresponding example from ``integer_array_indices`` is used + as an index. + * ``dtype`` the integer data type of the generated index-arrays. Negative + integer indices can be generated if a signed integer type is specified. + + Recall that an array can be indexed using a tuple of integer-arrays to + access its members in an arbitrary order, producing an array with an + arbitrary shape. For example: + + .. code-block:: pycon + + >>> from numpy import array + >>> x = array([-0, -1, -2, -3, -4]) + >>> ind = (array([[4, 0], [0, 1]]),) # a tuple containing a 2D integer-array + >>> x[ind] # the resulting array is commensurate with the indexing array(s) + array([[-4, 0], + [0, -1]]) + + Note that this strategy does not accommodate all variations of so-called + 'advanced indexing', as prescribed by NumPy's nomenclature. Combinations + of basic and advanced indexes are too complex to usefully define in a + standard strategy; we leave application-specific strategies to the user. + Advanced-boolean indexing can be defined as ``arrays(shape=..., dtype=bool)``, + and is similarly left to the user. + """ + check_type(tuple, shape, "shape") + check_argument( + shape and all(isinstance(x, integer_types) and x > 0 for x in shape), + "shape=%r must be a non-empty tuple of integers > 0" % (shape,), + ) + check_type(SearchStrategy, result_shape, "result_shape") + check_argument( + np.issubdtype(dtype, np.integer), "dtype=%r must be an integer dtype" % (dtype,) + ) + signed = np.issubdtype(dtype, np.signedinteger) + + def array_for(index_shape, size): + return arrays( + dtype=dtype, + shape=index_shape, + elements=st.integers(-size if signed else 0, size - 1), + ) + + return result_shape.flatmap( + lambda index_shape: st.tuples(*[array_for(index_shape, size) for size in shape]) + ) diff --git a/hypothesis-python/tests/numpy/test_argument_validation.py b/hypothesis-python/tests/numpy/test_argument_validation.py index f358f4a991..2136122ae4 100644 --- a/hypothesis-python/tests/numpy/test_argument_validation.py +++ b/hypothesis-python/tests/numpy/test_argument_validation.py @@ -116,6 +116,11 @@ def e(a, **kwargs): min_side=2, max_side=3, ), + e(nps.integer_array_indices, shape=()), + e(nps.integer_array_indices, shape=(2, 0)), + e(nps.integer_array_indices, shape="a"), + e(nps.integer_array_indices, shape=(2,), result_shape=(2, 2)), + e(nps.integer_array_indices, shape=(2,), dtype=float), ], ) def test_raise_invalid_argument(function, kwargs): diff --git a/hypothesis-python/tests/numpy/test_gen_data.py b/hypothesis-python/tests/numpy/test_gen_data.py index ad36a3be91..eed7186629 100644 --- a/hypothesis-python/tests/numpy/test_gen_data.py +++ b/hypothesis-python/tests/numpy/test_gen_data.py @@ -702,3 +702,101 @@ def test_broadcastable_shape_can_generate_arbitrary_ndims(shape, max_dims, data) lambda x: len(x) == desired_ndim, settings(max_examples=10 ** 6), ) + + +@settings(deadline=None) +@given( + shape=nps.array_shapes(min_dims=1, min_side=1), + dtype=st.one_of(nps.unsigned_integer_dtypes(), nps.integer_dtypes()), + data=st.data(), +) +def test_advanced_integer_index_is_valid_with_default_result_shape(shape, dtype, data): + index = data.draw(nps.integer_array_indices(shape, dtype=dtype)) + x = np.zeros(shape) + out = x[index] # raises if the index is invalid + assert not np.shares_memory(x, out) # advanced indexing should not return a view + assert all(dtype == x.dtype for x in index) + + +@settings(deadline=None) +@given( + shape=nps.array_shapes(min_dims=1, min_side=1), + min_dims=st.integers(0, 3), + min_side=st.integers(0, 3), + dtype=st.one_of(nps.unsigned_integer_dtypes(), nps.integer_dtypes()), + data=st.data(), +) +def test_advanced_integer_index_is_valid_and_satisfies_bounds( + shape, min_dims, min_side, dtype, data +): + max_side = data.draw(st.integers(min_side, min_side + 2), label="max_side") + max_dims = data.draw(st.integers(min_dims, min_dims + 2), label="max_dims") + index = data.draw( + nps.integer_array_indices( + shape, + result_shape=nps.array_shapes( + min_dims=min_dims, + max_dims=max_dims, + min_side=min_side, + max_side=max_side, + ), + dtype=dtype, + ) + ) + x = np.zeros(shape) + out = x[index] # raises if the index is invalid + assert all(min_side <= s <= max_side for s in out.shape) + assert min_dims <= out.ndim <= max_dims + assert not np.shares_memory(x, out) # advanced indexing should not return a view + assert all(dtype == x.dtype for x in index) + + +@settings(deadline=None) +@given( + shape=nps.array_shapes(min_dims=1, min_side=1), + min_dims=st.integers(0, 3), + min_side=st.integers(0, 3), + dtype=st.sampled_from(["uint8", "int8"]), + data=st.data(), +) +def test_advanced_integer_index_minimizes_as_documented( + shape, min_dims, min_side, dtype, data +): + max_side = data.draw(st.integers(min_side, min_side + 2), label="max_side") + max_dims = data.draw(st.integers(min_dims, min_dims + 2), label="max_dims") + result_shape = nps.array_shapes( + min_dims=min_dims, max_dims=max_dims, min_side=min_side, max_side=max_side + ) + smallest = minimal( + nps.integer_array_indices(shape, result_shape=result_shape, dtype=dtype) + ) + desired = len(shape) * (np.zeros(min_dims * [min_side]),) + assert len(smallest) == len(desired) + for s, d in zip(smallest, desired): + np.testing.assert_array_equal(s, d) + + +@settings(deadline=None, max_examples=10) +@given( + shape=nps.array_shapes(min_dims=1, max_dims=2, min_side=1, max_side=3), + data=st.data(), +) +def test_advanced_integer_index_can_generate_any_pattern(shape, data): + # ensures that generated index-arrays can be used to yield any pattern of elements from an array + x = np.arange(np.product(shape)).reshape(shape) + + target = data.draw( + nps.arrays( + shape=nps.array_shapes(min_dims=1, max_dims=2, min_side=1, max_side=2), + elements=st.sampled_from(x.flatten()), + dtype=x.dtype, + ), + label="target", + ) + find_any( + nps.integer_array_indices( + shape, result_shape=st.just(target.shape), dtype=np.dtype("int8") + ), + lambda index: np.all(target == x[index]), + settings(max_examples=10 ** 6), + ) diff --git a/tooling/src/hypothesistooling/projects/hypothesispython.py b/tooling/src/hypothesistooling/projects/hypothesispython.py index f2c508c07d..1061c8add8 100644 --- a/tooling/src/hypothesistooling/projects/hypothesispython.py +++ b/tooling/src/hypothesistooling/projects/hypothesispython.py @@ -193,7 +193,7 @@ def upload_distribution(): entries = [i for i, l in enumerate(lines) if CHANGELOG_HEADER.match(l)] changelog_body = "".join(lines[entries[0] + 2 : entries[1]]).strip() + ( "\n\n*[The canonical version of these notes (with links) is on readthedocs.]" - "(https://hypothesis.readthedocs.io/en/latest/changes.html#v%s).*" + "(https://hypothesis.readthedocs.io/en/latest/changes.html#v%s)*" % (current_version().replace(".", "-"),) )