diff --git a/CONTRIBUTING.rst b/CONTRIBUTING.rst index 970d0eb64c..ec818623e2 100644 --- a/CONTRIBUTING.rst +++ b/CONTRIBUTING.rst @@ -271,6 +271,7 @@ their individual contributions. * `Tessa Bradbury `_ * `Thomas Grainge `_ * `Tim Martin `_ (tim@asymptotic.co.uk) +* `Thomas Kluyver `_ (thomas@kluyver.me.uk) * `Tom McDermott `_ (sponster@gmail.com) * `Tyler Gibbons `_ (tyler.gibbons@flexport.com) * `Tyler Nickerson `_ diff --git a/hypothesis-python/RELEASE.rst b/hypothesis-python/RELEASE.rst new file mode 100644 index 0000000000..d8ff269c9d --- /dev/null +++ b/hypothesis-python/RELEASE.rst @@ -0,0 +1,11 @@ +RELEASE_TYPE: minor + +This patch deprecates ``min_len`` or ``max_len`` of 0 in +:func:`~hypothesis.extra.numpy.byte_string_dtypes` and +:func:`~hypothesis.extra.numpy.unicode_string_dtypes`. +The lower limit is now 1. + +Numpy uses a length of 0 in these dtypes to indicate an undetermined size, +chosen from the data at array creation. +However, as the :func:`~hypothesis.extra.numpy.arrays` strategy creates arrays +before filling them, strings were truncated to 1 byte. diff --git a/hypothesis-python/src/hypothesis/extra/numpy.py b/hypothesis-python/src/hypothesis/extra/numpy.py index ed10eb0218..809133c4e9 100644 --- a/hypothesis-python/src/hypothesis/extra/numpy.py +++ b/hypothesis-python/src/hypothesis/extra/numpy.py @@ -587,20 +587,60 @@ def timedelta64_dtypes(max_period="Y", min_period="ns", endianness="?"): @defines_dtype_strategy -def byte_string_dtypes(endianness="?", min_len=0, max_len=16): +def byte_string_dtypes(endianness="?", min_len=1, max_len=16): # type: (str, int, int) -> st.SearchStrategy[np.dtype] """Return a strategy for generating bytestring dtypes, of various lengths - and byteorder.""" - order_check("len", 0, min_len, max_len) + and byteorder. + + While Hypothesis' string strategies can generate empty strings, string + dtypes with length 0 indicate that size is still to be determined, so + the minimum length for string dtypes is 1. + """ + if min_len == 0: + note_deprecation( + "generating byte string dtypes for unspecified length ('S0') " + "is deprecated. min_len will be 1 instead.", + since="RELEASEDAY", + ) + min_len = 1 + if max_len == 0: + note_deprecation( + "generating byte string dtypes for unspecified length ('S0') " + "is deprecated. max_len will be 1 instead.", + since="RELEASEDAY", + ) + max_len = 1 + + order_check("len", 1, min_len, max_len) return dtype_factory("S", list(range(min_len, max_len + 1)), None, endianness) @defines_dtype_strategy -def unicode_string_dtypes(endianness="?", min_len=0, max_len=16): +def unicode_string_dtypes(endianness="?", min_len=1, max_len=16): # type: (str, int, int) -> st.SearchStrategy[np.dtype] """Return a strategy for generating unicode string dtypes, of various - lengths and byteorder.""" - order_check("len", 0, min_len, max_len) + lengths and byteorder. + + While Hypothesis' string strategies can generate empty strings, string + dtypes with length 0 indicate that size is still to be determined, so + the minimum length for string dtypes is 1. + """ + if min_len == 0: + note_deprecation( + "generating unicode string dtypes for unspecified length ('U0') " + "is deprecated. min_len will be 1 instead.", + since="RELEASEDAY", + ) + min_len = 1 + if max_len == 0: + note_deprecation( + "generating unicode string dtypes for unspecified length ('U0') " + "is deprecated. max_len will be 1 instead.", + since="RELEASEDAY", + ) + max_len = 1 + + order_check("len", 1, min_len, max_len) return dtype_factory("U", list(range(min_len, max_len + 1)), None, endianness) diff --git a/hypothesis-python/tests/numpy/test_argument_validation.py b/hypothesis-python/tests/numpy/test_argument_validation.py index 2136122ae4..439aba5224 100644 --- a/hypothesis-python/tests/numpy/test_argument_validation.py +++ b/hypothesis-python/tests/numpy/test_argument_validation.py @@ -24,6 +24,7 @@ import hypothesis.strategies as st from hypothesis import given from hypothesis.errors import InvalidArgument +from tests.common.utils import checks_deprecated_behaviour def e(a, **kwargs): @@ -142,3 +143,17 @@ def test_bad_dtype_strategy(capsys, data): assert capsys.readouterr().out.startswith( "Got invalid dtype value=%r from strategy=just(%r), function=" % (val, val) ) + + +@checks_deprecated_behaviour +@given(st.data()) +def test_byte_string_dtype_len_0(data): + s = nps.byte_string_dtypes(min_len=0, max_len=0) + assert data.draw(s).itemsize == 1 + + +@checks_deprecated_behaviour +@given(st.data()) +def test_unicode_string_dtype_len_0(data): + s = nps.unicode_string_dtypes(min_len=0, max_len=0) + assert data.draw(s).itemsize == 4 diff --git a/hypothesis-python/tests/numpy/test_gen_data.py b/hypothesis-python/tests/numpy/test_gen_data.py index 712029efee..dc369fd6e5 100644 --- a/hypothesis-python/tests/numpy/test_gen_data.py +++ b/hypothesis-python/tests/numpy/test_gen_data.py @@ -194,11 +194,29 @@ def test_can_generate_scalar_dtypes(dtype): assert isinstance(dtype, np.dtype) -@given(nps.nested_dtypes()) +@given( + nps.nested_dtypes( + subtype_strategy=st.one_of( + nps.scalar_dtypes(), nps.byte_string_dtypes(), nps.unicode_string_dtypes() + ) + ) +) def test_can_generate_compound_dtypes(dtype): assert isinstance(dtype, np.dtype) +@given( + nps.nested_dtypes( + subtype_strategy=st.one_of( + nps.scalar_dtypes(), nps.byte_string_dtypes(), nps.unicode_string_dtypes() + ) + ).flatmap(lambda dt: nps.arrays(dtype=dt, shape=1)) +) +def test_can_generate_data_compound_dtypes(arr): + # This is meant to catch the class of errors which prompted PR #2085 + assert isinstance(arr, np.ndarray) + + @given(nps.nested_dtypes(max_itemsize=400), st.data()) def test_infer_strategy_from_dtype(dtype, data): # Given a dtype