Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Deprecate generating string dtypes with unspecified length #2085

Merged
1 change: 1 addition & 0 deletions CONTRIBUTING.rst
Expand Up @@ -270,6 +270,7 @@ their individual contributions.
* `Tariq Khokhar <https://www.github.com/tkb>`_ (tariq@khokhar.net)
* `Tessa Bradbury <https://www.github.com/tessereth>`_
* `Tim Martin <https://www.github.com/timmartin>`_ (tim@asymptotic.co.uk)
* `Thomas Kluyver <https://www.github.com/takluyver>`_ (thomas@kluyver.me.uk)
* `Tom McDermott <https://www.github.com/sponster-au>`_ (sponster@gmail.com)
* `Tyler Gibbons <https://www.github.com/kavec>`_ (tyler.gibbons@flexport.com)
* `Tyler Nickerson <https://www.github.com/nmbrgts>`_
Expand Down
11 changes: 11 additions & 0 deletions hypothesis-python/RELEASE.rst
@@ -0,0 +1,11 @@
RELEASE_TYPE: minor

This patch deprecates ``min_len`` or ``max_len`` of 0 in
:func:`~hypothesis.extra.numpy.byte_string_dtypes` and
:func:`~hypothesis.extra.numpy.unicode_string_dtypes`.
The lower limit is now 1.

Numpy uses a length of 0 in these dtypes to indicate an undetermined size,
chosen from the data at array creation.
However, as the :func:`~hypothesis.extra.numpy.arrays` strategy creates arrays
before filling them, strings were truncated to 1 byte.
52 changes: 46 additions & 6 deletions hypothesis-python/src/hypothesis/extra/numpy.py
Expand Up @@ -587,20 +587,60 @@ def timedelta64_dtypes(max_period="Y", min_period="ns", endianness="?"):


@defines_dtype_strategy
def byte_string_dtypes(endianness="?", min_len=0, max_len=16):
def byte_string_dtypes(endianness="?", min_len=1, max_len=16):
# type: (str, int, int) -> st.SearchStrategy[np.dtype]
"""Return a strategy for generating bytestring dtypes, of various lengths
and byteorder."""
order_check("len", 0, min_len, max_len)
and byteorder.

While Hypothesis' string strategies can generate empty strings, string
dtypes with length 0 indicate that size is still to be determined, so
the minimum length for string dtypes is 1.
"""
if min_len == 0:
note_deprecation(
"generating byte string dtypes for unspecified length ('S0') "
"is deprecated. min_len will be 1 instead.",
since="RELEASEDAY",
)
min_len = 1
if max_len == 0:
note_deprecation(
"generating byte string dtypes for unspecified length ('S0') "
"is deprecated. max_len will be 1 instead.",
since="RELEASEDAY",
)
max_len = 1

order_check("len", 1, min_len, max_len)
return dtype_factory("S", list(range(min_len, max_len + 1)), None, endianness)


@defines_dtype_strategy
def unicode_string_dtypes(endianness="?", min_len=0, max_len=16):
def unicode_string_dtypes(endianness="?", min_len=1, max_len=16):
# type: (str, int, int) -> st.SearchStrategy[np.dtype]
"""Return a strategy for generating unicode string dtypes, of various
lengths and byteorder."""
order_check("len", 0, min_len, max_len)
lengths and byteorder.

While Hypothesis' string strategies can generate empty strings, string
dtypes with length 0 indicate that size is still to be determined, so
the minimum length for string dtypes is 1.
"""
if min_len == 0:
note_deprecation(
"generating unicode string dtypes for unspecified length ('U0') "
"is deprecated. min_len will be 1 instead.",
since="RELEASEDAY",
)
min_len = 1
if max_len == 0:
note_deprecation(
"generating unicode string dtypes for unspecified length ('U0') "
"is deprecated. max_len will be 1 instead.",
since="RELEASEDAY",
)
max_len = 1

order_check("len", 1, min_len, max_len)
return dtype_factory("U", list(range(min_len, max_len + 1)), None, endianness)


Expand Down
15 changes: 15 additions & 0 deletions hypothesis-python/tests/numpy/test_argument_validation.py
Expand Up @@ -24,6 +24,7 @@
import hypothesis.strategies as st
from hypothesis import given
from hypothesis.errors import InvalidArgument
from tests.common.utils import checks_deprecated_behaviour


def e(a, **kwargs):
Expand Down Expand Up @@ -142,3 +143,17 @@ def test_bad_dtype_strategy(capsys, data):
assert capsys.readouterr().out.startswith(
"Got invalid dtype value=%r from strategy=just(%r), function=" % (val, val)
)


@checks_deprecated_behaviour
@given(st.data())
def test_byte_string_dtype_len_0(data):
s = nps.byte_string_dtypes(min_len=0, max_len=0)
assert data.draw(s).itemsize == 1


@checks_deprecated_behaviour
@given(st.data())
def test_unicode_string_dtype_len_0(data):
s = nps.unicode_string_dtypes(min_len=0, max_len=0)
assert data.draw(s).itemsize == 4
20 changes: 19 additions & 1 deletion hypothesis-python/tests/numpy/test_gen_data.py
Expand Up @@ -194,11 +194,29 @@ def test_can_generate_scalar_dtypes(dtype):
assert isinstance(dtype, np.dtype)


@given(nps.nested_dtypes())
@given(
nps.nested_dtypes(
subtype_strategy=st.one_of(
nps.scalar_dtypes(), nps.byte_string_dtypes(), nps.unicode_string_dtypes()
)
)
)
def test_can_generate_compound_dtypes(dtype):
assert isinstance(dtype, np.dtype)


@given(
nps.nested_dtypes(
subtype_strategy=st.one_of(
nps.scalar_dtypes(), nps.byte_string_dtypes(), nps.unicode_string_dtypes()
)
).flatmap(lambda dt: nps.arrays(dtype=dt, shape=1))
)
def test_can_generate_data_compound_dtypes(arr):
# This is meant to catch the class of errors which prompted PR #2085
assert isinstance(arr, np.ndarray)


@given(nps.nested_dtypes(max_itemsize=400), st.data())
def test_infer_strategy_from_dtype(dtype, data):
# Given a dtype
Expand Down