Skip to content

Commit

Permalink
Merge pull request #2085 from takluyver/deprecate-empty-string-dtypes
Browse files Browse the repository at this point in the history
Deprecate generating string dtypes with unspecified length
  • Loading branch information
Zac-HD committed Sep 9, 2019
2 parents 6c651fe + d11f2e8 commit a1bd4b2
Show file tree
Hide file tree
Showing 5 changed files with 92 additions and 7 deletions.
1 change: 1 addition & 0 deletions CONTRIBUTING.rst
Expand Up @@ -271,6 +271,7 @@ their individual contributions.
* `Tessa Bradbury <https://www.github.com/tessereth>`_
* `Thomas Grainge <https://www.github.com/tgrainge>`_
* `Tim Martin <https://www.github.com/timmartin>`_ (tim@asymptotic.co.uk)
* `Thomas Kluyver <https://www.github.com/takluyver>`_ (thomas@kluyver.me.uk)
* `Tom McDermott <https://www.github.com/sponster-au>`_ (sponster@gmail.com)
* `Tyler Gibbons <https://www.github.com/kavec>`_ (tyler.gibbons@flexport.com)
* `Tyler Nickerson <https://www.github.com/nmbrgts>`_
Expand Down
11 changes: 11 additions & 0 deletions hypothesis-python/RELEASE.rst
@@ -0,0 +1,11 @@
RELEASE_TYPE: minor

This patch deprecates ``min_len`` or ``max_len`` of 0 in
:func:`~hypothesis.extra.numpy.byte_string_dtypes` and
:func:`~hypothesis.extra.numpy.unicode_string_dtypes`.
The lower limit is now 1.

Numpy uses a length of 0 in these dtypes to indicate an undetermined size,
chosen from the data at array creation.
However, as the :func:`~hypothesis.extra.numpy.arrays` strategy creates arrays
before filling them, strings were truncated to 1 byte.
52 changes: 46 additions & 6 deletions hypothesis-python/src/hypothesis/extra/numpy.py
Expand Up @@ -587,20 +587,60 @@ def timedelta64_dtypes(max_period="Y", min_period="ns", endianness="?"):


@defines_dtype_strategy
def byte_string_dtypes(endianness="?", min_len=0, max_len=16):
def byte_string_dtypes(endianness="?", min_len=1, max_len=16):
# type: (str, int, int) -> st.SearchStrategy[np.dtype]
"""Return a strategy for generating bytestring dtypes, of various lengths
and byteorder."""
order_check("len", 0, min_len, max_len)
and byteorder.
While Hypothesis' string strategies can generate empty strings, string
dtypes with length 0 indicate that size is still to be determined, so
the minimum length for string dtypes is 1.
"""
if min_len == 0:
note_deprecation(
"generating byte string dtypes for unspecified length ('S0') "
"is deprecated. min_len will be 1 instead.",
since="RELEASEDAY",
)
min_len = 1
if max_len == 0:
note_deprecation(
"generating byte string dtypes for unspecified length ('S0') "
"is deprecated. max_len will be 1 instead.",
since="RELEASEDAY",
)
max_len = 1

order_check("len", 1, min_len, max_len)
return dtype_factory("S", list(range(min_len, max_len + 1)), None, endianness)


@defines_dtype_strategy
def unicode_string_dtypes(endianness="?", min_len=0, max_len=16):
def unicode_string_dtypes(endianness="?", min_len=1, max_len=16):
# type: (str, int, int) -> st.SearchStrategy[np.dtype]
"""Return a strategy for generating unicode string dtypes, of various
lengths and byteorder."""
order_check("len", 0, min_len, max_len)
lengths and byteorder.
While Hypothesis' string strategies can generate empty strings, string
dtypes with length 0 indicate that size is still to be determined, so
the minimum length for string dtypes is 1.
"""
if min_len == 0:
note_deprecation(
"generating unicode string dtypes for unspecified length ('U0') "
"is deprecated. min_len will be 1 instead.",
since="RELEASEDAY",
)
min_len = 1
if max_len == 0:
note_deprecation(
"generating unicode string dtypes for unspecified length ('U0') "
"is deprecated. max_len will be 1 instead.",
since="RELEASEDAY",
)
max_len = 1

order_check("len", 1, min_len, max_len)
return dtype_factory("U", list(range(min_len, max_len + 1)), None, endianness)


Expand Down
15 changes: 15 additions & 0 deletions hypothesis-python/tests/numpy/test_argument_validation.py
Expand Up @@ -24,6 +24,7 @@
import hypothesis.strategies as st
from hypothesis import given
from hypothesis.errors import InvalidArgument
from tests.common.utils import checks_deprecated_behaviour


def e(a, **kwargs):
Expand Down Expand Up @@ -142,3 +143,17 @@ def test_bad_dtype_strategy(capsys, data):
assert capsys.readouterr().out.startswith(
"Got invalid dtype value=%r from strategy=just(%r), function=" % (val, val)
)


@checks_deprecated_behaviour
@given(st.data())
def test_byte_string_dtype_len_0(data):
s = nps.byte_string_dtypes(min_len=0, max_len=0)
assert data.draw(s).itemsize == 1


@checks_deprecated_behaviour
@given(st.data())
def test_unicode_string_dtype_len_0(data):
s = nps.unicode_string_dtypes(min_len=0, max_len=0)
assert data.draw(s).itemsize == 4
20 changes: 19 additions & 1 deletion hypothesis-python/tests/numpy/test_gen_data.py
Expand Up @@ -194,11 +194,29 @@ def test_can_generate_scalar_dtypes(dtype):
assert isinstance(dtype, np.dtype)


@given(nps.nested_dtypes())
@given(
nps.nested_dtypes(
subtype_strategy=st.one_of(
nps.scalar_dtypes(), nps.byte_string_dtypes(), nps.unicode_string_dtypes()
)
)
)
def test_can_generate_compound_dtypes(dtype):
assert isinstance(dtype, np.dtype)


@given(
nps.nested_dtypes(
subtype_strategy=st.one_of(
nps.scalar_dtypes(), nps.byte_string_dtypes(), nps.unicode_string_dtypes()
)
).flatmap(lambda dt: nps.arrays(dtype=dt, shape=1))
)
def test_can_generate_data_compound_dtypes(arr):
# This is meant to catch the class of errors which prompted PR #2085
assert isinstance(arr, np.ndarray)


@given(nps.nested_dtypes(max_itemsize=400), st.data())
def test_infer_strategy_from_dtype(dtype, data):
# Given a dtype
Expand Down

0 comments on commit a1bd4b2

Please sign in to comment.