From fd55325397753cbb0b567379f1ee89d071aa5470 Mon Sep 17 00:00:00 2001 From: Thomas Kluyver Date: Fri, 6 Sep 2019 17:10:36 +0200 Subject: [PATCH 1/8] Deprecate generating string dtypes with unspecified length --- .../src/hypothesis/extra/numpy.py | 32 +++++++++++++++++-- 1 file changed, 30 insertions(+), 2 deletions(-) diff --git a/hypothesis-python/src/hypothesis/extra/numpy.py b/hypothesis-python/src/hypothesis/extra/numpy.py index e2e4c93d00..09d0cae22f 100644 --- a/hypothesis-python/src/hypothesis/extra/numpy.py +++ b/hypothesis-python/src/hypothesis/extra/numpy.py @@ -587,20 +587,48 @@ def timedelta64_dtypes(max_period="Y", min_period="ns", endianness="?"): @defines_dtype_strategy -def byte_string_dtypes(endianness="?", min_len=0, max_len=16): +def byte_string_dtypes(endianness="?", min_len=1, max_len=16): # type: (str, int, int) -> st.SearchStrategy[np.dtype] """Return a strategy for generating bytestring dtypes, of various lengths and byteorder.""" order_check("len", 0, min_len, max_len) + if min_len == 0: + note_deprecation( + "generating byte string dtypes for unspecified length ('S0') " + "is deprecated. min_len will be 1 instead.", + since="RELEASEDAY" + ) + min_len = 1 + if max_len == 0: + note_deprecation( + "generating byte string dtypes for unspecified length ('S0') " + "is deprecated. max_len will be 1 instead.", + since="RELEASEDAY" + ) + max_len = 1 return dtype_factory("S", list(range(min_len, max_len + 1)), None, endianness) @defines_dtype_strategy -def unicode_string_dtypes(endianness="?", min_len=0, max_len=16): +def unicode_string_dtypes(endianness="?", min_len=1, max_len=16): # type: (str, int, int) -> st.SearchStrategy[np.dtype] """Return a strategy for generating unicode string dtypes, of various lengths and byteorder.""" order_check("len", 0, min_len, max_len) + if min_len == 0: + note_deprecation( + "generating unicode string dtypes for unspecified length ('U0') " + "is deprecated. min_len will be 1 instead.", + since="RELEASEDAY" + ) + min_len = 1 + if max_len == 0: + note_deprecation( + "generating unicode string dtypes for unspecified length ('U0') " + "is deprecated. max_len will be 1 instead.", + since="RELEASEDAY" + ) + max_len = 1 return dtype_factory("U", list(range(min_len, max_len + 1)), None, endianness) From a8094dd26b6d971522586b3b230e112f27a6dcad Mon Sep 17 00:00:00 2001 From: Thomas Kluyver Date: Fri, 6 Sep 2019 17:39:28 +0200 Subject: [PATCH 2/8] Add RELEASE.rst --- hypothesis-python/RELEASE.rst | 11 +++++++++++ 1 file changed, 11 insertions(+) create mode 100644 hypothesis-python/RELEASE.rst diff --git a/hypothesis-python/RELEASE.rst b/hypothesis-python/RELEASE.rst new file mode 100644 index 0000000000..d8ff269c9d --- /dev/null +++ b/hypothesis-python/RELEASE.rst @@ -0,0 +1,11 @@ +RELEASE_TYPE: minor + +This patch deprecates ``min_len`` or ``max_len`` of 0 in +:func:`~hypothesis.extra.numpy.byte_string_dtypes` and +:func:`~hypothesis.extra.numpy.unicode_string_dtypes`. +The lower limit is now 1. + +Numpy uses a length of 0 in these dtypes to indicate an undetermined size, +chosen from the data at array creation. +However, as the :func:`~hypothesis.extra.numpy.arrays` strategy creates arrays +before filling them, strings were truncated to 1 byte. From 9c1d794918ab7321146cc878099347e367b9b467 Mon Sep 17 00:00:00 2001 From: Thomas Kluyver Date: Fri, 6 Sep 2019 17:42:40 +0200 Subject: [PATCH 3/8] Move order_check after new deprecation checks, enforce new limit --- hypothesis-python/src/hypothesis/extra/numpy.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/hypothesis-python/src/hypothesis/extra/numpy.py b/hypothesis-python/src/hypothesis/extra/numpy.py index 09d0cae22f..1da78cb32f 100644 --- a/hypothesis-python/src/hypothesis/extra/numpy.py +++ b/hypothesis-python/src/hypothesis/extra/numpy.py @@ -591,7 +591,6 @@ def byte_string_dtypes(endianness="?", min_len=1, max_len=16): # type: (str, int, int) -> st.SearchStrategy[np.dtype] """Return a strategy for generating bytestring dtypes, of various lengths and byteorder.""" - order_check("len", 0, min_len, max_len) if min_len == 0: note_deprecation( "generating byte string dtypes for unspecified length ('S0') " @@ -606,6 +605,8 @@ def byte_string_dtypes(endianness="?", min_len=1, max_len=16): since="RELEASEDAY" ) max_len = 1 + + order_check("len", 1, min_len, max_len) return dtype_factory("S", list(range(min_len, max_len + 1)), None, endianness) @@ -614,7 +615,6 @@ def unicode_string_dtypes(endianness="?", min_len=1, max_len=16): # type: (str, int, int) -> st.SearchStrategy[np.dtype] """Return a strategy for generating unicode string dtypes, of various lengths and byteorder.""" - order_check("len", 0, min_len, max_len) if min_len == 0: note_deprecation( "generating unicode string dtypes for unspecified length ('U0') " @@ -629,6 +629,8 @@ def unicode_string_dtypes(endianness="?", min_len=1, max_len=16): since="RELEASEDAY" ) max_len = 1 + + order_check("len", 1, min_len, max_len) return dtype_factory("U", list(range(min_len, max_len + 1)), None, endianness) From 495d1b81c4888f38d8e312d5abe54af32bb06a53 Mon Sep 17 00:00:00 2001 From: Thomas Kluyver Date: Fri, 6 Sep 2019 17:43:35 +0200 Subject: [PATCH 4/8] Add myself to contributor list --- CONTRIBUTING.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/CONTRIBUTING.rst b/CONTRIBUTING.rst index 09af95128d..11f2bbdcd7 100644 --- a/CONTRIBUTING.rst +++ b/CONTRIBUTING.rst @@ -270,6 +270,7 @@ their individual contributions. * `Tariq Khokhar `_ (tariq@khokhar.net) * `Tessa Bradbury `_ * `Tim Martin `_ (tim@asymptotic.co.uk) +* `Thomas Kluyver `_ (thomas@kluyver.me.uk) * `Tom McDermott `_ (sponster@gmail.com) * `Tyler Gibbons `_ (tyler.gibbons@flexport.com) * `Tyler Nickerson `_ From 30f4a8a9da1085a7861e3bc55f6ae03e2f0adfc7 Mon Sep 17 00:00:00 2001 From: Thomas Kluyver Date: Fri, 6 Sep 2019 17:46:46 +0200 Subject: [PATCH 5/8] Explain minimum length in docstrings --- hypothesis-python/src/hypothesis/extra/numpy.py | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/hypothesis-python/src/hypothesis/extra/numpy.py b/hypothesis-python/src/hypothesis/extra/numpy.py index 1da78cb32f..bbda12bbd5 100644 --- a/hypothesis-python/src/hypothesis/extra/numpy.py +++ b/hypothesis-python/src/hypothesis/extra/numpy.py @@ -590,7 +590,12 @@ def timedelta64_dtypes(max_period="Y", min_period="ns", endianness="?"): def byte_string_dtypes(endianness="?", min_len=1, max_len=16): # type: (str, int, int) -> st.SearchStrategy[np.dtype] """Return a strategy for generating bytestring dtypes, of various lengths - and byteorder.""" + and byteorder. + + While Hypothesis' string strategies can generate empty strings, string + dtypes with length 0 indicate that size is still to be determined, so + the minimum length for string dtypes is 1. + """ if min_len == 0: note_deprecation( "generating byte string dtypes for unspecified length ('S0') " @@ -614,7 +619,12 @@ def byte_string_dtypes(endianness="?", min_len=1, max_len=16): def unicode_string_dtypes(endianness="?", min_len=1, max_len=16): # type: (str, int, int) -> st.SearchStrategy[np.dtype] """Return a strategy for generating unicode string dtypes, of various - lengths and byteorder.""" + lengths and byteorder. + + While Hypothesis' string strategies can generate empty strings, string + dtypes with length 0 indicate that size is still to be determined, so + the minimum length for string dtypes is 1. + """ if min_len == 0: note_deprecation( "generating unicode string dtypes for unspecified length ('U0') " From 94e7fa5b556e36d3ce7c6668103154576c6c2056 Mon Sep 17 00:00:00 2001 From: Thomas Kluyver Date: Fri, 6 Sep 2019 18:02:38 +0200 Subject: [PATCH 6/8] Add tests for deprecated min_len=0 and max_len=0 for string dtypes --- .../tests/numpy/test_argument_validation.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/hypothesis-python/tests/numpy/test_argument_validation.py b/hypothesis-python/tests/numpy/test_argument_validation.py index 2136122ae4..439aba5224 100644 --- a/hypothesis-python/tests/numpy/test_argument_validation.py +++ b/hypothesis-python/tests/numpy/test_argument_validation.py @@ -24,6 +24,7 @@ import hypothesis.strategies as st from hypothesis import given from hypothesis.errors import InvalidArgument +from tests.common.utils import checks_deprecated_behaviour def e(a, **kwargs): @@ -142,3 +143,17 @@ def test_bad_dtype_strategy(capsys, data): assert capsys.readouterr().out.startswith( "Got invalid dtype value=%r from strategy=just(%r), function=" % (val, val) ) + + +@checks_deprecated_behaviour +@given(st.data()) +def test_byte_string_dtype_len_0(data): + s = nps.byte_string_dtypes(min_len=0, max_len=0) + assert data.draw(s).itemsize == 1 + + +@checks_deprecated_behaviour +@given(st.data()) +def test_unicode_string_dtype_len_0(data): + s = nps.unicode_string_dtypes(min_len=0, max_len=0) + assert data.draw(s).itemsize == 4 From 801f9a1d2ff798eb41673a4b776733df0f0bda2b Mon Sep 17 00:00:00 2001 From: Thomas Kluyver Date: Fri, 6 Sep 2019 18:03:26 +0200 Subject: [PATCH 7/8] Fix formatting --- hypothesis-python/src/hypothesis/extra/numpy.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/hypothesis-python/src/hypothesis/extra/numpy.py b/hypothesis-python/src/hypothesis/extra/numpy.py index bbda12bbd5..de4ba9a3b8 100644 --- a/hypothesis-python/src/hypothesis/extra/numpy.py +++ b/hypothesis-python/src/hypothesis/extra/numpy.py @@ -600,14 +600,14 @@ def byte_string_dtypes(endianness="?", min_len=1, max_len=16): note_deprecation( "generating byte string dtypes for unspecified length ('S0') " "is deprecated. min_len will be 1 instead.", - since="RELEASEDAY" + since="RELEASEDAY", ) min_len = 1 if max_len == 0: note_deprecation( "generating byte string dtypes for unspecified length ('S0') " "is deprecated. max_len will be 1 instead.", - since="RELEASEDAY" + since="RELEASEDAY", ) max_len = 1 @@ -629,14 +629,14 @@ def unicode_string_dtypes(endianness="?", min_len=1, max_len=16): note_deprecation( "generating unicode string dtypes for unspecified length ('U0') " "is deprecated. min_len will be 1 instead.", - since="RELEASEDAY" + since="RELEASEDAY", ) min_len = 1 if max_len == 0: note_deprecation( "generating unicode string dtypes for unspecified length ('U0') " "is deprecated. max_len will be 1 instead.", - since="RELEASEDAY" + since="RELEASEDAY", ) max_len = 1 From d11f2e80708829c4042683290b8c9f72e9cbec45 Mon Sep 17 00:00:00 2001 From: Thomas Kluyver Date: Fri, 6 Sep 2019 19:03:48 +0200 Subject: [PATCH 8/8] Test generating compound dtypes & data with bytes & unicode strings in --- .../tests/numpy/test_gen_data.py | 20 ++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/hypothesis-python/tests/numpy/test_gen_data.py b/hypothesis-python/tests/numpy/test_gen_data.py index 712029efee..dc369fd6e5 100644 --- a/hypothesis-python/tests/numpy/test_gen_data.py +++ b/hypothesis-python/tests/numpy/test_gen_data.py @@ -194,11 +194,29 @@ def test_can_generate_scalar_dtypes(dtype): assert isinstance(dtype, np.dtype) -@given(nps.nested_dtypes()) +@given( + nps.nested_dtypes( + subtype_strategy=st.one_of( + nps.scalar_dtypes(), nps.byte_string_dtypes(), nps.unicode_string_dtypes() + ) + ) +) def test_can_generate_compound_dtypes(dtype): assert isinstance(dtype, np.dtype) +@given( + nps.nested_dtypes( + subtype_strategy=st.one_of( + nps.scalar_dtypes(), nps.byte_string_dtypes(), nps.unicode_string_dtypes() + ) + ).flatmap(lambda dt: nps.arrays(dtype=dt, shape=1)) +) +def test_can_generate_data_compound_dtypes(arr): + # This is meant to catch the class of errors which prompted PR #2085 + assert isinstance(arr, np.ndarray) + + @given(nps.nested_dtypes(max_itemsize=400), st.data()) def test_infer_strategy_from_dtype(dtype, data): # Given a dtype