diff --git a/hypothesis-python/RELEASE.rst b/hypothesis-python/RELEASE.rst new file mode 100644 index 0000000000..6bac08488f --- /dev/null +++ b/hypothesis-python/RELEASE.rst @@ -0,0 +1,5 @@ +RELEASE_TYPE: minor + +This release adds special filtering logic to make a few special cases +like ``s.map(lambda x: x)`` and ``lists().filter(len)`` more efficient +(:issue:`2701`). diff --git a/hypothesis-python/src/hypothesis/internal/reflection.py b/hypothesis-python/src/hypothesis/internal/reflection.py index 8d54fd7111..f087e5e599 100644 --- a/hypothesis-python/src/hypothesis/internal/reflection.py +++ b/hypothesis-python/src/hypothesis/internal/reflection.py @@ -581,3 +581,8 @@ def accept(proxy): return impersonate(target)(wraps(target)(replace_sig(proxy))) return accept + + +def is_identity_function(f): + # TODO: pattern-match the AST to handle `def ...` identity functions too + return bool(re.fullmatch(r"lambda (\w+): \1", get_pretty_function_description(f))) diff --git a/hypothesis-python/src/hypothesis/strategies/_internal/collections.py b/hypothesis-python/src/hypothesis/strategies/_internal/collections.py index efd506ce6b..56c55e01e7 100644 --- a/hypothesis-python/src/hypothesis/strategies/_internal/collections.py +++ b/hypothesis-python/src/hypothesis/strategies/_internal/collections.py @@ -13,12 +13,14 @@ # # END HEADER +import copy from typing import Any, Tuple, overload from hypothesis.errors import InvalidArgument from hypothesis.internal.conjecture import utils as cu from hypothesis.internal.conjecture.junkdrawer import LazySequenceCopy from hypothesis.internal.conjecture.utils import combine_labels +from hypothesis.internal.reflection import is_identity_function from hypothesis.strategies._internal.strategies import ( T3, T4, @@ -135,6 +137,8 @@ class ListStrategy(SearchStrategy): """A strategy for lists which takes a strategy for its elements and the allowed lengths, and generates lists with the correct size and contents.""" + _nonempty_filters: tuple = (bool, len, tuple, list) + def __init__(self, elements, min_size=0, max_size=float("inf")): super().__init__() self.min_size = min_size or 0 @@ -190,6 +194,16 @@ def __repr__(self): self.__class__.__name__, self.element_strategy, self.min_size, self.max_size ) + def filter(self, condition): + if condition in self._nonempty_filters or is_identity_function(condition): + assert self.max_size >= 1, "Always-empty is special cased in st.lists()" + if self.min_size >= 1: + return self + new = copy.copy(self) + new.min_size = 1 + return new + return super().filter(condition) + class UniqueListStrategy(ListStrategy): def __init__(self, elements, min_size, max_size, keys, tuple_suffixes): diff --git a/hypothesis-python/src/hypothesis/strategies/_internal/core.py b/hypothesis-python/src/hypothesis/strategies/_internal/core.py index 8946cb8318..4f64ff75fe 100644 --- a/hypothesis-python/src/hypothesis/strategies/_internal/core.py +++ b/hypothesis-python/src/hypothesis/strategies/_internal/core.py @@ -112,6 +112,7 @@ from hypothesis.strategies._internal.strings import ( FixedSizeBytes, OneCharStringStrategy, + TextStrategy, ) from hypothesis.strategies._internal.utils import cacheable, defines_strategy from hypothesis.utils.conventions import InferType, infer, not_set @@ -643,7 +644,7 @@ def text( ) if (max_size == 0 or char_strategy.is_empty) and not min_size: return just("") - return lists(char_strategy, min_size=min_size, max_size=max_size).map("".join) + return TextStrategy(char_strategy, min_size=min_size, max_size=max_size) @cacheable diff --git a/hypothesis-python/src/hypothesis/strategies/_internal/strategies.py b/hypothesis-python/src/hypothesis/strategies/_internal/strategies.py index 3f322bddad..89731fff1e 100644 --- a/hypothesis-python/src/hypothesis/strategies/_internal/strategies.py +++ b/hypothesis-python/src/hypothesis/strategies/_internal/strategies.py @@ -46,7 +46,10 @@ ) from hypothesis.internal.coverage import check_function from hypothesis.internal.lazyformat import lazyformat -from hypothesis.internal.reflection import get_pretty_function_description +from hypothesis.internal.reflection import ( + get_pretty_function_description, + is_identity_function, +) from hypothesis.strategies._internal.utils import defines_strategy from hypothesis.utils.conventions import UniqueIdentifier @@ -338,6 +341,8 @@ def map(self, pack: Callable[[Ex], T]) -> "SearchStrategy[T]": This method is part of the public API. """ + if is_identity_function(pack): + return self # type: ignore # Mypy has no way to know that `Ex == T` return MappedSearchStrategy(pack=pack, strategy=self) def flatmap( diff --git a/hypothesis-python/src/hypothesis/strategies/_internal/strings.py b/hypothesis-python/src/hypothesis/strategies/_internal/strings.py index 291d9da026..460b5b0bf7 100644 --- a/hypothesis-python/src/hypothesis/strategies/_internal/strings.py +++ b/hypothesis-python/src/hypothesis/strategies/_internal/strings.py @@ -13,10 +13,14 @@ # # END HEADER -from hypothesis.errors import InvalidArgument +import copy +import warnings + +from hypothesis.errors import HypothesisWarning, InvalidArgument from hypothesis.internal import charmap from hypothesis.internal.conjecture.utils import biased_coin, integer_range from hypothesis.internal.intervalsets import IntervalSet +from hypothesis.strategies._internal.collections import ListStrategy from hypothesis.strategies._internal.strategies import SearchStrategy @@ -42,8 +46,9 @@ def __init__( include_characters=whitelist_characters, exclude_characters=blacklist_characters, ) - if not intervals: - arguments = [ + self._arg_repr = ", ".join( + f"{k}={v!r}" + for k, v in [ ("whitelist_categories", whitelist_categories), ("blacklist_categories", blacklist_categories), ("whitelist_characters", whitelist_characters), @@ -51,10 +56,12 @@ def __init__( ("min_codepoint", min_codepoint), ("max_codepoint", max_codepoint), ] + if not (v in (None, "") or (k == "blacklist_categories" and v == ("Cs",))) + ) + if not intervals: raise InvalidArgument( "No characters are allowed to be generated by this " - "combination of arguments: " - + ", ".join("%s=%r" % arg for arg in arguments if arg[1] is not None) + f"combination of arguments: {self._arg_repr}" ) self.intervals = IntervalSet(intervals) self.zero_point = self.intervals.index_above(ord("0")) @@ -62,6 +69,9 @@ def __init__( self.intervals.index_above(ord("Z")), len(self.intervals) - 1 ) + def __repr__(self): + return f"characters({self._arg_repr})" + def do_draw(self, data): if len(self.intervals) > 256: if biased_coin(data, 0.2): @@ -99,6 +109,70 @@ def rewrite_integer(self, i): return i +class TextStrategy(ListStrategy): + def do_draw(self, data): + return "".join(super().do_draw(data)) + + def __repr__(self): + args = [] + if repr(self.element_strategy) != "characters()": + args.append(repr(self.element_strategy)) + if self.min_size: + args.append(f"min_size={self.min_size}") + if self.max_size < float("inf"): + args.append(f"max_size={self.max_size}") + return f"text({', '.join(args)})" + + # See https://docs.python.org/3/library/stdtypes.html#string-methods + # These methods always return Truthy values for any nonempty string. + _nonempty_filters = ListStrategy._nonempty_filters + ( + str.capitalize, + str.casefold, + str.expandtabs, + str.join, + str.lower, + str.split, + str.splitlines, + str.swapcase, + str.title, + str.upper, + ) + _nonempty_and_content_filters = ( + str.isidentifier, + str.islower, + str.isupper, + str.isalnum, + str.isalpha, + # str.isascii, # new in Python 3.7 + str.isdecimal, + str.isdigit, + str.isnumeric, + str.isspace, + str.istitle, + ) + + def filter(self, condition): + if condition in (str.lower, str.title, str.upper): + warnings.warn( + f"You applied str.{condition.__name__} as a filter, but this allows " + f"all nonempty strings! Did you mean str.is{condition.__name__}?", + HypothesisWarning, + ) + # We use ListStrategy filter logic for the conditions that *only* imply + # the string is nonempty. Here, we increment the min_size but still apply + # the filter for conditions that imply nonempty *and specific contents*. + # + # TODO: we may eventually rewrite the elements_strategy for some of these, + # avoiding rejection sampling and making them much more efficient. + if condition in self._nonempty_and_content_filters: + assert self.max_size >= 1, "Always-empty is special cased in st.text()" + self = copy.copy(self) + self.min_size = max(1, self.min_size) + return ListStrategy.filter(self, condition) + + return super().filter(condition) + + class FixedSizeBytes(SearchStrategy): def __init__(self, size): self.size = size diff --git a/hypothesis-python/tests/conjecture/test_test_data.py b/hypothesis-python/tests/conjecture/test_test_data.py index b8ab34254e..97279afe59 100644 --- a/hypothesis-python/tests/conjecture/test_test_data.py +++ b/hypothesis-python/tests/conjecture/test_test_data.py @@ -338,7 +338,7 @@ def test_will_mark_too_deep_examples_as_invalid(): s = st.none() for _ in range(MAX_DEPTH + 1): - s = s.map(lambda x: x) + s = s.map(lambda x: None) with pytest.raises(StopTest): d.draw(s) diff --git a/hypothesis-python/tests/cover/test_filter_rewriting.py b/hypothesis-python/tests/cover/test_filter_rewriting.py index 4ca67a55ed..f0f7f833f0 100644 --- a/hypothesis-python/tests/cover/test_filter_rewriting.py +++ b/hypothesis-python/tests/cover/test_filter_rewriting.py @@ -21,9 +21,9 @@ import pytest from hypothesis import given, strategies as st -from hypothesis.errors import Unsatisfiable +from hypothesis.errors import HypothesisWarning, Unsatisfiable from hypothesis.internal.reflection import get_pretty_function_description -from hypothesis.strategies._internal.lazy import LazyStrategy +from hypothesis.strategies._internal.lazy import LazyStrategy, unwrap_strategies from hypothesis.strategies._internal.numbers import IntegersStrategy from hypothesis.strategies._internal.strategies import FilteredStrategy @@ -179,6 +179,7 @@ def __call__(self, bar): lambda_without_source = eval("lambda x: x > 2", {}, {}) +assert get_pretty_function_description(lambda_without_source) == "lambda x: " @pytest.mark.parametrize( @@ -208,3 +209,45 @@ def test_rewriting_partially_understood_filters(data, start, end, predicate): value = data.draw(s) assert predicate(value) + + +@pytest.mark.parametrize( + "strategy", + [ + st.text(), + st.text(min_size=2), + st.lists(st.none()), + st.lists(st.none(), min_size=2), + ], +) +@pytest.mark.parametrize( + "predicate", + [bool, len, tuple, list, lambda x: x], + ids=get_pretty_function_description, +) +def test_sequence_filter_rewriting(strategy, predicate): + s = unwrap_strategies(strategy) + fs = s.filter(predicate) + assert not isinstance(fs, FilteredStrategy) + if s.min_size > 0: + assert fs is s + else: + assert fs.min_size == 1 + + +@pytest.mark.parametrize("method", [str.lower, str.title, str.upper]) +def test_warns_on_suspicious_string_methods(method): + s = unwrap_strategies(st.text()) + with pytest.warns( + HypothesisWarning, match="this allows all nonempty strings! Did you mean" + ): + fs = s.filter(method) + assert fs.min_size == 1 + + +@pytest.mark.parametrize("method", [str.isidentifier, str.isalnum]) +def test_bumps_min_size_and_filters_for_content_str_methods(method): + s = unwrap_strategies(st.text()) + fs = s.filter(method) + assert fs.filtered_strategy.min_size == 1 + assert fs.flat_conditions == (method,) diff --git a/hypothesis-python/tests/cover/test_map.py b/hypothesis-python/tests/cover/test_map.py index 56a22455ee..947104d1ea 100644 --- a/hypothesis-python/tests/cover/test_map.py +++ b/hypothesis-python/tests/cover/test_map.py @@ -14,6 +14,7 @@ # END HEADER from hypothesis import assume, given, strategies as st +from hypothesis.strategies._internal.lazy import unwrap_strategies from tests.common.debug import assert_no_examples @@ -25,3 +26,8 @@ def test_can_assume_in_map(x): def test_assume_in_just_raises_immediately(): assert_no_examples(st.just(1).map(lambda x: assume(x == 2))) + + +def test_identity_map_is_noop(): + s = unwrap_strategies(st.integers()) + assert s.map(lambda x: x) is s diff --git a/hypothesis-python/tests/ghostwriter/recorded/magic_builtins.txt b/hypothesis-python/tests/ghostwriter/recorded/magic_builtins.txt index 99e18aa18f..77022924c4 100644 --- a/hypothesis-python/tests/ghostwriter/recorded/magic_builtins.txt +++ b/hypothesis-python/tests/ghostwriter/recorded/magic_builtins.txt @@ -43,7 +43,7 @@ def test_fuzz_chr(i): @given( source=st.nothing(), - filename=st.nothing(), + filename=st.text(), mode=st.nothing(), flags=st.just(0), dont_inherit=st.booleans(), @@ -69,7 +69,7 @@ def test_fuzz_complex(real, imag): complex(real=real, imag=imag) -@given(obj=st.nothing(), name=st.nothing()) +@given(obj=st.nothing(), name=st.text()) def test_fuzz_delattr(obj, name): delattr(obj, name) @@ -112,12 +112,12 @@ def test_fuzz_format(value, format_spec): format(value, format_spec) -@given(object=st.builds(object), name=st.nothing(), default=st.nothing()) +@given(object=st.builds(object), name=st.text(), default=st.nothing()) def test_fuzz_getattr(object, name, default): getattr(object, name, default) -@given(obj=st.nothing(), name=st.nothing()) +@given(obj=st.nothing(), name=st.text()) def test_fuzz_hasattr(obj, name): hasattr(obj, name) @@ -253,7 +253,7 @@ def test_fuzz_round(number, ndigits): round(number=number, ndigits=ndigits) -@given(obj=st.nothing(), name=st.nothing(), value=st.nothing()) +@given(obj=st.nothing(), name=st.text(), value=st.nothing()) def test_fuzz_setattr(obj, name, value): setattr(obj, name, value)