HypothesisWorks · Zac-HD · Nov 19, 2021 · Nov 16, 2021 · Nov 16, 2021 · Nov 16, 2021
diff --git a/hypothesis-python/RELEASE.rst b/hypothesis-python/RELEASE.rst
@@ -0,0 +1,5 @@
+RELEASE_TYPE: minor
+
+This release adds special filtering logic to make a few special cases
+like ``s.map(lambda x: x)`` and ``lists().filter(len)`` more efficient
+(:issue:`2701`).
diff --git a/hypothesis-python/src/hypothesis/internal/reflection.py b/hypothesis-python/src/hypothesis/internal/reflection.py
@@ -581,3 +581,8 @@ def accept(proxy):
         return impersonate(target)(wraps(target)(replace_sig(proxy)))
 
     return accept
+
+
+def is_identity_function(f):
+    # TODO: pattern-match the AST to handle `def ...` identity functions too
+    return bool(re.fullmatch(r"lambda (\w+): \1", get_pretty_function_description(f)))
diff --git a/hypothesis-python/src/hypothesis/strategies/_internal/collections.py b/hypothesis-python/src/hypothesis/strategies/_internal/collections.py
@@ -13,12 +13,14 @@
 #
 # END HEADER
 
+import copy
 from typing import Any, Tuple, overload
 
 from hypothesis.errors import InvalidArgument
 from hypothesis.internal.conjecture import utils as cu
 from hypothesis.internal.conjecture.junkdrawer import LazySequenceCopy
 from hypothesis.internal.conjecture.utils import combine_labels
+from hypothesis.internal.reflection import is_identity_function
 from hypothesis.strategies._internal.strategies import (
     T3,
     T4,
@@ -135,6 +137,8 @@ class ListStrategy(SearchStrategy):
     """A strategy for lists which takes a strategy for its elements and the
     allowed lengths, and generates lists with the correct size and contents."""
 
+    _nonempty_filters: tuple = (bool, len, tuple, list)
+
     def __init__(self, elements, min_size=0, max_size=float("inf")):
         super().__init__()
         self.min_size = min_size or 0
@@ -190,6 +194,16 @@ def __repr__(self):
             self.__class__.__name__, self.element_strategy, self.min_size, self.max_size
         )
 
+    def filter(self, condition):
+        if condition in self._nonempty_filters or is_identity_function(condition):
+            assert self.max_size >= 1, "Always-empty is special cased in st.lists()"
+            if self.min_size >= 1:
+                return self
+            new = copy.copy(self)
+            new.min_size = 1
+            return new
+        return super().filter(condition)
+
 
 class UniqueListStrategy(ListStrategy):
     def __init__(self, elements, min_size, max_size, keys, tuple_suffixes):

diff --git a/hypothesis-python/src/hypothesis/strategies/_internal/core.py b/hypothesis-python/src/hypothesis/strategies/_internal/core.py
@@ -112,6 +112,7 @@
 from hypothesis.strategies._internal.strings import (
     FixedSizeBytes,
     OneCharStringStrategy,
+    TextStrategy,
 )
 from hypothesis.strategies._internal.utils import cacheable, defines_strategy
 from hypothesis.utils.conventions import InferType, infer, not_set
@@ -643,7 +644,7 @@ def text(
         )
     if (max_size == 0 or char_strategy.is_empty) and not min_size:
         return just("")
-    return lists(char_strategy, min_size=min_size, max_size=max_size).map("".join)
+    return TextStrategy(char_strategy, min_size=min_size, max_size=max_size)
 
 
 @cacheable

diff --git a/hypothesis-python/src/hypothesis/strategies/_internal/strategies.py b/hypothesis-python/src/hypothesis/strategies/_internal/strategies.py
@@ -46,7 +46,10 @@
 )
 from hypothesis.internal.coverage import check_function
 from hypothesis.internal.lazyformat import lazyformat
-from hypothesis.internal.reflection import get_pretty_function_description
+from hypothesis.internal.reflection import (
+    get_pretty_function_description,
+    is_identity_function,
+)
 from hypothesis.strategies._internal.utils import defines_strategy
 from hypothesis.utils.conventions import UniqueIdentifier
 
@@ -338,6 +341,8 @@ def map(self, pack: Callable[[Ex], T]) -> "SearchStrategy[T]":
 
         This method is part of the public API.
         """
+        if is_identity_function(pack):
+            return self  # type: ignore  # Mypy has no way to know that `Ex == T`
         return MappedSearchStrategy(pack=pack, strategy=self)
 
     def flatmap(

diff --git a/hypothesis-python/src/hypothesis/strategies/_internal/strings.py b/hypothesis-python/src/hypothesis/strategies/_internal/strings.py
@@ -13,10 +13,14 @@
 #
 # END HEADER
 
-from hypothesis.errors import InvalidArgument
+import copy
+import warnings
+
+from hypothesis.errors import HypothesisWarning, InvalidArgument
 from hypothesis.internal import charmap
 from hypothesis.internal.conjecture.utils import biased_coin, integer_range
 from hypothesis.internal.intervalsets import IntervalSet
+from hypothesis.strategies._internal.collections import ListStrategy
 from hypothesis.strategies._internal.strategies import SearchStrategy
 
 
@@ -42,26 +46,32 @@ def __init__(
             include_characters=whitelist_characters,
             exclude_characters=blacklist_characters,
         )
-        if not intervals:
-            arguments = [
+        self._arg_repr = ", ".join(
+            f"{k}={v!r}"
+            for k, v in [
                 ("whitelist_categories", whitelist_categories),
                 ("blacklist_categories", blacklist_categories),
                 ("whitelist_characters", whitelist_characters),
                 ("blacklist_characters", blacklist_characters),
                 ("min_codepoint", min_codepoint),
                 ("max_codepoint", max_codepoint),
             ]
+            if not (v in (None, "") or (k == "blacklist_categories" and v == ("Cs",)))
+        )
+        if not intervals:
             raise InvalidArgument(
                 "No characters are allowed to be generated by this "
-                "combination of arguments: "
-                + ", ".join("%s=%r" % arg for arg in arguments if arg[1] is not None)
+                f"combination of arguments: {self._arg_repr}"
             )
         self.intervals = IntervalSet(intervals)
         self.zero_point = self.intervals.index_above(ord("0"))
         self.Z_point = min(
             self.intervals.index_above(ord("Z")), len(self.intervals) - 1
         )
 
+    def __repr__(self):
+        return f"characters({self._arg_repr})"
+
     def do_draw(self, data):
         if len(self.intervals) > 256:
             if biased_coin(data, 0.2):
@@ -99,6 +109,70 @@ def rewrite_integer(self, i):
         return i
 
 
+class TextStrategy(ListStrategy):
+    def do_draw(self, data):
+        return "".join(super().do_draw(data))
+
+    def __repr__(self):
+        args = []
+        if repr(self.element_strategy) != "characters()":
+            args.append(repr(self.element_strategy))
+        if self.min_size:
+            args.append(f"min_size={self.min_size}")
+        if self.max_size < float("inf"):
+            args.append(f"max_size={self.max_size}")
+        return f"text({', '.join(args)})"
+
+    # See https://docs.python.org/3/library/stdtypes.html#string-methods
+    # These methods always return Truthy values for any nonempty string.
+    _nonempty_filters = ListStrategy._nonempty_filters + (
+        str.capitalize,
+        str.casefold,
+        str.expandtabs,
+        str.join,
+        str.lower,
+        str.split,
+        str.splitlines,
+        str.swapcase,
+        str.title,
+        str.upper,
+    )
+    _nonempty_and_content_filters = (
+        str.isidentifier,
+        str.islower,
+        str.isupper,
+        str.isalnum,
+        str.isalpha,
+        # str.isascii,  # new in Python 3.7
+        str.isdecimal,
+        str.isdigit,
+        str.isnumeric,
+        str.isspace,
+        str.istitle,
+    )
+
+    def filter(self, condition):
+        if condition in (str.lower, str.title, str.upper):
+            warnings.warn(
+                f"You applied str.{condition.__name__} as a filter, but this allows "
+                f"all nonempty strings!  Did you mean str.is{condition.__name__}?",
+                HypothesisWarning,
+            )
+        # We use ListStrategy filter logic for the conditions that *only* imply
+        # the string is nonempty.  Here, we increment the min_size but still apply
+        # the filter for conditions that imply nonempty *and specific contents*.
+        #
+        # TODO: we may eventually rewrite the elements_strategy for some of these,
+        #       avoiding rejection sampling and making them much more efficient.
+        if condition in self._nonempty_and_content_filters:
+            assert self.max_size >= 1, "Always-empty is special cased in st.text()"
+            self = copy.copy(self)
+            self.min_size = max(1, self.min_size)
+            return ListStrategy.filter(self, condition)
+
+        return super().filter(condition)
+
+
 class FixedSizeBytes(SearchStrategy):
     def __init__(self, size):
         self.size = size

diff --git a/hypothesis-python/tests/conjecture/test_test_data.py b/hypothesis-python/tests/conjecture/test_test_data.py
@@ -338,7 +338,7 @@ def test_will_mark_too_deep_examples_as_invalid():
 
     s = st.none()
     for _ in range(MAX_DEPTH + 1):
-        s = s.map(lambda x: x)
+        s = s.map(lambda x: None)
 
     with pytest.raises(StopTest):
         d.draw(s)

diff --git a/hypothesis-python/tests/cover/test_filter_rewriting.py b/hypothesis-python/tests/cover/test_filter_rewriting.py
@@ -21,9 +21,9 @@
 import pytest
 
 from hypothesis import given, strategies as st
-from hypothesis.errors import Unsatisfiable
+from hypothesis.errors import HypothesisWarning, Unsatisfiable
 from hypothesis.internal.reflection import get_pretty_function_description
-from hypothesis.strategies._internal.lazy import LazyStrategy
+from hypothesis.strategies._internal.lazy import LazyStrategy, unwrap_strategies
 from hypothesis.strategies._internal.numbers import IntegersStrategy
 from hypothesis.strategies._internal.strategies import FilteredStrategy
 
@@ -179,6 +179,7 @@ def __call__(self, bar):
 
 
 lambda_without_source = eval("lambda x: x > 2", {}, {})
+assert get_pretty_function_description(lambda_without_source) == "lambda x: <unknown>"
 
 
 @pytest.mark.parametrize(
@@ -208,3 +209,45 @@ def test_rewriting_partially_understood_filters(data, start, end, predicate):
 
     value = data.draw(s)
     assert predicate(value)
+
+
+@pytest.mark.parametrize(
+    "strategy",
+    [
+        st.text(),
+        st.text(min_size=2),
+        st.lists(st.none()),
+        st.lists(st.none(), min_size=2),
+    ],
+)
+@pytest.mark.parametrize(
+    "predicate",
+    [bool, len, tuple, list, lambda x: x],
+    ids=get_pretty_function_description,
+)
+def test_sequence_filter_rewriting(strategy, predicate):
+    s = unwrap_strategies(strategy)
+    fs = s.filter(predicate)
+    assert not isinstance(fs, FilteredStrategy)
+    if s.min_size > 0:
+        assert fs is s
+    else:
+        assert fs.min_size == 1
+
+
+@pytest.mark.parametrize("method", [str.lower, str.title, str.upper])
+def test_warns_on_suspicious_string_methods(method):
+    s = unwrap_strategies(st.text())
+    with pytest.warns(
+        HypothesisWarning, match="this allows all nonempty strings!  Did you mean"
+    ):
+        fs = s.filter(method)
+    assert fs.min_size == 1
+
+
+@pytest.mark.parametrize("method", [str.isidentifier, str.isalnum])
+def test_bumps_min_size_and_filters_for_content_str_methods(method):
+    s = unwrap_strategies(st.text())
+    fs = s.filter(method)
+    assert fs.filtered_strategy.min_size == 1
+    assert fs.flat_conditions == (method,)
diff --git a/hypothesis-python/tests/cover/test_map.py b/hypothesis-python/tests/cover/test_map.py
@@ -14,6 +14,7 @@
 # END HEADER
 
 from hypothesis import assume, given, strategies as st
+from hypothesis.strategies._internal.lazy import unwrap_strategies
 
 from tests.common.debug import assert_no_examples
 
@@ -25,3 +26,8 @@ def test_can_assume_in_map(x):
 
 def test_assume_in_just_raises_immediately():
     assert_no_examples(st.just(1).map(lambda x: assume(x == 2)))
+
+
+def test_identity_map_is_noop():
+    s = unwrap_strategies(st.integers())
+    assert s.map(lambda x: x) is s
diff --git a/hypothesis-python/tests/ghostwriter/recorded/magic_builtins.txt b/hypothesis-python/tests/ghostwriter/recorded/magic_builtins.txt
@@ -43,7 +43,7 @@ def test_fuzz_chr(i):
 
 @given(
     source=st.nothing(),
-    filename=st.nothing(),
+    filename=st.text(),
     mode=st.nothing(),
     flags=st.just(0),
     dont_inherit=st.booleans(),
@@ -69,7 +69,7 @@ def test_fuzz_complex(real, imag):
     complex(real=real, imag=imag)
 
 
-@given(obj=st.nothing(), name=st.nothing())
+@given(obj=st.nothing(), name=st.text())
 def test_fuzz_delattr(obj, name):
     delattr(obj, name)
 
@@ -112,12 +112,12 @@ def test_fuzz_format(value, format_spec):
     format(value, format_spec)
 
 
-@given(object=st.builds(object), name=st.nothing(), default=st.nothing())
+@given(object=st.builds(object), name=st.text(), default=st.nothing())
 def test_fuzz_getattr(object, name, default):
     getattr(object, name, default)
 
 
-@given(obj=st.nothing(), name=st.nothing())
+@given(obj=st.nothing(), name=st.text())
 def test_fuzz_hasattr(obj, name):
     hasattr(obj, name)
 
@@ -253,7 +253,7 @@ def test_fuzz_round(number, ndigits):
     round(number=number, ndigits=ndigits)
 
 
-@given(obj=st.nothing(), name=st.nothing(), value=st.nothing())
+@given(obj=st.nothing(), name=st.text(), value=st.nothing())
 def test_fuzz_setattr(obj, name, value):
     setattr(obj, name, value)