Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Predicate rewriting for nonempty collections #3134

Merged
merged 6 commits into from Nov 19, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
5 changes: 5 additions & 0 deletions hypothesis-python/RELEASE.rst
@@ -0,0 +1,5 @@
RELEASE_TYPE: minor

This release adds special filtering logic to make a few special cases
like ``s.map(lambda x: x)`` and ``lists().filter(len)`` more efficient
(:issue:`2701`).
5 changes: 5 additions & 0 deletions hypothesis-python/src/hypothesis/internal/reflection.py
Expand Up @@ -581,3 +581,8 @@ def accept(proxy):
return impersonate(target)(wraps(target)(replace_sig(proxy)))

return accept


def is_identity_function(f):
# TODO: pattern-match the AST to handle `def ...` identity functions too
return bool(re.fullmatch(r"lambda (\w+): \1", get_pretty_function_description(f)))
Expand Up @@ -13,12 +13,14 @@
#
# END HEADER

import copy
from typing import Any, Tuple, overload

from hypothesis.errors import InvalidArgument
from hypothesis.internal.conjecture import utils as cu
from hypothesis.internal.conjecture.junkdrawer import LazySequenceCopy
from hypothesis.internal.conjecture.utils import combine_labels
from hypothesis.internal.reflection import is_identity_function
from hypothesis.strategies._internal.strategies import (
T3,
T4,
Expand Down Expand Up @@ -135,6 +137,8 @@ class ListStrategy(SearchStrategy):
"""A strategy for lists which takes a strategy for its elements and the
allowed lengths, and generates lists with the correct size and contents."""

_nonempty_filters: tuple = (bool, len, tuple, list)

def __init__(self, elements, min_size=0, max_size=float("inf")):
super().__init__()
self.min_size = min_size or 0
Expand Down Expand Up @@ -190,6 +194,16 @@ def __repr__(self):
self.__class__.__name__, self.element_strategy, self.min_size, self.max_size
)

def filter(self, condition):
if condition in self._nonempty_filters or is_identity_function(condition):
assert self.max_size >= 1, "Always-empty is special cased in st.lists()"
if self.min_size >= 1:
return self
new = copy.copy(self)
new.min_size = 1
return new
return super().filter(condition)


class UniqueListStrategy(ListStrategy):
def __init__(self, elements, min_size, max_size, keys, tuple_suffixes):
Expand Down
Expand Up @@ -112,6 +112,7 @@
from hypothesis.strategies._internal.strings import (
FixedSizeBytes,
OneCharStringStrategy,
TextStrategy,
)
from hypothesis.strategies._internal.utils import cacheable, defines_strategy
from hypothesis.utils.conventions import InferType, infer, not_set
Expand Down Expand Up @@ -643,7 +644,7 @@ def text(
)
if (max_size == 0 or char_strategy.is_empty) and not min_size:
return just("")
return lists(char_strategy, min_size=min_size, max_size=max_size).map("".join)
return TextStrategy(char_strategy, min_size=min_size, max_size=max_size)


@cacheable
Expand Down
Expand Up @@ -46,7 +46,10 @@
)
from hypothesis.internal.coverage import check_function
from hypothesis.internal.lazyformat import lazyformat
from hypothesis.internal.reflection import get_pretty_function_description
from hypothesis.internal.reflection import (
get_pretty_function_description,
is_identity_function,
)
from hypothesis.strategies._internal.utils import defines_strategy
from hypothesis.utils.conventions import UniqueIdentifier

Expand Down Expand Up @@ -338,6 +341,8 @@ def map(self, pack: Callable[[Ex], T]) -> "SearchStrategy[T]":

This method is part of the public API.
"""
if is_identity_function(pack):
return self # type: ignore # Mypy has no way to know that `Ex == T`
return MappedSearchStrategy(pack=pack, strategy=self)

def flatmap(
Expand Down
84 changes: 79 additions & 5 deletions hypothesis-python/src/hypothesis/strategies/_internal/strings.py
Expand Up @@ -13,10 +13,14 @@
#
# END HEADER

from hypothesis.errors import InvalidArgument
import copy
import warnings

from hypothesis.errors import HypothesisWarning, InvalidArgument
from hypothesis.internal import charmap
from hypothesis.internal.conjecture.utils import biased_coin, integer_range
from hypothesis.internal.intervalsets import IntervalSet
from hypothesis.strategies._internal.collections import ListStrategy
from hypothesis.strategies._internal.strategies import SearchStrategy


Expand All @@ -42,26 +46,32 @@ def __init__(
include_characters=whitelist_characters,
exclude_characters=blacklist_characters,
)
if not intervals:
arguments = [
self._arg_repr = ", ".join(
f"{k}={v!r}"
for k, v in [
("whitelist_categories", whitelist_categories),
("blacklist_categories", blacklist_categories),
("whitelist_characters", whitelist_characters),
("blacklist_characters", blacklist_characters),
("min_codepoint", min_codepoint),
("max_codepoint", max_codepoint),
]
if not (v in (None, "") or (k == "blacklist_categories" and v == ("Cs",)))
)
if not intervals:
raise InvalidArgument(
"No characters are allowed to be generated by this "
"combination of arguments: "
+ ", ".join("%s=%r" % arg for arg in arguments if arg[1] is not None)
f"combination of arguments: {self._arg_repr}"
)
self.intervals = IntervalSet(intervals)
self.zero_point = self.intervals.index_above(ord("0"))
self.Z_point = min(
self.intervals.index_above(ord("Z")), len(self.intervals) - 1
)

def __repr__(self):
return f"characters({self._arg_repr})"

def do_draw(self, data):
if len(self.intervals) > 256:
if biased_coin(data, 0.2):
Expand Down Expand Up @@ -99,6 +109,70 @@ def rewrite_integer(self, i):
return i


class TextStrategy(ListStrategy):
def do_draw(self, data):
return "".join(super().do_draw(data))

def __repr__(self):
args = []
if repr(self.element_strategy) != "characters()":
args.append(repr(self.element_strategy))
if self.min_size:
args.append(f"min_size={self.min_size}")
if self.max_size < float("inf"):
args.append(f"max_size={self.max_size}")
return f"text({', '.join(args)})"

# See https://docs.python.org/3/library/stdtypes.html#string-methods
# These methods always return Truthy values for any nonempty string.
_nonempty_filters = ListStrategy._nonempty_filters + (
str.capitalize,
str.casefold,
str.expandtabs,
str.join,
str.lower,
str.split,
str.splitlines,
str.swapcase,
str.title,
str.upper,
)
_nonempty_and_content_filters = (
str.isidentifier,
str.islower,
str.isupper,
str.isalnum,
str.isalpha,
# str.isascii, # new in Python 3.7
str.isdecimal,
str.isdigit,
str.isnumeric,
str.isspace,
str.istitle,
)

def filter(self, condition):
if condition in (str.lower, str.title, str.upper):
warnings.warn(
f"You applied str.{condition.__name__} as a filter, but this allows "
f"all nonempty strings! Did you mean str.is{condition.__name__}?",
HypothesisWarning,
)
# We use ListStrategy filter logic for the conditions that *only* imply
# the string is nonempty. Here, we increment the min_size but still apply
# the filter for conditions that imply nonempty *and specific contents*.
#
# TODO: we may eventually rewrite the elements_strategy for some of these,
# avoiding rejection sampling and making them much more efficient.
if condition in self._nonempty_and_content_filters:
assert self.max_size >= 1, "Always-empty is special cased in st.text()"
self = copy.copy(self)
self.min_size = max(1, self.min_size)
return ListStrategy.filter(self, condition)

return super().filter(condition)


class FixedSizeBytes(SearchStrategy):
def __init__(self, size):
self.size = size
Expand Down
2 changes: 1 addition & 1 deletion hypothesis-python/tests/conjecture/test_test_data.py
Expand Up @@ -338,7 +338,7 @@ def test_will_mark_too_deep_examples_as_invalid():

s = st.none()
for _ in range(MAX_DEPTH + 1):
s = s.map(lambda x: x)
s = s.map(lambda x: None)

with pytest.raises(StopTest):
d.draw(s)
Expand Down
47 changes: 45 additions & 2 deletions hypothesis-python/tests/cover/test_filter_rewriting.py
Expand Up @@ -21,9 +21,9 @@
import pytest

from hypothesis import given, strategies as st
from hypothesis.errors import Unsatisfiable
from hypothesis.errors import HypothesisWarning, Unsatisfiable
from hypothesis.internal.reflection import get_pretty_function_description
from hypothesis.strategies._internal.lazy import LazyStrategy
from hypothesis.strategies._internal.lazy import LazyStrategy, unwrap_strategies
from hypothesis.strategies._internal.numbers import IntegersStrategy
from hypothesis.strategies._internal.strategies import FilteredStrategy

Expand Down Expand Up @@ -179,6 +179,7 @@ def __call__(self, bar):


lambda_without_source = eval("lambda x: x > 2", {}, {})
assert get_pretty_function_description(lambda_without_source) == "lambda x: <unknown>"


@pytest.mark.parametrize(
Expand Down Expand Up @@ -208,3 +209,45 @@ def test_rewriting_partially_understood_filters(data, start, end, predicate):

value = data.draw(s)
assert predicate(value)


@pytest.mark.parametrize(
"strategy",
[
st.text(),
st.text(min_size=2),
st.lists(st.none()),
st.lists(st.none(), min_size=2),
],
)
@pytest.mark.parametrize(
"predicate",
[bool, len, tuple, list, lambda x: x],
ids=get_pretty_function_description,
)
def test_sequence_filter_rewriting(strategy, predicate):
s = unwrap_strategies(strategy)
fs = s.filter(predicate)
assert not isinstance(fs, FilteredStrategy)
if s.min_size > 0:
assert fs is s
else:
assert fs.min_size == 1


@pytest.mark.parametrize("method", [str.lower, str.title, str.upper])
def test_warns_on_suspicious_string_methods(method):
s = unwrap_strategies(st.text())
with pytest.warns(
HypothesisWarning, match="this allows all nonempty strings! Did you mean"
):
fs = s.filter(method)
assert fs.min_size == 1


@pytest.mark.parametrize("method", [str.isidentifier, str.isalnum])
def test_bumps_min_size_and_filters_for_content_str_methods(method):
s = unwrap_strategies(st.text())
fs = s.filter(method)
assert fs.filtered_strategy.min_size == 1
assert fs.flat_conditions == (method,)
6 changes: 6 additions & 0 deletions hypothesis-python/tests/cover/test_map.py
Expand Up @@ -14,6 +14,7 @@
# END HEADER

from hypothesis import assume, given, strategies as st
from hypothesis.strategies._internal.lazy import unwrap_strategies

from tests.common.debug import assert_no_examples

Expand All @@ -25,3 +26,8 @@ def test_can_assume_in_map(x):

def test_assume_in_just_raises_immediately():
assert_no_examples(st.just(1).map(lambda x: assume(x == 2)))


def test_identity_map_is_noop():
s = unwrap_strategies(st.integers())
assert s.map(lambda x: x) is s
10 changes: 5 additions & 5 deletions hypothesis-python/tests/ghostwriter/recorded/magic_builtins.txt
Expand Up @@ -43,7 +43,7 @@ def test_fuzz_chr(i):

@given(
source=st.nothing(),
filename=st.nothing(),
filename=st.text(),
mode=st.nothing(),
flags=st.just(0),
dont_inherit=st.booleans(),
Expand All @@ -69,7 +69,7 @@ def test_fuzz_complex(real, imag):
complex(real=real, imag=imag)


@given(obj=st.nothing(), name=st.nothing())
@given(obj=st.nothing(), name=st.text())
def test_fuzz_delattr(obj, name):
delattr(obj, name)

Expand Down Expand Up @@ -112,12 +112,12 @@ def test_fuzz_format(value, format_spec):
format(value, format_spec)


@given(object=st.builds(object), name=st.nothing(), default=st.nothing())
@given(object=st.builds(object), name=st.text(), default=st.nothing())
def test_fuzz_getattr(object, name, default):
getattr(object, name, default)


@given(obj=st.nothing(), name=st.nothing())
@given(obj=st.nothing(), name=st.text())
def test_fuzz_hasattr(obj, name):
hasattr(obj, name)

Expand Down Expand Up @@ -253,7 +253,7 @@ def test_fuzz_round(number, ndigits):
round(number=number, ndigits=ndigits)


@given(obj=st.nothing(), name=st.nothing(), value=st.nothing())
@given(obj=st.nothing(), name=st.text(), value=st.nothing())
def test_fuzz_setattr(obj, name, value):
setattr(obj, name, value)

Expand Down