Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We鈥檒l occasionally send you account related emails.

Already on GitHub? Sign in to your account

Make dictionaries(keys=sampled_from(...), ...) more efficient #2687

Merged
merged 1 commit into from
Jan 2, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
6 changes: 6 additions & 0 deletions hypothesis-python/RELEASE.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
RELEASE_TYPE: patch

This patch makes some strategies for collections with a uniqueness constraint
much more efficient, including ``dictionaries(keys=sampled_from(...), values=..)``
and ``lists(tuples(sampled_from(...), ...), unique_by=lambda x: x[0])``.
(related to :issue:`2036`)
Original file line number Diff line number Diff line change
Expand Up @@ -127,9 +127,10 @@ def __repr__(self):


class UniqueListStrategy(ListStrategy):
def __init__(self, elements, min_size, max_size, keys):
def __init__(self, elements, min_size, max_size, keys, tuple_suffixes):
super().__init__(elements, min_size, max_size)
self.keys = keys
self.tuple_suffixes = tuple_suffixes

def do_draw(self, data):
if self.element_strategy.is_empty:
Expand Down Expand Up @@ -161,6 +162,8 @@ def not_yet_in_unique_list(val):
else:
for key, seen in zip(self.keys, seen_sets):
seen.add(key(value))
if self.tuple_suffixes is not None:
value = (value,) + data.draw(self.tuple_suffixes)
result.append(value)
assert self.max_size >= len(result) >= self.min_size
return result
Expand Down Expand Up @@ -191,6 +194,8 @@ def do_draw(self, data):
):
for key, seen in zip(self.keys, seen_sets):
seen.add(key(value))
if self.tuple_suffixes is not None:
value = (value,) + data.draw(self.tuple_suffixes)
result.append(value)
else:
should_draw.reject()
Expand Down
47 changes: 41 additions & 6 deletions hypothesis-python/src/hypothesis/strategies/_internal/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
import math
import operator
import random
import re
import string
import sys
import threading
Expand All @@ -25,6 +26,7 @@
from fractions import Fraction
from functools import reduce
from inspect import Parameter, getfullargspec, isabstract, isclass, signature
from types import FunctionType
from typing import (
Any,
AnyStr,
Expand Down Expand Up @@ -696,6 +698,10 @@ def sampled_from(elements):
return SampledFromStrategy(values, repr_)


def identity(x):
return x


@cacheable
@defines_strategy()
@deprecated_posargs
Expand Down Expand Up @@ -743,9 +749,7 @@ def lists(
"(you probably only want to set unique_by)"
)
else:

def unique_by(x):
return x
unique_by = identity

if max_size == 0:
return builds(list)
Expand All @@ -763,6 +767,29 @@ def unique_by(x):
raise InvalidArgument("unique_by[%i]=%r is not a callable" % (i, f))
# Note that lazy strategies automatically unwrap when passed to a defines_strategy
# function.
tuple_suffixes = None
if (
# We're generating a list of tuples unique by the first element, perhaps
# via st.dictionaries(), and this will be more efficient if we rearrange
# our strategy somewhat to draw the first element then draw add the rest.
isinstance(elements, TupleStrategy)
and len(elements.element_strategies) >= 1
and len(unique_by) == 1
and (
# Introspection for either `itemgetter(0)`, or `lambda x: x[0]`
isinstance(unique_by[0], operator.itemgetter) # type: ignore
and repr(unique_by[0]) == "operator.itemgetter(0)"
or isinstance(unique_by[0], FunctionType)
and re.fullmatch(
get_pretty_function_description(unique_by[0]),
r"lambda ([a-z]+): \1\[0\]",
)
)
):
unique_by = (identity,)
tuple_suffixes = TupleStrategy(elements.element_strategies[1:])
elements = elements.element_strategies[0]

if isinstance(elements, SampledFromStrategy):
element_count = len(elements.elements)
if min_size > element_count:
Expand All @@ -778,11 +805,19 @@ def unique_by(x):
max_size = element_count

return UniqueSampledListStrategy(
elements=elements, max_size=max_size, min_size=min_size, keys=unique_by
elements=elements,
max_size=max_size,
min_size=min_size,
keys=unique_by,
tuple_suffixes=tuple_suffixes,
)

return UniqueListStrategy(
elements=elements, max_size=max_size, min_size=min_size, keys=unique_by
elements=elements,
max_size=max_size,
min_size=min_size,
keys=unique_by,
tuple_suffixes=tuple_suffixes,
)
return ListStrategy(elements, min_size=min_size, max_size=max_size)

Expand Down Expand Up @@ -940,7 +975,7 @@ def dictionaries(
tuples(keys, values),
min_size=min_size,
max_size=max_size,
unique_by=lambda x: x[0],
unique_by=operator.itemgetter(0),
).map(dict_class)


Expand Down
16 changes: 16 additions & 0 deletions hypothesis-python/tests/cover/test_sampled_from.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,22 @@ def test_efficient_sets_of_samples(x):
assert x == set(range(50))


@given(st.dictionaries(keys=st.sampled_from(range(50)), values=st.none(), min_size=50))
def test_efficient_dicts_with_sampled_keys(x):
assert set(x) == set(range(50))


@given(
st.lists(
st.tuples(st.sampled_from(range(20)), st.builds(list)),
min_size=20,
unique_by=lambda asdf: asdf[0],
)
)
def test_efficient_lists_of_tuples_first_element_sampled_from(x):
assert {first for first, *_ in x} == set(range(20))


@given(st.lists(st.sampled_from([0] * 100), unique=True))
def test_does_not_include_duplicates_even_when_duplicated_in_collection(ls):
assert len(ls) <= 1
Expand Down