Skip to content

Commit

Permalink
Efficient dictionaries()
Browse files Browse the repository at this point in the history
  • Loading branch information
Zac-HD committed Jan 2, 2021
1 parent ea8fc3c commit a0ce679
Show file tree
Hide file tree
Showing 4 changed files with 69 additions and 7 deletions.
6 changes: 6 additions & 0 deletions hypothesis-python/RELEASE.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
RELEASE_TYPE: patch

This patch makes some strategies for collections with a uniqueness constraint
much more efficient, including ``dictionaries(keys=sampled_from(...), values=..)``
and ``lists(tuples(sampled_from(...), ...), unique_by=lambda x: x[0])``.
(related to :issue:`2036`)
Original file line number Diff line number Diff line change
Expand Up @@ -127,9 +127,10 @@ def __repr__(self):


class UniqueListStrategy(ListStrategy):
def __init__(self, elements, min_size, max_size, keys):
def __init__(self, elements, min_size, max_size, keys, tuple_suffixes):
super().__init__(elements, min_size, max_size)
self.keys = keys
self.tuple_suffixes = tuple_suffixes

def do_draw(self, data):
if self.element_strategy.is_empty:
Expand Down Expand Up @@ -161,6 +162,8 @@ def not_yet_in_unique_list(val):
else:
for key, seen in zip(self.keys, seen_sets):
seen.add(key(value))
if self.tuple_suffixes is not None:
value = (value,) + data.draw(self.tuple_suffixes)
result.append(value)
assert self.max_size >= len(result) >= self.min_size
return result
Expand Down Expand Up @@ -191,6 +194,8 @@ def do_draw(self, data):
):
for key, seen in zip(self.keys, seen_sets):
seen.add(key(value))
if self.tuple_suffixes is not None:
value = (value,) + data.draw(self.tuple_suffixes)
result.append(value)
else:
should_draw.reject()
Expand Down
47 changes: 41 additions & 6 deletions hypothesis-python/src/hypothesis/strategies/_internal/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
import math
import operator
import random
import re
import string
import sys
import threading
Expand All @@ -25,6 +26,7 @@
from fractions import Fraction
from functools import reduce
from inspect import Parameter, getfullargspec, isabstract, isclass, signature
from types import FunctionType
from typing import (
Any,
AnyStr,
Expand Down Expand Up @@ -696,6 +698,10 @@ def sampled_from(elements):
return SampledFromStrategy(values, repr_)


def identity(x):
return x


@cacheable
@defines_strategy()
@deprecated_posargs
Expand Down Expand Up @@ -743,9 +749,7 @@ def lists(
"(you probably only want to set unique_by)"
)
else:

def unique_by(x):
return x
unique_by = identity

if max_size == 0:
return builds(list)
Expand All @@ -763,6 +767,29 @@ def unique_by(x):
raise InvalidArgument("unique_by[%i]=%r is not a callable" % (i, f))
# Note that lazy strategies automatically unwrap when passed to a defines_strategy
# function.
tuple_suffixes = None
if (
# We're generating a list of tuples unique by the first element, perhaps
# via st.dictionaries(), and this will be more efficient if we rearrange
# our strategy somewhat to draw the first element then draw add the rest.
isinstance(elements, TupleStrategy)
and len(elements.element_strategies) >= 1
and len(unique_by) == 1
and (
# Introspection for either `itemgetter(0)`, or `lambda x: x[0]`
isinstance(unique_by[0], operator.itemgetter) # type: ignore
and repr(unique_by[0]) == "operator.itemgetter(0)"
or isinstance(unique_by[0], FunctionType)
and re.fullmatch(
get_pretty_function_description(unique_by[0]),
r"lambda ([a-z]+): \1\[0\]",
)
)
):
unique_by = (identity,)
tuple_suffixes = TupleStrategy(elements.element_strategies[1:])
elements = elements.element_strategies[0]

if isinstance(elements, SampledFromStrategy):
element_count = len(elements.elements)
if min_size > element_count:
Expand All @@ -778,11 +805,19 @@ def unique_by(x):
max_size = element_count

return UniqueSampledListStrategy(
elements=elements, max_size=max_size, min_size=min_size, keys=unique_by
elements=elements,
max_size=max_size,
min_size=min_size,
keys=unique_by,
tuple_suffixes=tuple_suffixes,
)

return UniqueListStrategy(
elements=elements, max_size=max_size, min_size=min_size, keys=unique_by
elements=elements,
max_size=max_size,
min_size=min_size,
keys=unique_by,
tuple_suffixes=tuple_suffixes,
)
return ListStrategy(elements, min_size=min_size, max_size=max_size)

Expand Down Expand Up @@ -940,7 +975,7 @@ def dictionaries(
tuples(keys, values),
min_size=min_size,
max_size=max_size,
unique_by=lambda x: x[0],
unique_by=operator.itemgetter(0),
).map(dict_class)


Expand Down
16 changes: 16 additions & 0 deletions hypothesis-python/tests/cover/test_sampled_from.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,22 @@ def test_efficient_sets_of_samples(x):
assert x == set(range(50))


@given(st.dictionaries(keys=st.sampled_from(range(50)), values=st.none(), min_size=50))
def test_efficient_dicts_with_sampled_keys(x):
assert set(x) == set(range(50))


@given(
st.lists(
st.tuples(st.sampled_from(range(20)), st.builds(list)),
min_size=20,
unique_by=lambda asdf: asdf[0],
)
)
def test_efficient_lists_of_tuples_first_element_sampled_from(x):
assert {first for first, *_ in x} == set(range(20))


@given(st.lists(st.sampled_from([0] * 100), unique=True))
def test_does_not_include_duplicates_even_when_duplicated_in_collection(ls):
assert len(ls) <= 1
Expand Down

0 comments on commit a0ce679

Please sign in to comment.