HypothesisWorks · Zac-HD · Jan 2, 2021 · Jan 2, 2021
diff --git a/hypothesis-python/RELEASE.rst b/hypothesis-python/RELEASE.rst
@@ -0,0 +1,6 @@
+RELEASE_TYPE: patch
+
+This patch makes some strategies for collections with a uniqueness constraint
+much more efficient, including ``dictionaries(keys=sampled_from(...), values=..)``
+and ``lists(tuples(sampled_from(...), ...), unique_by=lambda x: x[0])``.
+(related to :issue:`2036`)
diff --git a/hypothesis-python/src/hypothesis/strategies/_internal/collections.py b/hypothesis-python/src/hypothesis/strategies/_internal/collections.py
@@ -127,9 +127,10 @@ def __repr__(self):
 
 
 class UniqueListStrategy(ListStrategy):
-    def __init__(self, elements, min_size, max_size, keys):
+    def __init__(self, elements, min_size, max_size, keys, tuple_suffixes):
         super().__init__(elements, min_size, max_size)
         self.keys = keys
+        self.tuple_suffixes = tuple_suffixes
 
     def do_draw(self, data):
         if self.element_strategy.is_empty:
@@ -161,6 +162,8 @@ def not_yet_in_unique_list(val):
             else:
                 for key, seen in zip(self.keys, seen_sets):
                     seen.add(key(value))
+                if self.tuple_suffixes is not None:
+                    value = (value,) + data.draw(self.tuple_suffixes)
                 result.append(value)
         assert self.max_size >= len(result) >= self.min_size
         return result
@@ -191,6 +194,8 @@ def do_draw(self, data):
             ):
                 for key, seen in zip(self.keys, seen_sets):
                     seen.add(key(value))
+                if self.tuple_suffixes is not None:
+                    value = (value,) + data.draw(self.tuple_suffixes)
                 result.append(value)
             else:
                 should_draw.reject()

diff --git a/hypothesis-python/src/hypothesis/strategies/_internal/core.py b/hypothesis-python/src/hypothesis/strategies/_internal/core.py
@@ -17,6 +17,7 @@
 import math
 import operator
 import random
+import re
 import string
 import sys
 import threading
@@ -25,6 +26,7 @@
 from fractions import Fraction
 from functools import reduce
 from inspect import Parameter, getfullargspec, isabstract, isclass, signature
+from types import FunctionType
 from typing import (
     Any,
     AnyStr,
@@ -696,6 +698,10 @@ def sampled_from(elements):
     return SampledFromStrategy(values, repr_)
 
 
+def identity(x):
+    return x
+
+
 @cacheable
 @defines_strategy()
 @deprecated_posargs
@@ -743,9 +749,7 @@ def lists(
                 "(you probably only want to set unique_by)"
             )
         else:
-
-            def unique_by(x):
-                return x
+            unique_by = identity
 
     if max_size == 0:
         return builds(list)
@@ -763,6 +767,29 @@ def unique_by(x):
                 raise InvalidArgument("unique_by[%i]=%r is not a callable" % (i, f))
         # Note that lazy strategies automatically unwrap when passed to a defines_strategy
         # function.
+        tuple_suffixes = None
+        if (
+            # We're generating a list of tuples unique by the first element, perhaps
+            # via st.dictionaries(), and this will be more efficient if we rearrange
+            # our strategy somewhat to draw the first element then draw add the rest.
+            isinstance(elements, TupleStrategy)
+            and len(elements.element_strategies) >= 1
+            and len(unique_by) == 1
+            and (
+                # Introspection for either `itemgetter(0)`, or `lambda x: x[0]`
+                isinstance(unique_by[0], operator.itemgetter)  # type: ignore
+                and repr(unique_by[0]) == "operator.itemgetter(0)"
+                or isinstance(unique_by[0], FunctionType)
+                and re.fullmatch(
+                    get_pretty_function_description(unique_by[0]),
+                    r"lambda ([a-z]+): \1\[0\]",
+                )
+            )
+        ):
+            unique_by = (identity,)
+            tuple_suffixes = TupleStrategy(elements.element_strategies[1:])
+            elements = elements.element_strategies[0]
+
         if isinstance(elements, SampledFromStrategy):
             element_count = len(elements.elements)
             if min_size > element_count:
@@ -778,11 +805,19 @@ def unique_by(x):
                 max_size = element_count
 
             return UniqueSampledListStrategy(
-                elements=elements, max_size=max_size, min_size=min_size, keys=unique_by
+                elements=elements,
+                max_size=max_size,
+                min_size=min_size,
+                keys=unique_by,
+                tuple_suffixes=tuple_suffixes,
             )
 
         return UniqueListStrategy(
-            elements=elements, max_size=max_size, min_size=min_size, keys=unique_by
+            elements=elements,
+            max_size=max_size,
+            min_size=min_size,
+            keys=unique_by,
+            tuple_suffixes=tuple_suffixes,
         )
     return ListStrategy(elements, min_size=min_size, max_size=max_size)
 
@@ -940,7 +975,7 @@ def dictionaries(
         tuples(keys, values),
         min_size=min_size,
         max_size=max_size,
-        unique_by=lambda x: x[0],
+        unique_by=operator.itemgetter(0),
     ).map(dict_class)
 
 

diff --git a/hypothesis-python/tests/cover/test_sampled_from.py b/hypothesis-python/tests/cover/test_sampled_from.py
@@ -86,6 +86,22 @@ def test_efficient_sets_of_samples(x):
     assert x == set(range(50))
 
 
+@given(st.dictionaries(keys=st.sampled_from(range(50)), values=st.none(), min_size=50))
+def test_efficient_dicts_with_sampled_keys(x):
+    assert set(x) == set(range(50))
+
+
+@given(
+    st.lists(
+        st.tuples(st.sampled_from(range(20)), st.builds(list)),
+        min_size=20,
+        unique_by=lambda asdf: asdf[0],
+    )
+)
+def test_efficient_lists_of_tuples_first_element_sampled_from(x):
+    assert {first for first, *_ in x} == set(range(20))
+
+
 @given(st.lists(st.sampled_from([0] * 100), unique=True))
 def test_does_not_include_duplicates_even_when_duplicated_in_collection(ls):
     assert len(ls) <= 1