Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Add unique items validation to constrained lists #2618

Merged
merged 5 commits into from
Dec 10, 2021
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
1 change: 1 addition & 0 deletions changes/2618-nuno-andre.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Add `uniqueItems` option to `ConstrainedList`.
2 changes: 2 additions & 0 deletions docs/usage/schema.md
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,8 @@ It has the following arguments:
JSON Schema
* `max_items`: for list values, this adds a corresponding validation and an annotation of `maxItems` to the
JSON Schema
* `unique_items`: for list values, this adds a corresponding validation and an annotation of `uniqueItems` to the
JSON Schema
* `min_length`: for string values, this adds a corresponding validation and an annotation of `minLength` to the
JSON Schema
* `max_length`: for string values, this adds a corresponding validation and an annotation of `maxLength` to the
Expand Down
1 change: 1 addition & 0 deletions docs/usage/types.md
Original file line number Diff line number Diff line change
Expand Up @@ -767,6 +767,7 @@ The following arguments are available when using the `conlist` type function
- `item_type: Type[T]`: type of the list items
- `min_items: int = None`: minimum number of items in the list
- `max_items: int = None`: maximum number of items in the list
- `unique_items: bool = None`: enforces list elements to be unique

### Arguments to `conset`
The following arguments are available when using the `conset` type function
Expand Down
8 changes: 8 additions & 0 deletions pydantic/errors.py
Original file line number Diff line number Diff line change
Expand Up @@ -320,6 +320,14 @@ def __init__(self, *, limit_value: int) -> None:
super().__init__(limit_value=limit_value)


class ListUniqueItemsError(PydanticValueError):
code = 'list.unique_items'
msg_template = 'the list has {not_unique} not unique items'

def __init__(self, *, not_unique: int) -> None:
super().__init__(not_unique=not_unique)


class SetMinLengthError(PydanticValueError):
code = 'set.min_items'
msg_template = 'ensure this value has at least {limit_value} items'
Expand Down
11 changes: 11 additions & 0 deletions pydantic/fields.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,7 @@ class FieldInfo(Representation):
'multiple_of',
'min_items',
'max_items',
'unique_items',
'min_length',
'max_length',
'allow_mutation',
Expand All @@ -118,6 +119,7 @@ class FieldInfo(Representation):
'multiple_of': None,
'min_items': None,
'max_items': None,
'unique_items': None,
'allow_mutation': True,
}

Expand All @@ -136,6 +138,7 @@ def __init__(self, default: Any = Undefined, **kwargs: Any) -> None:
self.multiple_of = kwargs.pop('multiple_of', None)
self.min_items = kwargs.pop('min_items', None)
self.max_items = kwargs.pop('max_items', None)
self.unique_items = kwargs.pop('unique_items', None)
self.min_length = kwargs.pop('min_length', None)
self.max_length = kwargs.pop('max_length', None)
self.allow_mutation = kwargs.pop('allow_mutation', True)
Expand Down Expand Up @@ -188,6 +191,7 @@ def Field(
multiple_of: float = None,
min_items: int = None,
max_items: int = None,
unique_items: bool = None,
min_length: int = None,
max_length: int = None,
allow_mutation: bool = True,
Expand Down Expand Up @@ -216,6 +220,12 @@ def Field(
schema will have a ``maximum`` validation keyword
:param multiple_of: only applies to numbers, requires the field to be "a multiple of". The
schema will have a ``multipleOf`` validation keyword
:param min_items: only applies to lists, requires the field to have a minimum number of
elements. The schema will have a ``minItems`` validation keyword
:param max_items: only applies to lists, requires the field to have a maximum number of
elements. The schema will have a ``maxItems`` validation keyword
:param max_items: only applies to lists, requires the field not to have duplicated
elements. The schema will have a ``uniqueItems`` validation keyword
:param min_length: only applies to strings, requires the field to have a minimum length. The
schema will have a ``maximum`` validation keyword
:param max_length: only applies to strings, requires the field to have a maximum length. The
Expand All @@ -240,6 +250,7 @@ def Field(
multiple_of=multiple_of,
min_items=min_items,
max_items=max_items,
unique_items=unique_items,
min_length=min_length,
max_length=max_length,
allow_mutation=allow_mutation,
Expand Down
17 changes: 12 additions & 5 deletions pydantic/schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -913,11 +913,9 @@ def get_annotation_from_field_info(
:return: the same ``annotation`` if unmodified or a new annotation with validation in place
"""
constraints = field_info.get_constraints()

used_constraints: Set[str] = set()
if constraints:
annotation, used_constraints = get_annotation_with_constraints(annotation, field_info)

if validate_assignment:
used_constraints.add('allow_mutation')

Expand Down Expand Up @@ -961,9 +959,18 @@ def go(type_: Any) -> Type[Any]:
if origin is Union:
return Union[tuple(go(a) for a in args)] # type: ignore

if issubclass(origin, List) and (field_info.min_items is not None or field_info.max_items is not None):
used_constraints.update({'min_items', 'max_items'})
return conlist(go(args[0]), min_items=field_info.min_items, max_items=field_info.max_items)
if issubclass(origin, List) and (
field_info.min_items is not None
or field_info.max_items is not None
or field_info.unique_items is not None
):
used_constraints.update({'min_items', 'max_items', 'unique_items'})
return conlist(
go(args[0]),
min_items=field_info.min_items,
max_items=field_info.max_items,
unique_items=field_info.unique_items,
)

if issubclass(origin, Set) and (field_info.min_items is not None or field_info.max_items is not None):
used_constraints.update({'min_items', 'max_items'})
Expand Down
27 changes: 24 additions & 3 deletions pydantic/types.py
Original file line number Diff line number Diff line change
Expand Up @@ -479,15 +479,18 @@ class ConstrainedList(list): # type: ignore

min_items: Optional[int] = None
max_items: Optional[int] = None
unique_items: Optional[bool] = None
item_type: Type[T] # type: ignore

@classmethod
def __get_validators__(cls) -> 'CallableGenerator':
yield cls.list_length_validator
if cls.unique_items:
yield cls.unique_items_validator

@classmethod
def __modify_schema__(cls, field_schema: Dict[str, Any]) -> None:
update_not_none(field_schema, minItems=cls.min_items, maxItems=cls.max_items)
update_not_none(field_schema, minItems=cls.min_items, maxItems=cls.max_items, uniqueItems=cls.unique_items)

@classmethod
def list_length_validator(cls, v: 'Optional[List[T]]') -> 'Optional[List[T]]':
Expand All @@ -505,10 +508,28 @@ def list_length_validator(cls, v: 'Optional[List[T]]') -> 'Optional[List[T]]':

return v

@classmethod
def unique_items_validator(cls, v: 'Optional[List[T]]') -> 'Optional[List[T]]':
try:
nuno-andre marked this conversation as resolved.
Show resolved Hide resolved
if v and len(set(v)) != len(v):
raise errors.ListUniqueItemsError(not_unique=len(v) - len(set(v)))
except TypeError:
# failover for unhashable types
unique = list()

def conlist(item_type: Type[T], *, min_items: int = None, max_items: int = None) -> Type[List[T]]:
if v and len([unique.append(i) for i in v if i not in unique]) != len(v): # type: ignore
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

modifying unique inside a comprehension is somewhat unorthodox, also (if we don't care about how many items are not unique, which I will mostly be the case) we can do this faster by raising the error as soon as we find a duplicate.

Something like

for i, value in enumerate(v, start=1):
    if value in v[i:]:
        raise errors.ListUniqueItemsError()

I haven't done any profiling but I feel something like this should be quicker.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I've just profiled it and you're totally right about performance. Also, the difference between the set and the loop checks is almost negligible (even when the duplicate is at the end of the iterable).

I also agree that the most of the time we don't care about how many items aren't unique, and furthermore, this calculation reduces the performance. So I'm going to refactor this PR to push it again.

raise errors.ListUniqueItemsError(not_unique=len(v) - len(unique)) from None

return v


def conlist(
item_type: Type[T], *, min_items: int = None, max_items: int = None, unique_items: bool = None
) -> Type[List[T]]:
# __args__ is needed to conform to typing generics api
namespace = {'min_items': min_items, 'max_items': max_items, 'item_type': item_type, '__args__': (item_type,)}
namespace = dict(
min_items=min_items, max_items=max_items, unique_items=unique_items, item_type=item_type, __args__=(item_type,)
)
# We use new_class to be able to deal with Generic types
return new_class('ConstrainedListValue', (ConstrainedList,), {}, lambda ns: ns.update(namespace))

Expand Down
60 changes: 56 additions & 4 deletions tests/test_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -173,6 +173,41 @@ class ConListModelMin(BaseModel):
]


def test_constrained_list_not_unique_hashable_items():
class ConListModelUnique(BaseModel):
v: conlist(int, unique_items=True)

with pytest.raises(ValidationError) as exc_info:
ConListModelUnique(v=[1, 1, 2, 2, 2, 3])
assert exc_info.value.errors() == [
{
'loc': ('v',),
'msg': 'the list has 3 not unique items',
'type': 'value_error.list.unique_items',
'ctx': {'not_unique': 3},
}
]


def test_constrained_list_not_unique_unhashable_items():
class ConListModelUnique(BaseModel):
v: conlist(Set[int], unique_items=True)

m = ConListModelUnique(v=[{1}, {2}, {3}])
assert m.v == [{1}, {2}, {3}]

with pytest.raises(ValidationError) as exc_info:
ConListModelUnique(v=[{1}, {1}, {2}, {2}, {2}, {3}])
assert exc_info.value.errors() == [
{
'loc': ('v',),
'msg': 'the list has 3 not unique items',
'type': 'value_error.list.unique_items',
'ctx': {'not_unique': 3},
}
]


def test_constrained_list_optional():
class Model(BaseModel):
req: Optional[conlist(str, min_items=1)] = ...
Expand Down Expand Up @@ -253,8 +288,8 @@ class ConListModel(BaseModel):

def test_conlist():
class Model(BaseModel):
foo: List[int] = Field(..., min_items=2, max_items=4)
bar: conlist(str, min_items=1, max_items=4) = None
foo: List[int] = Field(..., min_items=2, max_items=4, unique_items=True)
bar: conlist(str, min_items=1, max_items=4, unique_items=False) = None

assert Model(foo=[1, 2], bar=['spoon']).dict() == {'foo': [1, 2], 'bar': ['spoon']}

Expand All @@ -264,12 +299,29 @@ class Model(BaseModel):
with pytest.raises(ValidationError, match='ensure this value has at most 4 items'):
Model(foo=list(range(5)))

with pytest.raises(ValidationError, match='the list has 2 not unique items'):
Model(foo=[1, 1, 2, 2])

assert Model.schema() == {
'title': 'Model',
'type': 'object',
'properties': {
'foo': {'title': 'Foo', 'type': 'array', 'items': {'type': 'integer'}, 'minItems': 2, 'maxItems': 4},
'bar': {'title': 'Bar', 'type': 'array', 'items': {'type': 'string'}, 'minItems': 1, 'maxItems': 4},
'foo': {
'title': 'Foo',
'type': 'array',
'items': {'type': 'integer'},
'minItems': 2,
'maxItems': 4,
'uniqueItems': True,
},
'bar': {
'title': 'Bar',
'type': 'array',
'items': {'type': 'string'},
'minItems': 1,
'maxItems': 4,
'uniqueItems': False,
},
},
'required': ['foo'],
}
Expand Down