pydantic/_hypothesis_plugin.py

"""
Register Hypothesis strategies for Pydantic custom types.

This enables fully-automatic generation of test data for most Pydantic classes.

Note that this module has *no* runtime impact on Pydantic itself; instead it
is registered as a setuptools entry point and Hypothesis will import it if
Pydantic is installed.  See also:

https://hypothesis.readthedocs.io/en/latest/strategies.html#registering-strategies-via-setuptools-entry-points
https://hypothesis.readthedocs.io/en/latest/data.html#hypothesis.strategies.register_type_strategy
https://hypothesis.readthedocs.io/en/latest/strategies.html#interaction-with-pytest-cov
https://pydantic-docs.helpmanual.io/usage/types/#pydantic-types

Note that because our motivation is to *improve user experience*, the strategies
are always sound (never generate invalid data) but sacrifice completeness for
maintainability (ie may be unable to generate some tricky but valid data).

Finally, this module makes liberal use of `# type: ignore[<code>]` pragmas.
This is because Hypothesis annotates `register_type_strategy()` with
`(T, SearchStrategy[T])`, but in most cases we register e.g. `ConstrainedInt`
to generate instances of the builtin `int` type which match the constraints.
"""

import contextlib
import ipaddress
import json
import math
from fractions import Fraction
from typing import Any, Callable, Dict, Type, Union, cast, overload

import hypothesis.strategies as st

import pydantic
import pydantic.color
import pydantic.types

# FilePath and DirectoryPath are explicitly unsupported, as we'd have to create
# them on-disk, and that's unsafe in general without being told *where* to do so.
#
# URLs are unsupported because it's easy for users to define their own strategy for
# "normal" URLs, and hard for us to define a general strategy which includes "weird"
# URLs but doesn't also have unpredictable performance problems.
#
# conlist() and conset() are unsupported for now, because the workarounds for
# Cython and Hypothesis to handle parametrized generic types are incompatible.
# Once Cython can support 'normal' generics we'll revisit this.

# Emails
try:
    import email_validator
except ImportError:  # pragma: no cover
    pass
else:

    def is_valid_email(s: str) -> bool:
        # Hypothesis' st.emails() occasionally generates emails like 0@A0--0.ac
        # that are invalid according to email-validator, so we filter those out.
        try:
            email_validator.validate_email(s, check_deliverability=False)
            return True
        except email_validator.EmailNotValidError:  # pragma: no cover
            return False

    # Note that these strategies deliberately stay away from any tricky Unicode
    # or other encoding issues; we're just trying to generate *something* valid.
    st.register_type_strategy(pydantic.EmailStr, st.emails().filter(is_valid_email))  # type: ignore[arg-type]
    st.register_type_strategy(
        pydantic.NameEmail,
        st.builds(
            '{} <{}>'.format,  # type: ignore[arg-type]
            st.from_regex('[A-Za-z0-9_]+( [A-Za-z0-9_]+){0,5}', fullmatch=True),
            st.emails().filter(is_valid_email),
        ),
    )

# PyObject - dotted names, in this case taken from the math module.
st.register_type_strategy(
    pydantic.PyObject,  # type: ignore[arg-type]
    st.sampled_from(
        [cast(pydantic.PyObject, f'math.{name}') for name in sorted(vars(math)) if not name.startswith('_')]
    ),
)

# CSS3 Colors; as name, hex, rgb(a) tuples or strings, or hsl strings
_color_regexes = (
    '|'.join(
        (
            pydantic.color.r_hex_short,
            pydantic.color.r_hex_long,
            pydantic.color.r_rgb,
            pydantic.color.r_rgba,
            pydantic.color.r_hsl,
            pydantic.color.r_hsla,
        )
    )
    # Use more precise regex patterns to avoid value-out-of-range errors
    .replace(pydantic.color._r_sl, r'(?:(\d\d?(?:\.\d+)?|100(?:\.0+)?)%)')
    .replace(pydantic.color._r_alpha, r'(?:(0(?:\.\d+)?|1(?:\.0+)?|\.\d+|\d{1,2}%))')
    .replace(pydantic.color._r_255, r'(?:((?:\d|\d\d|[01]\d\d|2[0-4]\d|25[0-4])(?:\.\d+)?|255(?:\.0+)?))')
)
st.register_type_strategy(
    pydantic.color.Color,
    st.one_of(
        st.sampled_from(sorted(pydantic.color.COLORS_BY_NAME)),
        st.tuples(
            st.integers(0, 255),
            st.integers(0, 255),
            st.integers(0, 255),
            st.none() | st.floats(0, 1) | st.floats(0, 100).map('{}%'.format),
        ),
        st.from_regex(_color_regexes, fullmatch=True),
    ),
)


# Card numbers, valid according to the Luhn algorithm


def add_luhn_digit(card_number: str) -> str:
    # See https://en.wikipedia.org/wiki/Luhn_algorithm
    for digit in '0123456789':
        with contextlib.suppress(Exception):
            pydantic.PaymentCardNumber.validate_luhn_check_digit(card_number + digit)
            return card_number + digit
    raise AssertionError('Unreachable')  # pragma: no cover


card_patterns = (
    # Note that these patterns omit the Luhn check digit; that's added by the function above
    '4[0-9]{14}',  # Visa
    '5[12345][0-9]{13}',  # Mastercard
    '3[47][0-9]{12}',  # American Express
    '[0-26-9][0-9]{10,17}',  # other (incomplete to avoid overlap)
)
st.register_type_strategy(
    pydantic.PaymentCardNumber,
    st.from_regex('|'.join(card_patterns), fullmatch=True).map(add_luhn_digit),  # type: ignore[arg-type]
)

# UUIDs
st.register_type_strategy(pydantic.UUID1, st.uuids(version=1))
st.register_type_strategy(pydantic.UUID3, st.uuids(version=3))
st.register_type_strategy(pydantic.UUID4, st.uuids(version=4))
st.register_type_strategy(pydantic.UUID5, st.uuids(version=5))

# Secrets
st.register_type_strategy(pydantic.SecretBytes, st.binary().map(pydantic.SecretBytes))
st.register_type_strategy(pydantic.SecretStr, st.text().map(pydantic.SecretStr))

# IP addresses, networks, and interfaces
st.register_type_strategy(pydantic.IPvAnyAddress, st.ip_addresses())  # type: ignore[arg-type]
st.register_type_strategy(
    pydantic.IPvAnyInterface,
    st.from_type(ipaddress.IPv4Interface) | st.from_type(ipaddress.IPv6Interface),  # type: ignore[arg-type]
)
st.register_type_strategy(
    pydantic.IPvAnyNetwork,
    st.from_type(ipaddress.IPv4Network) | st.from_type(ipaddress.IPv6Network),  # type: ignore[arg-type]
)

# We hook into the con***() functions and the ConstrainedNumberMeta metaclass,
# so here we only have to register subclasses for other constrained types which
# don't go via those mechanisms.  Then there are the registration hooks below.
st.register_type_strategy(pydantic.StrictBool, st.booleans())
st.register_type_strategy(pydantic.StrictStr, st.text())


# Constrained-type resolver functions
#
# For these ones, we actually want to inspect the type in order to work out a
# satisfying strategy.  First up, the machinery for tracking resolver functions:

RESOLVERS: Dict[type, Callable[[type], st.SearchStrategy]] = {}  # type: ignore[type-arg]


@overload
def _registered(typ: Type[pydantic.types.T]) -> Type[pydantic.types.T]:
    pass


@overload
def _registered(typ: pydantic.types.ConstrainedNumberMeta) -> pydantic.types.ConstrainedNumberMeta:
    pass


def _registered(
    typ: Union[Type[pydantic.types.T], pydantic.types.ConstrainedNumberMeta]
) -> Union[Type[pydantic.types.T], pydantic.types.ConstrainedNumberMeta]:
    # This function replaces the version in `pydantic.types`, in order to
    # effect the registration of new constrained types so that Hypothesis
    # can generate valid examples.
    pydantic.types._DEFINED_TYPES.add(typ)
    for supertype, resolver in RESOLVERS.items():
        if issubclass(typ, supertype):
            st.register_type_strategy(typ, resolver(typ))  # type: ignore
            return typ
    raise NotImplementedError(f'Unknown type {typ!r} has no resolver to register')  # pragma: no cover


def resolves(
    typ: Union[type, pydantic.types.ConstrainedNumberMeta]
) -> Callable[[Callable[..., st.SearchStrategy]], Callable[..., st.SearchStrategy]]:  # type: ignore[type-arg]
    def inner(f):  # type: ignore
        assert f not in RESOLVERS
        RESOLVERS[typ] = f
        return f

    return inner


# Type-to-strategy resolver functions


def is_base_model(cls: Any) -> bool:
    """Returns True if the given class is a subclass of the pydantic BaseModel."""

    try:
        return issubclass(cls, pydantic.BaseModel)
    except TypeError:
        return False


@resolves(pydantic.JsonWrapper)
def resolve_json(cls):  # type: ignore[no-untyped-def]
    try:
        inner = st.none() if cls.inner_type is None else st.from_type(cls.inner_type)
    except Exception:  # pragma: no cover
        finite = st.floats(allow_infinity=False, allow_nan=False)
        inner = st.recursive(
            base=st.one_of(st.none(), st.booleans(), st.integers(), finite, st.text()),
            extend=lambda x: st.lists(x) | st.dictionaries(st.text(), x),  # type: ignore
        )
    return st.builds(
        cls.inner_type.json if is_base_model(getattr(cls, 'inner_type', None)) else json.dumps,
        inner,
        ensure_ascii=st.booleans(),
        indent=st.none() | st.integers(0, 16),
        sort_keys=st.booleans(),
    )


@resolves(pydantic.ConstrainedBytes)
def resolve_conbytes(cls):  # type: ignore[no-untyped-def]  # pragma: no cover
    min_size = cls.min_length or 0
    max_size = cls.max_length
    if not cls.strip_whitespace:
        return st.binary(min_size=min_size, max_size=max_size)
    # Fun with regex to ensure we neither start nor end with whitespace
    repeats = '{{{},{}}}'.format(
        min_size - 2 if min_size > 2 else 0,
        max_size - 2 if (max_size or 0) > 2 else '',
    )
    if min_size >= 2:
        pattern = rf'\W.{repeats}\W'
    elif min_size == 1:
        pattern = rf'\W(.{repeats}\W)?'
    else:
        assert min_size == 0
        pattern = rf'(\W(.{repeats}\W)?)?'
    return st.from_regex(pattern.encode(), fullmatch=True)


@resolves(pydantic.ConstrainedDecimal)
def resolve_condecimal(cls):  # type: ignore[no-untyped-def]
    min_value = cls.ge
    max_value = cls.le
    if cls.gt is not None:
        assert min_value is None, 'Set `gt` or `ge`, but not both'
        min_value = cls.gt
    if cls.lt is not None:
        assert max_value is None, 'Set `lt` or `le`, but not both'
        max_value = cls.lt
    s = st.decimals(min_value, max_value, allow_nan=False, places=cls.decimal_places)
    if cls.lt is not None:
        s = s.filter(lambda d: d < cls.lt)
    if cls.gt is not None:
        s = s.filter(lambda d: cls.gt < d)
    return s


@resolves(pydantic.ConstrainedFloat)
def resolve_confloat(cls):  # type: ignore[no-untyped-def]
    min_value = cls.ge
    max_value = cls.le
    exclude_min = False
    exclude_max = False

    if cls.gt is not None:
        assert min_value is None, 'Set `gt` or `ge`, but not both'
        min_value = cls.gt
        exclude_min = True
    if cls.lt is not None:
        assert max_value is None, 'Set `lt` or `le`, but not both'
        max_value = cls.lt
        exclude_max = True

    if cls.multiple_of is None:
        return st.floats(min_value, max_value, exclude_min=exclude_min, exclude_max=exclude_max, allow_nan=False)

    if min_value is not None:
        min_value = math.ceil(min_value / cls.multiple_of)
        if exclude_min:
            min_value = min_value + 1
    if max_value is not None:
        assert max_value >= cls.multiple_of, 'Cannot build model with max value smaller than multiple of'
        max_value = math.floor(max_value / cls.multiple_of)
        if exclude_max:
            max_value = max_value - 1

    return st.integers(min_value, max_value).map(lambda x: x * cls.multiple_of)


@resolves(pydantic.ConstrainedInt)
def resolve_conint(cls):  # type: ignore[no-untyped-def]
    min_value = cls.ge
    max_value = cls.le
    if cls.gt is not None:
        assert min_value is None, 'Set `gt` or `ge`, but not both'
        min_value = cls.gt + 1
    if cls.lt is not None:
        assert max_value is None, 'Set `lt` or `le`, but not both'
        max_value = cls.lt - 1

    if cls.multiple_of is None or cls.multiple_of == 1:
        return st.integers(min_value, max_value)

    # These adjustments and the .map handle integer-valued multiples, while the
    # .filter handles trickier cases as for confloat.
    if min_value is not None:
        min_value = math.ceil(Fraction(min_value) / Fraction(cls.multiple_of))
    if max_value is not None:
        max_value = math.floor(Fraction(max_value) / Fraction(cls.multiple_of))
    return st.integers(min_value, max_value).map(lambda x: x * cls.multiple_of)


@resolves(pydantic.ConstrainedStr)
def resolve_constr(cls):  # type: ignore[no-untyped-def]  # pragma: no cover
    min_size = cls.min_length or 0
    max_size = cls.max_length

    if cls.regex is None and not cls.strip_whitespace:
        return st.text(min_size=min_size, max_size=max_size)

    if cls.regex is not None:
        strategy = st.from_regex(cls.regex)
        if cls.strip_whitespace:
            strategy = strategy.filter(lambda s: s == s.strip())
    elif cls.strip_whitespace:
        repeats = '{{{},{}}}'.format(
            min_size - 2 if min_size > 2 else 0,
            max_size - 2 if (max_size or 0) > 2 else '',
        )
        if min_size >= 2:
            strategy = st.from_regex(rf'\W.{repeats}\W')
        elif min_size == 1:
            strategy = st.from_regex(rf'\W(.{repeats}\W)?')
        else:
            assert min_size == 0
            strategy = st.from_regex(rf'(\W(.{repeats}\W)?)?')

    if min_size == 0 and max_size is None:
        return strategy
    elif max_size is None:
        return strategy.filter(lambda s: min_size <= len(s))
    return strategy.filter(lambda s: min_size <= len(s) <= max_size)


# Finally, register all previously-defined types, and patch in our new function
for typ in list(pydantic.types._DEFINED_TYPES):
    _registered(typ)
pydantic.types._registered = _registered
st.register_type_strategy(pydantic.Json, resolve_json)