Add Hypothesis plugin

pydantic · Dec 1, 2020 · b835b9d · b835b9d
1 parent 11c8ea7
commit b835b9d
Show file tree

Hide file tree

Showing 4 changed files with 260 additions and 0 deletions.
diff --git a/pydantic/_hypothesis_plugin.py b/pydantic/_hypothesis_plugin.py
@@ -0,0 +1,175 @@
+"""
+Register Hypothesis strategies for Pydantic custom types.
+
+This enables fully-automatic generation of test data for most Pydantic classes.
+
+Note that this module has *no* runtime impact on Pydantic itself; instead it
+is registered as a setuptools entry point and Hypothesis will import it if
+Pydantic is installed.  See also:
+
+https://hypothesis.readthedocs.io/en/latest/strategies.html#registering-strategies-via-setuptools-entry-points
+https://hypothesis.readthedocs.io/en/latest/data.html#hypothesis.strategies.register_type_strategy
+https://hypothesis.readthedocs.io/en/latest/strategies.html#interaction-with-pytest-cov
+https://pydantic-docs.helpmanual.io/usage/types/#pydantic-types
+
+Note that because our motivation is to *improve user experience*, the strategies
+are always sound (never generate invalid data) but sacrifice completeness for
+maintainability (ie may be unable to generate some tricky but valid data).
+
+Finally, this module makes liberal use of `# type: ignore[<code>]` pragmas.
+This is because Hypothesis annotates `register_type_strategy()` with
+`(T, SearchStrategy[T])`, but in most cases we register e.g. `ConstrainedInt`
+to generate instances of the builtin `int` type which match the constraints.
+"""
+
+import contextlib
+import ipaddress
+import json
+import math
+from typing import cast
+
+import hypothesis.strategies as st
+
+import pydantic
+import pydantic.color
+from pydantic.networks import import_email_validator
+
+# FilePath and DirectoryPath are explicitly unsupported, as we'd have to create
+# them on-disk, and that's unsafe in general without being told *where* to do so.
+
+# Emails
+try:
+    import_email_validator()
+except ImportError:  # pragma: no cover
+    pass
+else:
+    # Note that these strategies deliberately stay away from any tricky Unicode
+    # or other encoding issues; we're just trying to generate *something* valid.
+    st.register_type_strategy(pydantic.EmailStr, st.emails())  # type: ignore[arg-type]
+    st.register_type_strategy(
+        pydantic.NameEmail,
+        st.builds(
+            '{} <{}>'.format,  # type: ignore[arg-type]
+            st.from_regex('[A-Za-z0-9_]+( [A-Za-z0-9_]+){0,5}', fullmatch=True),
+            st.emails(),
+        ),
+    )
+
+# PyObject - dotted names, in this case taken from the math module.
+st.register_type_strategy(
+    pydantic.PyObject,
+    st.sampled_from(
+        [cast(pydantic.PyObject, f'math.{name}') for name in sorted(vars(math)) if not name.startswith('_')]
+    ),
+)
+
+# CSS3 Colors; as name, hex, rgb(a) tuples or strings, or hsl strings
+_color_regexes = (
+    '|'.join(
+        (
+            pydantic.color.r_hex_short,
+            pydantic.color.r_hex_long,
+            pydantic.color.r_rgb,
+            pydantic.color.r_rgba,
+            pydantic.color.r_hsl,
+            pydantic.color.r_hsla,
+        )
+    )
+    # Use more precise regex patterns to avoid value-out-of-range errors
+    .replace(pydantic.color._r_255, r'(?:((?:\d|\d\d|[01]\d\d|2[0-4]\d|25[0-4])(?:\.\d+)?|255(?:\.0+)?))')
+    .replace(pydantic.color._r_alpha, r'(?:(0(?:\.\d+)?|1(?:\.0+)?|\.\d+|\d{1,2}%))')
+    .replace(pydantic.color._r_sl, r'(?:(\d\d?(?:\.\d+)?|100(?:\.0+)?)%)')
+)
+st.register_type_strategy(
+    pydantic.color.Color,
+    st.one_of(
+        st.sampled_from(sorted(pydantic.color.COLORS_BY_NAME)),
+        st.tuples(
+            st.integers(0, 255),
+            st.integers(0, 255),
+            st.integers(0, 255),
+            st.none() | st.floats(0, 1) | st.floats(0, 100).map('{}%'.format),
+        ),
+        st.from_regex(_color_regexes, fullmatch=True),
+    ),
+)
+
+# JSON strings, optionally constrained to a particular type.  We have to register
+# separate strategies for these cases because they're distinct types at runtime.
+st.register_type_strategy(
+    pydantic.Json,
+    st.builds(
+        json.dumps,  # type: ignore[arg-type]
+        st.recursive(
+            base=st.one_of(
+                st.none(),
+                st.booleans(),
+                st.integers(),
+                st.floats(allow_infinity=False, allow_nan=False),
+                st.text(),
+            ),
+            extend=lambda x: st.lists(x) | st.dictionaries(st.text(), x),
+        ),
+        ensure_ascii=st.booleans(),
+        indent=st.none() | st.integers(0, 16),
+        sort_keys=st.booleans(),
+    ),
+)
+
+
+# Card numbers, valid according to the Luhn algorithm
+
+
+def add_luhn_digit(card_number: str) -> str:
+    # See https://en.wikipedia.org/wiki/Luhn_algorithm
+    for digit in '0123456789':
+        with contextlib.suppress(Exception):
+            pydantic.PaymentCardNumber.validate_luhn_check_digit(card_number + digit)
+            return card_number + digit
+    raise AssertionError('Unreachable')  # pragma: no cover
+
+
+card_patterns = (
+    # Note that these patterns omit the Luhn check digit; that's added by the function above
+    '4[0-9]{14}',  # Visa
+    '5[12345][0-9]{13}',  # Mastercard
+    '3[47][0-9]{12}',  # American Express
+    '[0-26-9][0-9]{10,17}',  # other (incomplete to avoid overlap)
+)
+st.register_type_strategy(
+    pydantic.PaymentCardNumber,
+    st.from_regex('|'.join(card_patterns), fullmatch=True).map(add_luhn_digit),  # type: ignore[arg-type]
+)
+
+# UUIDs
+st.register_type_strategy(pydantic.UUID1, st.uuids(version=1))  # type: ignore[arg-type]
+st.register_type_strategy(pydantic.UUID3, st.uuids(version=3))  # type: ignore[arg-type]
+st.register_type_strategy(pydantic.UUID4, st.uuids(version=4))  # type: ignore[arg-type]
+st.register_type_strategy(pydantic.UUID5, st.uuids(version=5))  # type: ignore[arg-type]
+
+# Secrets
+st.register_type_strategy(pydantic.SecretBytes, st.binary().map(pydantic.SecretBytes))
+st.register_type_strategy(pydantic.SecretStr, st.text().map(pydantic.SecretStr))
+
+# IP addresses, networks, and interfaces
+st.register_type_strategy(pydantic.IPvAnyAddress, st.ip_addresses())
+st.register_type_strategy(
+    pydantic.IPvAnyInterface,
+    st.from_type(ipaddress.IPv4Interface) | st.from_type(ipaddress.IPv6Interface),
+)
+st.register_type_strategy(
+    pydantic.IPvAnyNetwork,
+    st.from_type(ipaddress.IPv4Network) | st.from_type(ipaddress.IPv6Network),
+)
+
+# Constrained types
+# Because a new type is created at runtime for each new set of constraints,
+# we can only register the predefined constrained types.
+st.register_type_strategy(pydantic.StrictBool, st.booleans())
+st.register_type_strategy(pydantic.StrictInt, st.integers())  # type: ignore[arg-type]
+st.register_type_strategy(pydantic.StrictFloat, st.floats())  # type: ignore[arg-type]
+st.register_type_strategy(pydantic.StrictStr, st.text())  # type: ignore[arg-type]
+st.register_type_strategy(pydantic.PositiveInt, st.integers(min_value=1))  # type: ignore[arg-type]
+st.register_type_strategy(pydantic.NegativeInt, st.integers(max_value=-1))  # type: ignore[arg-type]
+st.register_type_strategy(pydantic.PositiveFloat, st.floats(min_value=0, exclude_min=True))  # type: ignore[arg-type]
+st.register_type_strategy(pydantic.NegativeFloat, st.floats(max_value=-0.0, exclude_max=True))  # type: ignore[arg-type]
diff --git a/pydantic/types.py b/pydantic/types.py
@@ -726,6 +726,8 @@ def get_secret_value(self) -> bytes:
 
 
 class PaymentCardBrand(str, Enum):
+    # If you add another card type, please also add it to the
+    # Hypothesis strategy in `pydantic._hypothesis_plugin`.
     amex = 'American Express'
     mastercard = 'Mastercard'
     visa = 'Visa'

diff --git a/setup.py b/setup.py
@@ -114,6 +114,7 @@ def extra(self):
         'Operating System :: POSIX :: Linux',
         'Environment :: Console',
         'Environment :: MacOS X',
+        'Framework :: Hypothesis',
         'Topic :: Software Development :: Libraries :: Python Modules',
         'Topic :: Internet',
     ],
@@ -134,4 +135,5 @@ def extra(self):
         'dotenv': ['python-dotenv>=0.10.4'],
     },
     ext_modules=ext_modules,
+    entry_points={'hypothesis': ['_ = pydantic._hypothesis_plugin']},
 )
diff --git a/tests/test_hypothesis_plugin.py b/tests/test_hypothesis_plugin.py
@@ -0,0 +1,81 @@
+import pytest
+from hypothesis import given, settings, strategies as st
+
+import pydantic
+from pydantic.networks import import_email_validator
+
+
+def gen_models():
+    class MiscModel(pydantic.BaseModel):
+        # Each of these models contains a few related fields; the idea is that
+        # if there's a bug we have neither too many fields to dig through nor
+        # too many models to read.
+        obj: pydantic.PyObject
+        color: pydantic.color.Color
+        json_any: pydantic.Json
+
+    class StringsModel(pydantic.BaseModel):
+        card: pydantic.PaymentCardNumber
+        secbytes: pydantic.SecretBytes
+        secstr: pydantic.SecretStr
+
+    class UUIDsModel(pydantic.BaseModel):
+        uuid1: pydantic.UUID1
+        uuid3: pydantic.UUID3
+        uuid4: pydantic.UUID4
+        uuid5: pydantic.UUID5
+
+    class IPvAnyAddress(pydantic.BaseModel):
+        address: pydantic.IPvAnyAddress
+
+    class IPvAnyInterface(pydantic.BaseModel):
+        interface: pydantic.IPvAnyInterface
+
+    class IPvAnyNetwork(pydantic.BaseModel):
+        network: pydantic.IPvAnyNetwork
+
+    class StrictNumbersModel(pydantic.BaseModel):
+        strictbool: pydantic.StrictBool
+        strictint: pydantic.StrictInt
+        strictfloat: pydantic.StrictFloat
+        strictstr: pydantic.StrictStr
+
+    class NumbersModel(pydantic.BaseModel):
+        posint: pydantic.PositiveInt
+        negint: pydantic.NegativeInt
+        posfloat: pydantic.PositiveFloat
+        negfloat: pydantic.NegativeFloat
+
+    yield from (
+        MiscModel,
+        StringsModel,
+        UUIDsModel,
+        IPvAnyAddress,
+        IPvAnyInterface,
+        IPvAnyNetwork,
+        StrictNumbersModel,
+        NumbersModel,
+    )
+
+    try:
+        import_email_validator()
+    except ImportError:
+        pass
+    else:
+
+        class EmailsModel(pydantic.BaseModel):
+            email: pydantic.EmailStr
+            name_email: pydantic.NameEmail
+
+        yield EmailsModel
+
+
+@pytest.mark.parametrize('model', gen_models())
+@settings(max_examples=20)
+@given(data=st.data())
+def test_can_construct_models_with_all_fields(data, model):
+    # We take successful creation of an instance to demonstrate that Hypothesis
+    # knows how to provide valid values for each field, so we don't need any
+    # additional assertions.
+    instance = data.draw(st.from_type(model))
+    assert isinstance(instance, model)