diff --git a/changes/1477-samuelcolvin.md b/changes/1477-samuelcolvin.md new file mode 100644 index 0000000000..816460f8b2 --- /dev/null +++ b/changes/1477-samuelcolvin.md @@ -0,0 +1,3 @@ +Prevent long (length > `4_300`) strings/bytes as input to int fields, see +[python/cpython#95778](https://github.com/python/cpython/issues/95778) and +[CVE-2020-10735](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2020-10735) diff --git a/pydantic/validators.py b/pydantic/validators.py index 1c19fc9239..fb6d041883 100644 --- a/pydantic/validators.py +++ b/pydantic/validators.py @@ -120,10 +120,23 @@ def bool_validator(v: Any) -> bool: raise errors.BoolError() +# matches the default limit cpython, see https://github.com/python/cpython/pull/96500 +max_str_int = 4_300 + + def int_validator(v: Any) -> int: if isinstance(v, int) and not (v is True or v is False): return v + # see https://github.com/pydantic/pydantic/issues/1477 and in turn, https://github.com/python/cpython/issues/95778 + # this check should be unnecessary once patch releases are out for 3.7, 3.8, 3.9 and 3.10 + # but better to check here until then. + # NOTICE: this does not fully protect user from the DOS risk since the standard library JSON implementation + # (and other std lib modules like xml) use `int()` and are likely called before this, the best workaround is to + # 1. update to the latest patch release of python once released, 2. use a different JSON library like ujson + if isinstance(v, (str, bytes, bytearray)) and len(v) > max_str_int: + raise errors.IntegerError() + try: return int(v) except (TypeError, ValueError, OverflowError): diff --git a/tests/test_edge_cases.py b/tests/test_edge_cases.py index 002abd9dcb..799dd0bff4 100644 --- a/tests/test_edge_cases.py +++ b/tests/test_edge_cases.py @@ -2038,3 +2038,38 @@ class Custom: __fields__ = True assert not issubclass(Custom, BaseModel) + + +def test_long_int(): + """ + see https://github.com/pydantic/pydantic/issues/1477 and in turn, https://github.com/python/cpython/issues/95778 + """ + + class Model(BaseModel): + x: int + + assert Model(x='1' * 4_300).x == int('1' * 4_300) + assert Model(x=b'1' * 4_300).x == int('1' * 4_300) + assert Model(x=bytearray(b'1' * 4_300)).x == int('1' * 4_300) + + too_long = '1' * 4_301 + with pytest.raises(ValidationError) as exc_info: + Model(x=too_long) + + assert exc_info.value.errors() == [ + { + 'loc': ('x',), + 'msg': 'value is not a valid integer', + 'type': 'type_error.integer', + }, + ] + + too_long_b = too_long.encode('utf-8') + with pytest.raises(ValidationError): + Model(x=too_long_b) + with pytest.raises(ValidationError): + Model(x=bytearray(too_long_b)) + + # this used to hang indefinitely + with pytest.raises(ValidationError): + Model(x='1' * (10**7))