diff --git a/hypothesis-python/RELEASE.rst b/hypothesis-python/RELEASE.rst new file mode 100644 index 0000000000..3ed885d9ee --- /dev/null +++ b/hypothesis-python/RELEASE.rst @@ -0,0 +1,7 @@ +RELEASE_TYPE: minor + +This release improves our treatment of database keys, which based on (among other things) +the source code of your test function. We now post-process this source to ignore +decorators, comments, trailing whitespace, and blank lines - so that you can add +:obj:`@example() `\ s or make some small no-op edits to your code +without preventing replay of any known failing or covering examples. diff --git a/hypothesis-python/src/hypothesis/internal/reflection.py b/hypothesis-python/src/hypothesis/internal/reflection.py index 1c1419c609..cf8a025886 100644 --- a/hypothesis-python/src/hypothesis/internal/reflection.py +++ b/hypothesis-python/src/hypothesis/internal/reflection.py @@ -20,10 +20,11 @@ import textwrap import types from functools import wraps +from io import StringIO from keyword import iskeyword -from tokenize import detect_encoding +from tokenize import COMMENT, detect_encoding, generate_tokens, untokenize from types import ModuleType -from typing import TYPE_CHECKING, Callable +from typing import TYPE_CHECKING, Any, Callable from unittest.mock import _patch as PatchType from hypothesis.internal.compat import is_typed_named_tuple, update_code_location @@ -48,6 +49,33 @@ def is_mock(obj): return hasattr(obj, "hypothesis_internal_is_this_a_mock_check") +def _clean_source(src: str) -> bytes: + """Return the source code as bytes, without decorators or comments. + + Because this is part of our database key, we reduce the cache invalidation + rate by ignoring decorators, comments, trailing whitespace, and empty lines. + We can't just use the (dumped) AST directly because it changes between Python + versions (e.g. ast.Constant) + """ + # Get the (one-indexed) line number of the function definition, and drop preceding + # lines - i.e. any decorators, so that adding `@example()`s keeps the same key. + try: + func_lineno = ast.parse(src).body[0].lineno - 1 + src = "".join(src.splitlines(keepends=True)[func_lineno:]) + except Exception: + pass + # Remove blank lines and use the tokenize module to strip out comments, + # so that those can be changed without changing the database key. + try: + src = untokenize( + t for t in generate_tokens(StringIO(src).readline) if t.type != COMMENT + ) + except Exception: + pass + # Finally, remove any trailing whitespace and empty lines as a last cleanup. + return "\n".join(x.rstrip() for x in src.splitlines() if x.rstrip()).encode() + + def function_digest(function): """Returns a string that is stable across multiple invocations across multiple processes and is prone to changing significantly in response to @@ -57,24 +85,23 @@ def function_digest(function): """ hasher = hashlib.sha384() try: - hasher.update(inspect.getsource(function).encode()) + src = inspect.getsource(function) except (OSError, TypeError): - pass - try: - hasher.update(function.__name__.encode()) - except AttributeError: - pass + # If we can't actually get the source code, try for the name as a fallback. + try: + hasher.update(function.__name__.encode()) + except AttributeError: + pass + else: + hasher.update(_clean_source(src)) try: - # We prefer to use the modern signature API, but left this for compatibility. - # While we don't promise stability of the database, there's no advantage to - # using signature here, so we might as well keep the existing keys for now. - spec = inspect.getfullargspec(function) - if inspect.ismethod(function): - del spec.args[0] - hasher.update(repr(spec).encode()) - except TypeError: + # This is additional to the source code because it can include the effects + # of decorators, or of post-hoc assignment to the .__signature__ attribute. + hasher.update(repr(get_signature(function)).encode()) + except Exception: pass try: + # We set this in order to distinguish e.g. @pytest.mark.parametrize cases. hasher.update(function._hypothesis_internal_add_digest) except AttributeError: pass diff --git a/hypothesis-python/tests/cover/test_reflection.py b/hypothesis-python/tests/cover/test_reflection.py index b4639494d2..dbe579a0d0 100644 --- a/hypothesis-python/tests/cover/test_reflection.py +++ b/hypothesis-python/tests/cover/test_reflection.py @@ -13,6 +13,7 @@ from datetime import time from functools import partial, wraps from inspect import Parameter, Signature, signature +from textwrap import dedent from unittest.mock import MagicMock, Mock, NonCallableMagicMock, NonCallableMock import pytest @@ -651,3 +652,49 @@ def test_param_called_within_defaults_on_error(): # Create a function object for which we cannot retrieve the source. f = compile("lambda: ...", "_.py", "eval") assert is_first_param_referenced_in_function(f) + + +def _prep_source(*pairs): + return [ + pytest.param(dedent(x).strip(), dedent(y).strip().encode(), id=f"case-{i}") + for i, (x, y) in enumerate(pairs) + ] + + +@pytest.mark.parametrize( + "src, clean", + _prep_source( + ("", ""), + ("def test(): pass", "def test(): pass"), + ("def invalid syntax", "def invalid syntax"), + ( + """ + @example(1) + @given(st.integers()) + def test(x): + # line comment + assert x # end-of-line comment + + + "Had some blank lines above" + """, + """ + def test(x): + assert x + "Had some blank lines above" + """, + ), + ( + """ + def \\ + f(): pass + """, + """ + def\\ + f(): pass + """, + ), + ), +) +def test_clean_source(src, clean): + assert reflection._clean_source(src).splitlines() == clean.splitlines()