From b9de5cf0fc11b4478f7d3969069a393e70031589 Mon Sep 17 00:00:00 2001 From: Zac Hatfield-Dodds Date: Sun, 4 Dec 2022 14:14:25 -0800 Subject: [PATCH] Improve database keys --- hypothesis-python/RELEASE.rst | 7 +++ .../src/hypothesis/internal/reflection.py | 60 +++++++++++++------ 2 files changed, 50 insertions(+), 17 deletions(-) create mode 100644 hypothesis-python/RELEASE.rst diff --git a/hypothesis-python/RELEASE.rst b/hypothesis-python/RELEASE.rst new file mode 100644 index 0000000000..3ed885d9ee --- /dev/null +++ b/hypothesis-python/RELEASE.rst @@ -0,0 +1,7 @@ +RELEASE_TYPE: minor + +This release improves our treatment of database keys, which based on (among other things) +the source code of your test function. We now post-process this source to ignore +decorators, comments, trailing whitespace, and blank lines - so that you can add +:obj:`@example() `\ s or make some small no-op edits to your code +without preventing replay of any known failing or covering examples. diff --git a/hypothesis-python/src/hypothesis/internal/reflection.py b/hypothesis-python/src/hypothesis/internal/reflection.py index 1c1419c609..243dce6767 100644 --- a/hypothesis-python/src/hypothesis/internal/reflection.py +++ b/hypothesis-python/src/hypothesis/internal/reflection.py @@ -20,8 +20,9 @@ import textwrap import types from functools import wraps +from io import StringIO from keyword import iskeyword -from tokenize import detect_encoding +from tokenize import COMMENT, detect_encoding, generate_tokens, untokenize from types import ModuleType from typing import TYPE_CHECKING, Callable from unittest.mock import _patch as PatchType @@ -48,6 +49,41 @@ def is_mock(obj): return hasattr(obj, "hypothesis_internal_is_this_a_mock_check") +def _get_clean_source(function) -> bytes: + """Return the function's source code as bytes, without decorators or comments. + + Because this is part of our database key, we reduce the cache invalidation + rate by ignoring decorators, comments, trailing whitespace, and empty lines. + We can't just use the (dumped) AST directly because it changes between Python + versions (e.g. ast.Constant) + """ + try: + src = inspect.getsource(function) + except (OSError, TypeError): + # If we can't actually get the source code, try for the name as a fallback. + try: + return function.__name__.encode() + except AttributeError: + return b"" + # Get the (one-indexed) line number of the function definition, and drop preceding + # lines - i.e. any decorators, so that adding `@example()`s keeps the same key. + try: + func_lineno = ast.parse(src).body[0].lineno - 1 + src = "".join(src.splitlines(keepends=True)[func_lineno:]) + except Exception: + pass + # Remove blank lines and use the tokenize module to strip out comments, + # so that those can be changed without changing the database key. + try: + src = untokenize( + t for t in generate_tokens(StringIO(src).readline) if t.type != COMMENT + ) + except Exception: + pass + # Finally, remove any trailing whitespace and empty lines as a last cleanup. + return "\n".join(x.rstrip() for x in src.splitlines() if x.rstrip()).encode() + + def function_digest(function): """Returns a string that is stable across multiple invocations across multiple processes and is prone to changing significantly in response to @@ -56,25 +92,15 @@ def function_digest(function): No guarantee of uniqueness though it usually will be. """ hasher = hashlib.sha384() + hasher.update(_get_clean_source(function)) try: - hasher.update(inspect.getsource(function).encode()) - except (OSError, TypeError): - pass - try: - hasher.update(function.__name__.encode()) - except AttributeError: - pass - try: - # We prefer to use the modern signature API, but left this for compatibility. - # While we don't promise stability of the database, there's no advantage to - # using signature here, so we might as well keep the existing keys for now. - spec = inspect.getfullargspec(function) - if inspect.ismethod(function): - del spec.args[0] - hasher.update(repr(spec).encode()) - except TypeError: + # This is additional to the source code because it can include the effects + # of decorators, or of post-hoc assignment to the .__signature__ attribute. + hasher.update(repr(get_signature(function)).encode()) + except Exception: pass try: + # We set this in order to distinguish e.g. @pytest.mark.parametrize cases. hasher.update(function._hypothesis_internal_add_digest) except AttributeError: pass