Skip to content

Commit

Permalink
Improve database keys
Browse files Browse the repository at this point in the history
  • Loading branch information
Zac-HD committed Dec 4, 2022
1 parent 7e34f94 commit b9de5cf
Show file tree
Hide file tree
Showing 2 changed files with 50 additions and 17 deletions.
7 changes: 7 additions & 0 deletions hypothesis-python/RELEASE.rst
@@ -0,0 +1,7 @@
RELEASE_TYPE: minor

This release improves our treatment of database keys, which based on (among other things)
the source code of your test function. We now post-process this source to ignore
decorators, comments, trailing whitespace, and blank lines - so that you can add
:obj:`@example() <hypothesis.example>`\ s or make some small no-op edits to your code
without preventing replay of any known failing or covering examples.
60 changes: 43 additions & 17 deletions hypothesis-python/src/hypothesis/internal/reflection.py
Expand Up @@ -20,8 +20,9 @@
import textwrap
import types
from functools import wraps
from io import StringIO
from keyword import iskeyword
from tokenize import detect_encoding
from tokenize import COMMENT, detect_encoding, generate_tokens, untokenize
from types import ModuleType
from typing import TYPE_CHECKING, Callable
from unittest.mock import _patch as PatchType
Expand All @@ -48,6 +49,41 @@ def is_mock(obj):
return hasattr(obj, "hypothesis_internal_is_this_a_mock_check")


def _get_clean_source(function) -> bytes:
"""Return the function's source code as bytes, without decorators or comments.
Because this is part of our database key, we reduce the cache invalidation
rate by ignoring decorators, comments, trailing whitespace, and empty lines.
We can't just use the (dumped) AST directly because it changes between Python
versions (e.g. ast.Constant)
"""
try:
src = inspect.getsource(function)
except (OSError, TypeError):
# If we can't actually get the source code, try for the name as a fallback.
try:
return function.__name__.encode()
except AttributeError:
return b""
# Get the (one-indexed) line number of the function definition, and drop preceding
# lines - i.e. any decorators, so that adding `@example()`s keeps the same key.
try:
func_lineno = ast.parse(src).body[0].lineno - 1
src = "".join(src.splitlines(keepends=True)[func_lineno:])
except Exception:
pass
# Remove blank lines and use the tokenize module to strip out comments,
# so that those can be changed without changing the database key.
try:
src = untokenize(
t for t in generate_tokens(StringIO(src).readline) if t.type != COMMENT
)
except Exception:
pass
# Finally, remove any trailing whitespace and empty lines as a last cleanup.
return "\n".join(x.rstrip() for x in src.splitlines() if x.rstrip()).encode()


def function_digest(function):
"""Returns a string that is stable across multiple invocations across
multiple processes and is prone to changing significantly in response to
Expand All @@ -56,25 +92,15 @@ def function_digest(function):
No guarantee of uniqueness though it usually will be.
"""
hasher = hashlib.sha384()
hasher.update(_get_clean_source(function))
try:
hasher.update(inspect.getsource(function).encode())
except (OSError, TypeError):
pass
try:
hasher.update(function.__name__.encode())
except AttributeError:
pass
try:
# We prefer to use the modern signature API, but left this for compatibility.
# While we don't promise stability of the database, there's no advantage to
# using signature here, so we might as well keep the existing keys for now.
spec = inspect.getfullargspec(function)
if inspect.ismethod(function):
del spec.args[0]
hasher.update(repr(spec).encode())
except TypeError:
# This is additional to the source code because it can include the effects
# of decorators, or of post-hoc assignment to the .__signature__ attribute.
hasher.update(repr(get_signature(function)).encode())
except Exception:
pass
try:
# We set this in order to distinguish e.g. @pytest.mark.parametrize cases.
hasher.update(function._hypothesis_internal_add_digest)
except AttributeError:
pass
Expand Down

0 comments on commit b9de5cf

Please sign in to comment.