Skip to content

Commit

Permalink
Improve database keys
Browse files Browse the repository at this point in the history
  • Loading branch information
Zac-HD committed Dec 4, 2022
1 parent c3101d6 commit 1059489
Show file tree
Hide file tree
Showing 3 changed files with 97 additions and 16 deletions.
7 changes: 7 additions & 0 deletions hypothesis-python/RELEASE.rst
@@ -0,0 +1,7 @@
RELEASE_TYPE: minor

This release improves our treatment of database keys, which based on (among other things)
the source code of your test function. We now post-process this source to ignore
decorators, comments, trailing whitespace, and blank lines - so that you can add
:obj:`@example() <hypothesis.example>`\ s or make some small no-op edits to your code
without preventing replay of any known failing or covering examples.
59 changes: 43 additions & 16 deletions hypothesis-python/src/hypothesis/internal/reflection.py
Expand Up @@ -20,10 +20,11 @@
import textwrap
import types
from functools import wraps
from io import StringIO
from keyword import iskeyword
from tokenize import detect_encoding
from tokenize import COMMENT, detect_encoding, generate_tokens, untokenize
from types import ModuleType
from typing import TYPE_CHECKING, Callable
from typing import TYPE_CHECKING, Any, Callable
from unittest.mock import _patch as PatchType

from hypothesis.internal.compat import is_typed_named_tuple, update_code_location
Expand All @@ -48,6 +49,33 @@ def is_mock(obj):
return hasattr(obj, "hypothesis_internal_is_this_a_mock_check")


def _clean_source(src: str) -> bytes:
"""Return the source code as bytes, without decorators or comments.
Because this is part of our database key, we reduce the cache invalidation
rate by ignoring decorators, comments, trailing whitespace, and empty lines.
We can't just use the (dumped) AST directly because it changes between Python
versions (e.g. ast.Constant)
"""
# Get the (one-indexed) line number of the function definition, and drop preceding
# lines - i.e. any decorators, so that adding `@example()`s keeps the same key.
try:
func_lineno = ast.parse(src).body[0].lineno - 1
src = "".join(src.splitlines(keepends=True)[func_lineno:])
except Exception:
pass
# Remove blank lines and use the tokenize module to strip out comments,
# so that those can be changed without changing the database key.
try:
src = untokenize(
t for t in generate_tokens(StringIO(src).readline) if t.type != COMMENT
)
except Exception:
pass
# Finally, remove any trailing whitespace and empty lines as a last cleanup.
return "\n".join(x.rstrip() for x in src.splitlines() if x.rstrip()).encode()


def function_digest(function):
"""Returns a string that is stable across multiple invocations across
multiple processes and is prone to changing significantly in response to
Expand All @@ -57,24 +85,23 @@ def function_digest(function):
"""
hasher = hashlib.sha384()
try:
hasher.update(inspect.getsource(function).encode())
src = inspect.getsource(function)
except (OSError, TypeError):
pass
try:
hasher.update(function.__name__.encode())
except AttributeError:
pass
# If we can't actually get the source code, try for the name as a fallback.
try:
hasher.update(function.__name__.encode())
except AttributeError:
pass
else:
hasher.update(_clean_source(src))
try:
# We prefer to use the modern signature API, but left this for compatibility.
# While we don't promise stability of the database, there's no advantage to
# using signature here, so we might as well keep the existing keys for now.
spec = inspect.getfullargspec(function)
if inspect.ismethod(function):
del spec.args[0]
hasher.update(repr(spec).encode())
except TypeError:
# This is additional to the source code because it can include the effects
# of decorators, or of post-hoc assignment to the .__signature__ attribute.
hasher.update(repr(get_signature(function)).encode())
except Exception:
pass
try:
# We set this in order to distinguish e.g. @pytest.mark.parametrize cases.
hasher.update(function._hypothesis_internal_add_digest)
except AttributeError:
pass
Expand Down
47 changes: 47 additions & 0 deletions hypothesis-python/tests/cover/test_reflection.py
Expand Up @@ -13,6 +13,7 @@
from datetime import time
from functools import partial, wraps
from inspect import Parameter, Signature, signature
from textwrap import dedent
from unittest.mock import MagicMock, Mock, NonCallableMagicMock, NonCallableMock

import pytest
Expand Down Expand Up @@ -651,3 +652,49 @@ def test_param_called_within_defaults_on_error():
# Create a function object for which we cannot retrieve the source.
f = compile("lambda: ...", "_.py", "eval")
assert is_first_param_referenced_in_function(f)


def _prep_source(*pairs):
return [
pytest.param(dedent(x).strip(), dedent(y).strip().encode(), id=f"case-{i}")
for i, (x, y) in enumerate(pairs)
]


@pytest.mark.parametrize(
"src, clean",
_prep_source(
("", ""),
("def test(): pass", "def test(): pass"),
("def invalid syntax", "def invalid syntax"),
(
"""
@example(1)
@given(st.integers())
def test(x):
# line comment
assert x # end-of-line comment
"Had some blank lines above"
""",
"""
def test(x):
assert x
"Had some blank lines above"
""",
),
(
"""
def \\
f(): pass
""",
"""
def\\
f(): pass
""",
),
),
)
def test_clean_source(src, clean):
assert reflection._clean_source(src).splitlines() == clean.splitlines()

0 comments on commit 1059489

Please sign in to comment.