Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve database keys by ignoring decorators, comments, etc. #3523

Merged
merged 5 commits into from Dec 11, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
2 changes: 1 addition & 1 deletion hypothesis-python/.coveragerc
Expand Up @@ -23,5 +23,5 @@ exclude_lines =
except ModuleNotFoundError:
if PYPY:
if TYPE_CHECKING:
if sys\.version_info
if "\w+" in sys\.modules:
assert all\(.+\)
7 changes: 7 additions & 0 deletions hypothesis-python/RELEASE.rst
@@ -0,0 +1,7 @@
RELEASE_TYPE: minor

This release improves our treatment of database keys, which based on (among other things)
the source code of your test function. We now post-process this source to ignore
decorators, comments, trailing whitespace, and blank lines - so that you can add
:obj:`@example() <hypothesis.example>`\ s or make some small no-op edits to your code
without preventing replay of any known failing or covering examples.
4 changes: 2 additions & 2 deletions hypothesis-python/src/hypothesis/extra/ghostwriter.py
Expand Up @@ -52,7 +52,7 @@
.. tip::

Using a light theme? Hypothesis respects `NO_COLOR <https://no-color.org/>`__
and :envvar:`DJANGO_COLORS=light <django:DJANGO_COLORS>`.
and ``DJANGO_COLORS=light``.

.. note::

Expand Down Expand Up @@ -117,7 +117,7 @@
)
from hypothesis.strategies._internal.types import _global_type_lookup, is_generic_type

if sys.version_info >= (3, 10): # pragma: no cover
if sys.version_info >= (3, 10):
from types import EllipsisType as EllipsisType
elif TYPE_CHECKING:
from builtins import ellipsis as EllipsisType
Expand Down
68 changes: 51 additions & 17 deletions hypothesis-python/src/hypothesis/internal/reflection.py
Expand Up @@ -20,13 +20,14 @@
import textwrap
import types
from functools import wraps
from io import StringIO
from keyword import iskeyword
from tokenize import detect_encoding
from tokenize import COMMENT, detect_encoding, generate_tokens, untokenize
from types import ModuleType
from typing import TYPE_CHECKING, Callable
from unittest.mock import _patch as PatchType

from hypothesis.internal.compat import is_typed_named_tuple, update_code_location
from hypothesis.internal.compat import PYPY, is_typed_named_tuple, update_code_location
from hypothesis.utils.conventions import not_set
from hypothesis.vendor.pretty import pretty

Expand All @@ -48,6 +49,40 @@ def is_mock(obj):
return hasattr(obj, "hypothesis_internal_is_this_a_mock_check")


def _clean_source(src: str) -> bytes:
"""Return the source code as bytes, without decorators or comments.

Because this is part of our database key, we reduce the cache invalidation
rate by ignoring decorators, comments, trailing whitespace, and empty lines.
We can't just use the (dumped) AST directly because it changes between Python
versions (e.g. ast.Constant)
"""
# Get the (one-indexed) line number of the function definition, and drop preceding
# lines - i.e. any decorators, so that adding `@example()`s keeps the same key.
try:
funcdef = ast.parse(src).body[0]
if sys.version_info[:2] == (3, 7) or (sys.version_info[:2] == (3, 8) and PYPY):
# We can't get a line number of the (async) def here, so as a best-effort
# approximation we'll use str.split instead and hope for the best.
tag = "async def " if isinstance(funcdef, ast.AsyncFunctionDef) else "def "
if tag in src:
src = tag + src.split(tag, maxsplit=1)[1]
else:
src = "".join(src.splitlines(keepends=True)[funcdef.lineno - 1 :])
except Exception:
pass
# Remove blank lines and use the tokenize module to strip out comments,
# so that those can be changed without changing the database key.
try:
src = untokenize(
t for t in generate_tokens(StringIO(src).readline) if t.type != COMMENT
)
except Exception:
pass
# Finally, remove any trailing whitespace and empty lines as a last cleanup.
return "\n".join(x.rstrip() for x in src.splitlines() if x.rstrip()).encode()


def function_digest(function):
"""Returns a string that is stable across multiple invocations across
multiple processes and is prone to changing significantly in response to
Expand All @@ -57,24 +92,23 @@ def function_digest(function):
"""
hasher = hashlib.sha384()
try:
hasher.update(inspect.getsource(function).encode())
src = inspect.getsource(function)
except (OSError, TypeError):
pass
try:
hasher.update(function.__name__.encode())
except AttributeError:
pass
# If we can't actually get the source code, try for the name as a fallback.
try:
hasher.update(function.__name__.encode())
except AttributeError:
pass
else:
hasher.update(_clean_source(src))
try:
# We prefer to use the modern signature API, but left this for compatibility.
# While we don't promise stability of the database, there's no advantage to
# using signature here, so we might as well keep the existing keys for now.
spec = inspect.getfullargspec(function)
if inspect.ismethod(function):
del spec.args[0]
hasher.update(repr(spec).encode())
except TypeError:
# This is additional to the source code because it can include the effects
# of decorators, or of post-hoc assignment to the .__signature__ attribute.
hasher.update(repr(get_signature(function)).encode())
except Exception:
pass
try:
# We set this in order to distinguish e.g. @pytest.mark.parametrize cases.
hasher.update(function._hypothesis_internal_add_digest)
except AttributeError:
pass
Expand Down Expand Up @@ -120,7 +154,7 @@ def get_signature(target, *, follow_wrapped=True):
parameters=[v for k, v in sig.parameters.items() if k != "self"]
)
return sig
if sys.version_info[:2] <= (3, 8) and inspect.isclass(target): # pragma: no cover
if sys.version_info[:2] <= (3, 8) and inspect.isclass(target):
# Workaround for subclasses of typing.Generic on Python <= 3.8
from hypothesis.strategies._internal.types import is_generic_type

Expand Down
7 changes: 4 additions & 3 deletions hypothesis-python/src/hypothesis/utils/terminal.py
Expand Up @@ -18,9 +18,10 @@ def guess_background_color():
See also https://stackoverflow.com/questions/2507337/ and
https://unix.stackexchange.com/questions/245378/
"""
django_colors = os.getenv("DJANGO_COLORS")
if django_colors in ("light", "dark"):
return django_colors
django_colors = os.getenv("DJANGO_COLORS", "")
for theme in ("light", "dark"):
if theme in django_colors.split(";"):
return theme
# Guessing based on the $COLORFGBG environment variable
try:
fg, *_, bg = os.getenv("COLORFGBG").split(";")
Expand Down
59 changes: 59 additions & 0 deletions hypothesis-python/tests/cover/test_reflection.py
Expand Up @@ -13,6 +13,7 @@
from datetime import time
from functools import partial, wraps
from inspect import Parameter, Signature, signature
from textwrap import dedent
from unittest.mock import MagicMock, Mock, NonCallableMagicMock, NonCallableMock

import pytest
Expand Down Expand Up @@ -651,3 +652,61 @@ def test_param_called_within_defaults_on_error():
# Create a function object for which we cannot retrieve the source.
f = compile("lambda: ...", "_.py", "eval")
assert is_first_param_referenced_in_function(f)


def _prep_source(*pairs):
return [
pytest.param(dedent(x).strip(), dedent(y).strip().encode(), id=f"case-{i}")
for i, (x, y) in enumerate(pairs)
]


@pytest.mark.parametrize(
"src, clean",
_prep_source(
("", ""),
("def test(): pass", "def test(): pass"),
("def invalid syntax", "def invalid syntax"),
("def also invalid(", "def also invalid("),
(
"""
@example(1)
@given(st.integers())
def test(x):
# line comment
assert x # end-of-line comment


"Had some blank lines above"
""",
"""
def test(x):
assert x
"Had some blank lines above"
""",
),
(
"""
def \\
f(): pass
""",
"""
def\\
f(): pass
""",
),
(
"""
@dec
async def f():
pass
""",
"""
async def f():
pass
""",
),
),
)
def test_clean_source(src, clean):
assert reflection._clean_source(src).splitlines() == clean.splitlines()
9 changes: 5 additions & 4 deletions hypothesis-python/tests/quality/test_float_shrinking.py
Expand Up @@ -13,7 +13,6 @@
from hypothesis import (
HealthCheck,
Verbosity,
assume,
example,
given,
settings,
Expand Down Expand Up @@ -42,7 +41,10 @@ def test_can_shrink_in_variable_sized_context(n):
@given(st.floats(min_value=0, allow_infinity=False, allow_nan=False))
@settings(deadline=None, suppress_health_check=HealthCheck.all())
def test_shrinks_downwards_to_integers(f):
g = minimal(st.floats(), lambda x: x >= f, settings(verbosity=Verbosity.quiet))
g = minimal(
st.floats().filter(lambda x: x >= f),
settings=settings(verbosity=Verbosity.quiet, max_examples=10**6),
)
assert g == ceil(f)


Expand All @@ -51,8 +53,7 @@ def test_shrinks_downwards_to_integers(f):
@settings(deadline=None, suppress_health_check=HealthCheck.all(), max_examples=10)
def test_shrinks_downwards_to_integers_when_fractional(b):
g = minimal(
st.floats(),
lambda x: assume((0 < x < (2**53)) and int(x) != x) and x >= b,
st.floats().filter(lambda x: b < x < 2**53 and int(x) != x),
settings=settings(verbosity=Verbosity.quiet, max_examples=10**6),
)
assert g == b + 0.5
6 changes: 3 additions & 3 deletions hypothesis-python/tox.ini
Expand Up @@ -18,9 +18,9 @@ setenv=
brief: HYPOTHESIS_PROFILE=speedy
commands =
full: bash scripts/basic-test.sh
brief: python -bb -X dev -m pytest tests/cover/test_testdecorators.py {posargs}
cover: python -bb -X dev -m pytest tests/cover/ tests/pytest/ {posargs}
nocover: python -bb -X dev -m pytest tests/nocover/ {posargs}
brief: python -bb -X dev -m pytest -n auto tests/cover/test_testdecorators.py {posargs}
cover: python -bb -X dev -m pytest -n auto tests/cover/ tests/pytest/ {posargs}
nocover: python -bb -X dev -m pytest -n auto tests/nocover/ {posargs}
niche: bash scripts/other-tests.sh
custom: python -bb -X dev -m pytest {posargs}

Expand Down