-
Notifications
You must be signed in to change notification settings - Fork 575
/
ghostwriter.py
1573 lines (1361 loc) · 59.6 KB
/
ghostwriter.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
# This file is part of Hypothesis, which may be found at
# https://github.com/HypothesisWorks/hypothesis/
#
# Copyright the Hypothesis Authors.
# Individual contributors are listed in AUTHORS.rst and the git log.
#
# This Source Code Form is subject to the terms of the Mozilla Public License,
# v. 2.0. If a copy of the MPL was not distributed with this file, You can
# obtain one at https://mozilla.org/MPL/2.0/.
"""
Writing tests with Hypothesis frees you from the tedium of deciding on and
writing out specific inputs to test. Now, the ``hypothesis.extra.ghostwriter``
module can write your test functions for you too!
The idea is to provide **an easy way to start** property-based testing,
**and a seamless transition** to more complex test code - because ghostwritten
tests are source code that you could have written for yourself.
So just pick a function you'd like tested, and feed it to one of the functions
below. They follow imports, use but do not require type annotations, and
generally do their best to write you a useful test. You can also use
:ref:`our command-line interface <hypothesis-cli>`::
$ hypothesis write --help
Usage: hypothesis write [OPTIONS] FUNC...
`hypothesis write` writes property-based tests for you!
Type annotations are helpful but not required for our advanced
introspection and templating logic. Try running the examples below to see
how it works:
hypothesis write gzip
hypothesis write numpy.matmul
hypothesis write re.compile --except re.error
hypothesis write --equivalent ast.literal_eval eval
hypothesis write --roundtrip json.dumps json.loads
hypothesis write --style=unittest --idempotent sorted
hypothesis write --binary-op operator.add
Options:
--roundtrip start by testing write/read or encode/decode!
--equivalent very useful when optimising or refactoring code
--errors-equivalent --equivalent, but also allows consistent errors
--idempotent check that f(x) == f(f(x))
--binary-op associativity, commutativity, identity element
--style [pytest|unittest] pytest-style function, or unittest-style method?
-e, --except OBJ_NAME dotted name of exception(s) to ignore
-h, --help Show this message and exit.
.. tip::
Using a light theme? Hypothesis respects `NO_COLOR <https://no-color.org/>`__
and ``DJANGO_COLORS=light``.
.. note::
The ghostwriter requires :pypi:`black`, but the generated code only
requires Hypothesis itself.
.. note::
Legal questions? While the ghostwriter fragments and logic is under the
MPL-2.0 license like the rest of Hypothesis, the *output* from the ghostwriter
is made available under the `Creative Commons Zero (CC0)
<https://creativecommons.org/share-your-work/public-domain/cc0/>`__
public domain dedication, so you can use it without any restrictions.
"""
import ast
import builtins
import contextlib
import enum
import inspect
import os
import re
import sys
import types
from collections import OrderedDict, defaultdict
from itertools import permutations, zip_longest
from keyword import iskeyword
from string import ascii_lowercase
from textwrap import dedent, indent
from typing import (
TYPE_CHECKING,
Any,
Callable,
Dict,
List,
Mapping,
Optional,
Set,
Tuple,
Type,
TypeVar,
Union,
)
import black
from hypothesis import Verbosity, find, settings, strategies as st
from hypothesis.errors import InvalidArgument
from hypothesis.internal.compat import get_type_hints
from hypothesis.internal.reflection import get_signature, is_mock
from hypothesis.internal.validation import check_type
from hypothesis.provisional import domains
from hypothesis.strategies._internal.collections import ListStrategy
from hypothesis.strategies._internal.core import BuildsStrategy
from hypothesis.strategies._internal.flatmapped import FlatMapStrategy
from hypothesis.strategies._internal.lazy import LazyStrategy, unwrap_strategies
from hypothesis.strategies._internal.strategies import (
FilteredStrategy,
MappedSearchStrategy,
OneOfStrategy,
SampledFromStrategy,
)
from hypothesis.strategies._internal.types import _global_type_lookup, is_generic_type
if sys.version_info >= (3, 10): # pragma: no cover
from types import EllipsisType as EllipsisType
elif TYPE_CHECKING:
from builtins import ellipsis as EllipsisType
else:
EllipsisType = type(Ellipsis)
IMPORT_SECTION = """
# This test code was written by the `hypothesis.extra.ghostwriter` module
# and is provided under the Creative Commons Zero public domain dedication.
{imports}
"""
TEMPLATE = """
@given({given_args})
def test_{test_kind}_{func_name}({arg_names}):
{test_body}
"""
SUPPRESS_BLOCK = """
try:
{test_body}
except {exceptions}:
reject()
""".strip()
Except = Union[Type[Exception], Tuple[Type[Exception], ...]]
ImportSet = Set[Union[str, Tuple[str, str]]]
RE_TYPES = (type(re.compile(".")), type(re.match(".", "abc")))
_quietly_settings = settings(
database=None,
deadline=None,
derandomize=True,
verbosity=Verbosity.quiet,
)
def _dedupe_exceptions(exc: Tuple[Type[Exception], ...]) -> Tuple[Type[Exception], ...]:
# This is reminiscent of de-duplication logic I wrote for flake8-bugbear,
# but with access to the actual objects we can just check for subclasses.
# This lets us print e.g. `Exception` instead of `(Exception, OSError)`.
uniques = list(exc)
for a, b in permutations(exc, 2):
if a in uniques and issubclass(a, b):
uniques.remove(a)
return tuple(sorted(uniques, key=lambda e: e.__name__))
def _check_except(except_: Except) -> Tuple[Type[Exception], ...]:
if isinstance(except_, tuple):
for i, e in enumerate(except_):
if not isinstance(e, type) or not issubclass(e, Exception):
raise InvalidArgument(
f"Expected an Exception but got except_[{i}]={e!r}"
f" (type={_get_qualname(type(e))})"
)
return except_
if not isinstance(except_, type) or not issubclass(except_, Exception):
raise InvalidArgument(
"Expected an Exception or tuple of exceptions, but got except_="
f"{except_!r} (type={_get_qualname(type(except_))})"
)
return (except_,)
def _exception_string(except_: Tuple[Type[Exception], ...]) -> Tuple[ImportSet, str]:
if not except_:
return set(), ""
exceptions = []
imports: ImportSet = set()
for ex in _dedupe_exceptions(except_):
if ex.__qualname__ in dir(builtins):
exceptions.append(ex.__qualname__)
else:
imports.add(ex.__module__)
exceptions.append(_get_qualname(ex, include_module=True))
return imports, (
"(" + ", ".join(exceptions) + ")" if len(exceptions) > 1 else exceptions[0]
)
def _check_style(style: str) -> None:
if style not in ("pytest", "unittest"):
raise InvalidArgument(f"Valid styles are 'pytest' or 'unittest', got {style!r}")
def _exceptions_from_docstring(doc: str) -> Tuple[Type[Exception], ...]:
"""Return a tuple of exceptions that the docstring says may be raised.
Note that we ignore non-builtin exception types for simplicity, as this is
used directly in _write_call() and passing import sets around would be really
really annoying.
"""
# TODO: it would be great to handle Google- and Numpy-style docstrings
# (e.g. by using the Napoleon Sphinx extension)
assert isinstance(doc, str), doc
raises = []
for excname in re.compile(r"\:raises\s+(\w+)\:", re.MULTILINE).findall(doc):
exc_type = getattr(builtins, excname, None)
if isinstance(exc_type, type) and issubclass(exc_type, Exception):
raises.append(exc_type)
return tuple(_dedupe_exceptions(tuple(raises)))
def _type_from_doc_fragment(token: str) -> Optional[type]:
# Special cases for "integer" and for numpy array-like and dtype
if token == "integer":
return int
if "numpy" in sys.modules:
if re.fullmatch(r"[Aa]rray[-_ ]?like", token):
return sys.modules["numpy"].ndarray
elif token == "dtype":
return sys.modules["numpy"].dtype
# Natural-language syntax, e.g. "sequence of integers"
coll_match = re.fullmatch(r"(\w+) of (\w+)", token)
if coll_match is not None:
coll_token, elem_token = coll_match.groups()
elems = _type_from_doc_fragment(elem_token)
if elems is None and elem_token.endswith("s"):
elems = _type_from_doc_fragment(elem_token[:-1])
if elems is not None and coll_token in ("list", "sequence", "collection"):
return List[elems] # type: ignore
# This might be e.g. "array-like of float"; arrays is better than nothing
# even if we can't conveniently pass a generic type around.
return _type_from_doc_fragment(coll_token)
# Check either builtins, or the module for a dotted name
if "." not in token:
return getattr(builtins, token, None)
mod, name = token.rsplit(".", maxsplit=1)
return getattr(sys.modules.get(mod, None), name, None)
def _strategy_for(param: inspect.Parameter, docstring: str) -> st.SearchStrategy:
# Example types in docstrings:
# - `:type a: sequence of integers`
# - `b (list, tuple, or None): ...`
# - `c : {"foo", "bar", or None}`
for pattern in (
rf"^\s*\:type\s+{param.name}\:\s+(.+)", # RST-style
rf"^\s*{param.name} \((.+)\):", # Google-style
rf"^\s*{param.name} \: (.+)", # Numpy-style
):
match = re.search(pattern, docstring, flags=re.MULTILINE)
if match is None:
continue
doc_type = match.group(1)
if doc_type.endswith(", optional"):
# Convention to describe "argument may be omitted"
doc_type = doc_type[: -len(", optional")]
doc_type = doc_type.strip("}{")
elements = []
types = []
for token in re.split(r",? +or +| *, *", doc_type):
for prefix in ("default ", "python "):
# `str or None, default "auto"`; `python int or numpy.int64`
if token.startswith(prefix):
token = token[len(prefix) :]
if not token:
continue
try:
# Elements of `{"inner", "outer"}` etc.
elements.append(ast.literal_eval(token))
continue
except (ValueError, SyntaxError):
t = _type_from_doc_fragment(token)
if isinstance(t, type) or is_generic_type(t):
assert t is not None
types.append(t)
if (
param.default is not inspect.Parameter.empty
and param.default not in elements
and not isinstance(
param.default, tuple(t for t in types if isinstance(t, type))
)
):
with contextlib.suppress(SyntaxError):
compile(repr(st.just(param.default)), "<string>", "eval")
elements.insert(0, param.default)
if elements or types:
return (st.sampled_from(elements) if elements else st.nothing()) | (
st.one_of(*map(st.from_type, types)) if types else st.nothing()
)
# If our default value is an Enum or a boolean, we assume that any value
# of that type is acceptable. Otherwise, we only generate the default.
if isinstance(param.default, bool):
return st.booleans()
if isinstance(param.default, enum.Enum):
return st.sampled_from(type(param.default))
if param.default is not inspect.Parameter.empty:
# Using `st.from_type(type(param.default))` would introduce spurious
# failures in cases like the `flags` argument to regex functions.
# Better in to keep it simple, and let the user elaborate if desired.
return st.just(param.default)
return _guess_strategy_by_argname(name=param.name.lower())
# fmt: off
BOOL_NAMES = (
"keepdims", "verbose", "debug", "force", "train", "training", "trainable", "bias",
"shuffle", "show", "load", "pretrained", "save", "overwrite", "normalize",
"reverse", "success", "enabled", "strict", "copy", "quiet", "required", "inplace",
"recursive", "enable", "active", "create", "validate", "refresh", "use_bias",
)
POSITIVE_INTEGER_NAMES = (
"width", "size", "length", "limit", "idx", "stride", "epoch", "epochs", "depth",
"pid", "steps", "iteration", "iterations", "vocab_size", "ttl", "count",
)
FLOAT_NAMES = (
"real", "imag", "alpha", "theta", "beta", "sigma", "gamma", "angle", "reward",
"tau", "temperature",
)
STRING_NAMES = (
"text", "txt", "password", "label", "prefix", "suffix", "desc", "description",
"str", "pattern", "subject", "reason", "comment", "prompt", "sentence", "sep",
)
# fmt: on
def _guess_strategy_by_argname(name: str) -> st.SearchStrategy:
"""
If all else fails, we try guessing a strategy based on common argument names.
We wouldn't do this in builds() where strict correctness is required, but for
the ghostwriter we accept "good guesses" since the user would otherwise have
to change the strategy anyway - from `nothing()` - if we refused to guess.
A "good guess" is _usually correct_, and _a reasonable mistake_ if not.
The logic below is therefore based on a manual reading of the builtins and
some standard-library docs, plus the analysis of about three hundred million
arguments in https://github.com/HypothesisWorks/hypothesis/issues/3311
"""
# Special-cased names
if name in ("function", "func", "f"):
return st.functions()
if name in ("pred", "predicate"):
return st.functions(returns=st.booleans(), pure=True)
if name in ("iterable",):
return st.iterables(st.integers()) | st.iterables(st.text())
if name in ("list", "lst", "ls"):
return st.lists(st.nothing())
if name in ("object",):
return st.builds(object)
if "uuid" in name:
return st.uuids().map(str)
# Names which imply the value is a boolean
if name.startswith("is_") or name in BOOL_NAMES:
return st.booleans()
# Names which imply that the value is a number, perhaps in a particular range
if name in ("amount", "threshold", "number", "num"):
return st.integers() | st.floats()
if name in ("port",):
return st.integers(0, 2**16 - 1)
if (
name.endswith("_size")
or (name.endswith("size") and "_" not in name)
or re.fullmatch(r"n(um)?_[a-z_]*s", name)
or name in POSITIVE_INTEGER_NAMES
):
return st.integers(min_value=0)
if name in ("offset", "seed", "dim", "total", "priority"):
return st.integers()
if name in ("learning_rate", "dropout", "dropout_rate", "epsilon", "eps", "prob"):
return st.floats(0, 1)
if name in ("lat", "latitude"):
return st.floats(-90, 90)
if name in ("lon", "longitude"):
return st.floats(-180, 180)
if name in ("radius", "tol", "tolerance", "rate"):
return st.floats(min_value=0)
if name in FLOAT_NAMES:
return st.floats()
# Names which imply that the value is a string
if name in ("host", "hostname"):
return domains()
if name in ("email",):
return st.emails()
if name in ("word", "slug", "api_key"):
return st.from_regex(r"\w+", fullmatch=True)
if name in ("char", "character"):
return st.characters()
if (
"file" in name
or "path" in name
or name.endswith("_dir")
or name in ("fname", "dir", "dirname", "directory", "folder")
):
# Common names for filesystem paths: these are usually strings, but we
# don't want to make strings more convenient than pathlib.Path.
return st.nothing()
if (
name.endswith("_name")
or (name.endswith("name") and "_" not in name)
or ("string" in name and "as" not in name)
or name.endswith("label")
or name in STRING_NAMES
):
return st.text()
# Last clever idea: maybe we're looking a plural, and know the singular:
if re.fullmatch(r"\w*[^s]s", name):
elems = _guess_strategy_by_argname(name[:-1])
if not elems.is_empty:
return st.lists(elems)
# And if all that failed, we'll return nothing() - the user will have to
# fill this in by hand, and we'll leave a comment to that effect later.
return st.nothing()
def _get_params(func: Callable) -> Dict[str, inspect.Parameter]:
"""Get non-vararg parameters of `func` as an ordered dict."""
var_param_kinds = (inspect.Parameter.VAR_POSITIONAL, inspect.Parameter.VAR_KEYWORD)
try:
params = list(get_signature(func).parameters.values())
except Exception:
if (
isinstance(func, (types.BuiltinFunctionType, types.BuiltinMethodType))
and hasattr(func, "__doc__")
and isinstance(func.__doc__, str)
):
# inspect.signature doesn't work on all builtin functions or methods.
# In such cases, we can try to reconstruct simple signatures from the docstring.
match = re.match(rf"^{func.__name__}\((.+?)\)", func.__doc__)
if match is None:
raise
args = match.group(1).replace("[", "").replace("]", "")
params = []
# Even if the signature doesn't contain a /, we assume that arguments
# are positional-only until shown otherwise - the / is often omitted.
kind: inspect._ParameterKind = inspect.Parameter.POSITIONAL_ONLY
for arg in args.split(", "):
arg, *_ = arg.partition("=")
arg = arg.strip()
if arg == "/":
kind = inspect.Parameter.POSITIONAL_OR_KEYWORD
continue
if arg.startswith("*") or arg == "...":
kind = inspect.Parameter.KEYWORD_ONLY
continue # we omit *varargs, if there are any
if iskeyword(arg.lstrip("*")) or not arg.lstrip("*").isidentifier():
print(repr(args))
break # skip all subsequent params if this name is invalid
params.append(inspect.Parameter(name=arg, kind=kind))
elif _is_probably_ufunc(func):
# `inspect.signature` doesn't work on ufunc objects, but we can work out
# what the required parameters would look like if it did.
# Note that we use args named a, b, c... to match the `operator` module,
# rather than x1, x2, x3... like the Numpy docs. Because they're pos-only
# this doesn't make a runtime difference, and it's much nicer for use-cases
# like `equivalent(numpy.add, operator.add)`.
params = [
inspect.Parameter(name=name, kind=inspect.Parameter.POSITIONAL_ONLY)
for name in ascii_lowercase[: func.nin] # type: ignore
]
else:
# If we haven't managed to recover a signature through the tricks above,
# we're out of ideas and should just re-raise the exception.
raise
return OrderedDict((p.name, p) for p in params if p.kind not in var_param_kinds)
@contextlib.contextmanager
def _with_any_registered():
# If the user has registered their own strategy for Any, leave it alone
if Any in _global_type_lookup:
yield
# We usually want to force from_type(Any) to raise an error because we don't
# have enough information to accurately resolve user intent, but in this case
# we can treat it as a synonym for object - this is probably wrong, but you'll
# get at least _some_ output to edit later. We then reset everything in order
# to avoid polluting the resolution logic in case you run tests later.
else:
try:
_global_type_lookup[Any] = st.builds(object)
yield
finally:
del _global_type_lookup[Any]
st.from_type.__clear_cache()
def _get_strategies(
*funcs: Callable, pass_result_to_next_func: bool = False
) -> Dict[str, st.SearchStrategy]:
"""Return a dict of strategies for the union of arguments to `funcs`.
If `pass_result_to_next_func` is True, assume that the result of each function
is passed to the next, and therefore skip the first argument of all but the
first function.
This dict is used to construct our call to the `@given(...)` decorator.
"""
assert funcs, "Must pass at least one function"
given_strategies: Dict[str, st.SearchStrategy] = {}
for i, f in enumerate(funcs):
params = _get_params(f)
if pass_result_to_next_func and i >= 1:
del params[next(iter(params))]
hints = get_type_hints(f)
docstring = getattr(f, "__doc__", None) or ""
builder_args = {
k: ... if k in hints else _strategy_for(v, docstring)
for k, v in params.items()
}
with _with_any_registered():
strat = st.builds(f, **builder_args).wrapped_strategy # type: ignore
if strat.args:
raise NotImplementedError("Expected to pass everything as kwargs")
for k, v in strat.kwargs.items():
if _valid_syntax_repr(v)[1] == "nothing()" and k in hints:
# e.g. from_type(Hashable) is OK but the unwrapped repr is not
v = LazyStrategy(st.from_type, (hints[k],), {})
if k in given_strategies:
given_strategies[k] |= v
else:
given_strategies[k] = v
# If there is only one function, we pass arguments to @given in the order of
# that function's signature. Otherwise, we use alphabetical order.
if len(funcs) == 1:
return {name: given_strategies[name] for name in _get_params(f)}
return dict(sorted(given_strategies.items()))
def _assert_eq(style: str, a: str, b: str) -> str:
if style == "unittest":
return f"self.assertEqual({a}, {b})"
assert style == "pytest"
if a.isidentifier() and b.isidentifier():
return f"assert {a} == {b}, ({a}, {b})"
return f"assert {a} == {b}"
def _imports_for_object(obj):
"""Return the imports for `obj`, which may be empty for e.g. lambdas"""
if isinstance(obj, RE_TYPES):
return {"re"}
try:
if is_generic_type(obj):
if isinstance(obj, TypeVar):
return {(obj.__module__, obj.__name__)}
with contextlib.suppress(Exception):
return set().union(*map(_imports_for_object, obj.__args__))
if (not callable(obj)) or obj.__name__ == "<lambda>":
return set()
name = _get_qualname(obj).split(".")[0]
return {(_get_module(obj), name)}
except Exception:
return set()
def _imports_for_strategy(strategy):
# If we have a lazy from_type strategy, because unwrapping it gives us an
# error or invalid syntax, import that type and we're done.
if isinstance(strategy, LazyStrategy):
if strategy.function is st.from_type:
return _imports_for_object(strategy._LazyStrategy__args[0])
elif _get_module(strategy.function).startswith("hypothesis.extra."):
return {(_get_module(strategy.function), strategy.function.__name__)}
module = _get_module(strategy.function).replace("._array_helpers", ".numpy")
return {(module, strategy.function.__name__)}
imports = set()
strategy = unwrap_strategies(strategy)
# Get imports for s.map(f), s.filter(f), s.flatmap(f), including both s and f
if isinstance(strategy, MappedSearchStrategy):
imports |= _imports_for_strategy(strategy.mapped_strategy)
imports |= _imports_for_object(strategy.pack)
if isinstance(strategy, FilteredStrategy):
imports |= _imports_for_strategy(strategy.filtered_strategy)
for f in strategy.flat_conditions:
imports |= _imports_for_object(f)
if isinstance(strategy, FlatMapStrategy):
imports |= _imports_for_strategy(strategy.flatmapped_strategy)
imports |= _imports_for_object(strategy.expand)
# recurse through one_of to handle e.g. from_type(Optional[Foo])
if isinstance(strategy, OneOfStrategy):
for s in strategy.element_strategies:
imports |= _imports_for_strategy(s)
# get imports for the target of builds(), and recurse into the argument strategies
if isinstance(strategy, BuildsStrategy):
imports |= _imports_for_object(strategy.target)
for s in strategy.args:
imports |= _imports_for_strategy(s)
for s in strategy.kwargs.values():
imports |= _imports_for_strategy(s)
if isinstance(strategy, SampledFromStrategy):
for obj in strategy.elements:
imports |= _imports_for_object(obj)
if isinstance(strategy, ListStrategy):
imports |= _imports_for_strategy(strategy.element_strategy)
return imports
def _valid_syntax_repr(strategy):
# For binary_op, we pass a variable name - so pass it right back again.
if isinstance(strategy, str):
return set(), strategy
# Flatten and de-duplicate any one_of strategies, whether that's from resolving
# a Union type or combining inputs to multiple functions.
try:
if isinstance(strategy, OneOfStrategy):
seen = set()
elems = []
for s in strategy.element_strategies:
if isinstance(s, SampledFromStrategy) and s.elements == (os.environ,):
continue
if repr(s) not in seen:
elems.append(s)
seen.add(repr(s))
strategy = st.one_of(elems or st.nothing())
# Trivial special case because the wrapped repr for text() is terrible.
if strategy == st.text().wrapped_strategy:
return set(), "text()"
# Return a syntactically-valid strategy repr, including fixing some
# strategy reprs and replacing invalid syntax reprs with `"nothing()"`.
# String-replace to hide the special case in from_type() for Decimal('snan')
r = repr(strategy).replace(".filter(_can_hash)", "")
# Replace <unknown> with ... in confusing lambdas
r = re.sub(r"(lambda.*?: )(<unknown>)([,)])", r"\1...\3", r)
compile(r, "<string>", "eval")
# Finally, try to work out the imports we need for builds(), .map(),
# .filter(), and .flatmap() to work without NameError
imports = {i for i in _imports_for_strategy(strategy) if i[1] in r}
return imports, r
except (SyntaxError, RecursionError, InvalidArgument):
return set(), "nothing()"
# When we ghostwrite for a module, we want to treat that as the __module__ for
# each function, rather than whichever internal file it was actually defined in.
KNOWN_FUNCTION_LOCATIONS: Dict[object, str] = {}
def _get_module_helper(obj):
# Get the __module__ attribute of the object, and return the first ancestor module
# which contains the object; falling back to the literal __module__ if none do.
# The goal is to show location from which obj should usually be accessed, rather
# than what we assume is an internal submodule which defined it.
module_name = obj.__module__
dots = [i for i, c in enumerate(module_name) if c == "."] + [None]
for idx in dots:
if getattr(sys.modules.get(module_name[:idx]), obj.__name__, None) is obj:
KNOWN_FUNCTION_LOCATIONS[obj] = module_name[:idx]
return module_name[:idx]
return module_name
def _get_module(obj):
if obj in KNOWN_FUNCTION_LOCATIONS:
return KNOWN_FUNCTION_LOCATIONS[obj]
try:
return _get_module_helper(obj)
except AttributeError:
if not _is_probably_ufunc(obj):
raise
for module_name in sorted(sys.modules, key=lambda n: tuple(n.split("."))):
if obj is getattr(sys.modules[module_name], obj.__name__, None):
KNOWN_FUNCTION_LOCATIONS[obj] = module_name
return module_name
raise RuntimeError(f"Could not find module for ufunc {obj.__name__} ({obj!r}")
def _get_qualname(obj, include_module=False):
# Replacing angle-brackets for objects defined in `.<locals>.`
qname = getattr(obj, "__qualname__", obj.__name__)
qname = qname.replace("<", "_").replace(">", "_").replace(" ", "")
if include_module:
return _get_module(obj) + "." + qname
return qname
def _write_call(
func: Callable, *pass_variables: str, except_: Except, assign: str = ""
) -> str:
"""Write a call to `func` with explicit and implicit arguments.
>>> _write_call(sorted, "my_seq", "func")
"builtins.sorted(my_seq, key=func, reverse=reverse)"
>>> write_call(f, assign="var1")
"var1 = f()"
The fancy part is that we'll check the docstring for any known exceptions
which `func` might raise, and catch-and-reject on them... *unless* they're
subtypes of `except_`, which will be handled in an outer try-except block.
"""
args = ", ".join(
(v or p.name)
if p.kind is inspect.Parameter.POSITIONAL_ONLY
else f"{p.name}={v or p.name}"
for v, p in zip_longest(pass_variables, _get_params(func).values())
)
call = f"{_get_qualname(func, include_module=True)}({args})"
if assign:
call = f"{assign} = {call}"
raises = _exceptions_from_docstring(getattr(func, "__doc__", "") or "")
exnames = [ex.__name__ for ex in raises if not issubclass(ex, except_)]
if not exnames:
return call
return SUPPRESS_BLOCK.format(
test_body=indent(call, prefix=" "),
exceptions="(" + ", ".join(exnames) + ")" if len(exnames) > 1 else exnames[0],
)
def _st_strategy_names(s: str) -> str:
"""Replace strategy name() with st.name().
Uses a tricky re.sub() to avoid problems with frozensets() matching
sets() too.
"""
names = "|".join(sorted(st.__all__, key=len, reverse=True))
return re.sub(pattern=rf"\b(?:{names})\(", repl=r"st.\g<0>", string=s)
def _make_test_body(
*funcs: Callable,
ghost: str,
test_body: str,
except_: Tuple[Type[Exception], ...],
assertions: str = "",
style: str,
given_strategies: Optional[Mapping[str, Union[str, st.SearchStrategy]]] = None,
imports: Optional[ImportSet] = None,
) -> Tuple[ImportSet, str]:
# A set of modules to import - we might add to this later. The import code
# is written later, so we can have one import section for multiple magic()
# test functions.
imports = (imports or set()) | {_get_module(f) for f in funcs}
# Get strategies for all the arguments to each function we're testing.
with _with_any_registered():
given_strategies = given_strategies or _get_strategies(
*funcs, pass_result_to_next_func=ghost in ("idempotent", "roundtrip")
)
reprs = [((k,) + _valid_syntax_repr(v)) for k, v in given_strategies.items()]
imports = imports.union(*(imp for _, imp, _ in reprs))
given_args = ", ".join(f"{k}={v}" for k, _, v in reprs)
given_args = _st_strategy_names(given_args)
if except_:
# Convert to strings, either builtin names or qualified names.
imp, exc_string = _exception_string(except_)
imports.update(imp)
# And finally indent the existing test body into a try-except block
# which catches these exceptions and calls `hypothesis.reject()`.
test_body = SUPPRESS_BLOCK.format(
test_body=indent(test_body, prefix=" "),
exceptions=exc_string,
)
if assertions:
test_body = f"{test_body}\n{assertions}"
# Indent our test code to form the body of a function or method.
argnames = (["self"] if style == "unittest" else []) + list(given_strategies)
body = TEMPLATE.format(
given_args=given_args,
test_kind=ghost,
func_name="_".join(_get_qualname(f).replace(".", "_") for f in funcs),
arg_names=", ".join(argnames),
test_body=indent(test_body, prefix=" "),
)
# For unittest-style, indent method further into a class body
if style == "unittest":
imports.add("unittest")
body = "class Test{}{}(unittest.TestCase):\n{}".format(
ghost.title(),
"".join(_get_qualname(f).replace(".", "").title() for f in funcs),
indent(body, " "),
)
return imports, body
def _make_test(imports: ImportSet, body: str) -> str:
# Discarding "builtins." and "__main__" probably isn't particularly useful
# for user code, but important for making a good impression in demos.
body = body.replace("builtins.", "").replace("__main__.", "")
if "st.from_type(typing." in body:
imports.add("typing")
imports |= {("hypothesis", "given"), ("hypothesis", "strategies as st")}
if " reject()\n" in body:
imports.add(("hypothesis", "reject"))
do_not_import = {"builtins", "__main__"}
direct = {f"import {i}" for i in imports - do_not_import if isinstance(i, str)}
from_imports = defaultdict(set)
for module, name in {i for i in imports if isinstance(i, tuple)}:
if not (module.startswith("hypothesis.strategies") and name in st.__all__):
from_imports[module].add(name)
from_ = {
"from {} import {}".format(module, ", ".join(sorted(names)))
for module, names in from_imports.items()
if isinstance(module, str) and module not in do_not_import
}
header = IMPORT_SECTION.format(imports="\n".join(sorted(direct) + sorted(from_)))
nothings = body.count("st.nothing()")
if nothings == 1:
header += "# TODO: replace st.nothing() with an appropriate strategy\n\n"
elif nothings >= 1:
header += "# TODO: replace st.nothing() with appropriate strategies\n\n"
return black.format_str(header + body, mode=black.FileMode())
def _is_probably_ufunc(obj):
# See https://numpy.org/doc/stable/reference/ufuncs.html - there doesn't seem
# to be an upstream function to detect this, so we just guess.
has_attributes = "nin nout nargs ntypes types identity signature".split()
return callable(obj) and all(hasattr(obj, name) for name in has_attributes)
# If we have a pair of functions where one name matches the regex and the second
# is the result of formatting the template with matched groups, our magic()
# ghostwriter will write a roundtrip test for them. Additional patterns welcome.
ROUNDTRIP_PAIRS = (
# Defined prefix, shared postfix. The easy cases.
(r"write(.+)", "read{}"),
(r"save(.+)", "load{}"),
(r"dump(.+)", "load{}"),
(r"to(.+)", "from{}"),
# Known stem, maybe matching prefixes, maybe matching postfixes.
(r"(.*)en(.+)", "{}de{}"),
# Shared postfix, prefix only on "inverse" function
(r"(.+)", "de{}"),
(r"(?!safe)(.+)", "un{}"), # safe_load / unsafe_load isn't a roundtrip
# a2b_postfix and b2a_postfix. Not a fan of this pattern, but it's pretty
# common in code imitating an C API - see e.g. the stdlib binascii module.
(r"(.+)2(.+?)(_.+)?", "{1}2{0}{2}"),
# Common in e.g. the colorsys module
(r"(.+)_to_(.+)", "{1}_to_{0}"),
# Sockets patterns
(r"(inet|if)_(.+)to(.+)", "{0}_{2}to{1}"),
(r"(\w)to(\w)(.+)", "{1}to{0}{2}"),
(r"send(.+)", "recv{}"),
(r"send(.+)", "receive{}"),
)
def magic(
*modules_or_functions: Union[Callable, types.ModuleType],
except_: Except = (),
style: str = "pytest",
) -> str:
"""Guess which ghostwriters to use, for a module or collection of functions.
As for all ghostwriters, the ``except_`` argument should be an
:class:`python:Exception` or tuple of exceptions, and ``style`` may be either
``"pytest"`` to write test functions or ``"unittest"`` to write test methods
and :class:`~python:unittest.TestCase`.
After finding the public functions attached to any modules, the ``magic``
ghostwriter looks for pairs of functions to pass to :func:`~roundtrip`,
then checks for :func:`~binary_operation` and :func:`~ufunc` functions,
and any others are passed to :func:`~fuzz`.
For example, try :command:`hypothesis write gzip` on the command line!
"""
except_ = _check_except(except_)
_check_style(style)
if not modules_or_functions:
raise InvalidArgument("Must pass at least one function or module to test.")
functions = set()
for thing in modules_or_functions:
if callable(thing):
functions.add(thing)
# class need to be added for exploration
if inspect.isclass(thing):
funcs: List[Optional[Any]] = [thing]
else:
funcs = []
elif isinstance(thing, types.ModuleType):
if hasattr(thing, "__all__"):
funcs = [getattr(thing, name, None) for name in thing.__all__]
elif hasattr(thing, "__package__"):
pkg = thing.__package__
funcs = [
v
for k, v in vars(thing).items()
if callable(v)
and not is_mock(v)
and ((not pkg) or getattr(v, "__module__", pkg).startswith(pkg))
and not k.startswith("_")
]
if pkg and any(getattr(f, "__module__", pkg) == pkg for f in funcs):
funcs = [f for f in funcs if getattr(f, "__module__", pkg) == pkg]
else:
raise InvalidArgument(f"Can't test non-module non-callable {thing!r}")
for f in list(funcs):
if inspect.isclass(f):
funcs += [
v.__get__(f)
for k, v in vars(f).items()
if hasattr(v, "__func__")
and not is_mock(v)
and not k.startswith("_")
]
for f in funcs:
try:
if (
(not is_mock(f))
and callable(f)
and _get_params(f)
and not isinstance(f, enum.EnumMeta)
):
functions.add(f)
if getattr(thing, "__name__", None):
if inspect.isclass(thing):
KNOWN_FUNCTION_LOCATIONS[f] = _get_module_helper(thing)
else:
KNOWN_FUNCTION_LOCATIONS[f] = thing.__name__
except (TypeError, ValueError):
pass
imports = set()
parts = []
def make_(how, *args, **kwargs):
imp, body = how(*args, **kwargs, except_=except_, style=style)
imports.update(imp)
parts.append(body)
by_name = {}
for f in functions:
try:
_get_params(f)
by_name[_get_qualname(f, include_module=True)] = f
except Exception:
# usually inspect.signature on C code such as socket.inet_aton, sometimes
# e.g. Pandas 'CallableDynamicDoc' object has no attribute '__name__'
pass
if not by_name:
return (
f"# Found no testable functions in\n"
f"# {functions!r} from {modules_or_functions}\n"
)
# Look for pairs of functions that roundtrip, based on known naming patterns.
for writename, readname in ROUNDTRIP_PAIRS:
for name in sorted(by_name):
match = re.fullmatch(writename, name.split(".")[-1])
if match:
inverse_name = readname.format(*match.groups())
for other in sorted(
n for n in by_name if n.split(".")[-1] == inverse_name
):
make_(_make_roundtrip_body, (by_name.pop(name), by_name.pop(other)))
break
else:
try:
other_func = getattr(
sys.modules[_get_module(by_name[name])],
inverse_name,
)
_get_params(other_func) # we want to skip if this fails
except Exception:
pass
else:
make_(_make_roundtrip_body, (by_name.pop(name), other_func))