From ea3be23ab6fe96f798e4de9171ff33d184414417 Mon Sep 17 00:00:00 2001 From: Matt Schwager Date: Tue, 18 Feb 2020 12:08:18 -0700 Subject: [PATCH] Fix #18, cache namespace results and minimize kwarg checks by grouping similar rules 5x speed up across all rules: ``` --------------------------------------------- benchmark: 2 tests --------------------------------------------- Name (time in s) Mean StdDev Median Outliers Rounds -------------------------------------------------------------------------------------------------------------- test_benchmark_run (0003_0647052) 28.9291 (5.38) 1.1912 (11.90) 29.2030 (5.42) 2;0 5 test_benchmark_run (NOW) 5.3746 (1.0) 0.1001 (1.0) 5.3912 (1.0) 2;0 5 -------------------------------------------------------------------------------------------------------------- ``` 20x+ speed up across kwarg rules: ``` --------------------------------- benchmark "(,)": 16 tests -------------------------------- Name (time in s) Mean StdDev Median Outliers Rounds ----------------------------------------------------------------------------------------------------------------------------------------------------------- test_benchmark_individual[DUO116-BadSubprocessUseLinter] (0002_0647052) 1.8720 (13.57) 0.0795 (22.15) 1.8482 (13.51) 2;0 5 test_benchmark_individual[DUO116-BadSubprocessUseLinter] (NOW) 0.2696 (1.95) 0.0068 (1.91) 0.2676 (1.96) 1;0 5 test_benchmark_individual[DUO123-BadRequestsUseLinter] (0002_0647052) 2.8741 (20.83) 0.0036 (1.0) 2.8756 (21.02) 2;0 5 test_benchmark_individual[DUO123-BadRequestsUseLinter] (NOW) 0.4896 (3.55) 0.0327 (9.10) 0.4977 (3.64) 1;0 5 test_benchmark_individual[DUO124-BadXmlrpcUseLinter] (0002_0647052) 0.5639 (4.09) 0.0356 (9.92) 0.5496 (4.02) 2;0 5 test_benchmark_individual[DUO124-BadXmlrpcUseLinter] (NOW) 0.1380 (1.0) 0.0145 (4.03) 0.1368 (1.0) 2;0 7 test_benchmark_individual[DUO127-BadDuoClientUseLinter] (0002_0647052) 2.9646 (21.49) 0.0370 (10.30) 2.9556 (21.60) 1;1 5 test_benchmark_individual[DUO127-BadDuoClientUseLinter] (NOW) 0.4263 (3.09) 0.0182 (5.06) 0.4167 (3.05) 2;0 5 test_benchmark_individual[DUO128-BadOneLoginKwargUseLinter] (0002_0647052) 4.3841 (31.78) 0.0710 (19.78) 4.3965 (32.13) 2;0 5 test_benchmark_individual[DUO128-BadOneLoginKwargUseLinter] (NOW) 0.3440 (2.49) 0.0061 (1.71) 0.3411 (2.49) 1;0 5 test_benchmark_individual[DUO132-BadUrllib3KwargUseLinter] (0002_0647052) 2.0491 (14.85) 0.3020 (84.18) 1.8836 (13.77) 1;0 5 test_benchmark_individual[DUO132-BadUrllib3KwargUseLinter] (NOW) 0.2857 (2.07) 0.0049 (1.37) 0.2848 (2.08) 3;0 5 test_benchmark_individual[DUO135-BadDefusedxmlUseLinter] (0002_0647052) 10.5807 (76.69) 0.7142 (199.09) 10.2502 (74.91) 1;0 5 test_benchmark_individual[DUO135-BadDefusedxmlUseLinter] (NOW) 0.5084 (3.69) 0.0054 (1.51) 0.5095 (3.72) 2;0 5 test_benchmark_individual[DUO137-BadItsDangerousKwargUseLinter] (0002_0647052) 2.3554 (17.07) 0.1115 (31.09) 2.3026 (16.83) 1;0 5 test_benchmark_individual[DUO137-BadItsDangerousKwargUseLinter] (NOW) 0.3476 (2.52) 0.0045 (1.25) 0.3476 (2.54) 2;0 5 ----------------------------------------------------------------------------------------------------------------------------------------------------------- ``` --- CHANGELOG.md | 3 ++ dlint/linters/helpers/bad_kwarg_use.py | 63 ++++++++++++++++++++++---- dlint/namespace.py | 13 ++++++ tests/test_benchmark/conftest.py | 10 ++++ tests/test_benchmark/test_benchmark.py | 5 +- 5 files changed, 84 insertions(+), 10 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 663cc2a..b1ba02e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,9 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). ## [Unreleased] +### Changed +- Cache namespace results and minimize kwarg checks by grouping similar rules - ~500% speed up ([#18](https://github.com/dlint-py/dlint/issues/18)) + ### Fixed - The `--print-dlint-linters` flag on Windows ([#17](https://github.com/dlint-py/dlint/issues/17)) diff --git a/dlint/linters/helpers/bad_kwarg_use.py b/dlint/linters/helpers/bad_kwarg_use.py index 687ea38..e45b5c7 100644 --- a/dlint/linters/helpers/bad_kwarg_use.py +++ b/dlint/linters/helpers/bad_kwarg_use.py @@ -9,6 +9,7 @@ import abc import ast +import itertools from .. import base from ... import tree @@ -20,6 +21,56 @@ class BadKwargUseLinter(base.BaseLinter, util.ABC): lint rules that block bad kwarg use. """ + def __init__(self, *args, **kwargs): + self.minimized_bad_kwarg_func = None + + module_path_grouped = [ + (k, list(v)) + for k, v in itertools.groupby( + sorted(self.kwargs, key=lambda k: k["module_path"]), + key=lambda k: k["module_path"] + ) + ] + + def minimized_illegal_module_imported(module_path, node): + return any( + self.namespace.illegal_module_imported( + module_path, + kwarg["module_path"] + ) + and kwarg["predicate"](node, kwarg["kwarg_name"]) + for illegal_module_path, kwargs in module_path_grouped + for kwarg in kwargs + ) + + kwarg_predicate_grouped = [ + (k, list(v)) + for k, v in itertools.groupby( + sorted(self.kwargs, key=lambda k: (k["kwarg_name"], id(k["predicate"]))), + key=lambda k: (k["kwarg_name"], id(k["predicate"])) + ) + ] + + def minimized_kwarg_predicate(module_path, node): + return any( + self.namespace.illegal_module_imported( + module_path, + kwarg["module_path"] + ) + and kwarg["predicate"](node, kwarg["kwarg_name"]) + for kwarg_predicate_tuple, kwargs in kwarg_predicate_grouped + for kwarg in kwargs + ) + + # Minimize kwarg checks by grouping similar rules + if (len(kwarg_predicate_grouped) < len(self.kwargs) + and len(module_path_grouped) == len(self.kwargs)): + self.minimized_bad_kwarg_func = minimized_kwarg_predicate + else: + self.minimized_bad_kwarg_func = minimized_illegal_module_imported + + super(BadKwargUseLinter, self).__init__(*args, **kwargs) + @property @abc.abstractmethod def kwargs(self): @@ -45,15 +96,9 @@ def visit_Call(self, node): if not isinstance(node.func, (ast.Attribute, ast.Name)): return - bad_kwarg = any( - ( - self.namespace.illegal_module_imported( - tree.module_path_str(node.func), - kwarg["module_path"] - ) - and kwarg["predicate"](node, kwarg["kwarg_name"]) - ) - for kwarg in self.kwargs + bad_kwarg = self.minimized_bad_kwarg_func( + tree.module_path_str(node.func), + node ) if bad_kwarg: diff --git a/dlint/namespace.py b/dlint/namespace.py index af55ef5..5b77608 100644 --- a/dlint/namespace.py +++ b/dlint/namespace.py @@ -10,6 +10,17 @@ import ast import copy +try: + from functools import lru_cache +except ImportError: + # Sorry Python 2 users, it's time to upgrade + def lru_cache(*args, **kwargs): + def decorator(function): + def noop(*inner_args, **inner_kwargs): + return function(*inner_args, **inner_kwargs) + return noop + return decorator + from . import util @@ -37,6 +48,7 @@ def from_module_node(cls, module_node): return cls(imports, from_imports) + @lru_cache(maxsize=1024) def name_imported(self, name): def alias_includes_name(alias): return ( @@ -58,6 +70,7 @@ def asname_to_name(self, asname): return None + @lru_cache(maxsize=1024) def illegal_module_imported(self, module_path, illegal_module_path): modules = module_path.split('.') illegal_modules = illegal_module_path.split('.') diff --git a/tests/test_benchmark/conftest.py b/tests/test_benchmark/conftest.py index 73bd11a..2166fa1 100644 --- a/tests/test_benchmark/conftest.py +++ b/tests/test_benchmark/conftest.py @@ -20,6 +20,11 @@ def pytest_addoption(parser): type=argparse.FileType("r"), help="Benchmark Dlint against this Python file." ) + parser.addoption( + "--benchmark-group-base-class", + action="store_true", + help="Group Dlint benchmark results by base class." + ) @pytest.fixture @@ -31,3 +36,8 @@ def benchmark_py_file(request): fd.seek(0) return ast.parse(fd.read()) + + +@pytest.fixture +def benchmark_group_base_class(request): + return request.config.getoption("--benchmark-group-base-class") diff --git a/tests/test_benchmark/test_benchmark.py b/tests/test_benchmark/test_benchmark.py index b3980ed..57334c2 100644 --- a/tests/test_benchmark/test_benchmark.py +++ b/tests/test_benchmark/test_benchmark.py @@ -47,7 +47,10 @@ def test_benchmark_run(benchmark_py_file, benchmark): sorted(extension.dlint.linters.ALL, key=lambda l: l._code), ids=lambda l: "{}-{}".format(l._code, l.__name__) ) -def test_benchmark_individual(benchmark_py_file, benchmark, linter): +def test_benchmark_individual(benchmark_py_file, benchmark_group_base_class, benchmark, linter): + if benchmark_group_base_class: + benchmark.group = str(linter.__bases__) + ext_class = get_single_linter_extension(linter) ext = ext_class(benchmark_py_file, "unused")