From ea3be23ab6fe96f798e4de9171ff33d184414417 Mon Sep 17 00:00:00 2001
From: Matt Schwager <schwag09@gmail.com>
Date: Tue, 18 Feb 2020 12:08:18 -0700
Subject: [PATCH] Fix #18, cache namespace results and minimize kwarg checks by
 grouping similar rules

5x speed up across all rules:

```
--------------------------------------------- benchmark: 2 tests ---------------------------------------------
Name (time in s)                         Mean            StdDev             Median            Outliers  Rounds
--------------------------------------------------------------------------------------------------------------
test_benchmark_run (0003_0647052)     28.9291 (5.38)     1.1912 (11.90)    29.2030 (5.42)          2;0       5
test_benchmark_run (NOW)               5.3746 (1.0)      0.1001 (1.0)       5.3912 (1.0)           2;0       5
--------------------------------------------------------------------------------------------------------------
```

20x+ speed up across kwarg rules:

```
--------------------------------- benchmark "(<class 'dlint.linters.helpers.bad_kwarg_use.BadKwargUseLinter'>,)": 16 tests --------------------------------
Name (time in s)                                                                      Mean            StdDev             Median            Outliers  Rounds
-----------------------------------------------------------------------------------------------------------------------------------------------------------
test_benchmark_individual[DUO116-BadSubprocessUseLinter] (0002_0647052)             1.8720 (13.57)    0.0795 (22.15)     1.8482 (13.51)         2;0       5
test_benchmark_individual[DUO116-BadSubprocessUseLinter] (NOW)                      0.2696 (1.95)     0.0068 (1.91)      0.2676 (1.96)          1;0       5
test_benchmark_individual[DUO123-BadRequestsUseLinter] (0002_0647052)               2.8741 (20.83)    0.0036 (1.0)       2.8756 (21.02)         2;0       5
test_benchmark_individual[DUO123-BadRequestsUseLinter] (NOW)                        0.4896 (3.55)     0.0327 (9.10)      0.4977 (3.64)          1;0       5
test_benchmark_individual[DUO124-BadXmlrpcUseLinter] (0002_0647052)                 0.5639 (4.09)     0.0356 (9.92)      0.5496 (4.02)          2;0       5
test_benchmark_individual[DUO124-BadXmlrpcUseLinter] (NOW)                          0.1380 (1.0)      0.0145 (4.03)      0.1368 (1.0)           2;0       7
test_benchmark_individual[DUO127-BadDuoClientUseLinter] (0002_0647052)              2.9646 (21.49)    0.0370 (10.30)     2.9556 (21.60)         1;1       5
test_benchmark_individual[DUO127-BadDuoClientUseLinter] (NOW)                       0.4263 (3.09)     0.0182 (5.06)      0.4167 (3.05)          2;0       5
test_benchmark_individual[DUO128-BadOneLoginKwargUseLinter] (0002_0647052)          4.3841 (31.78)    0.0710 (19.78)     4.3965 (32.13)         2;0       5
test_benchmark_individual[DUO128-BadOneLoginKwargUseLinter] (NOW)                   0.3440 (2.49)     0.0061 (1.71)      0.3411 (2.49)          1;0       5
test_benchmark_individual[DUO132-BadUrllib3KwargUseLinter] (0002_0647052)           2.0491 (14.85)    0.3020 (84.18)     1.8836 (13.77)         1;0       5
test_benchmark_individual[DUO132-BadUrllib3KwargUseLinter] (NOW)                    0.2857 (2.07)     0.0049 (1.37)      0.2848 (2.08)          3;0       5
test_benchmark_individual[DUO135-BadDefusedxmlUseLinter] (0002_0647052)            10.5807 (76.69)    0.7142 (199.09)   10.2502 (74.91)         1;0       5
test_benchmark_individual[DUO135-BadDefusedxmlUseLinter] (NOW)                      0.5084 (3.69)     0.0054 (1.51)      0.5095 (3.72)          2;0       5
test_benchmark_individual[DUO137-BadItsDangerousKwargUseLinter] (0002_0647052)      2.3554 (17.07)    0.1115 (31.09)     2.3026 (16.83)         1;0       5
test_benchmark_individual[DUO137-BadItsDangerousKwargUseLinter] (NOW)               0.3476 (2.52)     0.0045 (1.25)      0.3476 (2.54)          2;0       5
-----------------------------------------------------------------------------------------------------------------------------------------------------------
```
---
 CHANGELOG.md                           |  3 ++
 dlint/linters/helpers/bad_kwarg_use.py | 63 ++++++++++++++++++++++----
 dlint/namespace.py                     | 13 ++++++
 tests/test_benchmark/conftest.py       | 10 ++++
 tests/test_benchmark/test_benchmark.py |  5 +-
 5 files changed, 84 insertions(+), 10 deletions(-)
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 663cc2a..b1ba02e 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -5,6 +5,9 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
 ## [Unreleased]
+### Changed
+- Cache namespace results and minimize kwarg checks by grouping similar rules - ~500% speed up ([#18](https://github.com/dlint-py/dlint/issues/18))
+
 ### Fixed
 - The `--print-dlint-linters` flag on Windows ([#17](https://github.com/dlint-py/dlint/issues/17))
 
diff --git a/dlint/linters/helpers/bad_kwarg_use.py b/dlint/linters/helpers/bad_kwarg_use.py
index 687ea38..e45b5c7 100644
--- a/dlint/linters/helpers/bad_kwarg_use.py
+++ b/dlint/linters/helpers/bad_kwarg_use.py
@@ -9,6 +9,7 @@
 
 import abc
 import ast
+import itertools
 
 from .. import base
 from ... import tree
@@ -20,6 +21,56 @@ class BadKwargUseLinter(base.BaseLinter, util.ABC):
     lint rules that block bad kwarg use.
     """
 
+    def __init__(self, *args, **kwargs):
+        self.minimized_bad_kwarg_func = None
+
+        module_path_grouped = [
+            (k, list(v))
+            for k, v in itertools.groupby(
+                sorted(self.kwargs, key=lambda k: k["module_path"]),
+                key=lambda k: k["module_path"]
+            )
+        ]
+
+        def minimized_illegal_module_imported(module_path, node):
+            return any(
+                self.namespace.illegal_module_imported(
+                    module_path,
+                    kwarg["module_path"]
+                )
+                and kwarg["predicate"](node, kwarg["kwarg_name"])
+                for illegal_module_path, kwargs in module_path_grouped
+                for kwarg in kwargs
+            )
+
+        kwarg_predicate_grouped = [
+            (k, list(v))
+            for k, v in itertools.groupby(
+                sorted(self.kwargs, key=lambda k: (k["kwarg_name"], id(k["predicate"]))),
+                key=lambda k: (k["kwarg_name"], id(k["predicate"]))
+            )
+        ]
+
+        def minimized_kwarg_predicate(module_path, node):
+            return any(
+                self.namespace.illegal_module_imported(
+                    module_path,
+                    kwarg["module_path"]
+                )
+                and kwarg["predicate"](node, kwarg["kwarg_name"])
+                for kwarg_predicate_tuple, kwargs in kwarg_predicate_grouped
+                for kwarg in kwargs
+            )
+
+        # Minimize kwarg checks by grouping similar rules
+        if (len(kwarg_predicate_grouped) < len(self.kwargs)
+                and len(module_path_grouped) == len(self.kwargs)):
+            self.minimized_bad_kwarg_func = minimized_kwarg_predicate
+        else:
+            self.minimized_bad_kwarg_func = minimized_illegal_module_imported
+
+        super(BadKwargUseLinter, self).__init__(*args, **kwargs)
+
     @property
     @abc.abstractmethod
     def kwargs(self):
@@ -45,15 +96,9 @@ def visit_Call(self, node):
         if not isinstance(node.func, (ast.Attribute, ast.Name)):
             return
 
-        bad_kwarg = any(
-            (
-                self.namespace.illegal_module_imported(
-                    tree.module_path_str(node.func),
-                    kwarg["module_path"]
-                )
-                and kwarg["predicate"](node, kwarg["kwarg_name"])
-            )
-            for kwarg in self.kwargs
+        bad_kwarg = self.minimized_bad_kwarg_func(
+            tree.module_path_str(node.func),
+            node
         )
 
         if bad_kwarg:
diff --git a/dlint/namespace.py b/dlint/namespace.py
index af55ef5..5b77608 100644
--- a/dlint/namespace.py
+++ b/dlint/namespace.py
@@ -10,6 +10,17 @@
 import ast
 import copy
 
+try:
+    from functools import lru_cache
+except ImportError:
+    # Sorry Python 2 users, it's time to upgrade
+    def lru_cache(*args, **kwargs):
+        def decorator(function):
+            def noop(*inner_args, **inner_kwargs):
+                return function(*inner_args, **inner_kwargs)
+            return noop
+        return decorator
+
 from . import util
 
 
@@ -37,6 +48,7 @@ def from_module_node(cls, module_node):
 
         return cls(imports, from_imports)
 
+    @lru_cache(maxsize=1024)
     def name_imported(self, name):
         def alias_includes_name(alias):
             return (
@@ -58,6 +70,7 @@ def asname_to_name(self, asname):
 
         return None
 
+    @lru_cache(maxsize=1024)
     def illegal_module_imported(self, module_path, illegal_module_path):
         modules = module_path.split('.')
         illegal_modules = illegal_module_path.split('.')
diff --git a/tests/test_benchmark/conftest.py b/tests/test_benchmark/conftest.py
index 73bd11a..2166fa1 100644
--- a/tests/test_benchmark/conftest.py
+++ b/tests/test_benchmark/conftest.py
@@ -20,6 +20,11 @@ def pytest_addoption(parser):
         type=argparse.FileType("r"),
         help="Benchmark Dlint against this Python file."
     )
+    parser.addoption(
+        "--benchmark-group-base-class",
+        action="store_true",
+        help="Group Dlint benchmark results by base class."
+    )
 
 
 @pytest.fixture
@@ -31,3 +36,8 @@ def benchmark_py_file(request):
         fd.seek(0)
 
     return ast.parse(fd.read())
+
+
+@pytest.fixture
+def benchmark_group_base_class(request):
+    return request.config.getoption("--benchmark-group-base-class")
diff --git a/tests/test_benchmark/test_benchmark.py b/tests/test_benchmark/test_benchmark.py
index b3980ed..57334c2 100644
--- a/tests/test_benchmark/test_benchmark.py
+++ b/tests/test_benchmark/test_benchmark.py
@@ -47,7 +47,10 @@ def test_benchmark_run(benchmark_py_file, benchmark):
     sorted(extension.dlint.linters.ALL, key=lambda l: l._code),
     ids=lambda l: "{}-{}".format(l._code, l.__name__)
 )
-def test_benchmark_individual(benchmark_py_file, benchmark, linter):
+def test_benchmark_individual(benchmark_py_file, benchmark_group_base_class, benchmark, linter):
+    if benchmark_group_base_class:
+        benchmark.group = str(linter.__bases__)
+
     ext_class = get_single_linter_extension(linter)
     ext = ext_class(benchmark_py_file, "unused")