Skip to content

Commit

Permalink
Deduplicate module file paths to prevent redundant scans. (#7747)
Browse files Browse the repository at this point in the history
* Use dict for 'expand_modules' result rather than list.
With 'path' as the key, we get deduplication for free
and do not need to reprocess the list for deduplication later.
* Fix deduplication to account for CLI args marker.
* Fix corner case with CLI arg flag handling during deduplication.
* Add 'deduplication' to custom Pyenchant dict.

Closes #6242
Closes #4053

Co-authored-by: Eric McDonald <emcd@users.noreply.github.com>
Co-authored-by: Pierre Sassoulas <pierre.sassoulas@gmail.com>
  • Loading branch information
3 people committed Nov 23, 2022
1 parent dff0adb commit 1baf4be
Show file tree
Hide file tree
Showing 6 changed files with 107 additions and 32 deletions.
1 change: 1 addition & 0 deletions .pyenchant_pylint_custom_dict.txt
Expand Up @@ -73,6 +73,7 @@ cyclomatic
dataclass
datetime
debian
deduplication
deepcopy
defaultdicts
defframe
Expand Down
4 changes: 4 additions & 0 deletions doc/whatsnew/fragments/6242.bugfix
@@ -0,0 +1,4 @@
Pylint will now filter duplicates given to it before linting. The output should
be the same whether a file is given/discovered multiple times or not.

Closes #6242, #4053
30 changes: 16 additions & 14 deletions pylint/lint/expand_modules.py
Expand Up @@ -66,11 +66,11 @@ def expand_modules(
ignore_list: list[str],
ignore_list_re: list[Pattern[str]],
ignore_list_paths_re: list[Pattern[str]],
) -> tuple[list[ModuleDescriptionDict], list[ErrorDescriptionDict]]:
) -> tuple[dict[str, ModuleDescriptionDict], list[ErrorDescriptionDict]]:
"""Take a list of files/modules/packages and return the list of tuple
(file, module name) which have to be actually checked.
"""
result: list[ModuleDescriptionDict] = []
result: dict[str, ModuleDescriptionDict] = {}
errors: list[ErrorDescriptionDict] = []
path = sys.path.copy()

Expand Down Expand Up @@ -120,15 +120,17 @@ def expand_modules(
is_namespace = modutils.is_namespace(spec)
is_directory = modutils.is_directory(spec)
if not is_namespace:
result.append(
{
if filepath in result:
# Always set arg flag if module explicitly given.
result[filepath]["isarg"] = True
else:
result[filepath] = {
"path": filepath,
"name": modname,
"isarg": True,
"basepath": filepath,
"basename": modname,
}
)
has_init = (
not (modname.endswith(".__init__") or modname == "__init__")
and os.path.basename(filepath) == "__init__.py"
Expand All @@ -148,13 +150,13 @@ def expand_modules(
subfilepath, is_namespace, path=additional_search_path
)
submodname = ".".join(modpath)
result.append(
{
"path": subfilepath,
"name": submodname,
"isarg": False,
"basepath": filepath,
"basename": modname,
}
)
# Preserve arg flag if module is also explicitly given.
isarg = subfilepath in result and result[subfilepath]["isarg"]
result[subfilepath] = {
"path": subfilepath,
"name": submodname,
"isarg": isarg,
"basepath": filepath,
"basename": modname,
}
return result, errors
4 changes: 2 additions & 2 deletions pylint/lint/pylinter.py
Expand Up @@ -871,12 +871,12 @@ def _iterate_file_descrs(
The returned generator yield one item for each Python module that should be linted.
"""
for descr in self._expand_files(files_or_modules):
for descr in self._expand_files(files_or_modules).values():
name, filepath, is_arg = descr["name"], descr["path"], descr["isarg"]
if self.should_analyze_file(name, filepath, is_argument=is_arg):
yield FileItem(name, filepath, descr["basename"])

def _expand_files(self, modules: Sequence[str]) -> list[ModuleDescriptionDict]:
def _expand_files(self, modules: Sequence[str]) -> dict[str, ModuleDescriptionDict]:
"""Get modules and errors from a list of modules and handle errors."""
result, errors = expand_modules(
modules,
Expand Down
77 changes: 61 additions & 16 deletions tests/lint/unittest_expand_modules.py
Expand Up @@ -45,6 +45,14 @@ def test__is_in_ignore_list_re_match() -> None:
"path": EXPAND_MODULES,
}

this_file_from_init_deduplicated = {
"basename": "lint",
"basepath": INIT_PATH,
"isarg": True,
"name": "lint.unittest_expand_modules",
"path": EXPAND_MODULES,
}

unittest_lint = {
"basename": "lint",
"basepath": INIT_PATH,
Expand Down Expand Up @@ -77,7 +85,6 @@ def test__is_in_ignore_list_re_match() -> None:
"path": str(TEST_DIRECTORY / "lint/test_caching.py"),
}


init_of_package = {
"basename": "lint",
"basepath": INIT_PATH,
Expand All @@ -87,6 +94,20 @@ def test__is_in_ignore_list_re_match() -> None:
}


def _list_expected_package_modules(
deduplicating: bool = False,
) -> tuple[dict[str, object], ...]:
"""Generates reusable list of modules for our package."""
return (
init_of_package,
test_caching,
test_pylinter,
test_utils,
this_file_from_init_deduplicated if deduplicating else this_file_from_init,
unittest_lint,
)


class TestExpandModules(CheckerTestCase):
"""Test the expand_modules function while allowing options to be set."""

Expand All @@ -102,17 +123,13 @@ class Checker(BaseChecker):
@pytest.mark.parametrize(
"files_or_modules,expected",
[
([__file__], [this_file]),
([__file__], {this_file["path"]: this_file}),
(
[str(Path(__file__).parent)],
[
init_of_package,
test_caching,
test_pylinter,
test_utils,
this_file_from_init,
unittest_lint,
],
{
module["path"]: module # pylint: disable=unsubscriptable-object
for module in _list_expected_package_modules()
},
),
],
)
Expand All @@ -126,19 +143,48 @@ def test_expand_modules(self, files_or_modules, expected):
ignore_list_re,
self.linter.config.ignore_paths,
)
modules.sort(key=lambda d: d["name"])
assert modules == expected
assert not errors

@pytest.mark.parametrize(
"files_or_modules,expected",
[
([__file__], []),
([__file__, __file__], {this_file["path"]: this_file}),
(
[EXPAND_MODULES, str(Path(__file__).parent), EXPAND_MODULES],
{
module["path"]: module # pylint: disable=unsubscriptable-object
for module in _list_expected_package_modules(deduplicating=True)
},
),
],
)
@set_config(ignore_paths="")
def test_expand_modules_deduplication(
self, files_or_modules: list[str], expected
) -> None:
"""Test expand_modules deduplication."""
ignore_list: list[str] = []
ignore_list_re: list[re.Pattern[str]] = []
modules, errors = expand_modules(
files_or_modules,
ignore_list,
ignore_list_re,
self.linter.config.ignore_paths,
)
assert modules == expected
assert not errors

@pytest.mark.parametrize(
"files_or_modules,expected",
[
([__file__], {}),
(
[str(Path(__file__).parent)],
[
init_of_package,
],
{
module["path"]: module # pylint: disable=unsubscriptable-object
for module in (init_of_package,)
},
),
],
)
Expand All @@ -152,6 +198,5 @@ def test_expand_modules_with_ignore(self, files_or_modules, expected):
ignore_list_re,
self.linter.config.ignore_paths,
)
modules.sort(key=lambda d: d["name"])
assert modules == expected
assert not errors
23 changes: 23 additions & 0 deletions tests/test_pylint_runners.py
Expand Up @@ -5,8 +5,10 @@

from __future__ import annotations

import contextlib
import os
import pathlib
import shlex
import sys
from collections.abc import Callable
from io import BufferedReader
Expand Down Expand Up @@ -47,6 +49,27 @@ def test_runner_with_arguments(runner: Callable, tmpdir: LocalPath) -> None:
assert err.value.code == 0


def test_pylint_argument_deduplication(
tmpdir: LocalPath, tests_directory: pathlib.Path
) -> None:
"""Check that the Pylint runner does not over-report on duplicate
arguments.
See https://github.com/PyCQA/pylint/issues/6242 and
https://github.com/PyCQA/pylint/issues/4053
"""
filepath = str(tests_directory / "functional/t/too/too_many_branches.py")
testargs = shlex.split("--report n --score n --max-branches 13")
testargs.extend([filepath] * 4)
exit_stack = contextlib.ExitStack()
exit_stack.enter_context(tmpdir.as_cwd())
exit_stack.enter_context(patch.object(sys, "argv", testargs))
err = exit_stack.enter_context(pytest.raises(SystemExit))
with exit_stack:
run_pylint(testargs)
assert err.value.code == 0


def test_pylint_run_jobs_equal_zero_dont_crash_with_cpu_fraction(
tmpdir: LocalPath,
) -> None:
Expand Down

0 comments on commit 1baf4be

Please sign in to comment.