Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Deduplicate module file paths to prevent redundant scans. #7747

Merged
merged 20 commits into from Nov 18, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
20 commits
Select commit Hold shift + click to select a range
9e345ba
Deduplicate module file paths to prevent redundant scans.
emcd Nov 11, 2022
cb1eaab
Merge branch 'PyCQA:main' into main
emcd Nov 12, 2022
0538ffd
Use dict for 'expand_modules' result rather than list.
emcd Nov 12, 2022
4bcbcc4
Update towncrier description for patch.
emcd Nov 12, 2022
387c240
Fix deduplication to account for CLI args marker. Add unit test for d…
emcd Nov 12, 2022
5a6d70b
Fix corner case with CLI arg flag handling during deduplication. Also…
emcd Nov 12, 2022
887eafb
Merge branch 'PyCQA:main' into main
emcd Nov 12, 2022
e2e326d
Factor out package module list for 'expand_modules' unit tests.
emcd Nov 13, 2022
08b9e1d
Add 'deduplication' to custom Pyenchant dict.
emcd Nov 13, 2022
2165ddf
Merge branch 'PyCQA:main' into main
emcd Nov 15, 2022
c59248b
Appease Mypy. Improve accuracy of comment.
emcd Nov 15, 2022
d0f66ef
Merge branch 'PyCQA:main' into main
emcd Nov 17, 2022
3d147e3
Integration Test: Pylint argument deduplication with highly constrain…
emcd Nov 17, 2022
dffd61d
Appease Pyenchant.
emcd Nov 17, 2022
e45a731
Use 'too-many-branches' functional test file as target for deduplicat…
emcd Nov 18, 2022
d641d69
Merge branch 'PyCQA:main' into main
emcd Nov 18, 2022
82948a0
Appease Pyenchant. Again.
emcd Nov 18, 2022
7a8fe98
Update tests/test_pylint_runners.py
Pierre-Sassoulas Nov 18, 2022
ae05856
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Nov 18, 2022
c67f204
Update tests/test_pylint_runners.py
Pierre-Sassoulas Nov 18, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
1 change: 1 addition & 0 deletions .pyenchant_pylint_custom_dict.txt
Expand Up @@ -74,6 +74,7 @@ cyclomatic
dataclass
datetime
debian
deduplication
deepcopy
defaultdicts
defframe
Expand Down
4 changes: 4 additions & 0 deletions doc/whatsnew/fragments/6242.bugfix
@@ -0,0 +1,4 @@
Pylint will now filter duplicates given to it before linting. The output should
be the same whether a file is given/discovered multiple times or not.

Closes #6242, #4053
30 changes: 16 additions & 14 deletions pylint/lint/expand_modules.py
Expand Up @@ -66,11 +66,11 @@ def expand_modules(
ignore_list: list[str],
ignore_list_re: list[Pattern[str]],
ignore_list_paths_re: list[Pattern[str]],
) -> tuple[list[ModuleDescriptionDict], list[ErrorDescriptionDict]]:
) -> tuple[dict[str, ModuleDescriptionDict], list[ErrorDescriptionDict]]:
"""Take a list of files/modules/packages and return the list of tuple
(file, module name) which have to be actually checked.
"""
result: list[ModuleDescriptionDict] = []
result: dict[str, ModuleDescriptionDict] = {}
errors: list[ErrorDescriptionDict] = []
path = sys.path.copy()

Expand Down Expand Up @@ -120,15 +120,17 @@ def expand_modules(
is_namespace = modutils.is_namespace(spec)
is_directory = modutils.is_directory(spec)
if not is_namespace:
result.append(
{
if filepath in result:
# Always set arg flag if module explicitly given.
result[filepath]["isarg"] = True
else:
result[filepath] = {
"path": filepath,
"name": modname,
"isarg": True,
"basepath": filepath,
"basename": modname,
}
)
has_init = (
not (modname.endswith(".__init__") or modname == "__init__")
and os.path.basename(filepath) == "__init__.py"
Expand All @@ -148,13 +150,13 @@ def expand_modules(
subfilepath, is_namespace, path=additional_search_path
)
submodname = ".".join(modpath)
result.append(
{
"path": subfilepath,
"name": submodname,
"isarg": False,
"basepath": filepath,
"basename": modname,
}
)
# Preserve arg flag if module is also explicitly given.
isarg = subfilepath in result and result[subfilepath]["isarg"]
result[subfilepath] = {
"path": subfilepath,
"name": submodname,
"isarg": isarg,
"basepath": filepath,
"basename": modname,
}
return result, errors
4 changes: 2 additions & 2 deletions pylint/lint/pylinter.py
Expand Up @@ -879,12 +879,12 @@ def _iterate_file_descrs(

The returned generator yield one item for each Python module that should be linted.
"""
for descr in self._expand_files(files_or_modules):
for descr in self._expand_files(files_or_modules).values():
name, filepath, is_arg = descr["name"], descr["path"], descr["isarg"]
if self.should_analyze_file(name, filepath, is_argument=is_arg):
yield FileItem(name, filepath, descr["basename"])

def _expand_files(self, modules: Sequence[str]) -> list[ModuleDescriptionDict]:
def _expand_files(self, modules: Sequence[str]) -> dict[str, ModuleDescriptionDict]:
"""Get modules and errors from a list of modules and handle errors."""
result, errors = expand_modules(
modules,
Expand Down
81 changes: 63 additions & 18 deletions tests/lint/unittest_expand_modules.py
Expand Up @@ -45,6 +45,14 @@ def test__is_in_ignore_list_re_match() -> None:
"path": EXPAND_MODULES,
}

this_file_from_init_deduplicated = {
"basename": "lint",
"basepath": INIT_PATH,
"isarg": True,
"name": "lint.unittest_expand_modules",
"path": EXPAND_MODULES,
}

unittest_lint = {
"basename": "lint",
"basepath": INIT_PATH,
Expand Down Expand Up @@ -77,7 +85,6 @@ def test__is_in_ignore_list_re_match() -> None:
"path": str(TEST_DIRECTORY / "lint/test_caching.py"),
}


init_of_package = {
"basename": "lint",
"basepath": INIT_PATH,
Expand All @@ -87,6 +94,20 @@ def test__is_in_ignore_list_re_match() -> None:
}


def _list_expected_package_modules(
deduplicating: bool = False,
) -> tuple[dict[str, object], ...]:
"""Generates reusable list of modules for our package."""
return (
init_of_package,
test_caching,
test_pylinter,
test_utils,
this_file_from_init_deduplicated if deduplicating else this_file_from_init,
unittest_lint,
)


class TestExpandModules(CheckerTestCase):
"""Test the expand_modules function while allowing options to be set."""

Expand All @@ -102,23 +123,19 @@ class Checker(BaseChecker):
@pytest.mark.parametrize(
"files_or_modules,expected",
[
([__file__], [this_file]),
([__file__], {this_file["path"]: this_file}),
(
[str(Path(__file__).parent)],
[
init_of_package,
test_caching,
test_pylinter,
test_utils,
this_file_from_init,
unittest_lint,
],
{
module["path"]: module # pylint: disable=unsubscriptable-object
emcd marked this conversation as resolved.
Show resolved Hide resolved
for module in _list_expected_package_modules()
},
),
],
)
@set_config(ignore_paths="")
def test_expand_modules(
self, files_or_modules: list[str], expected: list[ModuleDescriptionDict]
self, files_or_modules: list[str], expected: dict[str, ModuleDescriptionDict]
) -> None:
"""Test expand_modules with the default value of ignore-paths."""
ignore_list: list[str] = []
Expand All @@ -129,25 +146,54 @@ def test_expand_modules(
ignore_list_re,
self.linter.config.ignore_paths,
)
modules.sort(key=lambda d: d["name"])
assert modules == expected
assert not errors

@pytest.mark.parametrize(
"files_or_modules,expected",
[
([__file__], []),
([__file__, __file__], {this_file["path"]: this_file}),
(
[EXPAND_MODULES, str(Path(__file__).parent), EXPAND_MODULES],
{
module["path"]: module # pylint: disable=unsubscriptable-object
for module in _list_expected_package_modules(deduplicating=True)
},
),
],
)
@set_config(ignore_paths="")
def test_expand_modules_deduplication(
self, files_or_modules: list[str], expected: dict[str, ModuleDescriptionDict]
) -> None:
"""Test expand_modules deduplication."""
ignore_list: list[str] = []
ignore_list_re: list[re.Pattern[str]] = []
modules, errors = expand_modules(
files_or_modules,
ignore_list,
ignore_list_re,
self.linter.config.ignore_paths,
)
assert modules == expected
assert not errors

@pytest.mark.parametrize(
"files_or_modules,expected",
[
([__file__], {}),
(
[str(Path(__file__).parent)],
[
init_of_package,
],
{
module["path"]: module # pylint: disable=unsubscriptable-object
for module in (init_of_package,)
},
),
],
)
@set_config(ignore_paths=".*/lint/.*")
def test_expand_modules_with_ignore(
self, files_or_modules: list[str], expected: list[ModuleDescriptionDict]
self, files_or_modules: list[str], expected: dict[str, ModuleDescriptionDict]
) -> None:
"""Test expand_modules with a non-default value of ignore-paths."""
ignore_list: list[str] = []
Expand All @@ -158,6 +204,5 @@ def test_expand_modules_with_ignore(
ignore_list_re,
self.linter.config.ignore_paths,
)
modules.sort(key=lambda d: d["name"])
assert modules == expected
assert not errors
23 changes: 23 additions & 0 deletions tests/test_pylint_runners.py
Expand Up @@ -5,8 +5,10 @@

from __future__ import annotations

import contextlib
import os
import pathlib
import shlex
import sys
from collections.abc import Sequence
from io import BufferedReader
Expand Down Expand Up @@ -57,6 +59,27 @@ def test_runner_with_arguments(runner: _RunCallable, tmpdir: LocalPath) -> None:
assert err.value.code == 0


def test_pylint_argument_deduplication(
tmpdir: LocalPath, tests_directory: pathlib.Path
) -> None:
"""Check that the Pylint runner does not over-report on duplicate
arguments.

See https://github.com/PyCQA/pylint/issues/6242 and
https://github.com/PyCQA/pylint/issues/4053
"""
filepath = str(tests_directory / "functional/t/too/too_many_branches.py")
testargs = shlex.split("--report n --score n --max-branches 13")
testargs.extend([filepath] * 4)
exit_stack = contextlib.ExitStack()
exit_stack.enter_context(tmpdir.as_cwd())
exit_stack.enter_context(patch.object(sys, "argv", testargs))
err = exit_stack.enter_context(pytest.raises(SystemExit))
with exit_stack:
run_pylint(testargs)
assert err.value.code == 0


def test_pylint_run_jobs_equal_zero_dont_crash_with_cpu_fraction(
tmpdir: LocalPath,
) -> None:
Expand Down