From 605afdb720afee682b1d13e128da632e7ea3c316 Mon Sep 17 00:00:00 2001 From: yushao2 <36848472+yushao2@users.noreply.github.com> Date: Sat, 22 May 2021 22:01:52 +0800 Subject: [PATCH] Implemented new check use-maxsplit-arg (#4469) Co-authored-by: Pierre Sassoulas Co-authored-by: Marc Mueller <30130371+cdce8p@users.noreply.github.com> --- ChangeLog | 5 ++ doc/whatsnew/2.9.rst | 4 + .../refactoring/recommendation_checker.py | 53 +++++++++++- pylint/checkers/utils.py | 26 ++++++ pylint/testutils/lint_module_test.py | 5 +- tests/functional/u/use/use_maxsplit_arg.py | 84 +++++++++++++++++++ tests/functional/u/use/use_maxsplit_arg.txt | 21 +++++ 7 files changed, 195 insertions(+), 3 deletions(-) create mode 100644 tests/functional/u/use/use_maxsplit_arg.py create mode 100644 tests/functional/u/use/use_maxsplit_arg.txt diff --git a/ChangeLog b/ChangeLog index a8a7323e94..3b712b88d5 100644 --- a/ChangeLog +++ b/ChangeLog @@ -51,6 +51,11 @@ modules are added. Closes #4412 +* New checker ``use-maxsplit-arg``. Emitted either when accessing only the first or last + element of ``str.split()``. + + Closes #4440 + * Add ignore_signatures to duplicate code checker Closes #3619 diff --git a/doc/whatsnew/2.9.rst b/doc/whatsnew/2.9.rst index 14669ca27c..4e6249a9a4 100644 --- a/doc/whatsnew/2.9.rst +++ b/doc/whatsnew/2.9.rst @@ -15,8 +15,12 @@ New checkers * ``consider-using-dict-items``: Emitted when iterating over dictionary keys and then indexing the same dictionary with the key within loop body. +* ``use-maxsplit-arg``: Emitted either when accessing only the first or last + element of ``str.split()``. + * An ``ignore_signatures`` option has been added to the similarity checker. It will permits to reduce false positives when multiple functions have the same parameters. + Other Changes ============= diff --git a/pylint/checkers/refactoring/recommendation_checker.py b/pylint/checkers/refactoring/recommendation_checker.py index 50c5097e1a..2678904888 100644 --- a/pylint/checkers/refactoring/recommendation_checker.py +++ b/pylint/checkers/refactoring/recommendation_checker.py @@ -35,6 +35,14 @@ class RecommendationChecker(checkers.BaseChecker): "Both the key and value can be accessed by iterating using the .items() " "method of the dictionary instead.", ), + "C0207": ( + "Use %s instead", + "use-maxsplit-arg", + "Emitted when accessing only the first or last element of str.split(). " + "The first and last element can be accessed by using " + "str.split(sep, maxsplit=1)[0] or str.rsplit(sep, maxsplit=1)[-1] " + "instead.", + ), } @staticmethod @@ -44,8 +52,12 @@ def _is_builtin(node, function): return False return utils.is_builtin_object(inferred) and inferred.name == function - @utils.check_messages("consider-iterating-dictionary") - def visit_call(self, node): + @utils.check_messages("consider-iterating-dictionary", "use-maxsplit-arg") + def visit_call(self, node: astroid.Call) -> None: + self._check_consider_iterating_dictionary(node) + self._check_use_maxsplit_arg(node) + + def _check_consider_iterating_dictionary(self, node: astroid.Call) -> None: if not isinstance(node.func, astroid.Attribute): return if node.func.attrname != "keys": @@ -62,6 +74,43 @@ def visit_call(self, node): if isinstance(node.parent, (astroid.For, astroid.Comprehension)): self.add_message("consider-iterating-dictionary", node=node) + def _check_use_maxsplit_arg(self, node: astroid.Call) -> None: + """Add message when accessing first or last elements of a str.split() or str.rsplit().""" + + # Check if call is split() or rsplit() + if ( + isinstance(node.func, astroid.Attribute) + and node.func.attrname in ("split", "rsplit") + and isinstance(utils.safe_infer(node.func), astroid.BoundMethod) + ): + try: + utils.get_argument_from_call(node, 0, "sep") + except utils.NoSuchArgumentError: + return + + try: + # Ignore if maxsplit arg has been set + utils.get_argument_from_call(node, 1, "maxsplit") + return + except utils.NoSuchArgumentError: + pass + + if isinstance(node.parent, astroid.Subscript): + try: + subscript_value = utils.get_subscript_const_value(node.parent).value + except utils.InferredTypeError: + return + + if subscript_value in (-1, 0): + fn_name = node.func.attrname + new_fn = "rsplit" if subscript_value == -1 else "split" + new_name = ( + node.func.as_string().rsplit(fn_name, maxsplit=1)[0] + + new_fn + + f"({node.args[0].as_string()}, maxsplit=1)[{subscript_value}]" + ) + self.add_message("use-maxsplit-arg", node=node, args=(new_name,)) + @utils.check_messages("consider-using-enumerate", "consider-using-dict-items") def visit_for(self, node: astroid.For) -> None: self._check_consider_using_enumerate(node) diff --git a/pylint/checkers/utils.py b/pylint/checkers/utils.py index 8cf97d8074..4581c7849d 100644 --- a/pylint/checkers/utils.py +++ b/pylint/checkers/utils.py @@ -272,6 +272,10 @@ class NoSuchArgumentError(Exception): pass +class InferredTypeError(Exception): + pass + + def is_inside_except(node): """Returns true if node is inside the name of an except handler.""" current = node @@ -1522,3 +1526,25 @@ def get_iterating_dictionary_name( return node.iter.as_string() return None + + +def get_subscript_const_value(node: astroid.Subscript) -> astroid.Const: + """ + Returns the value (subscript.slice) of a Subscript node, + also supports python <3.9 windows where node.slice might be an Index + node + + :param node: Subscript Node to extract value from + :returns: Const Node containing subscript value + :raises InferredTypeError: if the subscript node cannot be inferred as a Const + """ + value = node.slice + if isinstance(value, astroid.Index): + value = value.value + inferred = safe_infer(value) + if not isinstance(inferred, astroid.Const): + raise InferredTypeError( + "Subscript.slice cannot be inferred as an astroid.Const" + ) + + return inferred diff --git a/pylint/testutils/lint_module_test.py b/pylint/testutils/lint_module_test.py index e3580946d2..a765381b6b 100644 --- a/pylint/testutils/lint_module_test.py +++ b/pylint/testutils/lint_module_test.py @@ -52,7 +52,10 @@ def __init__(self, test_file: FunctionalTestFile, config: Optional[Config] = Non def setUp(self): if self._should_be_skipped_due_to_version(): - pytest.skip("Test cannot run with Python %s." % sys.version.split(" ")[0]) + pytest.skip( + "Test cannot run with Python %s." + % sys.version.split(" ", maxsplit=1)[0] + ) missing = [] for requirement in self._test_file.options["requires"]: try: diff --git a/tests/functional/u/use/use_maxsplit_arg.py b/tests/functional/u/use/use_maxsplit_arg.py new file mode 100644 index 0000000000..1b998e5e92 --- /dev/null +++ b/tests/functional/u/use/use_maxsplit_arg.py @@ -0,0 +1,84 @@ +"""Emit a message for accessing first/last element of string.split""" +# pylint: disable=line-too-long,missing-docstring,unsubscriptable-object,too-few-public-methods,invalid-name,redefined-builtin + +# Test subscripting .split() +get_first = '1,2,3'.split(',')[0] # [use-maxsplit-arg] +get_last = '1,2,3'[::-1].split(',')[0] # [use-maxsplit-arg] + +SEQ = '1,2,3' +get_first = SEQ.split(',')[0] # [use-maxsplit-arg] +get_last = SEQ.split(',')[-1] # [use-maxsplit-arg] +get_first = SEQ.rsplit(',')[0] # [use-maxsplit-arg] +get_last = SEQ.rsplit(',')[-1] # [use-maxsplit-arg] + +# Don't suggest maxsplit=1 if not accessing the first or last element +get_mid = SEQ.split(',')[1] +get_mid = SEQ.split(',')[-2] + + +# Test varying maxsplit argument -- all these will be okay +# ## str.split() tests +good_split = '1,2,3'.split(sep=',', maxsplit=1)[-1] +good_split = '1,2,3'.split(sep=',', maxsplit=1)[0] +good_split = '1,2,3'.split(sep=',', maxsplit=2)[-1] +good_split = '1,2,3'.split(sep=',', maxsplit=2)[0] +good_split = '1,2,3'.split(sep=',', maxsplit=2)[1] + +# ## str.rsplit() tests +good_split = '1,2,3'.rsplit(sep=',', maxsplit=1)[-1] +good_split = '1,2,3'.rsplit(sep=',', maxsplit=1)[0] +good_split = '1,2,3'.rsplit(sep=',', maxsplit=2)[-1] +good_split = '1,2,3'.rsplit(sep=',', maxsplit=2)[0] +good_split = '1,2,3'.rsplit(sep=',', maxsplit=2)[1] + + +# Tests on class attributes +class Foo(): + class_str = '1,2,3' + def __init__(self): + self.my_str = '1,2,3' + + def get_string(self) -> str: + return self.my_str + +# Class attributes +get_first = Foo.class_str.split(',')[0] # [use-maxsplit-arg] +get_last = Foo.class_str.split(',')[-1] # [use-maxsplit-arg] +get_first = Foo.class_str.rsplit(',')[0] # [use-maxsplit-arg] +get_last = Foo.class_str.rsplit(',')[-1] # [use-maxsplit-arg] + +get_mid = Foo.class_str.split(',')[1] +get_mid = Foo.class_str.split(',')[-2] + + +# Test with accessors +bar = Foo() +get_first = bar.get_string().split(',')[0] # [use-maxsplit-arg] +get_last = bar.get_string().split(',')[-1] # [use-maxsplit-arg] + +get_mid = bar.get_string().split(',')[1] +get_mid = bar.get_string().split(',')[-2] + + +# Test with iterating over strings +list_of_strs = ["a", "b", "c", "d", "e", "f"] +for s in list_of_strs: + print(s.split(" ")[0]) # [use-maxsplit-arg] + print(s.split(" ")[-1]) # [use-maxsplit-arg] + print(s.split(" ")[-2]) + + +# Test warning messages (matching and replacing .split / .rsplit) +class Bar(): + split = '1,2,3' + +# Error message should show Bar.split.split(',', maxsplit=1) or Bar.split.rsplit(',', maxsplit=1) +print(Bar.split.split(",")[0]) # [use-maxsplit-arg] +print(Bar.split.split(",")[-1]) # [use-maxsplit-arg] +print(Bar.split.rsplit(",")[0]) # [use-maxsplit-arg] +print(Bar.split.rsplit(",")[-1]) # [use-maxsplit-arg] + +# Special cases +a = "1,2,3".split('\n')[0] # [use-maxsplit-arg] +a = "1,2,3".split('split')[-1] # [use-maxsplit-arg] +a = "1,2,3".rsplit('rsplit')[0] # [use-maxsplit-arg] diff --git a/tests/functional/u/use/use_maxsplit_arg.txt b/tests/functional/u/use/use_maxsplit_arg.txt new file mode 100644 index 0000000000..8137374aa9 --- /dev/null +++ b/tests/functional/u/use/use_maxsplit_arg.txt @@ -0,0 +1,21 @@ +use-maxsplit-arg:5:12::Use '1,2,3'.split(',', maxsplit=1)[0] instead +use-maxsplit-arg:6:11::"Use '1,2,3'[::-1].split(',', maxsplit=1)[0] instead" +use-maxsplit-arg:9:12::Use SEQ.split(',', maxsplit=1)[0] instead +use-maxsplit-arg:10:11::Use SEQ.rsplit(',', maxsplit=1)[-1] instead +use-maxsplit-arg:11:12::Use SEQ.split(',', maxsplit=1)[0] instead +use-maxsplit-arg:12:11::Use SEQ.rsplit(',', maxsplit=1)[-1] instead +use-maxsplit-arg:45:12::Use Foo.class_str.split(',', maxsplit=1)[0] instead +use-maxsplit-arg:46:11::Use Foo.class_str.rsplit(',', maxsplit=1)[-1] instead +use-maxsplit-arg:47:12::Use Foo.class_str.split(',', maxsplit=1)[0] instead +use-maxsplit-arg:48:11::Use Foo.class_str.rsplit(',', maxsplit=1)[-1] instead +use-maxsplit-arg:56:12::Use bar.get_string().split(',', maxsplit=1)[0] instead +use-maxsplit-arg:57:11::Use bar.get_string().rsplit(',', maxsplit=1)[-1] instead +use-maxsplit-arg:66:10::Use s.split(' ', maxsplit=1)[0] instead +use-maxsplit-arg:67:10::Use s.rsplit(' ', maxsplit=1)[-1] instead +use-maxsplit-arg:76:6::Use Bar.split.split(',', maxsplit=1)[0] instead +use-maxsplit-arg:77:6::Use Bar.split.rsplit(',', maxsplit=1)[-1] instead +use-maxsplit-arg:78:6::Use Bar.split.split(',', maxsplit=1)[0] instead +use-maxsplit-arg:79:6::Use Bar.split.rsplit(',', maxsplit=1)[-1] instead +use-maxsplit-arg:82:4::Use '1,2,3'.split('\n', maxsplit=1)[0] instead +use-maxsplit-arg:83:4::Use '1,2,3'.rsplit('split', maxsplit=1)[-1] instead +use-maxsplit-arg:84:4::Use '1,2,3'.split('rsplit', maxsplit=1)[0] instead