From 5963aac7a45fe2b37e9712cf9139b5c2802b64b3 Mon Sep 17 00:00:00 2001 From: Pang Yu Shao Date: Thu, 13 May 2021 12:39:02 +0800 Subject: [PATCH] Reworked consider-using-str-partition --- .../refactoring/recommendation_checker.py | 236 ++++++++++-------- .../consider/consider_using_str_partition.py | 68 +++-- .../consider/consider_using_str_partition.txt | 54 ++-- 3 files changed, 201 insertions(+), 157 deletions(-) diff --git a/pylint/checkers/refactoring/recommendation_checker.py b/pylint/checkers/refactoring/recommendation_checker.py index a012cb5216..dff0de925e 100644 --- a/pylint/checkers/refactoring/recommendation_checker.py +++ b/pylint/checkers/refactoring/recommendation_checker.py @@ -1,6 +1,5 @@ # Licensed under the GPL: https://www.gnu.org/licenses/old-licenses/gpl-2.0.html # For details: https://github.com/PyCQA/pylint/blob/master/LICENSE -import re from typing import cast import astroid @@ -37,12 +36,14 @@ class RecommendationChecker(checkers.BaseChecker): "method of the dictionary instead.", ), "C0207": ( - "Consider using %s[%d] instead", + "Consider using %s instead", "consider-using-str-partition", "Emitted when accessing only the first or last element of a str.split(sep). " + "Or when a str.split(sep,maxsplit=1) is used. " "The first and last element can be accessed by using str.partition(sep)[0] " "or str.rpartition(sep)[-1] instead, which is less computationally " - "expensive.", + "expensive and works the same as str.split() or str.rsplit() with a maxsplit " + "of 1", ), } @@ -53,8 +54,14 @@ def _is_builtin(node, function): return False return utils.is_builtin_object(inferred) and inferred.name == function - @utils.check_messages("consider-iterating-dictionary") - def visit_call(self, node): + @utils.check_messages( + "consider-iterating-dictionary", "consider-using-str-partition" + ) + def visit_call(self, node: astroid.Call) -> None: + self._check_consider_iterating_dictionary(node) + self._check_consider_using_str_partition(node) + + def _check_consider_iterating_dictionary(self, node: astroid.Call) -> None: if not isinstance(node.func, astroid.Attribute): return if node.func.attrname != "keys": @@ -71,6 +78,127 @@ def visit_call(self, node): if isinstance(node.parent, (astroid.For, astroid.Comprehension)): self.add_message("consider-iterating-dictionary", node=node) + def _check_consider_using_str_partition(self, node: astroid.Call) -> None: + """Add message when accessing first or last elements of a str.split() or str.rsplit(), + or when split/rsplit with max_split=1 is used""" + + # Check if call is split() or rsplit() + if isinstance(node.func, astroid.Attribute) and node.func.attrname in ( + "split", + "rsplit", + ): + inferred_func = utils.safe_infer(node.func) + fn_name = node.func.attrname + node_name = node.as_string() + + if not isinstance(inferred_func, astroid.BoundMethod): + return + + try: + seperator = utils.get_argument_from_call(node, 0, "sep").value + except utils.NoSuchArgumentError: + return + # Check if maxsplit is set, and if it's == 1 (this can be replaced by partition) + try: + maxsplit = utils.get_argument_from_call(node, 1, "maxsplit").value + if maxsplit == 1: + self.add_message( + "consider-using-str-partition", + node=node, + args=( + f"{node.func.expr.as_string()}.{node.func.attrname.replace('split','partition')}('{seperator}')" + ), + ) + return + + except utils.NoSuchArgumentError: + pass + + # Check if it's immediately subscripted + if isinstance(node.parent, astroid.Subscript): + subscript_node = node.parent + # Check if subscripted with -1/0 + if not isinstance( + subscript_node.slice, (astroid.Const, astroid.UnaryOp) + ): + return + subscript_value = utils.safe_infer(subscript_node.slice) + subscript_value = cast(astroid.Const, subscript_value) + subscript_value = subscript_value.value + if subscript_value in (-1, 0): + new_fn = "rpartition" if subscript_value == -1 else "partition" + new_fn = new_fn[::-1] + node_name = node_name[::-1] + fn_name = fn_name[::-1] + new_name = node_name.replace(fn_name, new_fn, 1)[::-1] + self.add_message( + "consider-using-str-partition", node=node, args=(new_name,) + ) + return + # Check where name it's assigned to, then check all usage of name + assign_target = None + if isinstance(node.parent, astroid.Tuple): + assign_node = node.parent.parent + if not isinstance(assign_node, astroid.Assign): + return + idx = node.parent.elts.index(node) + if not isinstance(assign_node.targets[0], astroid.Tuple): + return + assign_target = assign_node.targets[0].elts[idx] + elif isinstance(node.parent, astroid.Assign): + assign_target = node.parent.targets[0] + + if assign_target is None or not isinstance( + assign_target, astroid.AssignName + ): + return + + # Go to outer-most scope (module), then search the child for usage + module_node = node + while not isinstance(module_node, astroid.Module): + module_node = module_node.parent + + subscript_usage = set() + for child in module_node.body: + for search_node in child.nodes_of_class(astroid.Name): + search_node = cast(astroid.Name, search_node) + + last_definition = search_node.lookup(search_node.name)[1][-1] + if last_definition is not assign_target: + continue + if not isinstance(search_node.parent, astroid.Subscript): + continue + subscript_node = search_node.parent + if not isinstance( + subscript_node.slice, (astroid.Const, astroid.UnaryOp) + ): + return + subscript_value = utils.safe_infer(subscript_node.slice) + subscript_value = cast(astroid.Const, subscript_value) + subscript_value = subscript_value.value + if subscript_value not in (-1, 0): + return + subscript_usage.add(subscript_value) + if not subscript_usage: # Not used + return + # Construct help text + help_text = "" + node_name = node_name[::-1] + fn_name = fn_name[::-1] + if 0 in subscript_usage: + new_fn = "partition"[::-1] + new_name = node_name.replace(fn_name, new_fn, 1)[::-1] + help_text += f"{new_name} to extract the first element of a .split()" + + if -1 in subscript_usage: + new_fn = "rpartition"[::-1] + new_name = node_name.replace(fn_name, new_fn, 1)[::-1] + help_text += " and " if help_text != "" else "" + help_text += f"{new_name} to extract the last element of a .split()" + self.add_message( + "consider-using-str-partition", node=node, args=(help_text,) + ) + @utils.check_messages("consider-using-enumerate", "consider-using-dict-items") def visit_for(self, node: astroid.For) -> None: self._check_consider_using_enumerate(node) @@ -219,101 +347,3 @@ def visit_comprehension(self, node: astroid.Comprehension) -> None: self.add_message("consider-using-dict-items", node=node) return - - @utils.check_messages("consider-using-str-partition") - def visit_subscript(self, node: astroid.Subscript) -> None: - """Add message when accessing first or last elements of a str.split().""" - assignment = None - subscripted_object = node.value - if isinstance(subscripted_object, astroid.Name): - # Check assignment of this subscripted object - assignment_lookup_results = node.value.lookup(node.value.name) - if ( - len(assignment_lookup_results) == 0 - or len(assignment_lookup_results[-1]) == 0 - ): - return - assign_name = assignment_lookup_results[-1][-1] - if not isinstance(assign_name, astroid.AssignName): - return - assignment = assign_name.parent - if not isinstance(assignment, astroid.Assign): - return - # Set subscripted_object to the assignment RHS - subscripted_object = assignment.value - - if isinstance(subscripted_object, astroid.Call): - # Check if call is .split() or .rsplit() - if isinstance( - subscripted_object.func, astroid.Attribute - ) and subscripted_object.func.attrname in ["split", "rsplit"]: - inferred = utils.safe_infer(subscripted_object.func) - if not isinstance(inferred, astroid.BoundMethod): - return - - # Check if subscript is 1 or -1 - value = node.slice - if isinstance(value, astroid.Index): - value = value.value - - if isinstance(value, (astroid.UnaryOp, astroid.Const)): - const = utils.safe_infer(value) - const = cast(astroid.Const, const) - p = re.compile( - r"tilpsr*\." - ) # Match and replace in reverse to ensure last occurance replaced - if const.value == -1: - help_text = p.sub( - ".rpartition"[::-1], - subscripted_object.as_string()[::-1], - count=1, - )[::-1] - self.add_message( - "consider-using-str-partition", - node=node, - args=(help_text, -1), - ) - elif const.value == 0: - help_text = p.sub( - ".partition"[::-1], - subscripted_object.as_string()[::-1], - count=1, - )[::-1] - self.add_message( - "consider-using-str-partition", - node=node, - args=(help_text, 0), - ) - # Check if subscript is len(split) - 1 - if ( - isinstance(value, astroid.BinOp) - and value.left.func.name == "len" - and value.op == "-" - and value.right.value == 1 - and assignment is not None - ): - # Ensure that the len() is from builtins, not user re-defined - inferred_fn = utils.safe_infer(value.left.func) - if not ( - isinstance(inferred_fn, astroid.FunctionDef) - and isinstance(inferred_fn.parent, astroid.Module) - and inferred_fn.parent.name == "builtins" - ): - return - - # Further check for argument within len(), and compare that to object being split - arg_within_len = value.left.args[0].name - if arg_within_len == node.value.name: - p = re.compile( - r"tilpsr*\." - ) # Match and replace in reverse to ensure last occurance replaced - help_text = p.sub( - ".rpartition"[::-1], - subscripted_object.as_string()[::-1], - count=1, - )[::-1] - self.add_message( - "consider-using-str-partition", - node=node, - args=(help_text, -1), - ) diff --git a/tests/functional/c/consider/consider_using_str_partition.py b/tests/functional/c/consider/consider_using_str_partition.py index 541cff6443..b4cce175ad 100644 --- a/tests/functional/c/consider/consider_using_str_partition.py +++ b/tests/functional/c/consider/consider_using_str_partition.py @@ -1,8 +1,19 @@ """Emit a message for iteration through dict keys and subscripting dict with key.""" # pylint: disable=line-too-long,missing-docstring,unsubscriptable-object,too-few-public-methods,invalid-name,redefined-builtin -from collections.abc import Iterable -from typing import Any + +# Test varying maxsplit argument +bad_split = '1,2,3'.split(sep=',',maxsplit=1) # [consider-using-str-partition] +bad_split = '1,2,3'[::-1].split(',',1) # [consider-using-str-partition] + +good_split = '1,2,3'[::-1].split(',',2) +good_split = '1,2,3'[::-1].split(',') + +good_split = '1,2,3'[::-1].split(',',2)[0] #This is fine, we ignore cases where maxsplit > 1 +good_split = '1,2,3'[::-1].split(',',2)[-1] #This is fine, we ignore cases where maxsplit > 1 + + +# Test subscripting .split() get_first = '1,2,3'.split(',')[0] # [consider-using-str-partition] get_last = '1,2,3'[::-1].split(',')[0] # [consider-using-str-partition] @@ -15,27 +26,38 @@ get_mid = SEQ.split(',')[1] # This is okay get_mid = SEQ.split(',')[-2] # This is okay + # Test with storing splits as an object -split1 = SEQ.split(',') -get_first = split1[0] # [consider-using-str-partition] -get_last = split1[-1] # [consider-using-str-partition] -get_last = split1[len(split1)-1] # [consider-using-str-partition] +split1 = SEQ.split(',') # [consider-using-str-partition] +get_first = split1[0] +get_last = split1[-1] + +split2 = SEQ.rsplit(',') # [consider-using-str-partition] +get_first = split2[0] +get_last = split2[-1] -split2 = SEQ.rsplit(',') -get_first = split2[0] # [consider-using-str-partition] -get_last = split2[-1] # [consider-using-str-partition] -get_last = split2[len(split2)-1] # [consider-using-str-partition] +split3 = SEQ.rsplit(',') # This is fine, split3 indexed with [1] +get_first = split3[0] +get_middle = split3[1] +get_last = split3[-1] -print(split1[0], split1[-1]) # [consider-using-str-partition, consider-using-str-partition] +split1, split2 = SEQ.split(','), SEQ.rsplit(',') # [consider-using-str-partition, consider-using-str-partition] +get_first = split1[0] +get_last = split1[-1] +get_first = split2[0] +get_last = split2[-1] + +split1, split2 = SEQ.split(','), SEQ.rsplit(',') # This is fine, both splits are utilized +get_first = split1[0] +get_last = split1[-1] +get_first = split2[0] +get_last = split2[-1] +split1[1] = split2[1] -# Test when running len on another iterable -some_list = [] -get_last = split1[len(split2)-1] # Should not throw an error # Tests on class attributes class Foo(): class_str = '1,2,3' - def __init__(self): self.my_str = '1,2,3' @@ -51,10 +73,10 @@ def get_string(self) -> str: get_mid = Foo.class_str.split(',')[1] get_mid = Foo.class_str.split(',')[-2] -split2 = Foo.class_str.split(',') -get_first = split2[0] # [consider-using-str-partition] -get_last = split2[-1] # [consider-using-str-partition] -get_last = split2[len(split2)-1] # [consider-using-str-partition] +split2 = Foo.class_str.split(',') # [consider-using-str-partition] +get_first = split2[0] +get_last = split2[-1] + # Test with accessors bar = Foo() @@ -64,6 +86,7 @@ def get_string(self) -> str: get_mid = bar.get_string().split(',')[1] get_mid = bar.get_string().split(',')[-2] + # Test with iterating over strings list_of_strs = ["a", "b", "c", "d", "e", "f"] for s in list_of_strs: @@ -71,6 +94,7 @@ def get_string(self) -> str: print(s.split(" ")[-1]) # [consider-using-str-partition] print(s.split(" ")[-2]) + # Test warning messages (matching and replacing .split / .rsplit) class Bar(): split = '1,2,3' @@ -79,9 +103,3 @@ class Bar(): print(Bar.split.split(",")[-1]) # [consider-using-str-partition] (Error message should show Bar.split.rpartition) print(Bar.split.rsplit(",")[0]) # [consider-using-str-partition] (Error message should show Bar.split.partition) print(Bar.split.rsplit(",")[-1]) # [consider-using-str-partition] (Error message should show Bar.split.rpartition) - -# Test with user-defined len function -def len(x: Iterable[Any]) -> str: - return f"Hello, world! {x[2]}" - -get_last = split2[len(split2)-1] # This won't throw the warning as the len function has been redefined diff --git a/tests/functional/c/consider/consider_using_str_partition.txt b/tests/functional/c/consider/consider_using_str_partition.txt index 25e400b294..3d176b4ca7 100644 --- a/tests/functional/c/consider/consider_using_str_partition.txt +++ b/tests/functional/c/consider/consider_using_str_partition.txt @@ -1,29 +1,25 @@ -consider-using-str-partition:6:12::Consider using '1,2,3'.partition(',')[0] instead -consider-using-str-partition:7:11::"Consider using '1,2,3'[::-1].partition(',')[0] instead" -consider-using-str-partition:10:12::Consider using SEQ.partition(',')[0] instead -consider-using-str-partition:11:11::Consider using SEQ.rpartition(',')[-1] instead -consider-using-str-partition:12:12::Consider using SEQ.partition(',')[0] instead -consider-using-str-partition:13:11::Consider using SEQ.rpartition(',')[-1] instead -consider-using-str-partition:20:12::Consider using SEQ.partition(',')[0] instead -consider-using-str-partition:21:11::Consider using SEQ.rpartition(',')[-1] instead -consider-using-str-partition:22:11::Consider using SEQ.rpartition(',')[-1] instead -consider-using-str-partition:25:12::Consider using SEQ.partition(',')[0] instead -consider-using-str-partition:26:11::Consider using SEQ.rpartition(',')[-1] instead -consider-using-str-partition:27:11::Consider using SEQ.rpartition(',')[-1] instead -consider-using-str-partition:29:6::Consider using SEQ.partition(',')[0] instead -consider-using-str-partition:29:17::Consider using SEQ.rpartition(',')[-1] instead -consider-using-str-partition:46:12::Consider using Foo.class_str.partition(',')[0] instead -consider-using-str-partition:47:11::Consider using Foo.class_str.rpartition(',')[-1] instead -consider-using-str-partition:48:12::Consider using Foo.class_str.partition(',')[0] instead -consider-using-str-partition:49:11::Consider using Foo.class_str.rpartition(',')[-1] instead -consider-using-str-partition:55:12::Consider using Foo.class_str.partition(',')[0] instead -consider-using-str-partition:56:11::Consider using Foo.class_str.rpartition(',')[-1] instead -consider-using-str-partition:57:11::Consider using Foo.class_str.rpartition(',')[-1] instead -consider-using-str-partition:61:12::Consider using bar.get_string().partition(',')[0] instead -consider-using-str-partition:62:11::Consider using bar.get_string().rpartition(',')[-1] instead -consider-using-str-partition:70:10::Consider using s.partition(' ')[0] instead -consider-using-str-partition:71:10::Consider using s.rpartition(' ')[-1] instead -consider-using-str-partition:78:6::Consider using Bar.split.partition(',')[0] instead -consider-using-str-partition:79:6::Consider using Bar.split.rpartition(',')[-1] instead -consider-using-str-partition:80:6::Consider using Bar.split.partition(',')[0] instead -consider-using-str-partition:81:6::Consider using Bar.split.rpartition(',')[-1] instead +consider-using-str-partition:6:12::Consider using '1,2,3'.partition(',') instead +consider-using-str-partition:7:12::"Consider using '1,2,3'[::-1].partition(',') instead" +consider-using-str-partition:17:12::Consider using '1,2,3'.partition(',') instead +consider-using-str-partition:18:11::"Consider using '1,2,3'[::-1].partition(',') instead" +consider-using-str-partition:21:12::Consider using SEQ.partition(',') instead +consider-using-str-partition:22:11::Consider using SEQ.rpartition(',') instead +consider-using-str-partition:23:12::Consider using SEQ.partition(',') instead +consider-using-str-partition:24:11::Consider using SEQ.rpartition(',') instead +consider-using-str-partition:31:9::Consider using SEQ.partition(',') to extract the first element of a .split() and SEQ.rpartition(',') to extract the last element of a .split() instead +consider-using-str-partition:35:9::Consider using SEQ.partition(',') to extract the first element of a .split() and SEQ.rpartition(',') to extract the last element of a .split() instead +consider-using-str-partition:44:17::Consider using SEQ.partition(',') to extract the first element of a .split() and SEQ.rpartition(',') to extract the last element of a .split() instead +consider-using-str-partition:44:33::Consider using SEQ.partition(',') to extract the first element of a .split() and SEQ.rpartition(',') to extract the last element of a .split() instead +consider-using-str-partition:68:12::Consider using Foo.class_str.partition(',') instead +consider-using-str-partition:69:11::Consider using Foo.class_str.rpartition(',') instead +consider-using-str-partition:70:12::Consider using Foo.class_str.partition(',') instead +consider-using-str-partition:71:11::Consider using Foo.class_str.rpartition(',') instead +consider-using-str-partition:76:9::Consider using Foo.class_str.partition(',') to extract the first element of a .split() and Foo.class_str.rpartition(',') to extract the last element of a .split() instead +consider-using-str-partition:83:12::Consider using bar.get_string().partition(',') instead +consider-using-str-partition:84:11::Consider using bar.get_string().rpartition(',') instead +consider-using-str-partition:93:10::Consider using s.partition(' ') instead +consider-using-str-partition:94:10::Consider using s.rpartition(' ') instead +consider-using-str-partition:102:6::Consider using Bar.split.partition(',') instead +consider-using-str-partition:103:6::Consider using Bar.split.rpartition(',') instead +consider-using-str-partition:104:6::Consider using Bar.split.partition(',') instead +consider-using-str-partition:105:6::Consider using Bar.split.rpartition(',') instead