From 5963aac7a45fe2b37e9712cf9139b5c2802b64b3 Mon Sep 17 00:00:00 2001
From: Pang Yu Shao
Date: Thu, 13 May 2021 12:39:02 +0800
Subject: [PATCH] Reworked consider-using-str-partition
---
.../refactoring/recommendation_checker.py | 236 ++++++++++--------
.../consider/consider_using_str_partition.py | 68 +++--
.../consider/consider_using_str_partition.txt | 54 ++--
3 files changed, 201 insertions(+), 157 deletions(-)
diff --git a/pylint/checkers/refactoring/recommendation_checker.py b/pylint/checkers/refactoring/recommendation_checker.py
index a012cb5216..dff0de925e 100644
--- a/pylint/checkers/refactoring/recommendation_checker.py
+++ b/pylint/checkers/refactoring/recommendation_checker.py
@@ -1,6 +1,5 @@
# Licensed under the GPL: https://www.gnu.org/licenses/old-licenses/gpl-2.0.html
# For details: https://github.com/PyCQA/pylint/blob/master/LICENSE
-import re
from typing import cast
import astroid
@@ -37,12 +36,14 @@ class RecommendationChecker(checkers.BaseChecker):
"method of the dictionary instead.",
),
"C0207": (
- "Consider using %s[%d] instead",
+ "Consider using %s instead",
"consider-using-str-partition",
"Emitted when accessing only the first or last element of a str.split(sep). "
+ "Or when a str.split(sep,maxsplit=1) is used. "
"The first and last element can be accessed by using str.partition(sep)[0] "
"or str.rpartition(sep)[-1] instead, which is less computationally "
- "expensive.",
+ "expensive and works the same as str.split() or str.rsplit() with a maxsplit "
+ "of 1",
),
}
@@ -53,8 +54,14 @@ def _is_builtin(node, function):
return False
return utils.is_builtin_object(inferred) and inferred.name == function
- @utils.check_messages("consider-iterating-dictionary")
- def visit_call(self, node):
+ @utils.check_messages(
+ "consider-iterating-dictionary", "consider-using-str-partition"
+ )
+ def visit_call(self, node: astroid.Call) -> None:
+ self._check_consider_iterating_dictionary(node)
+ self._check_consider_using_str_partition(node)
+
+ def _check_consider_iterating_dictionary(self, node: astroid.Call) -> None:
if not isinstance(node.func, astroid.Attribute):
return
if node.func.attrname != "keys":
@@ -71,6 +78,127 @@ def visit_call(self, node):
if isinstance(node.parent, (astroid.For, astroid.Comprehension)):
self.add_message("consider-iterating-dictionary", node=node)
+ def _check_consider_using_str_partition(self, node: astroid.Call) -> None:
+ """Add message when accessing first or last elements of a str.split() or str.rsplit(),
+ or when split/rsplit with max_split=1 is used"""
+
+ # Check if call is split() or rsplit()
+ if isinstance(node.func, astroid.Attribute) and node.func.attrname in (
+ "split",
+ "rsplit",
+ ):
+ inferred_func = utils.safe_infer(node.func)
+ fn_name = node.func.attrname
+ node_name = node.as_string()
+
+ if not isinstance(inferred_func, astroid.BoundMethod):
+ return
+
+ try:
+ seperator = utils.get_argument_from_call(node, 0, "sep").value
+ except utils.NoSuchArgumentError:
+ return
+ # Check if maxsplit is set, and if it's == 1 (this can be replaced by partition)
+ try:
+ maxsplit = utils.get_argument_from_call(node, 1, "maxsplit").value
+ if maxsplit == 1:
+ self.add_message(
+ "consider-using-str-partition",
+ node=node,
+ args=(
+ f"{node.func.expr.as_string()}.{node.func.attrname.replace('split','partition')}('{seperator}')"
+ ),
+ )
+ return
+
+ except utils.NoSuchArgumentError:
+ pass
+
+ # Check if it's immediately subscripted
+ if isinstance(node.parent, astroid.Subscript):
+ subscript_node = node.parent
+ # Check if subscripted with -1/0
+ if not isinstance(
+ subscript_node.slice, (astroid.Const, astroid.UnaryOp)
+ ):
+ return
+ subscript_value = utils.safe_infer(subscript_node.slice)
+ subscript_value = cast(astroid.Const, subscript_value)
+ subscript_value = subscript_value.value
+ if subscript_value in (-1, 0):
+ new_fn = "rpartition" if subscript_value == -1 else "partition"
+ new_fn = new_fn[::-1]
+ node_name = node_name[::-1]
+ fn_name = fn_name[::-1]
+ new_name = node_name.replace(fn_name, new_fn, 1)[::-1]
+ self.add_message(
+ "consider-using-str-partition", node=node, args=(new_name,)
+ )
+ return
+ # Check where name it's assigned to, then check all usage of name
+ assign_target = None
+ if isinstance(node.parent, astroid.Tuple):
+ assign_node = node.parent.parent
+ if not isinstance(assign_node, astroid.Assign):
+ return
+ idx = node.parent.elts.index(node)
+ if not isinstance(assign_node.targets[0], astroid.Tuple):
+ return
+ assign_target = assign_node.targets[0].elts[idx]
+ elif isinstance(node.parent, astroid.Assign):
+ assign_target = node.parent.targets[0]
+
+ if assign_target is None or not isinstance(
+ assign_target, astroid.AssignName
+ ):
+ return
+
+ # Go to outer-most scope (module), then search the child for usage
+ module_node = node
+ while not isinstance(module_node, astroid.Module):
+ module_node = module_node.parent
+
+ subscript_usage = set()
+ for child in module_node.body:
+ for search_node in child.nodes_of_class(astroid.Name):
+ search_node = cast(astroid.Name, search_node)
+
+ last_definition = search_node.lookup(search_node.name)[1][-1]
+ if last_definition is not assign_target:
+ continue
+ if not isinstance(search_node.parent, astroid.Subscript):
+ continue
+ subscript_node = search_node.parent
+ if not isinstance(
+ subscript_node.slice, (astroid.Const, astroid.UnaryOp)
+ ):
+ return
+ subscript_value = utils.safe_infer(subscript_node.slice)
+ subscript_value = cast(astroid.Const, subscript_value)
+ subscript_value = subscript_value.value
+ if subscript_value not in (-1, 0):
+ return
+ subscript_usage.add(subscript_value)
+ if not subscript_usage: # Not used
+ return
+ # Construct help text
+ help_text = ""
+ node_name = node_name[::-1]
+ fn_name = fn_name[::-1]
+ if 0 in subscript_usage:
+ new_fn = "partition"[::-1]
+ new_name = node_name.replace(fn_name, new_fn, 1)[::-1]
+ help_text += f"{new_name} to extract the first element of a .split()"
+
+ if -1 in subscript_usage:
+ new_fn = "rpartition"[::-1]
+ new_name = node_name.replace(fn_name, new_fn, 1)[::-1]
+ help_text += " and " if help_text != "" else ""
+ help_text += f"{new_name} to extract the last element of a .split()"
+ self.add_message(
+ "consider-using-str-partition", node=node, args=(help_text,)
+ )
+
@utils.check_messages("consider-using-enumerate", "consider-using-dict-items")
def visit_for(self, node: astroid.For) -> None:
self._check_consider_using_enumerate(node)
@@ -219,101 +347,3 @@ def visit_comprehension(self, node: astroid.Comprehension) -> None:
self.add_message("consider-using-dict-items", node=node)
return
-
- @utils.check_messages("consider-using-str-partition")
- def visit_subscript(self, node: astroid.Subscript) -> None:
- """Add message when accessing first or last elements of a str.split()."""
- assignment = None
- subscripted_object = node.value
- if isinstance(subscripted_object, astroid.Name):
- # Check assignment of this subscripted object
- assignment_lookup_results = node.value.lookup(node.value.name)
- if (
- len(assignment_lookup_results) == 0
- or len(assignment_lookup_results[-1]) == 0
- ):
- return
- assign_name = assignment_lookup_results[-1][-1]
- if not isinstance(assign_name, astroid.AssignName):
- return
- assignment = assign_name.parent
- if not isinstance(assignment, astroid.Assign):
- return
- # Set subscripted_object to the assignment RHS
- subscripted_object = assignment.value
-
- if isinstance(subscripted_object, astroid.Call):
- # Check if call is .split() or .rsplit()
- if isinstance(
- subscripted_object.func, astroid.Attribute
- ) and subscripted_object.func.attrname in ["split", "rsplit"]:
- inferred = utils.safe_infer(subscripted_object.func)
- if not isinstance(inferred, astroid.BoundMethod):
- return
-
- # Check if subscript is 1 or -1
- value = node.slice
- if isinstance(value, astroid.Index):
- value = value.value
-
- if isinstance(value, (astroid.UnaryOp, astroid.Const)):
- const = utils.safe_infer(value)
- const = cast(astroid.Const, const)
- p = re.compile(
- r"tilpsr*\."
- ) # Match and replace in reverse to ensure last occurance replaced
- if const.value == -1:
- help_text = p.sub(
- ".rpartition"[::-1],
- subscripted_object.as_string()[::-1],
- count=1,
- )[::-1]
- self.add_message(
- "consider-using-str-partition",
- node=node,
- args=(help_text, -1),
- )
- elif const.value == 0:
- help_text = p.sub(
- ".partition"[::-1],
- subscripted_object.as_string()[::-1],
- count=1,
- )[::-1]
- self.add_message(
- "consider-using-str-partition",
- node=node,
- args=(help_text, 0),
- )
- # Check if subscript is len(split) - 1
- if (
- isinstance(value, astroid.BinOp)
- and value.left.func.name == "len"
- and value.op == "-"
- and value.right.value == 1
- and assignment is not None
- ):
- # Ensure that the len() is from builtins, not user re-defined
- inferred_fn = utils.safe_infer(value.left.func)
- if not (
- isinstance(inferred_fn, astroid.FunctionDef)
- and isinstance(inferred_fn.parent, astroid.Module)
- and inferred_fn.parent.name == "builtins"
- ):
- return
-
- # Further check for argument within len(), and compare that to object being split
- arg_within_len = value.left.args[0].name
- if arg_within_len == node.value.name:
- p = re.compile(
- r"tilpsr*\."
- ) # Match and replace in reverse to ensure last occurance replaced
- help_text = p.sub(
- ".rpartition"[::-1],
- subscripted_object.as_string()[::-1],
- count=1,
- )[::-1]
- self.add_message(
- "consider-using-str-partition",
- node=node,
- args=(help_text, -1),
- )
diff --git a/tests/functional/c/consider/consider_using_str_partition.py b/tests/functional/c/consider/consider_using_str_partition.py
index 541cff6443..b4cce175ad 100644
--- a/tests/functional/c/consider/consider_using_str_partition.py
+++ b/tests/functional/c/consider/consider_using_str_partition.py
@@ -1,8 +1,19 @@
"""Emit a message for iteration through dict keys and subscripting dict with key."""
# pylint: disable=line-too-long,missing-docstring,unsubscriptable-object,too-few-public-methods,invalid-name,redefined-builtin
-from collections.abc import Iterable
-from typing import Any
+
+# Test varying maxsplit argument
+bad_split = '1,2,3'.split(sep=',',maxsplit=1) # [consider-using-str-partition]
+bad_split = '1,2,3'[::-1].split(',',1) # [consider-using-str-partition]
+
+good_split = '1,2,3'[::-1].split(',',2)
+good_split = '1,2,3'[::-1].split(',')
+
+good_split = '1,2,3'[::-1].split(',',2)[0] #This is fine, we ignore cases where maxsplit > 1
+good_split = '1,2,3'[::-1].split(',',2)[-1] #This is fine, we ignore cases where maxsplit > 1
+
+
+# Test subscripting .split()
get_first = '1,2,3'.split(',')[0] # [consider-using-str-partition]
get_last = '1,2,3'[::-1].split(',')[0] # [consider-using-str-partition]
@@ -15,27 +26,38 @@
get_mid = SEQ.split(',')[1] # This is okay
get_mid = SEQ.split(',')[-2] # This is okay
+
# Test with storing splits as an object
-split1 = SEQ.split(',')
-get_first = split1[0] # [consider-using-str-partition]
-get_last = split1[-1] # [consider-using-str-partition]
-get_last = split1[len(split1)-1] # [consider-using-str-partition]
+split1 = SEQ.split(',') # [consider-using-str-partition]
+get_first = split1[0]
+get_last = split1[-1]
+
+split2 = SEQ.rsplit(',') # [consider-using-str-partition]
+get_first = split2[0]
+get_last = split2[-1]
-split2 = SEQ.rsplit(',')
-get_first = split2[0] # [consider-using-str-partition]
-get_last = split2[-1] # [consider-using-str-partition]
-get_last = split2[len(split2)-1] # [consider-using-str-partition]
+split3 = SEQ.rsplit(',') # This is fine, split3 indexed with [1]
+get_first = split3[0]
+get_middle = split3[1]
+get_last = split3[-1]
-print(split1[0], split1[-1]) # [consider-using-str-partition, consider-using-str-partition]
+split1, split2 = SEQ.split(','), SEQ.rsplit(',') # [consider-using-str-partition, consider-using-str-partition]
+get_first = split1[0]
+get_last = split1[-1]
+get_first = split2[0]
+get_last = split2[-1]
+
+split1, split2 = SEQ.split(','), SEQ.rsplit(',') # This is fine, both splits are utilized
+get_first = split1[0]
+get_last = split1[-1]
+get_first = split2[0]
+get_last = split2[-1]
+split1[1] = split2[1]
-# Test when running len on another iterable
-some_list = []
-get_last = split1[len(split2)-1] # Should not throw an error
# Tests on class attributes
class Foo():
class_str = '1,2,3'
-
def __init__(self):
self.my_str = '1,2,3'
@@ -51,10 +73,10 @@ def get_string(self) -> str:
get_mid = Foo.class_str.split(',')[1]
get_mid = Foo.class_str.split(',')[-2]
-split2 = Foo.class_str.split(',')
-get_first = split2[0] # [consider-using-str-partition]
-get_last = split2[-1] # [consider-using-str-partition]
-get_last = split2[len(split2)-1] # [consider-using-str-partition]
+split2 = Foo.class_str.split(',') # [consider-using-str-partition]
+get_first = split2[0]
+get_last = split2[-1]
+
# Test with accessors
bar = Foo()
@@ -64,6 +86,7 @@ def get_string(self) -> str:
get_mid = bar.get_string().split(',')[1]
get_mid = bar.get_string().split(',')[-2]
+
# Test with iterating over strings
list_of_strs = ["a", "b", "c", "d", "e", "f"]
for s in list_of_strs:
@@ -71,6 +94,7 @@ def get_string(self) -> str:
print(s.split(" ")[-1]) # [consider-using-str-partition]
print(s.split(" ")[-2])
+
# Test warning messages (matching and replacing .split / .rsplit)
class Bar():
split = '1,2,3'
@@ -79,9 +103,3 @@ class Bar():
print(Bar.split.split(",")[-1]) # [consider-using-str-partition] (Error message should show Bar.split.rpartition)
print(Bar.split.rsplit(",")[0]) # [consider-using-str-partition] (Error message should show Bar.split.partition)
print(Bar.split.rsplit(",")[-1]) # [consider-using-str-partition] (Error message should show Bar.split.rpartition)
-
-# Test with user-defined len function
-def len(x: Iterable[Any]) -> str:
- return f"Hello, world! {x[2]}"
-
-get_last = split2[len(split2)-1] # This won't throw the warning as the len function has been redefined
diff --git a/tests/functional/c/consider/consider_using_str_partition.txt b/tests/functional/c/consider/consider_using_str_partition.txt
index 25e400b294..3d176b4ca7 100644
--- a/tests/functional/c/consider/consider_using_str_partition.txt
+++ b/tests/functional/c/consider/consider_using_str_partition.txt
@@ -1,29 +1,25 @@
-consider-using-str-partition:6:12::Consider using '1,2,3'.partition(',')[0] instead
-consider-using-str-partition:7:11::"Consider using '1,2,3'[::-1].partition(',')[0] instead"
-consider-using-str-partition:10:12::Consider using SEQ.partition(',')[0] instead
-consider-using-str-partition:11:11::Consider using SEQ.rpartition(',')[-1] instead
-consider-using-str-partition:12:12::Consider using SEQ.partition(',')[0] instead
-consider-using-str-partition:13:11::Consider using SEQ.rpartition(',')[-1] instead
-consider-using-str-partition:20:12::Consider using SEQ.partition(',')[0] instead
-consider-using-str-partition:21:11::Consider using SEQ.rpartition(',')[-1] instead
-consider-using-str-partition:22:11::Consider using SEQ.rpartition(',')[-1] instead
-consider-using-str-partition:25:12::Consider using SEQ.partition(',')[0] instead
-consider-using-str-partition:26:11::Consider using SEQ.rpartition(',')[-1] instead
-consider-using-str-partition:27:11::Consider using SEQ.rpartition(',')[-1] instead
-consider-using-str-partition:29:6::Consider using SEQ.partition(',')[0] instead
-consider-using-str-partition:29:17::Consider using SEQ.rpartition(',')[-1] instead
-consider-using-str-partition:46:12::Consider using Foo.class_str.partition(',')[0] instead
-consider-using-str-partition:47:11::Consider using Foo.class_str.rpartition(',')[-1] instead
-consider-using-str-partition:48:12::Consider using Foo.class_str.partition(',')[0] instead
-consider-using-str-partition:49:11::Consider using Foo.class_str.rpartition(',')[-1] instead
-consider-using-str-partition:55:12::Consider using Foo.class_str.partition(',')[0] instead
-consider-using-str-partition:56:11::Consider using Foo.class_str.rpartition(',')[-1] instead
-consider-using-str-partition:57:11::Consider using Foo.class_str.rpartition(',')[-1] instead
-consider-using-str-partition:61:12::Consider using bar.get_string().partition(',')[0] instead
-consider-using-str-partition:62:11::Consider using bar.get_string().rpartition(',')[-1] instead
-consider-using-str-partition:70:10::Consider using s.partition(' ')[0] instead
-consider-using-str-partition:71:10::Consider using s.rpartition(' ')[-1] instead
-consider-using-str-partition:78:6::Consider using Bar.split.partition(',')[0] instead
-consider-using-str-partition:79:6::Consider using Bar.split.rpartition(',')[-1] instead
-consider-using-str-partition:80:6::Consider using Bar.split.partition(',')[0] instead
-consider-using-str-partition:81:6::Consider using Bar.split.rpartition(',')[-1] instead
+consider-using-str-partition:6:12::Consider using '1,2,3'.partition(',') instead
+consider-using-str-partition:7:12::"Consider using '1,2,3'[::-1].partition(',') instead"
+consider-using-str-partition:17:12::Consider using '1,2,3'.partition(',') instead
+consider-using-str-partition:18:11::"Consider using '1,2,3'[::-1].partition(',') instead"
+consider-using-str-partition:21:12::Consider using SEQ.partition(',') instead
+consider-using-str-partition:22:11::Consider using SEQ.rpartition(',') instead
+consider-using-str-partition:23:12::Consider using SEQ.partition(',') instead
+consider-using-str-partition:24:11::Consider using SEQ.rpartition(',') instead
+consider-using-str-partition:31:9::Consider using SEQ.partition(',') to extract the first element of a .split() and SEQ.rpartition(',') to extract the last element of a .split() instead
+consider-using-str-partition:35:9::Consider using SEQ.partition(',') to extract the first element of a .split() and SEQ.rpartition(',') to extract the last element of a .split() instead
+consider-using-str-partition:44:17::Consider using SEQ.partition(',') to extract the first element of a .split() and SEQ.rpartition(',') to extract the last element of a .split() instead
+consider-using-str-partition:44:33::Consider using SEQ.partition(',') to extract the first element of a .split() and SEQ.rpartition(',') to extract the last element of a .split() instead
+consider-using-str-partition:68:12::Consider using Foo.class_str.partition(',') instead
+consider-using-str-partition:69:11::Consider using Foo.class_str.rpartition(',') instead
+consider-using-str-partition:70:12::Consider using Foo.class_str.partition(',') instead
+consider-using-str-partition:71:11::Consider using Foo.class_str.rpartition(',') instead
+consider-using-str-partition:76:9::Consider using Foo.class_str.partition(',') to extract the first element of a .split() and Foo.class_str.rpartition(',') to extract the last element of a .split() instead
+consider-using-str-partition:83:12::Consider using bar.get_string().partition(',') instead
+consider-using-str-partition:84:11::Consider using bar.get_string().rpartition(',') instead
+consider-using-str-partition:93:10::Consider using s.partition(' ') instead
+consider-using-str-partition:94:10::Consider using s.rpartition(' ') instead
+consider-using-str-partition:102:6::Consider using Bar.split.partition(',') instead
+consider-using-str-partition:103:6::Consider using Bar.split.rpartition(',') instead
+consider-using-str-partition:104:6::Consider using Bar.split.partition(',') instead
+consider-using-str-partition:105:6::Consider using Bar.split.rpartition(',') instead