From 445e79a735cc6e59c78289c3525da29ff99e01ef Mon Sep 17 00:00:00 2001
From: Pang Yu Shao
Date: Wed, 12 May 2021 14:50:53 +0800
Subject: [PATCH] Implemented new check consider-using-str-partition
---
ChangeLog | 5 ++
doc/whatsnew/2.9.rst | 3 +
.../refactoring/recommendation_checker.py | 72 +++++++++++++++++++
.../consider/consider_using_str_partition.py | 68 ++++++++++++++++++
.../consider/consider_using_str_partition.txt | 21 ++++++
5 files changed, 169 insertions(+)
create mode 100644 tests/functional/c/consider/consider_using_str_partition.py
create mode 100644 tests/functional/c/consider/consider_using_str_partition.txt
diff --git a/ChangeLog b/ChangeLog
index 82da849a032..cc0eb9cbed4 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -40,6 +40,11 @@ modules are added.
* Don't emit ``import-error`` if import guarded behind ``if sys.version_info >= (x, x)``
+* New checker ``consider-using-str-partition``. Emitted when only accessing the first or last
+ element of a str.split().
+
+ Closes #4440
+
What's New in Pylint 2.8.2?
===========================
diff --git a/doc/whatsnew/2.9.rst b/doc/whatsnew/2.9.rst
index 8f5bd43f2fa..25056380feb 100644
--- a/doc/whatsnew/2.9.rst
+++ b/doc/whatsnew/2.9.rst
@@ -15,6 +15,9 @@ New checkers
* ``consider-using-dict-items``: Emitted when iterating over dictionary keys and then
indexing the same dictionary with the key within loop body.
+* ``consider-using-str-partition``: Emitted when accessing only the first or last element
+ of a str.split().
+
Other Changes
=============
diff --git a/pylint/checkers/refactoring/recommendation_checker.py b/pylint/checkers/refactoring/recommendation_checker.py
index 6efc4190c4a..5540986f2dd 100644
--- a/pylint/checkers/refactoring/recommendation_checker.py
+++ b/pylint/checkers/refactoring/recommendation_checker.py
@@ -35,6 +35,14 @@ class RecommendationChecker(checkers.BaseChecker):
"Both the key and value can be accessed by iterating using the .items() "
"method of the dictionary instead.",
),
+ "C0207": (
+ "Consider using str.partition()",
+ "consider-using-str-partition",
+ "Emitted when accessing only the first or last element of a str.split(sep). "
+ "The first and last element can be accessed by using str.partition(sep)[0] "
+ "or str.rpartition(sep)[-1] instead, which is less computationally "
+ "expensive.",
+ ),
}
@staticmethod
@@ -210,3 +218,67 @@ def visit_comprehension(self, node: astroid.Comprehension) -> None:
self.add_message("consider-using-dict-items", node=node)
return
+
+ @utils.check_messages("consider-using-str-partition")
+ def visit_subscript(self, node: astroid.Subscript) -> None:
+ """Add message when accessing first or last elements of a str.split()."""
+ assignment = None
+ subscripted_object = node.value
+ if isinstance(subscripted_object, astroid.Name):
+ # Check assignment of this subscripted object
+ assignment_lookup_results = node.value.lookup(node.value.name)
+ if (
+ len(assignment_lookup_results) == 0
+ or len(assignment_lookup_results[-1]) == 0
+ ):
+ return
+ assign_name = assignment_lookup_results[-1][-1]
+ if not isinstance(assign_name, astroid.AssignName):
+ return
+ assignment = assign_name.parent
+ if not isinstance(assignment, astroid.Assign):
+ return
+ # Set subscripted_object to the assignment RHS
+ subscripted_object = assignment.value
+
+ if isinstance(subscripted_object, astroid.Call):
+ # Check if call is .split() or .rsplit()
+ if isinstance(
+ subscripted_object.func, astroid.Attribute
+ ) and subscripted_object.func.attrname in ["split", "rsplit"]:
+ inferred = utils.safe_infer(subscripted_object.func)
+ if not isinstance(inferred, astroid.BoundMethod):
+ return
+
+ # Check if subscript is 1 or -1
+ value = node.slice
+ if isinstance(value, astroid.Index):
+ value = value.value
+
+ if isinstance(value, (astroid.UnaryOp, astroid.Const)):
+ const = utils.safe_infer(value)
+ const = cast(astroid.Const, const)
+ if const.value in [-1, 0]:
+ self.add_message("consider-using-str-partition", node=node)
+
+ # Check if subscript is len(split) - 1
+ if (
+ isinstance(value, astroid.BinOp)
+ and value.left.func.name == "len"
+ and value.op == "-"
+ and value.right.value == 1
+ and assignment is not None
+ ):
+ # Ensure that the len() is from builtins, not user re-defined
+ inferred_fn = utils.safe_infer(value.left.func)
+ if not (
+ isinstance(inferred_fn, astroid.FunctionDef)
+ and isinstance(inferred_fn.parent, astroid.Module)
+ and inferred_fn.parent.name == "builtins"
+ ):
+ return
+
+ # Further check for argument within len(), and compare that to object being split
+ arg_within_len = value.left.args[0].name
+ if arg_within_len == node.value.name:
+ self.add_message("consider-using-str-partition", node=node)
diff --git a/tests/functional/c/consider/consider_using_str_partition.py b/tests/functional/c/consider/consider_using_str_partition.py
new file mode 100644
index 00000000000..935f1a39387
--- /dev/null
+++ b/tests/functional/c/consider/consider_using_str_partition.py
@@ -0,0 +1,68 @@
+"""Emit a message for iteration through dict keys and subscripting dict with key."""
+# pylint: disable=line-too-long,missing-docstring,unsubscriptable-object,too-few-public-methods,invalid-name,redefined-builtin
+from collections.abc import Iterable
+from typing import Any
+
+SEQ = '1,2,3'
+get_first = SEQ.split(',')[0] # [consider-using-str-partition]
+get_last = SEQ.split(',')[-1] # [consider-using-str-partition]
+get_first = SEQ.rsplit(',')[0] # [consider-using-str-partition]
+get_last = SEQ.rsplit(',')[-1] # [consider-using-str-partition]
+
+get_mid = SEQ.split(',')[1] # This is okay
+get_mid = SEQ.split(',')[-2] # This is okay
+
+# Test with storing splits as an object
+split1 = SEQ.split(',')
+get_first = split1[0] # [consider-using-str-partition]
+get_last = split1[-1] # [consider-using-str-partition]
+get_last = split1[len(split1)-1] # [consider-using-str-partition]
+
+split2 = SEQ.rsplit(',')
+get_first = split2[0] # [consider-using-str-partition]
+get_last = split2[-1] # [consider-using-str-partition]
+get_last = split2[len(split2)-1] # [consider-using-str-partition]
+
+print(split1[0], split1[-1]) # [consider-using-str-partition, consider-using-str-partition]
+
+# Test when running len on another iterable
+some_list = []
+get_last = split1[len(split2)-1] # Should not throw an error
+
+# Tests on class attributes
+class Foo():
+ class_str = '1,2,3'
+
+ def __init__(self):
+ self.my_str = '1,2,3'
+
+ def get_string(self) -> str:
+ return self.my_str
+
+# Class attributes
+get_first = Foo.class_str.split(',')[0] # [consider-using-str-partition]
+get_last = Foo.class_str.split(',')[-1] # [consider-using-str-partition]
+get_first = Foo.class_str.rsplit(',')[0] # [consider-using-str-partition]
+get_last = Foo.class_str.rsplit(',')[-1] # [consider-using-str-partition]
+
+get_mid = Foo.class_str.split(',')[1]
+get_mid = Foo.class_str.split(',')[-2]
+
+split2 = Foo.class_str.split(',')
+get_first = split2[0] # [consider-using-str-partition]
+get_last = split2[-1] # [consider-using-str-partition]
+get_last = split2[len(split2)-1] # [consider-using-str-partition]
+
+# Test with accessors
+bar = Foo()
+get_first = bar.get_string().split(',')[0] # [consider-using-str-partition]
+get_last = bar.get_string().split(',')[-1] # [consider-using-str-partition]
+
+get_mid = bar.get_string().split(',')[1]
+get_mid = bar.get_string().split(',')[-2]
+
+# Test with user-defined len function
+def len(x: Iterable[Any]) -> str:
+ return f"Hello, world! {x[2]}"
+
+get_last = split2[len(split2)-1] # This won't throw the warning as the len function has been redefined
diff --git a/tests/functional/c/consider/consider_using_str_partition.txt b/tests/functional/c/consider/consider_using_str_partition.txt
new file mode 100644
index 00000000000..ebc34c9f356
--- /dev/null
+++ b/tests/functional/c/consider/consider_using_str_partition.txt
@@ -0,0 +1,21 @@
+consider-using-str-partition:7:12::Consider using str.partition()
+consider-using-str-partition:8:11::Consider using str.partition()
+consider-using-str-partition:9:12::Consider using str.partition()
+consider-using-str-partition:10:11::Consider using str.partition()
+consider-using-str-partition:17:12::Consider using str.partition()
+consider-using-str-partition:18:11::Consider using str.partition()
+consider-using-str-partition:19:11::Consider using str.partition()
+consider-using-str-partition:22:12::Consider using str.partition()
+consider-using-str-partition:23:11::Consider using str.partition()
+consider-using-str-partition:24:11::Consider using str.partition()
+consider-using-str-partition:26:6::Consider using str.partition()
+consider-using-str-partition:26:17::Consider using str.partition()
+consider-using-str-partition:43:12::Consider using str.partition()
+consider-using-str-partition:44:11::Consider using str.partition()
+consider-using-str-partition:45:12::Consider using str.partition()
+consider-using-str-partition:46:11::Consider using str.partition()
+consider-using-str-partition:52:12::Consider using str.partition()
+consider-using-str-partition:53:11::Consider using str.partition()
+consider-using-str-partition:54:11::Consider using str.partition()
+consider-using-str-partition:58:12::Consider using str.partition()
+consider-using-str-partition:59:11::Consider using str.partition()