From 605afdb720afee682b1d13e128da632e7ea3c316 Mon Sep 17 00:00:00 2001
From: yushao2 <36848472+yushao2@users.noreply.github.com>
Date: Sat, 22 May 2021 22:01:52 +0800
Subject: [PATCH] Implemented new check use-maxsplit-arg (#4469)

Co-authored-by: Pierre Sassoulas <pierre.sassoulas@gmail.com>
Co-authored-by: Marc Mueller <30130371+cdce8p@users.noreply.github.com>
---
 ChangeLog                                     |  5 ++
 doc/whatsnew/2.9.rst                          |  4 +
 .../refactoring/recommendation_checker.py     | 53 +++++++++++-
 pylint/checkers/utils.py                      | 26 ++++++
 pylint/testutils/lint_module_test.py          |  5 +-
 tests/functional/u/use/use_maxsplit_arg.py    | 84 +++++++++++++++++++
 tests/functional/u/use/use_maxsplit_arg.txt   | 21 +++++
 7 files changed, 195 insertions(+), 3 deletions(-)
 create mode 100644 tests/functional/u/use/use_maxsplit_arg.py
 create mode 100644 tests/functional/u/use/use_maxsplit_arg.txt

diff --git a/ChangeLog b/ChangeLog
index a8a7323e94..3b712b88d5 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -51,6 +51,11 @@ modules are added.
 
   Closes #4412
 
+* New checker ``use-maxsplit-arg``. Emitted either when accessing only the first or last
+  element of ``str.split()``.
+
+  Closes #4440
+
 * Add ignore_signatures to duplicate code checker
 
   Closes #3619
diff --git a/doc/whatsnew/2.9.rst b/doc/whatsnew/2.9.rst
index 14669ca27c..4e6249a9a4 100644
--- a/doc/whatsnew/2.9.rst
+++ b/doc/whatsnew/2.9.rst
@@ -15,8 +15,12 @@ New checkers
 * ``consider-using-dict-items``: Emitted when iterating over dictionary keys and then
   indexing the same dictionary with the key within loop body.
 
+* ``use-maxsplit-arg``: Emitted either when accessing only the first or last
+  element of ``str.split()``.
+
 * An ``ignore_signatures`` option has been added to the similarity checker. It will permits  to reduce false positives when multiple functions have the same parameters.
 
+
 Other Changes
 =============
 
diff --git a/pylint/checkers/refactoring/recommendation_checker.py b/pylint/checkers/refactoring/recommendation_checker.py
index 50c5097e1a..2678904888 100644
--- a/pylint/checkers/refactoring/recommendation_checker.py
+++ b/pylint/checkers/refactoring/recommendation_checker.py
@@ -35,6 +35,14 @@ class RecommendationChecker(checkers.BaseChecker):
             "Both the key and value can be accessed by iterating using the .items() "
             "method of the dictionary instead.",
         ),
+        "C0207": (
+            "Use %s instead",
+            "use-maxsplit-arg",
+            "Emitted when accessing only the first or last element of str.split(). "
+            "The first and last element can be accessed by using "
+            "str.split(sep, maxsplit=1)[0] or str.rsplit(sep, maxsplit=1)[-1] "
+            "instead.",
+        ),
     }
 
     @staticmethod
@@ -44,8 +52,12 @@ def _is_builtin(node, function):
             return False
         return utils.is_builtin_object(inferred) and inferred.name == function
 
-    @utils.check_messages("consider-iterating-dictionary")
-    def visit_call(self, node):
+    @utils.check_messages("consider-iterating-dictionary", "use-maxsplit-arg")
+    def visit_call(self, node: astroid.Call) -> None:
+        self._check_consider_iterating_dictionary(node)
+        self._check_use_maxsplit_arg(node)
+
+    def _check_consider_iterating_dictionary(self, node: astroid.Call) -> None:
         if not isinstance(node.func, astroid.Attribute):
             return
         if node.func.attrname != "keys":
@@ -62,6 +74,43 @@ def visit_call(self, node):
         if isinstance(node.parent, (astroid.For, astroid.Comprehension)):
             self.add_message("consider-iterating-dictionary", node=node)
 
+    def _check_use_maxsplit_arg(self, node: astroid.Call) -> None:
+        """Add message when accessing first or last elements of a str.split() or str.rsplit()."""
+
+        # Check if call is split() or rsplit()
+        if (
+            isinstance(node.func, astroid.Attribute)
+            and node.func.attrname in ("split", "rsplit")
+            and isinstance(utils.safe_infer(node.func), astroid.BoundMethod)
+        ):
+            try:
+                utils.get_argument_from_call(node, 0, "sep")
+            except utils.NoSuchArgumentError:
+                return
+
+            try:
+                # Ignore if maxsplit arg has been set
+                utils.get_argument_from_call(node, 1, "maxsplit")
+                return
+            except utils.NoSuchArgumentError:
+                pass
+
+            if isinstance(node.parent, astroid.Subscript):
+                try:
+                    subscript_value = utils.get_subscript_const_value(node.parent).value
+                except utils.InferredTypeError:
+                    return
+
+                if subscript_value in (-1, 0):
+                    fn_name = node.func.attrname
+                    new_fn = "rsplit" if subscript_value == -1 else "split"
+                    new_name = (
+                        node.func.as_string().rsplit(fn_name, maxsplit=1)[0]
+                        + new_fn
+                        + f"({node.args[0].as_string()}, maxsplit=1)[{subscript_value}]"
+                    )
+                    self.add_message("use-maxsplit-arg", node=node, args=(new_name,))
+
     @utils.check_messages("consider-using-enumerate", "consider-using-dict-items")
     def visit_for(self, node: astroid.For) -> None:
         self._check_consider_using_enumerate(node)
diff --git a/pylint/checkers/utils.py b/pylint/checkers/utils.py
index 8cf97d8074..4581c7849d 100644
--- a/pylint/checkers/utils.py
+++ b/pylint/checkers/utils.py
@@ -272,6 +272,10 @@ class NoSuchArgumentError(Exception):
     pass
 
 
+class InferredTypeError(Exception):
+    pass
+
+
 def is_inside_except(node):
     """Returns true if node is inside the name of an except handler."""
     current = node
@@ -1522,3 +1526,25 @@ def get_iterating_dictionary_name(
         return node.iter.as_string()
 
     return None
+
+
+def get_subscript_const_value(node: astroid.Subscript) -> astroid.Const:
+    """
+    Returns the value (subscript.slice) of a Subscript node,
+    also supports python <3.9 windows where node.slice might be an Index
+    node
+
+    :param node: Subscript Node to extract value from
+    :returns: Const Node containing subscript value
+    :raises InferredTypeError: if the subscript node cannot be inferred as a Const
+    """
+    value = node.slice
+    if isinstance(value, astroid.Index):
+        value = value.value
+    inferred = safe_infer(value)
+    if not isinstance(inferred, astroid.Const):
+        raise InferredTypeError(
+            "Subscript.slice cannot be inferred as an astroid.Const"
+        )
+
+    return inferred
diff --git a/pylint/testutils/lint_module_test.py b/pylint/testutils/lint_module_test.py
index e3580946d2..a765381b6b 100644
--- a/pylint/testutils/lint_module_test.py
+++ b/pylint/testutils/lint_module_test.py
@@ -52,7 +52,10 @@ def __init__(self, test_file: FunctionalTestFile, config: Optional[Config] = Non
 
     def setUp(self):
         if self._should_be_skipped_due_to_version():
-            pytest.skip("Test cannot run with Python %s." % sys.version.split(" ")[0])
+            pytest.skip(
+                "Test cannot run with Python %s."
+                % sys.version.split(" ", maxsplit=1)[0]
+            )
         missing = []
         for requirement in self._test_file.options["requires"]:
             try:
diff --git a/tests/functional/u/use/use_maxsplit_arg.py b/tests/functional/u/use/use_maxsplit_arg.py
new file mode 100644
index 0000000000..1b998e5e92
--- /dev/null
+++ b/tests/functional/u/use/use_maxsplit_arg.py
@@ -0,0 +1,84 @@
+"""Emit a message for accessing first/last element of string.split"""
+# pylint: disable=line-too-long,missing-docstring,unsubscriptable-object,too-few-public-methods,invalid-name,redefined-builtin
+
+# Test subscripting .split()
+get_first = '1,2,3'.split(',')[0]  # [use-maxsplit-arg]
+get_last = '1,2,3'[::-1].split(',')[0]  # [use-maxsplit-arg]
+
+SEQ = '1,2,3'
+get_first = SEQ.split(',')[0]  # [use-maxsplit-arg]
+get_last = SEQ.split(',')[-1]  # [use-maxsplit-arg]
+get_first = SEQ.rsplit(',')[0]  # [use-maxsplit-arg]
+get_last = SEQ.rsplit(',')[-1]  # [use-maxsplit-arg]
+
+# Don't suggest maxsplit=1 if not accessing the first or last element
+get_mid = SEQ.split(',')[1]
+get_mid = SEQ.split(',')[-2]
+
+
+# Test varying maxsplit argument -- all these will be okay
+# ## str.split() tests
+good_split = '1,2,3'.split(sep=',', maxsplit=1)[-1]
+good_split = '1,2,3'.split(sep=',', maxsplit=1)[0]
+good_split = '1,2,3'.split(sep=',', maxsplit=2)[-1]
+good_split = '1,2,3'.split(sep=',', maxsplit=2)[0]
+good_split = '1,2,3'.split(sep=',', maxsplit=2)[1]
+
+# ## str.rsplit() tests
+good_split = '1,2,3'.rsplit(sep=',', maxsplit=1)[-1]
+good_split = '1,2,3'.rsplit(sep=',', maxsplit=1)[0]
+good_split = '1,2,3'.rsplit(sep=',', maxsplit=2)[-1]
+good_split = '1,2,3'.rsplit(sep=',', maxsplit=2)[0]
+good_split = '1,2,3'.rsplit(sep=',', maxsplit=2)[1]
+
+
+# Tests on class attributes
+class Foo():
+    class_str = '1,2,3'
+    def __init__(self):
+        self.my_str = '1,2,3'
+
+    def get_string(self) -> str:
+        return self.my_str
+
+# Class attributes
+get_first = Foo.class_str.split(',')[0]  # [use-maxsplit-arg]
+get_last = Foo.class_str.split(',')[-1]  # [use-maxsplit-arg]
+get_first = Foo.class_str.rsplit(',')[0]  # [use-maxsplit-arg]
+get_last = Foo.class_str.rsplit(',')[-1]  # [use-maxsplit-arg]
+
+get_mid = Foo.class_str.split(',')[1]
+get_mid = Foo.class_str.split(',')[-2]
+
+
+# Test with accessors
+bar = Foo()
+get_first = bar.get_string().split(',')[0]  # [use-maxsplit-arg]
+get_last = bar.get_string().split(',')[-1]  # [use-maxsplit-arg]
+
+get_mid = bar.get_string().split(',')[1]
+get_mid = bar.get_string().split(',')[-2]
+
+
+# Test with iterating over strings
+list_of_strs = ["a", "b", "c", "d", "e", "f"]
+for s in list_of_strs:
+    print(s.split(" ")[0])  # [use-maxsplit-arg]
+    print(s.split(" ")[-1])  # [use-maxsplit-arg]
+    print(s.split(" ")[-2])
+
+
+# Test warning messages (matching and replacing .split / .rsplit)
+class Bar():
+    split = '1,2,3'
+
+# Error message should show Bar.split.split(',', maxsplit=1) or Bar.split.rsplit(',', maxsplit=1)
+print(Bar.split.split(",")[0])  # [use-maxsplit-arg]
+print(Bar.split.split(",")[-1])  # [use-maxsplit-arg]
+print(Bar.split.rsplit(",")[0])  # [use-maxsplit-arg]
+print(Bar.split.rsplit(",")[-1])  # [use-maxsplit-arg]
+
+# Special cases
+a = "1,2,3".split('\n')[0]  # [use-maxsplit-arg]
+a = "1,2,3".split('split')[-1]  # [use-maxsplit-arg]
+a = "1,2,3".rsplit('rsplit')[0]  # [use-maxsplit-arg]
diff --git a/tests/functional/u/use/use_maxsplit_arg.txt b/tests/functional/u/use/use_maxsplit_arg.txt
new file mode 100644
index 0000000000..8137374aa9
--- /dev/null
+++ b/tests/functional/u/use/use_maxsplit_arg.txt
@@ -0,0 +1,21 @@
+use-maxsplit-arg:5:12::Use '1,2,3'.split(',', maxsplit=1)[0] instead
+use-maxsplit-arg:6:11::"Use '1,2,3'[::-1].split(',', maxsplit=1)[0] instead"
+use-maxsplit-arg:9:12::Use SEQ.split(',', maxsplit=1)[0] instead
+use-maxsplit-arg:10:11::Use SEQ.rsplit(',', maxsplit=1)[-1] instead
+use-maxsplit-arg:11:12::Use SEQ.split(',', maxsplit=1)[0] instead
+use-maxsplit-arg:12:11::Use SEQ.rsplit(',', maxsplit=1)[-1] instead
+use-maxsplit-arg:45:12::Use Foo.class_str.split(',', maxsplit=1)[0] instead
+use-maxsplit-arg:46:11::Use Foo.class_str.rsplit(',', maxsplit=1)[-1] instead
+use-maxsplit-arg:47:12::Use Foo.class_str.split(',', maxsplit=1)[0] instead
+use-maxsplit-arg:48:11::Use Foo.class_str.rsplit(',', maxsplit=1)[-1] instead
+use-maxsplit-arg:56:12::Use bar.get_string().split(',', maxsplit=1)[0] instead
+use-maxsplit-arg:57:11::Use bar.get_string().rsplit(',', maxsplit=1)[-1] instead
+use-maxsplit-arg:66:10::Use s.split(' ', maxsplit=1)[0] instead
+use-maxsplit-arg:67:10::Use s.rsplit(' ', maxsplit=1)[-1] instead
+use-maxsplit-arg:76:6::Use Bar.split.split(',', maxsplit=1)[0] instead
+use-maxsplit-arg:77:6::Use Bar.split.rsplit(',', maxsplit=1)[-1] instead
+use-maxsplit-arg:78:6::Use Bar.split.split(',', maxsplit=1)[0] instead
+use-maxsplit-arg:79:6::Use Bar.split.rsplit(',', maxsplit=1)[-1] instead
+use-maxsplit-arg:82:4::Use '1,2,3'.split('\n', maxsplit=1)[0] instead
+use-maxsplit-arg:83:4::Use '1,2,3'.rsplit('split', maxsplit=1)[-1] instead
+use-maxsplit-arg:84:4::Use '1,2,3'.split('rsplit', maxsplit=1)[0] instead