Skip to content

Commit

Permalink
Implemented new check use-maxsplit-arg (#4469)
Browse files Browse the repository at this point in the history
Co-authored-by: Pierre Sassoulas <pierre.sassoulas@gmail.com>
Co-authored-by: Marc Mueller <30130371+cdce8p@users.noreply.github.com>
  • Loading branch information
3 people committed May 22, 2021
1 parent 0978e09 commit 605afdb
Show file tree
Hide file tree
Showing 7 changed files with 195 additions and 3 deletions.
5 changes: 5 additions & 0 deletions ChangeLog
Expand Up @@ -51,6 +51,11 @@ modules are added.

Closes #4412

* New checker ``use-maxsplit-arg``. Emitted either when accessing only the first or last
element of ``str.split()``.

Closes #4440

* Add ignore_signatures to duplicate code checker

Closes #3619
Expand Down
4 changes: 4 additions & 0 deletions doc/whatsnew/2.9.rst
Expand Up @@ -15,8 +15,12 @@ New checkers
* ``consider-using-dict-items``: Emitted when iterating over dictionary keys and then
indexing the same dictionary with the key within loop body.

* ``use-maxsplit-arg``: Emitted either when accessing only the first or last
element of ``str.split()``.

* An ``ignore_signatures`` option has been added to the similarity checker. It will permits to reduce false positives when multiple functions have the same parameters.


Other Changes
=============

Expand Down
53 changes: 51 additions & 2 deletions pylint/checkers/refactoring/recommendation_checker.py
Expand Up @@ -35,6 +35,14 @@ class RecommendationChecker(checkers.BaseChecker):
"Both the key and value can be accessed by iterating using the .items() "
"method of the dictionary instead.",
),
"C0207": (
"Use %s instead",
"use-maxsplit-arg",
"Emitted when accessing only the first or last element of str.split(). "
"The first and last element can be accessed by using "
"str.split(sep, maxsplit=1)[0] or str.rsplit(sep, maxsplit=1)[-1] "
"instead.",
),
}

@staticmethod
Expand All @@ -44,8 +52,12 @@ def _is_builtin(node, function):
return False
return utils.is_builtin_object(inferred) and inferred.name == function

@utils.check_messages("consider-iterating-dictionary")
def visit_call(self, node):
@utils.check_messages("consider-iterating-dictionary", "use-maxsplit-arg")
def visit_call(self, node: astroid.Call) -> None:
self._check_consider_iterating_dictionary(node)
self._check_use_maxsplit_arg(node)

def _check_consider_iterating_dictionary(self, node: astroid.Call) -> None:
if not isinstance(node.func, astroid.Attribute):
return
if node.func.attrname != "keys":
Expand All @@ -62,6 +74,43 @@ def visit_call(self, node):
if isinstance(node.parent, (astroid.For, astroid.Comprehension)):
self.add_message("consider-iterating-dictionary", node=node)

def _check_use_maxsplit_arg(self, node: astroid.Call) -> None:
"""Add message when accessing first or last elements of a str.split() or str.rsplit()."""

# Check if call is split() or rsplit()
if (
isinstance(node.func, astroid.Attribute)
and node.func.attrname in ("split", "rsplit")
and isinstance(utils.safe_infer(node.func), astroid.BoundMethod)
):
try:
utils.get_argument_from_call(node, 0, "sep")
except utils.NoSuchArgumentError:
return

try:
# Ignore if maxsplit arg has been set
utils.get_argument_from_call(node, 1, "maxsplit")
return
except utils.NoSuchArgumentError:
pass

if isinstance(node.parent, astroid.Subscript):
try:
subscript_value = utils.get_subscript_const_value(node.parent).value
except utils.InferredTypeError:
return

if subscript_value in (-1, 0):
fn_name = node.func.attrname
new_fn = "rsplit" if subscript_value == -1 else "split"
new_name = (
node.func.as_string().rsplit(fn_name, maxsplit=1)[0]
+ new_fn
+ f"({node.args[0].as_string()}, maxsplit=1)[{subscript_value}]"
)
self.add_message("use-maxsplit-arg", node=node, args=(new_name,))

@utils.check_messages("consider-using-enumerate", "consider-using-dict-items")
def visit_for(self, node: astroid.For) -> None:
self._check_consider_using_enumerate(node)
Expand Down
26 changes: 26 additions & 0 deletions pylint/checkers/utils.py
Expand Up @@ -272,6 +272,10 @@ class NoSuchArgumentError(Exception):
pass


class InferredTypeError(Exception):
pass


def is_inside_except(node):
"""Returns true if node is inside the name of an except handler."""
current = node
Expand Down Expand Up @@ -1522,3 +1526,25 @@ def get_iterating_dictionary_name(
return node.iter.as_string()

return None


def get_subscript_const_value(node: astroid.Subscript) -> astroid.Const:
"""
Returns the value (subscript.slice) of a Subscript node,
also supports python <3.9 windows where node.slice might be an Index
node
:param node: Subscript Node to extract value from
:returns: Const Node containing subscript value
:raises InferredTypeError: if the subscript node cannot be inferred as a Const
"""
value = node.slice
if isinstance(value, astroid.Index):
value = value.value
inferred = safe_infer(value)
if not isinstance(inferred, astroid.Const):
raise InferredTypeError(
"Subscript.slice cannot be inferred as an astroid.Const"
)

return inferred
5 changes: 4 additions & 1 deletion pylint/testutils/lint_module_test.py
Expand Up @@ -52,7 +52,10 @@ def __init__(self, test_file: FunctionalTestFile, config: Optional[Config] = Non

def setUp(self):
if self._should_be_skipped_due_to_version():
pytest.skip("Test cannot run with Python %s." % sys.version.split(" ")[0])
pytest.skip(
"Test cannot run with Python %s."
% sys.version.split(" ", maxsplit=1)[0]
)
missing = []
for requirement in self._test_file.options["requires"]:
try:
Expand Down
84 changes: 84 additions & 0 deletions tests/functional/u/use/use_maxsplit_arg.py
@@ -0,0 +1,84 @@
"""Emit a message for accessing first/last element of string.split"""
# pylint: disable=line-too-long,missing-docstring,unsubscriptable-object,too-few-public-methods,invalid-name,redefined-builtin

# Test subscripting .split()
get_first = '1,2,3'.split(',')[0] # [use-maxsplit-arg]
get_last = '1,2,3'[::-1].split(',')[0] # [use-maxsplit-arg]

SEQ = '1,2,3'
get_first = SEQ.split(',')[0] # [use-maxsplit-arg]
get_last = SEQ.split(',')[-1] # [use-maxsplit-arg]
get_first = SEQ.rsplit(',')[0] # [use-maxsplit-arg]
get_last = SEQ.rsplit(',')[-1] # [use-maxsplit-arg]

# Don't suggest maxsplit=1 if not accessing the first or last element
get_mid = SEQ.split(',')[1]
get_mid = SEQ.split(',')[-2]


# Test varying maxsplit argument -- all these will be okay
# ## str.split() tests
good_split = '1,2,3'.split(sep=',', maxsplit=1)[-1]
good_split = '1,2,3'.split(sep=',', maxsplit=1)[0]
good_split = '1,2,3'.split(sep=',', maxsplit=2)[-1]
good_split = '1,2,3'.split(sep=',', maxsplit=2)[0]
good_split = '1,2,3'.split(sep=',', maxsplit=2)[1]

# ## str.rsplit() tests
good_split = '1,2,3'.rsplit(sep=',', maxsplit=1)[-1]
good_split = '1,2,3'.rsplit(sep=',', maxsplit=1)[0]
good_split = '1,2,3'.rsplit(sep=',', maxsplit=2)[-1]
good_split = '1,2,3'.rsplit(sep=',', maxsplit=2)[0]
good_split = '1,2,3'.rsplit(sep=',', maxsplit=2)[1]


# Tests on class attributes
class Foo():
class_str = '1,2,3'
def __init__(self):
self.my_str = '1,2,3'

def get_string(self) -> str:
return self.my_str

# Class attributes
get_first = Foo.class_str.split(',')[0] # [use-maxsplit-arg]
get_last = Foo.class_str.split(',')[-1] # [use-maxsplit-arg]
get_first = Foo.class_str.rsplit(',')[0] # [use-maxsplit-arg]
get_last = Foo.class_str.rsplit(',')[-1] # [use-maxsplit-arg]

get_mid = Foo.class_str.split(',')[1]
get_mid = Foo.class_str.split(',')[-2]


# Test with accessors
bar = Foo()
get_first = bar.get_string().split(',')[0] # [use-maxsplit-arg]
get_last = bar.get_string().split(',')[-1] # [use-maxsplit-arg]

get_mid = bar.get_string().split(',')[1]
get_mid = bar.get_string().split(',')[-2]


# Test with iterating over strings
list_of_strs = ["a", "b", "c", "d", "e", "f"]
for s in list_of_strs:
print(s.split(" ")[0]) # [use-maxsplit-arg]
print(s.split(" ")[-1]) # [use-maxsplit-arg]
print(s.split(" ")[-2])


# Test warning messages (matching and replacing .split / .rsplit)
class Bar():
split = '1,2,3'

# Error message should show Bar.split.split(',', maxsplit=1) or Bar.split.rsplit(',', maxsplit=1)
print(Bar.split.split(",")[0]) # [use-maxsplit-arg]
print(Bar.split.split(",")[-1]) # [use-maxsplit-arg]
print(Bar.split.rsplit(",")[0]) # [use-maxsplit-arg]
print(Bar.split.rsplit(",")[-1]) # [use-maxsplit-arg]

# Special cases
a = "1,2,3".split('\n')[0] # [use-maxsplit-arg]
a = "1,2,3".split('split')[-1] # [use-maxsplit-arg]
a = "1,2,3".rsplit('rsplit')[0] # [use-maxsplit-arg]
21 changes: 21 additions & 0 deletions tests/functional/u/use/use_maxsplit_arg.txt
@@ -0,0 +1,21 @@
use-maxsplit-arg:5:12::Use '1,2,3'.split(',', maxsplit=1)[0] instead
use-maxsplit-arg:6:11::"Use '1,2,3'[::-1].split(',', maxsplit=1)[0] instead"
use-maxsplit-arg:9:12::Use SEQ.split(',', maxsplit=1)[0] instead
use-maxsplit-arg:10:11::Use SEQ.rsplit(',', maxsplit=1)[-1] instead
use-maxsplit-arg:11:12::Use SEQ.split(',', maxsplit=1)[0] instead
use-maxsplit-arg:12:11::Use SEQ.rsplit(',', maxsplit=1)[-1] instead
use-maxsplit-arg:45:12::Use Foo.class_str.split(',', maxsplit=1)[0] instead
use-maxsplit-arg:46:11::Use Foo.class_str.rsplit(',', maxsplit=1)[-1] instead
use-maxsplit-arg:47:12::Use Foo.class_str.split(',', maxsplit=1)[0] instead
use-maxsplit-arg:48:11::Use Foo.class_str.rsplit(',', maxsplit=1)[-1] instead
use-maxsplit-arg:56:12::Use bar.get_string().split(',', maxsplit=1)[0] instead
use-maxsplit-arg:57:11::Use bar.get_string().rsplit(',', maxsplit=1)[-1] instead
use-maxsplit-arg:66:10::Use s.split(' ', maxsplit=1)[0] instead
use-maxsplit-arg:67:10::Use s.rsplit(' ', maxsplit=1)[-1] instead
use-maxsplit-arg:76:6::Use Bar.split.split(',', maxsplit=1)[0] instead
use-maxsplit-arg:77:6::Use Bar.split.rsplit(',', maxsplit=1)[-1] instead
use-maxsplit-arg:78:6::Use Bar.split.split(',', maxsplit=1)[0] instead
use-maxsplit-arg:79:6::Use Bar.split.rsplit(',', maxsplit=1)[-1] instead
use-maxsplit-arg:82:4::Use '1,2,3'.split('\n', maxsplit=1)[0] instead
use-maxsplit-arg:83:4::Use '1,2,3'.rsplit('split', maxsplit=1)[-1] instead
use-maxsplit-arg:84:4::Use '1,2,3'.split('rsplit', maxsplit=1)[0] instead

0 comments on commit 605afdb

Please sign in to comment.