Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[pylint] redefined-loop-name (W2901) #3022

Merged
merged 12 commits into from
Feb 22, 2023
96 changes: 96 additions & 0 deletions crates/ruff/resources/test/fixtures/pylint/redefined_loop_name.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
# For -> for, variable reused
matthewlloyd marked this conversation as resolved.
Show resolved Hide resolved
for i in []:
for i in []: # error
pass

# With -> for, variable reused
with None as i:
for i in []: # error
pass

# For -> with, variable reused
for i in []:
with None as i: # error
pass

# With -> with, variable reused
with None as i:
with None as i: # error
pass

# For -> for, different variable
for i in []:
for j in []: # ok
pass

# With -> with, different variable
with None as i:
with None as j: # ok
pass

# For -> for -> for, doubly nested variable reuse
for i in []:
for j in []:
for i in []: # error
pass

# For -> for -> for -> for, doubly nested variable reuse x2
for i in []:
for j in []:
for i in []: # error
for j in []: # error
pass

# For -> assignment
for i in []:
i = 5 # error

# For -> augmented assignment
for i in []:
i += 5 # error

# For -> annotated assignment
for i in []:
i: int = 5 # error

# Async for -> for, variable reused
async for i in []:
for i in []: # error
pass

# For -> async for, variable reused
for i in []:
async for i in []: # error
pass

# For -> for, outer loop unpacks tuple
for i, j in enumerate([]):
for i in []: # error
pass

# For -> for, inner loop unpacks tuple
for i in []:
for i, j in enumerate([]): # error
pass

# For -> for, both loops unpack tuple
for (i, (j, k)) in []:
for i, j in enumerate([]): # two errors
pass

# For else -> for, variable reused in else
for i in []:
pass
else:
for i in []: # no error
pass

# For -> for, ignore dummy variables
for _ in []:
for _ in []: # no error
pass

# For -> for, outer loop unpacks with asterisk
for i, *j in []:
for j in []: # error
pass
2 changes: 1 addition & 1 deletion crates/ruff/src/ast/helpers.rs
Original file line number Diff line number Diff line change
Expand Up @@ -869,7 +869,7 @@ pub fn binding_range(binding: &Binding, locator: &Locator) -> Range {
}

// Return the ranges of `Name` tokens within a specified node.
pub fn find_names<T>(located: &Located<T>, locator: &Locator) -> Vec<Range> {
pub fn find_names<T, U>(located: &Located<T, U>, locator: &Locator) -> Vec<Range> {
let contents = locator.slice(&Range::from_located(located));
lexer::make_tokenizer_located(contents, located.location)
.flatten()
Expand Down
2 changes: 1 addition & 1 deletion crates/ruff/src/ast/types.rs
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ impl Range {
}
}

pub fn from_located<T>(located: &Located<T>) -> Self {
pub fn from_located<T, U>(located: &Located<T, U>) -> Self {
Range::new(located.location, located.end_location.unwrap())
}

Expand Down
6 changes: 6 additions & 0 deletions crates/ruff/src/checkers/ast.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1639,6 +1639,9 @@ where
self.current_stmt_parent().map(Into::into),
);
}
if self.settings.rules.enabled(&Rule::RedefinedLoopName) {
pylint::rules::redefined_loop_name(self, &Node::Stmt(stmt));
}
}
StmtKind::While { body, orelse, .. } => {
if self.settings.rules.enabled(&Rule::FunctionUsesLoopVariable) {
Expand Down Expand Up @@ -1683,6 +1686,9 @@ where
if self.settings.rules.enabled(&Rule::UselessElseOnLoop) {
pylint::rules::useless_else_on_loop(self, stmt, body, orelse);
}
if self.settings.rules.enabled(&Rule::RedefinedLoopName) {
pylint::rules::redefined_loop_name(self, &Node::Stmt(stmt));
}
if matches!(stmt.node, StmtKind::For { .. }) {
if self.settings.rules.enabled(&Rule::ReimplementedBuiltin) {
flake8_simplify::rules::convert_for_loop_to_any_all(
Expand Down
1 change: 1 addition & 0 deletions crates/ruff/src/codes.rs
Original file line number Diff line number Diff line change
Expand Up @@ -146,6 +146,7 @@ pub fn code_to_rule(linter: Linter, code: &str) -> Option<Rule> {
(Pylint, "R0913") => Rule::TooManyArguments,
(Pylint, "R0912") => Rule::TooManyBranches,
(Pylint, "R0915") => Rule::TooManyStatements,
(Pylint, "W2901") => Rule::RedefinedLoopName,

// flake8-builtins
(Flake8Builtins, "001") => Rule::BuiltinVariableShadowing,
Expand Down
1 change: 1 addition & 0 deletions crates/ruff/src/registry.rs
Original file line number Diff line number Diff line change
Expand Up @@ -149,6 +149,7 @@ ruff_macros::register_rules!(
rules::pylint::rules::TooManyArguments,
rules::pylint::rules::TooManyBranches,
rules::pylint::rules::TooManyStatements,
rules::pylint::rules::RedefinedLoopName,
// flake8-builtins
rules::flake8_builtins::rules::BuiltinVariableShadowing,
rules::flake8_builtins::rules::BuiltinArgumentShadowing,
Expand Down
1 change: 1 addition & 0 deletions crates/ruff/src/rules/pylint/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ mod tests {
#[test_case(Rule::BidirectionalUnicode, Path::new("bidirectional_unicode.py"); "PLE2502")]
#[test_case(Rule::BadStrStripCall, Path::new("bad_str_strip_call.py"); "PLE01310")]
#[test_case(Rule::YieldInInit, Path::new("yield_in_init.py"); "PLE0100")]
#[test_case(Rule::RedefinedLoopName, Path::new("redefined_loop_name.py"); "PLW2901")]
fn rules(rule_code: Rule, path: &Path) -> Result<()> {
let snapshot = format!("{}_{}", rule_code.noqa_code(), path.to_string_lossy());
let diagnostics = test_path(
Expand Down
2 changes: 2 additions & 0 deletions crates/ruff/src/rules/pylint/rules/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ pub use magic_value_comparison::{magic_value_comparison, MagicValueComparison};
pub use merge_isinstance::{merge_isinstance, ConsiderMergingIsinstance};
pub use nonlocal_without_binding::NonlocalWithoutBinding;
pub use property_with_parameters::{property_with_parameters, PropertyWithParameters};
pub use redefined_loop_name::{redefined_loop_name, RedefinedLoopName};
pub use return_in_init::{return_in_init, ReturnInInit};
pub use too_many_arguments::{too_many_arguments, TooManyArguments};
pub use too_many_branches::{too_many_branches, TooManyBranches};
Expand Down Expand Up @@ -40,6 +41,7 @@ mod magic_value_comparison;
mod merge_isinstance;
mod nonlocal_without_binding;
mod property_with_parameters;
mod redefined_loop_name;
mod return_in_init;
mod too_many_arguments;
mod too_many_branches;
Expand Down
194 changes: 194 additions & 0 deletions crates/ruff/src/rules/pylint/rules/redefined_loop_name.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,194 @@
use crate::ast::helpers::find_names;
use crate::ast::types::{Node, Range};
use crate::ast::visitor;
use crate::ast::visitor::Visitor;
use crate::checkers::ast::Checker;
use crate::registry::Diagnostic;
use crate::source_code::Locator;
use crate::violation::Violation;
use ruff_macros::{define_violation, derive_message_formats};
use rustpython_parser::ast::{Expr, Stmt, StmtKind, Withitem};
use std::iter::zip;

define_violation!(
/// ## What it does
/// Checks for variables defined in `for` loops and `with` statements that get overwritten
/// within the body, for example by another `for` loop or `with` statement or by direct
/// assignment.
///
/// ## Why is this bad?
/// Redefinition of a loop variable inside the loop's body causes its value to differ from
/// the original loop iteration for the remainder of the block, in a way that will likely
/// cause bugs.
///
/// In Python, unlike many other languages, `for` loops and `with` statements don't define
/// their own scopes. Therefore, a nested loop that uses the same target variable name as
/// an outer loop will reuse the same actual variable, and the value from the last
/// iteration will "leak out" into the remainder of the enclosing loop.
///
/// While this mistake is easy to spot in small examples, it can be hidden in larger
/// blocks of code where the definition and redefinition of the variable may not be
/// visible at the same time.
///
/// ## Example
/// ```python
/// for i in range(10):
/// i = 9
/// print(i) # prints 9 every iteration
///
/// for i in range(10):
/// for i in range(10): # original value overwritten
/// pass
/// print(i) # also prints 9 every iteration
///
/// with path1.open() as f:
/// with path2.open() as f:
/// f = path2.open()
/// print(f.readline()) # prints a line from path2
/// ```
charliermarsh marked this conversation as resolved.
Show resolved Hide resolved
pub struct RedefinedLoopName {
matthewlloyd marked this conversation as resolved.
Show resolved Hide resolved
pub name: String,
}
);
impl Violation for RedefinedLoopName {
#[derive_message_formats]
fn message(&self) -> String {
let RedefinedLoopName { name } = self;
format!("For loop or with statement variable `{name}` overwritten in body")
matthewlloyd marked this conversation as resolved.
Show resolved Hide resolved
}
}

struct InnerForWithAssignNamesVisitor<'a> {
locator: &'a Locator<'a>,
name_ranges: Vec<Range>,
}

impl<'a, 'b> Visitor<'b> for InnerForWithAssignNamesVisitor<'_>
where
'b: 'a,
{
fn visit_stmt(&mut self, stmt: &'b Stmt) {
match &stmt.node {
// For and async for.
StmtKind::For { target, .. } | StmtKind::AsyncFor { target, .. } => {
self.name_ranges
.extend(name_ranges_from_expr(target, self.locator));
}
// With.
StmtKind::With { items, .. } => {
self.name_ranges
.extend(name_ranges_from_with_items(items, self.locator));
}
// Assignment, augmented assignment, and annotated assignment.
StmtKind::Assign { targets, .. } => {
self.name_ranges
.extend(name_ranges_from_assign_targets(targets, self.locator));
}
StmtKind::AugAssign { target, .. } | StmtKind::AnnAssign { target, .. } => {
self.name_ranges
.extend(name_ranges_from_expr(target, self.locator));
}
_ => {}
}
visitor::walk_stmt(self, stmt);
matthewlloyd marked this conversation as resolved.
Show resolved Hide resolved
}
}

fn name_ranges_from_expr<'a, U>(target: &'a Expr<U>, locator: &Locator) -> Vec<Range> {
find_names(target, locator)
}

fn name_ranges_from_with_items<'a, U>(items: &'a [Withitem<U>], locator: &Locator) -> Vec<Range> {
items
.iter()
.filter_map(|item| {
item.optional_vars
.as_ref()
.map(|expr| find_names(&**expr, locator))
})
.flatten()
.collect()
matthewlloyd marked this conversation as resolved.
Show resolved Hide resolved
}

fn name_ranges_from_assign_targets<'a, U>(targets: &'a [Expr<U>], locator: &Locator) -> Vec<Range> {
targets
.iter()
.flat_map(|target| find_names(target, locator))
.collect()
}

/// PLW2901
pub fn redefined_loop_name<'a, 'b>(checker: &'a mut Checker<'b>, node: &Node<'b>)
where
'b: 'a,
{
let (outer_name_ranges, inner_name_ranges) = match node {
Node::Stmt(stmt) => match &stmt.node {
// With.
StmtKind::With { items, body, .. } => {
let name_ranges = name_ranges_from_with_items(items, checker.locator);
let mut visitor = InnerForWithAssignNamesVisitor {
locator: checker.locator,
name_ranges: vec![],
};
for stmt in body {
visitor.visit_stmt(stmt);
}
(name_ranges, visitor.name_ranges)
}
// For and async for.
StmtKind::For {
target,
body,
iter: _,
orelse: _,
matthewlloyd marked this conversation as resolved.
Show resolved Hide resolved
..
}
| StmtKind::AsyncFor {
target,
body,
iter: _,
orelse: _,
..
} => {
let name_ranges = name_ranges_from_expr(target, checker.locator);
let mut visitor = InnerForWithAssignNamesVisitor {
locator: checker.locator,
name_ranges: vec![],
};
for stmt in body {
visitor.visit_stmt(stmt);
}
(name_ranges, visitor.name_ranges)
}
_ => panic!(
"redefined_loop_name called on Statement that is not a With, For, or AsyncFor"
),
},
Node::Expr(_) => panic!("redefined_loop_name called on Node that is not a Statement"),
};

let outer_names: Vec<&str> = outer_name_ranges
.iter()
.map(|range| checker.locator.slice(range))
// Ignore dummy variables.
.filter(|name| !checker.settings.dummy_variable_rgx.is_match(name))
.collect();
let inner_names: Vec<&str> = inner_name_ranges
.iter()
.map(|range| checker.locator.slice(range))
.collect();

for outer_name in &outer_names {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

(Just out of interest, how closely does this match Pylint's own implementation? Did you look at the Pylint source, or is it implemented from scratch?)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It was implemented from scratch, but after I learned Pylint has it too, I did look at the Pylint source later to compare. Pylint has a slightly more efficient algorithm, using a single recursion over the AST, keeping track of defined names using a stack, and pushing and popping the names on entering and leaving each for loop. This could probably be done with a deeper integration into ast.rs.

for (inner_range, inner_name) in zip(&inner_name_ranges, &inner_names) {
if inner_name.eq(outer_name) {
checker.diagnostics.push(Diagnostic::new(
RedefinedLoopName {
name: (*inner_name).to_string(),
},
*inner_range,
));
}
}
}
}