From 8aed23fe0aae5591d837782010555d5278415ddd Mon Sep 17 00:00:00 2001 From: Charlie Marsh Date: Tue, 10 Jan 2023 21:23:48 -0500 Subject: [PATCH] Avoid B023 false-positives for some common builtins (#1776) This is based on the upstream work in https://github.com/PyCQA/flake8-bugbear/pull/303 and https://github.com/PyCQA/flake8-bugbear/pull/305/files. Resolves #1686. --- .../test/fixtures/flake8_bugbear/B023.py | 114 ++++++++++++++++-- .../rules/function_uses_loop_variable.rs | 101 +++++++++++----- ...__flake8_bugbear__tests__B023_B023.py.snap | 72 ++++++++++- 3 files changed, 248 insertions(+), 39 deletions(-) diff --git a/resources/test/fixtures/flake8_bugbear/B023.py b/resources/test/fixtures/flake8_bugbear/B023.py index b320302951d29..7a3221dd1ca70 100644 --- a/resources/test/fixtures/flake8_bugbear/B023.py +++ b/resources/test/fixtures/flake8_bugbear/B023.py @@ -25,10 +25,10 @@ def f_ok_1(x): def check_inside_functions_too(): - ls = [lambda: x for x in range(2)] - st = {lambda: x for x in range(2)} - gn = (lambda: x for x in range(2)) - dt = {x: lambda: x for x in range(2)} + ls = [lambda: x for x in range(2)] # error + st = {lambda: x for x in range(2)} # error + gn = (lambda: x for x in range(2)) # error + dt = {x: lambda: x for x in range(2)} # error async def pointless_async_iterable(): @@ -37,9 +37,9 @@ async def pointless_async_iterable(): async def container_for_problems(): async for x in pointless_async_iterable(): - functions.append(lambda: x) + functions.append(lambda: x) # error - [lambda: x async for x in pointless_async_iterable()] + [lambda: x async for x in pointless_async_iterable()] # error a = 10 @@ -47,10 +47,10 @@ async def container_for_problems(): while True: a = a_ = a - 1 b += 1 - functions.append(lambda: a) - functions.append(lambda: a_) - functions.append(lambda: b) - functions.append(lambda: c) # not a name error because of late binding! + functions.append(lambda: a) # error + functions.append(lambda: a_) # error + functions.append(lambda: b) # error + functions.append(lambda: c) # error, but not a name error due to late binding c: bool = a > 3 if not c: break @@ -58,7 +58,7 @@ async def container_for_problems(): # Nested loops should not duplicate reports for j in range(2): for k in range(3): - lambda: j * k + lambda: j * k # error for j, k, l in [(1, 2, 3)]: @@ -80,3 +80,95 @@ def explicit_capture(captured=var): for i in range(3): lambda: f"{i}" + + +# `query` is defined in the function, so also defining it in the loop should be OK. +for name in ["a", "b"]: + query = name + + def myfunc(x): + query = x + query_post = x + _ = query + _ = query_post + + query_post = name # in case iteration order matters + + +# Bug here because two dict comprehensions reference `name`, one of which is inside +# the lambda. This should be totally fine, of course. +_ = { + k: v + for k, v in reduce( + lambda data, event: merge_mappings( + [data, {name: f(caches, data, event) for name, f in xx}] + ), + events, + {name: getattr(group, name) for name in yy}, + ).items() + if k in backfill_fields +} + + +# OK to define lambdas if they're immediately consumed, typically as the `key=` +# argument or in a consumed `filter()` (even if a comprehension is better style) +for x in range(2): + # It's not a complete get-out-of-linting-free construct - these should fail: + min([None, lambda: x], key=repr) + sorted([None, lambda: x], key=repr) + any(filter(bool, [None, lambda: x])) + list(filter(bool, [None, lambda: x])) + all(reduce(bool, [None, lambda: x])) + + # But all these should be OK: + min(range(3), key=lambda y: x * y) + max(range(3), key=lambda y: x * y) + sorted(range(3), key=lambda y: x * y) + + any(map(lambda y: x < y, range(3))) + all(map(lambda y: x < y, range(3))) + set(map(lambda y: x < y, range(3))) + list(map(lambda y: x < y, range(3))) + tuple(map(lambda y: x < y, range(3))) + sorted(map(lambda y: x < y, range(3))) + frozenset(map(lambda y: x < y, range(3))) + + any(filter(lambda y: x < y, range(3))) + all(filter(lambda y: x < y, range(3))) + set(filter(lambda y: x < y, range(3))) + list(filter(lambda y: x < y, range(3))) + tuple(filter(lambda y: x < y, range(3))) + sorted(filter(lambda y: x < y, range(3))) + frozenset(filter(lambda y: x < y, range(3))) + + any(reduce(lambda y: x | y, range(3))) + all(reduce(lambda y: x | y, range(3))) + set(reduce(lambda y: x | y, range(3))) + list(reduce(lambda y: x | y, range(3))) + tuple(reduce(lambda y: x | y, range(3))) + sorted(reduce(lambda y: x | y, range(3))) + frozenset(reduce(lambda y: x | y, range(3))) + + import functools + + any(functools.reduce(lambda y: x | y, range(3))) + all(functools.reduce(lambda y: x | y, range(3))) + set(functools.reduce(lambda y: x | y, range(3))) + list(functools.reduce(lambda y: x | y, range(3))) + tuple(functools.reduce(lambda y: x | y, range(3))) + sorted(functools.reduce(lambda y: x | y, range(3))) + frozenset(functools.reduce(lambda y: x | y, range(3))) + +# OK because the lambda which references a loop variable is defined in a `return` +# statement, and after we return the loop variable can't be redefined. +# In principle we could do something fancy with `break`, but it's not worth it. +def iter_f(names): + for name in names: + if exists(name): + return lambda: name if exists(name) else None + + if foo(name): + return [lambda: name] # known false alarm + + if False: + return [lambda: i for i in range(3)] # error diff --git a/src/flake8_bugbear/rules/function_uses_loop_variable.rs b/src/flake8_bugbear/rules/function_uses_loop_variable.rs index befa32c99f92d..ac7be2fb58d1d 100644 --- a/src/flake8_bugbear/rules/function_uses_loop_variable.rs +++ b/src/flake8_bugbear/rules/function_uses_loop_variable.rs @@ -12,7 +12,9 @@ use crate::violations; #[derive(Default)] struct LoadedNamesVisitor<'a> { // Tuple of: name, defining expression, and defining range. - names: Vec<(&'a str, &'a Expr, Range)>, + loaded: Vec<(&'a str, &'a Expr, Range)>, + // Tuple of: name, defining expression, and defining range. + stored: Vec<(&'a str, &'a Expr, Range)>, } /// `Visitor` to collect all used identifiers in a statement. @@ -22,12 +24,11 @@ where { fn visit_expr(&mut self, expr: &'b Expr) { match &expr.node { - ExprKind::JoinedStr { .. } => { - visitor::walk_expr(self, expr); - } - ExprKind::Name { id, ctx } if matches!(ctx, ExprContext::Load) => { - self.names.push((id, expr, Range::from_located(expr))); - } + ExprKind::Name { id, ctx } => match ctx { + ExprContext::Load => self.loaded.push((id, expr, Range::from_located(expr))), + ExprContext::Store => self.stored.push((id, expr, Range::from_located(expr))), + ExprContext::Del => {} + }, _ => visitor::walk_expr(self, expr), } } @@ -36,6 +37,7 @@ where #[derive(Default)] struct SuspiciousVariablesVisitor<'a> { names: Vec<(&'a str, &'a Expr, Range)>, + safe_functions: Vec<&'a Expr>, } /// `Visitor` to collect all suspicious variables (those referenced in @@ -50,45 +52,90 @@ where | StmtKind::AsyncFunctionDef { args, body, .. } => { // Collect all loaded variable names. let mut visitor = LoadedNamesVisitor::default(); - for stmt in body { - visitor.visit_stmt(stmt); - } + visitor.visit_body(body); // Collect all argument names. - let arg_names = collect_arg_names(args); + let mut arg_names = collect_arg_names(args); + arg_names.extend(visitor.stored.iter().map(|(id, ..)| id)); // Treat any non-arguments as "suspicious". self.names.extend( visitor - .names - .into_iter() + .loaded + .iter() .filter(|(id, ..)| !arg_names.contains(id)), ); } - _ => visitor::walk_stmt(self, stmt), + StmtKind::Return { value: Some(value) } => { + // Mark `return lambda: x` as safe. + if matches!(value.node, ExprKind::Lambda { .. }) { + self.safe_functions.push(value); + } + } + _ => {} } + visitor::walk_stmt(self, stmt); } fn visit_expr(&mut self, expr: &'b Expr) { match &expr.node { + ExprKind::Call { + func, + args, + keywords, + } => { + if let ExprKind::Name { id, .. } = &func.node { + if id == "filter" || id == "reduce" || id == "map" { + for arg in args { + if matches!(arg.node, ExprKind::Lambda { .. }) { + self.safe_functions.push(arg); + } + } + } + } + if let ExprKind::Attribute { value, attr, .. } = &func.node { + if attr == "reduce" { + if let ExprKind::Name { id, .. } = &value.node { + if id == "functools" { + for arg in args { + if matches!(arg.node, ExprKind::Lambda { .. }) { + self.safe_functions.push(arg); + } + } + } + } + } + } + for keyword in keywords { + if keyword.node.arg.as_ref().map_or(false, |arg| arg == "key") + && matches!(keyword.node.value.node, ExprKind::Lambda { .. }) + { + self.safe_functions.push(&keyword.node.value); + } + } + } ExprKind::Lambda { args, body } => { - // Collect all loaded variable names. - let mut visitor = LoadedNamesVisitor::default(); - visitor.visit_expr(body); + if !self.safe_functions.contains(&expr) { + // Collect all loaded variable names. + let mut visitor = LoadedNamesVisitor::default(); + visitor.visit_expr(body); - // Collect all argument names. - let arg_names = collect_arg_names(args); + // Collect all argument names. + let mut arg_names = collect_arg_names(args); + arg_names.extend(visitor.stored.iter().map(|(id, ..)| id)); - // Treat any non-arguments as "suspicious". - self.names.extend( - visitor - .names - .into_iter() - .filter(|(id, ..)| !arg_names.contains(id)), - ); + // Treat any non-arguments as "suspicious". + self.names.extend( + visitor + .loaded + .iter() + .filter(|(id, ..)| !arg_names.contains(id)), + ); + } } - _ => visitor::walk_expr(self, expr), + _ => {} } + visitor::walk_expr(self, expr); } } diff --git a/src/flake8_bugbear/snapshots/ruff__flake8_bugbear__tests__B023_B023.py.snap b/src/flake8_bugbear/snapshots/ruff__flake8_bugbear__tests__B023_B023.py.snap index a690432c00d5f..a4378782760a6 100644 --- a/src/flake8_bugbear/snapshots/ruff__flake8_bugbear__tests__B023_B023.py.snap +++ b/src/flake8_bugbear/snapshots/ruff__flake8_bugbear__tests__B023_B023.py.snap @@ -1,6 +1,6 @@ --- source: src/flake8_bugbear/mod.rs -expression: checks +expression: diagnostics --- - kind: FunctionUsesLoopVariable: x @@ -172,4 +172,74 @@ expression: checks column: 16 fix: ~ parent: ~ +- kind: + FunctionUsesLoopVariable: x + location: + row: 117 + column: 23 + end_location: + row: 117 + column: 24 + fix: ~ + parent: ~ +- kind: + FunctionUsesLoopVariable: x + location: + row: 118 + column: 26 + end_location: + row: 118 + column: 27 + fix: ~ + parent: ~ +- kind: + FunctionUsesLoopVariable: x + location: + row: 119 + column: 36 + end_location: + row: 119 + column: 37 + fix: ~ + parent: ~ +- kind: + FunctionUsesLoopVariable: x + location: + row: 120 + column: 37 + end_location: + row: 120 + column: 38 + fix: ~ + parent: ~ +- kind: + FunctionUsesLoopVariable: x + location: + row: 121 + column: 36 + end_location: + row: 121 + column: 37 + fix: ~ + parent: ~ +- kind: + FunctionUsesLoopVariable: name + location: + row: 171 + column: 28 + end_location: + row: 171 + column: 32 + fix: ~ + parent: ~ +- kind: + FunctionUsesLoopVariable: i + location: + row: 174 + column: 28 + end_location: + row: 174 + column: 29 + fix: ~ + parent: ~