diff --git a/README.rst b/README.rst index bde5850..600e96c 100644 --- a/README.rst +++ b/README.rst @@ -150,6 +150,9 @@ No exceptions will be suppressed and therefore this context manager is redundant N.B. this rule currently does not flag `suppress` calls to avoid potential false positives due to similarly named user-defined functions. +**B023**: Functions defined inside a loop must not use variables redefined in +the loop, because `late-binding closures are a classic gotcha +`__. Opinionated warnings ~~~~~~~~~~~~~~~~~~~~ diff --git a/bugbear.py b/bugbear.py index f1dcf9e..5243fbf 100644 --- a/bugbear.py +++ b/bugbear.py @@ -198,6 +198,12 @@ def _to_name_str(node): return _to_name_str(node.value) +def names_from_assignments(node): + for name in ast.walk(node): + if isinstance(name, ast.Name): + yield name.id + + def _typesafe_issubclass(cls, class_or_tuple): try: return issubclass(cls, class_or_tuple) @@ -520,6 +526,44 @@ def check_for_b020(self, node): n = targets.names[name][0] self.errors.append(B020(n.lineno, n.col_offset, vars=(name,))) + def check_for_b023(self, loop_node): + """Check that functions (including lambdas) do not use loop variables. + + https://docs.python-guide.org/writing/gotchas/#late-binding-closures from + functions - usually but not always lambdas - defined inside a loop are a + classic source of bugs. + + For each use of a variable inside a function defined inside a loop, we + emit a warning if that variable is reassigned on each loop iteration + (outside the function). This includes but is not limited to explicit + loop variables like the `x` in `for x in range(3):`. + """ + # Because most loops don't contain functions, it's most efficient to + # implement this "backwards": first we find all the candidate variable + # uses, and then if there are any we check for assignment of those names + # inside the loop body. + suspicious_variables = [] + for node in ast.walk(loop_node): + if isinstance(node, (ast.AsyncFunctionDef, ast.FunctionDef, ast.Lambda)): + argnames = set(names_from_assignments(node.args)) + for name in ast.walk(node): + if isinstance(name, ast.Name) and name.id not in argnames: + err = B023(name.lineno, name.col_offset, vars=(name.id,)) + suspicious_variables.append((name.id, err)) + + if suspicious_variables: + reassigned_in_loop = set() + loop_targets = (ast.For, ast.AsyncFor, ast.comprehension) + for _, node in self.walk_function_body(loop_node): + if isinstance(node, (ast.Assign)): + reassigned_in_loop.update(names_from_assignments(node.targets)) + if isinstance(node, loop_targets + (ast.AnnAssign, ast.AugAssign)): + reassigned_in_loop.update(names_from_assignments(node.target)) + + for name, err in suspicious_variables: + if name in reassigned_in_loop: + self.errors.append(err) + def check_for_b904(self, node): """Checks `raise` without `from` inside an `except` clause. @@ -1041,6 +1085,15 @@ def visit_Lambda(self, node): ) ) +B023 = Error( + message=( + "B023 Function definition does not bind loop variable {!r}. " + "This means that invoking functions defined inside the loop will " + "always use the value of this variable from the last iteration. See " + "https://docs.python-guide.org/writing/gotchas/#late-binding-closures" + ) +) + # Warnings disabled by default. B901 = Error( message=(