Skip to content

Commit

Permalink
Pyupgrade: Format specifiers (#1594)
Browse files Browse the repository at this point in the history
A part of #827. Posting this for visibility. Still has some work to do
to be done.

Things that still need done before this is ready:

- [x] Does not work when the item is being assigned to a variable
- [x] Does not work if being used in a function call
- [x] Fix incorrectly removed calls in the function
- [x] Has not been tested with pyupgrade negative test cases

Tests from pyupgrade can be seen here:
https://github.com/asottile/pyupgrade/blob/main/tests/features/format_literals_test.py

Co-authored-by: Charlie Marsh <charlie.r.marsh@gmail.com>
  • Loading branch information
colin99d and charliermarsh committed Jan 11, 2023
1 parent f1a5e53 commit c016c41
Show file tree
Hide file tree
Showing 14 changed files with 472 additions and 47 deletions.
1 change: 1 addition & 0 deletions README.md
Expand Up @@ -702,6 +702,7 @@ For more, see [pyupgrade](https://pypi.org/project/pyupgrade/3.2.0/) on PyPI.
| UP027 | RewriteListComprehension | Replace unpacked list comprehension with a generator expression | 🛠 |
| UP028 | RewriteYieldFrom | Replace `yield` over `for` loop with `yield from` | 🛠 |
| UP029 | UnnecessaryBuiltinImport | Unnecessary builtin import: `...` | 🛠 |
| UP030 | FormatLiterals | Use implicit references for positional format fields | 🛠 |

### pep8-naming (N)

Expand Down
36 changes: 36 additions & 0 deletions resources/test/fixtures/pyupgrade/UP030_0.py
@@ -0,0 +1,36 @@
# Invalid calls; errors expected.

"{0}" "{1}" "{2}".format(1, 2, 3)

"a {3} complicated {1} string with {0} {2}".format(
"first", "second", "third", "fourth"
)

'{0}'.format(1)

'{0:x}'.format(30)

x = '{0}'.format(1)

'''{0}\n{1}\n'''.format(1, 2)

x = "foo {0}" \
"bar {1}".format(1, 2)

("{0}").format(1)

"\N{snowman} {0}".format(1)

'{' '0}'.format(1)

# These will not change because we are waiting for libcst to fix this issue:
# https://github.com/Instagram/LibCST/issues/846
print(
'foo{0}'
'bar{1}'.format(1, 2)
)

print(
'foo{0}' # ohai\n"
'bar{1}'.format(1, 2)
)
23 changes: 23 additions & 0 deletions resources/test/fixtures/pyupgrade/UP030_1.py
@@ -0,0 +1,23 @@
# Valid calls; no errors expected.

'{}'.format(1)


x = ('{0} {1}',)

'{0} {0}'.format(1)

'{0:<{1}}'.format(1, 4)

f"{0}".format(a)

f"{0}".format(1)

print(f"{0}".format(1))

# I did not include the following tests because ruff does not seem to work with
# invalid python syntax (which is a good thing)

# "{0}"format(1)
# '{'.format(1)", "'}'.format(1)
# ("{0}" # {1}\n"{2}").format(1, 2, 3)
2 changes: 2 additions & 0 deletions ruff.schema.json
Expand Up @@ -1606,6 +1606,8 @@
"UP027",
"UP028",
"UP029",
"UP03",
"UP030",
"W",
"W2",
"W29",
Expand Down
6 changes: 6 additions & 0 deletions src/checkers/ast.rs
Expand Up @@ -1830,6 +1830,8 @@ where
|| self.settings.enabled.contains(&RuleCode::F523)
|| self.settings.enabled.contains(&RuleCode::F524)
|| self.settings.enabled.contains(&RuleCode::F525)
// pyupgrade
|| self.settings.enabled.contains(&RuleCode::UP030)
{
if let ExprKind::Attribute { value, attr, .. } = &func.node {
if let ExprKind::Constant {
Expand Down Expand Up @@ -1876,6 +1878,10 @@ where
self, &summary, location,
);
}

if self.settings.enabled.contains(&RuleCode::UP030) {
pyupgrade::rules::format_literals(self, &summary, expr);
}
}
}
}
Expand Down
19 changes: 18 additions & 1 deletion src/cst/matchers.rs
@@ -1,5 +1,7 @@
use anyhow::{bail, Result};
use libcst_native::{Expr, Import, ImportFrom, Module, SmallStatement, Statement};
use libcst_native::{
Call, Expr, Expression, Import, ImportFrom, Module, SmallStatement, Statement,
};

pub fn match_module(module_text: &str) -> Result<Module> {
match libcst_native::parse_module(module_text, None) {
Expand All @@ -8,6 +10,13 @@ pub fn match_module(module_text: &str) -> Result<Module> {
}
}

pub fn match_expression(expression_text: &str) -> Result<Expression> {
match libcst_native::parse_expression(expression_text) {
Ok(expression) => Ok(expression),
Err(_) => bail!("Failed to extract CST from source"),
}
}

pub fn match_expr<'a, 'b>(module: &'a mut Module<'b>) -> Result<&'a mut Expr<'b>> {
if let Some(Statement::Simple(expr)) = module.body.first_mut() {
if let Some(SmallStatement::Expr(expr)) = expr.body.first_mut() {
Expand Down Expand Up @@ -43,3 +52,11 @@ pub fn match_import_from<'a, 'b>(module: &'a mut Module<'b>) -> Result<&'a mut I
bail!("Expected Statement::Simple")
}
}

pub fn match_call<'a, 'b>(expression: &'a mut Expression<'b>) -> Result<&'a mut Call<'b>> {
if let Expression::Call(call) = expression {
Ok(call)
} else {
bail!("Expected SmallStatement::Expr")
}
}
44 changes: 25 additions & 19 deletions src/pyflakes/format.rs
@@ -1,7 +1,6 @@
//! Implements helper functions for using vendored/format.rs
use std::convert::TryFrom;

use rustc_hash::FxHashSet;
use rustpython_common::format::{
FieldName, FieldType, FormatParseError, FormatPart, FormatString, FromTemplate,
};
Expand All @@ -21,10 +20,12 @@ pub(crate) fn error_to_string(err: &FormatParseError) -> String {
.to_string()
}

#[derive(Debug)]
pub(crate) struct FormatSummary {
pub autos: FxHashSet<usize>,
pub indexes: FxHashSet<usize>,
pub keywords: FxHashSet<String>,
pub autos: Vec<usize>,
pub indexes: Vec<usize>,
pub keywords: Vec<String>,
pub has_nested_parts: bool,
}

impl TryFrom<&str> for FormatSummary {
Expand All @@ -33,9 +34,10 @@ impl TryFrom<&str> for FormatSummary {
fn try_from(literal: &str) -> Result<Self, Self::Error> {
let format_string = FormatString::from_str(literal)?;

let mut autos = FxHashSet::default();
let mut indexes = FxHashSet::default();
let mut keywords = FxHashSet::default();
let mut autos = Vec::new();
let mut indexes = Vec::new();
let mut keywords = Vec::new();
let mut has_nested_parts = false;

for format_part in format_string.format_parts {
let FormatPart::Field {
Expand All @@ -47,9 +49,9 @@ impl TryFrom<&str> for FormatSummary {
};
let parsed = FieldName::parse(&field_name)?;
match parsed.field_type {
FieldType::Auto => autos.insert(autos.len()),
FieldType::Index(i) => indexes.insert(i),
FieldType::Keyword(k) => keywords.insert(k),
FieldType::Auto => autos.push(autos.len()),
FieldType::Index(i) => indexes.push(i),
FieldType::Keyword(k) => keywords.push(k),
};

let nested = FormatString::from_str(&format_spec)?;
Expand All @@ -59,17 +61,19 @@ impl TryFrom<&str> for FormatSummary {
};
let parsed = FieldName::parse(&field_name)?;
match parsed.field_type {
FieldType::Auto => autos.insert(autos.len()),
FieldType::Index(i) => indexes.insert(i),
FieldType::Keyword(k) => keywords.insert(k),
FieldType::Auto => autos.push(autos.len()),
FieldType::Index(i) => indexes.push(i),
FieldType::Keyword(k) => keywords.push(k),
};
has_nested_parts = true;
}
}

Ok(FormatSummary {
autos,
indexes,
keywords,
has_nested_parts,
})
}
}
Expand All @@ -82,9 +86,9 @@ mod tests {
fn test_format_summary() {
let literal = "foo{foo}a{}b{2}c{2}d{1}{}{}e{bar}{foo}f{spam}";

let expected_autos = [0usize, 1usize, 2usize].into_iter().collect();
let expected_indexes = [1usize, 2usize].into_iter().collect();
let expected_keywords = ["foo", "bar", "spam"]
let expected_autos = [0usize, 1usize, 2usize].to_vec();
let expected_indexes = [2usize, 2usize, 1usize].to_vec();
let expected_keywords: Vec<_> = ["foo", "bar", "foo", "spam"]
.into_iter()
.map(String::from)
.collect();
Expand All @@ -94,15 +98,16 @@ mod tests {
assert_eq!(format_summary.autos, expected_autos);
assert_eq!(format_summary.indexes, expected_indexes);
assert_eq!(format_summary.keywords, expected_keywords);
assert!(!format_summary.has_nested_parts);
}

#[test]
fn test_format_summary_nested() {
let literal = "foo{foo}a{:{}{}}b{2:{3}{4}}c{2}d{1}{}e{bar:{spam}{eggs}}";

let expected_autos = [0usize, 1usize, 2usize, 3usize].into_iter().collect();
let expected_indexes = [1usize, 2usize, 3usize, 4usize].into_iter().collect();
let expected_keywords = ["foo", "bar", "spam", "eggs"]
let expected_autos = [0usize, 1usize, 2usize, 3usize].to_vec();
let expected_indexes = [2usize, 3usize, 4usize, 2usize, 1usize].to_vec();
let expected_keywords: Vec<_> = ["foo", "bar", "spam", "eggs"]
.into_iter()
.map(String::from)
.collect();
Expand All @@ -112,6 +117,7 @@ mod tests {
assert_eq!(format_summary.autos, expected_autos);
assert_eq!(format_summary.indexes, expected_indexes);
assert_eq!(format_summary.keywords, expected_keywords);
assert!(format_summary.has_nested_parts);
}

#[test]
Expand Down
2 changes: 2 additions & 0 deletions src/pyupgrade/mod.rs
Expand Up @@ -50,6 +50,8 @@ mod tests {
#[test_case(RuleCode::UP028, Path::new("UP028_0.py"); "UP028_0")]
#[test_case(RuleCode::UP028, Path::new("UP028_1.py"); "UP028_1")]
#[test_case(RuleCode::UP029, Path::new("UP029.py"); "UP029")]
#[test_case(RuleCode::UP030, Path::new("UP030_0.py"); "UP030_0")]
#[test_case(RuleCode::UP030, Path::new("UP030_1.py"); "UP030_1")]
fn rules(rule_code: RuleCode, path: &Path) -> Result<()> {
let snapshot = format!("{}_{}", rule_code.as_ref(), path.to_string_lossy());
let diagnostics = test_path(
Expand Down
118 changes: 118 additions & 0 deletions src/pyupgrade/rules/format_literals.rs
@@ -0,0 +1,118 @@
use anyhow::{anyhow, bail, Result};
use libcst_native::{Arg, Codegen, CodegenState, Expression};
use once_cell::sync::Lazy;
use regex::Regex;
use rustpython_ast::Expr;

use crate::ast::types::Range;
use crate::autofix::Fix;
use crate::checkers::ast::Checker;
use crate::cst::matchers::{match_call, match_expression};
use crate::pyflakes::format::FormatSummary;
use crate::registry::Diagnostic;
use crate::violations;

// An opening curly brace, followed by any integer, followed by any text,
// followed by a closing brace.
static FORMAT_SPECIFIER: Lazy<Regex> =
Lazy::new(|| Regex::new(r"\{(?P<int>\d+)(?P<fmt>.*?)}").unwrap());

/// Returns a string without the format specifiers.
/// Ex. "Hello {0} {1}" -> "Hello {} {}"
fn remove_specifiers(raw_specifiers: &str) -> String {
FORMAT_SPECIFIER
.replace_all(raw_specifiers, "{$fmt}")
.to_string()
}

/// Return the corrected argument vector.
fn generate_arguments<'a>(
old_args: &[Arg<'a>],
correct_order: &'a [usize],
) -> Result<Vec<Arg<'a>>> {
let mut new_args: Vec<Arg> = Vec::with_capacity(old_args.len());
for (idx, given) in correct_order.iter().enumerate() {
// We need to keep the formatting in the same order but move the values.
let values = old_args
.get(*given)
.ok_or_else(|| anyhow!("Failed to extract argument at: {given}"))?;
let formatting = old_args
.get(idx)
.ok_or_else(|| anyhow!("Failed to extract argument at: {idx}"))?;
let new_arg = Arg {
value: values.value.clone(),
comma: formatting.comma.clone(),
equal: None,
keyword: None,
star: values.star,
whitespace_after_star: formatting.whitespace_after_star.clone(),
whitespace_after_arg: formatting.whitespace_after_arg.clone(),
};
new_args.push(new_arg);
}
Ok(new_args)
}

/// Returns the corrected function call.
fn generate_call(module_text: &str, correct_order: &[usize]) -> Result<String> {
let mut expression = match_expression(module_text)?;
let mut call = match_call(&mut expression)?;

// Fix the call arguments.
call.args = generate_arguments(&call.args, correct_order)?;

// Fix the string itself.
let Expression::Attribute(item) = &*call.func else {
panic!("Expected: Expression::Attribute")
};

let mut state = CodegenState::default();
item.codegen(&mut state);
let cleaned = remove_specifiers(&state.to_string());

call.func = Box::new(match_expression(&cleaned)?);

let mut state = CodegenState::default();
expression.codegen(&mut state);
if module_text == state.to_string() {
// Ex) `'{' '0}'.format(1)`
bail!("Failed to generate call expression for: {module_text}")
}
Ok(state.to_string())
}

/// UP030
pub(crate) fn format_literals(checker: &mut Checker, summary: &FormatSummary, expr: &Expr) {
// The format we expect is, e.g.: `"{0} {1}".format(...)`
if summary.has_nested_parts {
return;
}
if !summary.keywords.is_empty() {
return;
}
if !summary.autos.is_empty() {
return;
}
if !(0..summary.indexes.len()).all(|index| summary.indexes.contains(&index)) {
return;
}

let mut diagnostic = Diagnostic::new(violations::FormatLiterals, Range::from_located(expr));
if checker.patch(diagnostic.kind.code()) {
// Currently, the only issue we know of is in LibCST:
// https://github.com/Instagram/LibCST/issues/846
if let Ok(contents) = generate_call(
&checker
.locator
.slice_source_code_range(&Range::from_located(expr)),
&summary.indexes,
) {
diagnostic.amend(Fix::replacement(
contents,
expr.location,
expr.end_location.unwrap(),
));
};
}
checker.diagnostics.push(diagnostic);
}

0 comments on commit c016c41

Please sign in to comment.