Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix determination of f-string expression spans #2654

Merged
merged 12 commits into from Dec 1, 2021
Merged
Show file tree
Hide file tree
Changes from 11 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGES.md
Expand Up @@ -15,6 +15,7 @@
times, like `match re.match()` (#2661)
- Fix assignment to environment variables in Jupyter Notebooks (#2642)
- Add `flake8-simplify` and `flake8-comprehensions` plugins (#2653)
- Fixed determination of f-string expression spans (#2654)
JelleZijlstra marked this conversation as resolved.
Show resolved Hide resolved
- Fix parser error location on invalid syntax in a `match` statement (#2649)

## 21.11b1
Expand Down
72 changes: 55 additions & 17 deletions src/black/trans.py
Expand Up @@ -942,6 +942,57 @@ def _get_max_string_length(self, line: Line, string_idx: int) -> int:
return max_string_length


def iter_fexpr_spans(s: str) -> Iterator[Tuple[int, int]]:
"""
Yields spans corresponding to expressions in a given f-string.
Spans are half-open ranges (left inclusive, right exclusive).
Assumes the input string is a valid f-string, but will not crash if the input
string is invalid.
"""
stack: List[int] = [] # our curly paren stack
i = 0
while i < len(s):
if s[i] == "{":
# if we're in a string part of the f-string, ignore escaped curly braces
if not stack and i + 1 < len(s) and s[i + 1] == "{":
i += 2
continue
stack.append(i)
i += 1
continue

if s[i] == "}":
if not stack:
i += 1
continue
j = stack.pop()
# we've made it back out of the expression! yield the span
if not stack:
yield (j, i + 1)
i += 1
continue

# if we're in an expression part of the f-string, fast forward through strings
# note that backslashes are not legal in the expression portion of f-strings
if stack:
delim = None
if s[i : i + 3] in ("'''", '"""'):
delim = s[i : i + 3]
elif s[i] in ("'", '"'):
delim = s[i]
if delim:
i += len(delim)
JelleZijlstra marked this conversation as resolved.
Show resolved Hide resolved
while i < len(s) and s[i : i + len(delim)] != delim:
i += 1
i += len(delim)
continue
i += 1


def fstring_contains_expr(s: str) -> bool:
return any(iter_fexpr_spans(s))


class StringSplitter(BaseStringSplitter, CustomSplitMapMixin):
"""
StringTransformer that splits "atom" strings (i.e. strings which exist on
Expand Down Expand Up @@ -981,17 +1032,6 @@ class StringSplitter(BaseStringSplitter, CustomSplitMapMixin):
"""

MIN_SUBSTR_SIZE: Final = 6
# Matches an "f-expression" (e.g. {var}) that might be found in an f-string.
RE_FEXPR: Final = r"""
(?<!\{) (?:\{\{)* \{ (?!\{)
(?:
[^\{\}]
| \{\{
| \}\}
| (?R)
)+
\}
"""

def do_splitter_match(self, line: Line) -> TMatchResult:
LL = line.leaves
Expand Down Expand Up @@ -1058,8 +1098,8 @@ def do_transform(self, line: Line, string_idx: int) -> Iterator[TResult[Line]]:
# contain any f-expressions, but ONLY if the original f-string
# contains at least one f-expression. Otherwise, we will alter the AST
# of the program.
drop_pointless_f_prefix = ("f" in prefix) and re.search(
self.RE_FEXPR, LL[string_idx].value, re.VERBOSE
drop_pointless_f_prefix = ("f" in prefix) and fstring_contains_expr(
LL[string_idx].value
)

first_string_line = True
Expand Down Expand Up @@ -1299,9 +1339,7 @@ def _iter_fexpr_slices(self, string: str) -> Iterator[Tuple[Index, Index]]:
"""
if "f" not in get_string_prefix(string).lower():
return

for match in re.finditer(self.RE_FEXPR, string, re.VERBOSE):
yield match.span()
yield from iter_fexpr_spans(string)

def _get_illegal_split_indices(self, string: str) -> Set[Index]:
illegal_indices: Set[Index] = set()
Expand Down Expand Up @@ -1417,7 +1455,7 @@ def _normalize_f_string(self, string: str, prefix: str) -> str:
"""
assert_is_leaf_string(string)

if "f" in prefix and not re.search(self.RE_FEXPR, string, re.VERBOSE):
if "f" in prefix and not fstring_contains_expr(string):
new_prefix = prefix.replace("f", "")

temp = string[len(prefix) :]
Expand Down
50 changes: 50 additions & 0 deletions tests/test_trans.py
@@ -0,0 +1,50 @@
from typing import List, Tuple
from black.trans import iter_fexpr_spans


def test_fexpr_spans() -> None:
def check(
string: str, expected_spans: List[Tuple[int, int]], expected_slices: List[str]
) -> None:
spans = list(iter_fexpr_spans(string))

# Checking slices isn't strictly necessary, but it's easier to verify at
# a glance than only spans
assert len(spans) == len(expected_slices)
for (i, j), slice in zip(spans, expected_slices):
assert len(string[i:j]) == j - i
assert string[i:j] == slice

assert spans == expected_spans

# Most of these test cases omit the leading 'f' and leading / closing quotes
# for convenience
# Some additional property-based tests can be found in
# https://github.com/psf/black/pull/2654#issuecomment-981411748
check("""{var}""", [(0, 5)], ["{var}"])
check("""f'{var}'""", [(2, 7)], ["{var}"])
check("""f'{1 + f() + 2 + "asdf"}'""", [(2, 24)], ["""{1 + f() + 2 + "asdf"}"""])
check("""text {var} text""", [(5, 10)], ["{var}"])
check("""text {{ {var} }} text""", [(8, 13)], ["{var}"])
check("""{a} {b} {c}""", [(0, 3), (4, 7), (8, 11)], ["{a}", "{b}", "{c}"])
check("""f'{a} {b} {c}'""", [(2, 5), (6, 9), (10, 13)], ["{a}", "{b}", "{c}"])
check("""{ {} }""", [(0, 6)], ["{ {} }"])
check("""{ {{}} }""", [(0, 8)], ["{ {{}} }"])
check("""{ {{{}}} }""", [(0, 10)], ["{ {{{}}} }"])
check("""{{ {{{}}} }}""", [(5, 7)], ["{}"])
check("""{{ {{{var}}} }}""", [(5, 10)], ["{var}"])
check("""{f"{0}"}""", [(0, 8)], ["""{f"{0}"}"""])
check("""{"'"}""", [(0, 5)], ["""{"'"}"""])
check("""{"{"}""", [(0, 5)], ["""{"{"}"""])
check("""{"}"}""", [(0, 5)], ["""{"}"}"""])
check("""{"{{"}""", [(0, 6)], ["""{"{{"}"""])
check("""{''' '''}""", [(0, 9)], ["""{''' '''}"""])
check("""{'''{'''}""", [(0, 9)], ["""{'''{'''}"""])
check("""{''' {'{ '''}""", [(0, 13)], ["""{''' {'{ '''}"""])
check(
'''f\'\'\'-{f"""*{f"+{f'.{x}.'}+"}*"""}-'y\\'\'\'\'''',
[(5, 33)],
['''{f"""*{f"+{f'.{x}.'}+"}*"""}'''],
)
check(r"""{}{""", [(0, 2)], ["{}"])
check("""f"{'{'''''''''}\"""", [(2, 15)], ["{'{'''''''''}"])