From 71df758d70aa7575678d5925887c13960f9978e7 Mon Sep 17 00:00:00 2001 From: hauntsaninja <> Date: Sun, 28 Nov 2021 23:30:23 -0800 Subject: [PATCH 01/11] Remove regex dependency for determining f-string expression spans --- src/black/trans.py | 64 ++++++++++++++++++++++++++++++++++------------ 1 file changed, 47 insertions(+), 17 deletions(-) diff --git a/src/black/trans.py b/src/black/trans.py index d918ef111a2..f5a6fc70dbc 100644 --- a/src/black/trans.py +++ b/src/black/trans.py @@ -942,6 +942,48 @@ def _get_max_string_length(self, line: Line, string_idx: int) -> int: return max_string_length +def iter_fexpr_spans(s: str) -> Iterator[Tuple[int, int]]: + """ + Yields spans corresponding to expressions in a given f-string. + Assumes the input string is a valid f-string. + """ + stack = [] # our curly paren stack + i = 0 + while i < len(s): + if s[i] == "{": + # if we're in a string part of the f-string, ignore escaped curly braces + if not stack and i + 1 < len(s) and s[i + 1] == "{": + i += 2 + continue + stack.append(i) + i += 1 + continue + + if s[i] == "}": + if not stack: + i += 1 + continue + j = stack.pop() + # we've made it back out of the expression! yield the span + if not stack: + yield (j, i + 1) + i += 1 + continue + + # if we're in an expression part of the f-string, fast forward through strings + if stack: + delim = None + if s[i: i + 3] in ("'''", '"""'): + delim = s[i: i + 3] + elif s[i] in ("'", '"'): + delim = s[i] + if delim: + i += len(delim) + while i < len(s) and s[i:i + len(delim)] != delim: + i += 2 if s[i] == "\\" else 1 + i += 1 + + class StringSplitter(BaseStringSplitter, CustomSplitMapMixin): """ StringTransformer that splits "atom" strings (i.e. strings which exist on @@ -981,17 +1023,6 @@ class StringSplitter(BaseStringSplitter, CustomSplitMapMixin): """ MIN_SUBSTR_SIZE: Final = 6 - # Matches an "f-expression" (e.g. {var}) that might be found in an f-string. - RE_FEXPR: Final = r""" - (? TMatchResult: LL = line.leaves @@ -1058,8 +1089,9 @@ def do_transform(self, line: Line, string_idx: int) -> Iterator[TResult[Line]]: # contain any f-expressions, but ONLY if the original f-string # contains at least one f-expression. Otherwise, we will alter the AST # of the program. - drop_pointless_f_prefix = ("f" in prefix) and re.search( - self.RE_FEXPR, LL[string_idx].value, re.VERBOSE + drop_pointless_f_prefix = ( + ("f" in prefix) + and any(True for _ in iter_fexpr_spans(LL[string_idx].value)) ) first_string_line = True @@ -1299,9 +1331,7 @@ def _iter_fexpr_slices(self, string: str) -> Iterator[Tuple[Index, Index]]: """ if "f" not in get_string_prefix(string).lower(): return - - for match in re.finditer(self.RE_FEXPR, string, re.VERBOSE): - yield match.span() + yield from iter_fexpr_spans(string) def _get_illegal_split_indices(self, string: str) -> Set[Index]: illegal_indices: Set[Index] = set() @@ -1417,7 +1447,7 @@ def _normalize_f_string(self, string: str, prefix: str) -> str: """ assert_is_leaf_string(string) - if "f" in prefix and not re.search(self.RE_FEXPR, string, re.VERBOSE): + if "f" in prefix and not any(True for _ in iter_fexpr_spans(string)): new_prefix = prefix.replace("f", "") temp = string[len(prefix) :] From 869474030882f9f602afa5de80795c01b24fd65b Mon Sep 17 00:00:00 2001 From: hauntsaninja <> Date: Sun, 28 Nov 2021 23:38:24 -0800 Subject: [PATCH 02/11] remove backslash code --- src/black/trans.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/black/trans.py b/src/black/trans.py index f5a6fc70dbc..83e3e7960e5 100644 --- a/src/black/trans.py +++ b/src/black/trans.py @@ -980,7 +980,7 @@ def iter_fexpr_spans(s: str) -> Iterator[Tuple[int, int]]: if delim: i += len(delim) while i < len(s) and s[i:i + len(delim)] != delim: - i += 2 if s[i] == "\\" else 1 + i += 1 i += 1 From 9aa1af45d7cf43aee1e2f2dbad124dabb989e1b4 Mon Sep 17 00:00:00 2001 From: hauntsaninja <> Date: Sun, 28 Nov 2021 23:40:15 -0800 Subject: [PATCH 03/11] lint --- src/black/trans.py | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/src/black/trans.py b/src/black/trans.py index 83e3e7960e5..5af19e1afe3 100644 --- a/src/black/trans.py +++ b/src/black/trans.py @@ -947,7 +947,7 @@ def iter_fexpr_spans(s: str) -> Iterator[Tuple[int, int]]: Yields spans corresponding to expressions in a given f-string. Assumes the input string is a valid f-string. """ - stack = [] # our curly paren stack + stack: List[int] = [] # our curly paren stack i = 0 while i < len(s): if s[i] == "{": @@ -973,17 +973,21 @@ def iter_fexpr_spans(s: str) -> Iterator[Tuple[int, int]]: # if we're in an expression part of the f-string, fast forward through strings if stack: delim = None - if s[i: i + 3] in ("'''", '"""'): - delim = s[i: i + 3] + if s[i : i + 3] in ("'''", '"""'): + delim = s[i : i + 3] elif s[i] in ("'", '"'): delim = s[i] if delim: i += len(delim) - while i < len(s) and s[i:i + len(delim)] != delim: + while i < len(s) and s[i : i + len(delim)] != delim: i += 1 i += 1 +def fstring_contains_expr(s: str) -> bool: + return any(True for _ in iter_fexpr_spans(s)) + + class StringSplitter(BaseStringSplitter, CustomSplitMapMixin): """ StringTransformer that splits "atom" strings (i.e. strings which exist on @@ -1089,9 +1093,8 @@ def do_transform(self, line: Line, string_idx: int) -> Iterator[TResult[Line]]: # contain any f-expressions, but ONLY if the original f-string # contains at least one f-expression. Otherwise, we will alter the AST # of the program. - drop_pointless_f_prefix = ( - ("f" in prefix) - and any(True for _ in iter_fexpr_spans(LL[string_idx].value)) + drop_pointless_f_prefix = ("f" in prefix) and fstring_contains_expr( + LL[string_idx].value ) first_string_line = True @@ -1447,7 +1450,7 @@ def _normalize_f_string(self, string: str, prefix: str) -> str: """ assert_is_leaf_string(string) - if "f" in prefix and not any(True for _ in iter_fexpr_spans(string)): + if "f" in prefix and not fstring_contains_expr(string): new_prefix = prefix.replace("f", "") temp = string[len(prefix) :] From 62f80135383f52d82456ef4553514b3d740d2428 Mon Sep 17 00:00:00 2001 From: hauntsaninja <> Date: Sun, 28 Nov 2021 23:53:14 -0800 Subject: [PATCH 04/11] add a comment --- src/black/trans.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/black/trans.py b/src/black/trans.py index 5af19e1afe3..0060aef4889 100644 --- a/src/black/trans.py +++ b/src/black/trans.py @@ -971,6 +971,7 @@ def iter_fexpr_spans(s: str) -> Iterator[Tuple[int, int]]: continue # if we're in an expression part of the f-string, fast forward through strings + # note that backslashes are not legal in the expression portion of f-strings if stack: delim = None if s[i : i + 3] in ("'''", '"""'): From d3f6b60993905b579fcc160a48e6f7d4e46e8d52 Mon Sep 17 00:00:00 2001 From: Shantanu <12621235+hauntsaninja@users.noreply.github.com> Date: Mon, 29 Nov 2021 11:33:00 -0800 Subject: [PATCH 05/11] Update src/black/trans.py Co-authored-by: Jelle Zijlstra --- src/black/trans.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/black/trans.py b/src/black/trans.py index 0060aef4889..17ab6e715bf 100644 --- a/src/black/trans.py +++ b/src/black/trans.py @@ -986,7 +986,7 @@ def iter_fexpr_spans(s: str) -> Iterator[Tuple[int, int]]: def fstring_contains_expr(s: str) -> bool: - return any(True for _ in iter_fexpr_spans(s)) + return any(iter_fexpr_spans(s)) class StringSplitter(BaseStringSplitter, CustomSplitMapMixin): From 5a63aec96c348b698b09875e4d545ab8090679ff Mon Sep 17 00:00:00 2001 From: hauntsaninja <> Date: Mon, 29 Nov 2021 00:49:09 -0800 Subject: [PATCH 06/11] fix an issue --- src/black/trans.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/black/trans.py b/src/black/trans.py index 17ab6e715bf..9f2609271d8 100644 --- a/src/black/trans.py +++ b/src/black/trans.py @@ -982,6 +982,8 @@ def iter_fexpr_spans(s: str) -> Iterator[Tuple[int, int]]: i += len(delim) while i < len(s) and s[i : i + len(delim)] != delim: i += 1 + i += len(delim) + continue i += 1 From b0872cd97d1153deb3e2edd9039d00d6f2566713 Mon Sep 17 00:00:00 2001 From: hauntsaninja <> Date: Mon, 29 Nov 2021 11:42:15 -0800 Subject: [PATCH 07/11] document half open --- src/black/trans.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/black/trans.py b/src/black/trans.py index 9f2609271d8..3aef3abb8e5 100644 --- a/src/black/trans.py +++ b/src/black/trans.py @@ -945,6 +945,7 @@ def _get_max_string_length(self, line: Line, string_idx: int) -> int: def iter_fexpr_spans(s: str) -> Iterator[Tuple[int, int]]: """ Yields spans corresponding to expressions in a given f-string. + Spans are half-open ranges (left inclusive, right exclusive). Assumes the input string is a valid f-string. """ stack: List[int] = [] # our curly paren stack From 7fb4f81b04bf91eafb8b0c5053ca737b815dcdb9 Mon Sep 17 00:00:00 2001 From: hauntsaninja <> Date: Tue, 30 Nov 2021 22:29:43 -0800 Subject: [PATCH 08/11] commit tests --- tests/test_trans.py | 50 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 50 insertions(+) create mode 100644 tests/test_trans.py diff --git a/tests/test_trans.py b/tests/test_trans.py new file mode 100644 index 00000000000..a1666a9c166 --- /dev/null +++ b/tests/test_trans.py @@ -0,0 +1,50 @@ +from typing import List, Tuple +from black.trans import iter_fexpr_spans + + +def test_fexpr_spans() -> None: + def check( + string: str, expected_spans: List[Tuple[int, int]], expected_slices: List[str] + ) -> None: + spans = list(iter_fexpr_spans(string)) + + # Checking slices isn't strictly necessary, but it's easier to verify at + # a glance than only spans + assert len(spans) == len(expected_slices) + for (i, j), slice in zip(spans, expected_slices): + assert len(string[i:j]) == j - i + assert string[i:j] == slice + + assert spans == expected_spans + + # Most of these test cases omit the leading 'f' and leading / closing quotes + # for convenience + # Some additional property-based tests can be found in + # https://github.com/psf/black/pull/2654#issuecomment-981411748 + check("""{var}""", [(0, 5)], ["{var}"]) + check("""f'{var}'""", [(2, 7)], ["{var}"]) + check("""f'{1 + f() + 2 + "asdf"}'""", [(2, 24)], ["""{1 + f() + 2 + "asdf"}"""]) + check("""text {var} text""", [(5, 10)], ["{var}"]) + check("""text {{ {var} }} text""", [(8, 13)], ["{var}"]) + check("""{a} {b} {c}""", [(0, 3), (4, 7), (8, 11)], ["{a}", "{b}", "{c}"]) + check("""f'{a} {b} {c}'""", [(2, 5), (6, 9), (10, 13)], ["{a}", "{b}", "{c}"]) + check("""{ {} }""", [(0, 6)], ["{ {} }"]) + check("""{ {{}} }""", [(0, 8)], ["{ {{}} }"]) + check("""{ {{{}}} }""", [(0, 10)], ["{ {{{}}} }"]) + check("""{{ {{{}}} }}""", [(5, 7)], ["{}"]) + check("""{{ {{{var}}} }}""", [(5, 10)], ["{var}"]) + check("""{f"{0}"}""", [(0, 8)], ["""{f"{0}"}"""]) + check("""{"'"}""", [(0, 5)], ["""{"'"}"""]) + check("""{"{"}""", [(0, 5)], ["""{"{"}"""]) + check("""{"}"}""", [(0, 5)], ["""{"}"}"""]) + check("""{"{{"}""", [(0, 6)], ["""{"{{"}"""]) + check("""{''' '''}""", [(0, 9)], ["""{''' '''}"""]) + check("""{'''{'''}""", [(0, 9)], ["""{'''{'''}"""]) + check("""{''' {'{ '''}""", [(0, 13)], ["""{''' {'{ '''}"""]) + check( + '''f\'\'\'-{f"""*{f"+{f'.{x}.'}+"}*"""}-'y\\'\'\'\'''', + [(5, 33)], + ['''{f"""*{f"+{f'.{x}.'}+"}*"""}'''], + ) + check(r"""{}{""", [(0, 2)], ["{}"]) + check("""f"{'{'''''''''}\"""", [(2, 15)], ["{'{'''''''''}"]) From a9fa1c611782ae58e757065c0faea47396b684fd Mon Sep 17 00:00:00 2001 From: hauntsaninja <> Date: Tue, 30 Nov 2021 22:40:37 -0800 Subject: [PATCH 09/11] changelog --- CHANGES.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGES.md b/CHANGES.md index 235919ec7ac..61b3dc8679d 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -8,6 +8,7 @@ - Fixed `match` statements with open sequence subjects, like `match a, b:` (#2639) - Fixed assignment to environment variables in Jupyter Notebooks (#2642) - Add `flake8-simplify` and `flake8-comprehensions` plugins (#2653) +- Fixed determination of f-string expression spans (#2654) ## 21.11b1 From 2202185b11f638329d74133da30083f275adc882 Mon Sep 17 00:00:00 2001 From: hauntsaninja <> Date: Tue, 30 Nov 2021 22:42:49 -0800 Subject: [PATCH 10/11] make comment more accurate --- src/black/trans.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/black/trans.py b/src/black/trans.py index 3aef3abb8e5..74d73aa8edb 100644 --- a/src/black/trans.py +++ b/src/black/trans.py @@ -946,7 +946,8 @@ def iter_fexpr_spans(s: str) -> Iterator[Tuple[int, int]]: """ Yields spans corresponding to expressions in a given f-string. Spans are half-open ranges (left inclusive, right exclusive). - Assumes the input string is a valid f-string. + Assumes the input string is a valid f-string, but will not crash if the input + string is invalid. """ stack: List[int] = [] # our curly paren stack i = 0 From 4f0c229ac3ae51e7c08b4dac78fe6f999033bd8b Mon Sep 17 00:00:00 2001 From: Jelle Zijlstra Date: Wed, 1 Dec 2021 06:50:09 -0800 Subject: [PATCH 11/11] Update CHANGES.md --- CHANGES.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGES.md b/CHANGES.md index b858962eb9f..5b648225ad7 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -15,7 +15,7 @@ times, like `match re.match()` (#2661) - Fix assignment to environment variables in Jupyter Notebooks (#2642) - Add `flake8-simplify` and `flake8-comprehensions` plugins (#2653) -- Fixed determination of f-string expression spans (#2654) +- Fix determination of f-string expression spans (#2654) - Fix parser error location on invalid syntax in a `match` statement (#2649) ## 21.11b1