Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support named escapes (\N{...}) in string processing #2319

Merged
merged 12 commits into from Jun 9, 2021
2 changes: 2 additions & 0 deletions CHANGES.md
Expand Up @@ -13,6 +13,8 @@
- Fix incorrect custom breakpoint indices when string group contains fake f-strings
(#2311)
- Fix regression where `R` prefixes would be lowercased for docstrings (#2285)
- Fix handling of named escapes (`\N{...}`) when `--experimental-string-processing` is
used (#2319)

## 21.5b2

Expand Down
45 changes: 45 additions & 0 deletions src/black/trans.py
Expand Up @@ -1243,6 +1243,40 @@ def more_splits_should_be_made() -> bool:
last_line.comments = line.comments.copy()
yield Ok(last_line)

def _get_nameescape_slices(self, string: str) -> List[Tuple[int, int]]:
"""
Returns:
List of all ranges of @string which, if @string were to be split there,
would result in the splitting of an \\N{...} expression (which is NOT
allowed).
"""
slices = []
# True - the previous backslash was unescaped
# False - the previous backslash was escaped *or* there was no backslash
previous_was_unescaped_backslash = False
it = iter(enumerate(string))
for idx, c in it:
if c == "\\":
previous_was_unescaped_backslash = not previous_was_unescaped_backslash
continue
if not previous_was_unescaped_backslash or c != "N":
previous_was_unescaped_backslash = False
continue
Jackenmen marked this conversation as resolved.
Show resolved Hide resolved
previous_was_unescaped_backslash = False

start = idx - 1 # the position of backslash before \N{...}
for idx, c in it:
if c == "}":
end = idx
break
else:
# malformed nameescape expression?
# should have been detected by AST parsing earlier...
raise RuntimeError(f"{self.__class__.__name__} LOGIC ERROR!")
slices.append((start, end))

return slices

def _get_break_idx(self, string: str, max_break_idx: int) -> Optional[int]:
"""
This method contains the algorithm that StringSplitter uses to
Expand Down Expand Up @@ -1307,6 +1341,16 @@ def breaks_fstring_expression(i: Index) -> bool:

return False

nameescape_slices = self._get_nameescape_slices(string)

def breaks_nameescape_expression(i: Index) -> bool:
"""
Returns:
True iff returning @i would result in the splitting of an
\\N{...} expression (which is NOT allowed).
"""
return any(start <= i < end for start, end in nameescape_slices)

def passes_all_checks(i: Index) -> bool:
"""
Returns:
Expand All @@ -1330,6 +1374,7 @@ def passes_all_checks(i: Index) -> bool:
and is_not_escaped
and is_big_enough
and not breaks_fstring_expression(i)
and not breaks_nameescape_expression(i)
)

# First, we check all indices BELOW @max_break_idx.
Expand Down
72 changes: 72 additions & 0 deletions tests/data/long_strings.py
Expand Up @@ -207,6 +207,38 @@ def foo():
" of it."
)

string_with_nameescape = (
"........................................................................ \N{LAO KO LA}"
)

string_with_nameescape = (
"........................................................................... \N{LAO KO LA}"
)

string_with_nameescape = (
"............................................................................ \N{LAO KO LA}"
)

Jackenmen marked this conversation as resolved.
Show resolved Hide resolved
string_with_nameescape_and_escaped_backslash = (
"...................................................................... \\\N{LAO KO LA}"
)

string_with_nameescape_and_escaped_backslash = (
"......................................................................... \\\N{LAO KO LA}"
)

string_with_nameescape_and_escaped_backslash = (
".......................................................................... \\\N{LAO KO LA}"
)

string_with_escaped_nameescape = (
"........................................................................ \\N{LAO KO LA}"
)

string_with_escaped_nameescape = (
"........................................................................... \\N{LAO KO LA}"
)


# output

Expand Down Expand Up @@ -587,3 +619,43 @@ def foo():
"This is a really long string that can't be merged because it has a likely pragma at the end" # pylint: disable=some-pylint-check
" of it."
)

string_with_nameescape = (
"........................................................................"
" \N{LAO KO LA}"
)

string_with_nameescape = (
"..........................................................................."
" \N{LAO KO LA}"
)

string_with_nameescape = (
"............................................................................"
" \N{LAO KO LA}"
)

string_with_nameescape_and_escaped_backslash = (
"......................................................................"
" \\\N{LAO KO LA}"
)

string_with_nameescape_and_escaped_backslash = (
"........................................................................."
" \\\N{LAO KO LA}"
)

string_with_nameescape_and_escaped_backslash = (
".........................................................................."
" \\\N{LAO KO LA}"
)

string_with_escaped_nameescape = (
"........................................................................ \\N{LAO"
" KO LA}"
)

string_with_escaped_nameescape = (
"..........................................................................."
" \\N{LAO KO LA}"
)
8 changes: 8 additions & 0 deletions tests/data/long_strings__regression.py
Expand Up @@ -514,6 +514,10 @@ async def foo(self):

x = F"This is a long string which contains an f-expr that should not split {{{[i for i in range(5)]}}}."

x = (
"\N{BLACK RIGHT-POINTING TRIANGLE WITH DOUBLE VERTICAL BAR}\N{VARIATION SELECTOR-16}"
)


# output

Expand Down Expand Up @@ -1142,3 +1146,7 @@ async def foo(self):
"This is a long string which contains an f-expr that should not split"
f" {{{[i for i in range(5)]}}}."
)

x = (
"\N{BLACK RIGHT-POINTING TRIANGLE WITH DOUBLE VERTICAL BAR}\N{VARIATION SELECTOR-16}"
)