Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support named escapes (\N{...}) in string processing #2319

Merged
merged 12 commits into from Jun 9, 2021
2 changes: 2 additions & 0 deletions CHANGES.md
Expand Up @@ -13,6 +13,8 @@
- Fix incorrect custom breakpoint indices when string group contains fake f-strings
(#2311)
- Fix regression where `R` prefixes would be lowercased for docstrings (#2285)
- Fix handling of named escapes (`\N{...}`) when `--experimental-string-processing` is
used (#2319)

## 21.5b2

Expand Down
92 changes: 62 additions & 30 deletions src/black/trans.py
Expand Up @@ -15,6 +15,7 @@
List,
Optional,
Sequence,
Set,
Tuple,
TypeVar,
Union,
Expand Down Expand Up @@ -1243,6 +1244,62 @@ def more_splits_should_be_made() -> bool:
last_line.comments = line.comments.copy()
yield Ok(last_line)

def _iter_nameescape_slices(self, string: str) -> Iterator[Tuple[Index, Index]]:
"""
Yields:
All ranges of @string which, if @string were to be split there,
would result in the splitting of an \\N{...} expression (which is NOT
allowed).
"""
# True - the previous backslash was unescaped
# False - the previous backslash was escaped *or* there was no backslash
previous_was_unescaped_backslash = False
it = iter(enumerate(string))
for idx, c in it:
if c == "\\":
previous_was_unescaped_backslash = not previous_was_unescaped_backslash
continue
if not previous_was_unescaped_backslash or c != "N":
previous_was_unescaped_backslash = False
continue
Jackenmen marked this conversation as resolved.
Show resolved Hide resolved
previous_was_unescaped_backslash = False

begin = idx - 1 # the position of backslash before \N{...}
for idx, c in it:
if c == "}":
end = idx
break
else:
# malformed nameescape expression?
# should have been detected by AST parsing earlier...
raise RuntimeError(f"{self.__class__.__name__} LOGIC ERROR!")
yield begin, end

def _iter_fexpr_slices(self, string: str) -> Iterator[Tuple[Index, Index]]:
"""
Yields:
All ranges of @string which, if @string were to be split there,
would result in the splitting of an f-expression (which is NOT
allowed).
"""
if "f" not in get_string_prefix(string).lower():
return

for match in re.finditer(self.RE_FEXPR, string, re.VERBOSE):
yield match.span()

def _get_illegal_split_indices(self, string: str) -> Set[Index]:
illegal_indices: Set[Index] = set()
iterators = [
self._iter_fexpr_slices(string),
self._iter_nameescape_slices(string),
]
for it in iterators:
for begin, end in it:
for idx in range(begin, end + 1):
illegal_indices.add(idx)
Jackenmen marked this conversation as resolved.
Show resolved Hide resolved
return illegal_indices

def _get_break_idx(self, string: str, max_break_idx: int) -> Optional[int]:
"""
This method contains the algorithm that StringSplitter uses to
Expand Down Expand Up @@ -1272,40 +1329,15 @@ def _get_break_idx(self, string: str, max_break_idx: int) -> Optional[int]:
assert is_valid_index(max_break_idx)
assert_is_leaf_string(string)

_fexpr_slices: Optional[List[Tuple[Index, Index]]] = None

def fexpr_slices() -> Iterator[Tuple[Index, Index]]:
"""
Yields:
All ranges of @string which, if @string were to be split there,
would result in the splitting of an f-expression (which is NOT
allowed).
"""
nonlocal _fexpr_slices

if _fexpr_slices is None:
_fexpr_slices = []
for match in re.finditer(self.RE_FEXPR, string, re.VERBOSE):
_fexpr_slices.append(match.span())

yield from _fexpr_slices

is_fstring = "f" in get_string_prefix(string).lower()
_illegal_split_indices = self._get_illegal_split_indices(string)

def breaks_fstring_expression(i: Index) -> bool:
def breaks_unsplittable_expression(i: Index) -> bool:
"""
Returns:
True iff returning @i would result in the splitting of an
f-expression (which is NOT allowed).
unsplittable expression (which is NOT allowed).
"""
if not is_fstring:
return False

for (start, end) in fexpr_slices():
if start <= i < end:
return True

return False
return i in _illegal_split_indices

def passes_all_checks(i: Index) -> bool:
"""
Expand All @@ -1329,7 +1361,7 @@ def passes_all_checks(i: Index) -> bool:
is_space
and is_not_escaped
and is_big_enough
and not breaks_fstring_expression(i)
and not breaks_unsplittable_expression(i)
)

# First, we check all indices BELOW @max_break_idx.
Expand Down
72 changes: 72 additions & 0 deletions tests/data/long_strings.py
Expand Up @@ -207,6 +207,38 @@ def foo():
" of it."
)

string_with_nameescape = (
"........................................................................ \N{LAO KO LA}"
)

string_with_nameescape = (
"........................................................................... \N{LAO KO LA}"
)

string_with_nameescape = (
"............................................................................ \N{LAO KO LA}"
)

Jackenmen marked this conversation as resolved.
Show resolved Hide resolved
string_with_nameescape_and_escaped_backslash = (
"...................................................................... \\\N{LAO KO LA}"
)

string_with_nameescape_and_escaped_backslash = (
"......................................................................... \\\N{LAO KO LA}"
)

string_with_nameescape_and_escaped_backslash = (
".......................................................................... \\\N{LAO KO LA}"
)

string_with_escaped_nameescape = (
"........................................................................ \\N{LAO KO LA}"
)

string_with_escaped_nameescape = (
"........................................................................... \\N{LAO KO LA}"
)


# output

Expand Down Expand Up @@ -587,3 +619,43 @@ def foo():
"This is a really long string that can't be merged because it has a likely pragma at the end" # pylint: disable=some-pylint-check
" of it."
)

string_with_nameescape = (
"........................................................................"
" \N{LAO KO LA}"
)

string_with_nameescape = (
"..........................................................................."
" \N{LAO KO LA}"
)

string_with_nameescape = (
"............................................................................"
" \N{LAO KO LA}"
)

string_with_nameescape_and_escaped_backslash = (
"......................................................................"
" \\\N{LAO KO LA}"
)

string_with_nameescape_and_escaped_backslash = (
"........................................................................."
" \\\N{LAO KO LA}"
)

string_with_nameescape_and_escaped_backslash = (
".........................................................................."
" \\\N{LAO KO LA}"
)

string_with_escaped_nameescape = (
"........................................................................ \\N{LAO"
" KO LA}"
)

string_with_escaped_nameescape = (
"..........................................................................."
" \\N{LAO KO LA}"
)
8 changes: 8 additions & 0 deletions tests/data/long_strings__regression.py
Expand Up @@ -514,6 +514,10 @@ async def foo(self):

x = F"This is a long string which contains an f-expr that should not split {{{[i for i in range(5)]}}}."

x = (
"\N{BLACK RIGHT-POINTING TRIANGLE WITH DOUBLE VERTICAL BAR}\N{VARIATION SELECTOR-16}"
)


# output

Expand Down Expand Up @@ -1142,3 +1146,7 @@ async def foo(self):
"This is a long string which contains an f-expr that should not split"
f" {{{[i for i in range(5)]}}}."
)

x = (
"\N{BLACK RIGHT-POINTING TRIANGLE WITH DOUBLE VERTICAL BAR}\N{VARIATION SELECTOR-16}"
)