Skip to content

Commit

Permalink
Support named escapes (\N{...}) in string processing (#2319)
Browse files Browse the repository at this point in the history
Co-authored-by: Felix Hildén <felix.hilden@gmail.com>
Co-authored-by: Jelle Zijlstra <jelle.zijlstra@gmail.com>
  • Loading branch information
3 people committed Jun 9, 2021
1 parent 229498e commit 62402a3
Show file tree
Hide file tree
Showing 4 changed files with 143 additions and 30 deletions.
2 changes: 2 additions & 0 deletions CHANGES.md
Expand Up @@ -13,6 +13,8 @@
- Fix incorrect custom breakpoint indices when string group contains fake f-strings
(#2311)
- Fix regression where `R` prefixes would be lowercased for docstrings (#2285)
- Fix handling of named escapes (`\N{...}`) when `--experimental-string-processing` is
used (#2319)

## 21.5b2

Expand Down
91 changes: 61 additions & 30 deletions src/black/trans.py
Expand Up @@ -15,6 +15,7 @@
List,
Optional,
Sequence,
Set,
Tuple,
TypeVar,
Union,
Expand Down Expand Up @@ -1243,6 +1244,61 @@ def more_splits_should_be_made() -> bool:
last_line.comments = line.comments.copy()
yield Ok(last_line)

def _iter_nameescape_slices(self, string: str) -> Iterator[Tuple[Index, Index]]:
"""
Yields:
All ranges of @string which, if @string were to be split there,
would result in the splitting of an \\N{...} expression (which is NOT
allowed).
"""
# True - the previous backslash was unescaped
# False - the previous backslash was escaped *or* there was no backslash
previous_was_unescaped_backslash = False
it = iter(enumerate(string))
for idx, c in it:
if c == "\\":
previous_was_unescaped_backslash = not previous_was_unescaped_backslash
continue
if not previous_was_unescaped_backslash or c != "N":
previous_was_unescaped_backslash = False
continue
previous_was_unescaped_backslash = False

begin = idx - 1 # the position of backslash before \N{...}
for idx, c in it:
if c == "}":
end = idx
break
else:
# malformed nameescape expression?
# should have been detected by AST parsing earlier...
raise RuntimeError(f"{self.__class__.__name__} LOGIC ERROR!")
yield begin, end

def _iter_fexpr_slices(self, string: str) -> Iterator[Tuple[Index, Index]]:
"""
Yields:
All ranges of @string which, if @string were to be split there,
would result in the splitting of an f-expression (which is NOT
allowed).
"""
if "f" not in get_string_prefix(string).lower():
return

for match in re.finditer(self.RE_FEXPR, string, re.VERBOSE):
yield match.span()

def _get_illegal_split_indices(self, string: str) -> Set[Index]:
illegal_indices: Set[Index] = set()
iterators = [
self._iter_fexpr_slices(string),
self._iter_nameescape_slices(string),
]
for it in iterators:
for begin, end in it:
illegal_indices.update(range(begin, end + 1))
return illegal_indices

def _get_break_idx(self, string: str, max_break_idx: int) -> Optional[int]:
"""
This method contains the algorithm that StringSplitter uses to
Expand Down Expand Up @@ -1272,40 +1328,15 @@ def _get_break_idx(self, string: str, max_break_idx: int) -> Optional[int]:
assert is_valid_index(max_break_idx)
assert_is_leaf_string(string)

_fexpr_slices: Optional[List[Tuple[Index, Index]]] = None

def fexpr_slices() -> Iterator[Tuple[Index, Index]]:
"""
Yields:
All ranges of @string which, if @string were to be split there,
would result in the splitting of an f-expression (which is NOT
allowed).
"""
nonlocal _fexpr_slices

if _fexpr_slices is None:
_fexpr_slices = []
for match in re.finditer(self.RE_FEXPR, string, re.VERBOSE):
_fexpr_slices.append(match.span())

yield from _fexpr_slices

is_fstring = "f" in get_string_prefix(string).lower()
_illegal_split_indices = self._get_illegal_split_indices(string)

def breaks_fstring_expression(i: Index) -> bool:
def breaks_unsplittable_expression(i: Index) -> bool:
"""
Returns:
True iff returning @i would result in the splitting of an
f-expression (which is NOT allowed).
unsplittable expression (which is NOT allowed).
"""
if not is_fstring:
return False

for (start, end) in fexpr_slices():
if start <= i < end:
return True

return False
return i in _illegal_split_indices

def passes_all_checks(i: Index) -> bool:
"""
Expand All @@ -1329,7 +1360,7 @@ def passes_all_checks(i: Index) -> bool:
is_space
and is_not_escaped
and is_big_enough
and not breaks_fstring_expression(i)
and not breaks_unsplittable_expression(i)
)

# First, we check all indices BELOW @max_break_idx.
Expand Down
72 changes: 72 additions & 0 deletions tests/data/long_strings.py
Expand Up @@ -207,6 +207,38 @@ def foo():
" of it."
)

string_with_nameescape = (
"........................................................................ \N{LAO KO LA}"
)

string_with_nameescape = (
"........................................................................... \N{LAO KO LA}"
)

string_with_nameescape = (
"............................................................................ \N{LAO KO LA}"
)

string_with_nameescape_and_escaped_backslash = (
"...................................................................... \\\N{LAO KO LA}"
)

string_with_nameescape_and_escaped_backslash = (
"......................................................................... \\\N{LAO KO LA}"
)

string_with_nameescape_and_escaped_backslash = (
".......................................................................... \\\N{LAO KO LA}"
)

string_with_escaped_nameescape = (
"........................................................................ \\N{LAO KO LA}"
)

string_with_escaped_nameescape = (
"........................................................................... \\N{LAO KO LA}"
)


# output

Expand Down Expand Up @@ -587,3 +619,43 @@ def foo():
"This is a really long string that can't be merged because it has a likely pragma at the end" # pylint: disable=some-pylint-check
" of it."
)

string_with_nameescape = (
"........................................................................"
" \N{LAO KO LA}"
)

string_with_nameescape = (
"..........................................................................."
" \N{LAO KO LA}"
)

string_with_nameescape = (
"............................................................................"
" \N{LAO KO LA}"
)

string_with_nameescape_and_escaped_backslash = (
"......................................................................"
" \\\N{LAO KO LA}"
)

string_with_nameescape_and_escaped_backslash = (
"........................................................................."
" \\\N{LAO KO LA}"
)

string_with_nameescape_and_escaped_backslash = (
".........................................................................."
" \\\N{LAO KO LA}"
)

string_with_escaped_nameescape = (
"........................................................................ \\N{LAO"
" KO LA}"
)

string_with_escaped_nameescape = (
"..........................................................................."
" \\N{LAO KO LA}"
)
8 changes: 8 additions & 0 deletions tests/data/long_strings__regression.py
Expand Up @@ -514,6 +514,10 @@ async def foo(self):

x = F"This is a long string which contains an f-expr that should not split {{{[i for i in range(5)]}}}."

x = (
"\N{BLACK RIGHT-POINTING TRIANGLE WITH DOUBLE VERTICAL BAR}\N{VARIATION SELECTOR-16}"
)


# output

Expand Down Expand Up @@ -1142,3 +1146,7 @@ async def foo(self):
"This is a long string which contains an f-expr that should not split"
f" {{{[i for i in range(5)]}}}."
)

x = (
"\N{BLACK RIGHT-POINTING TRIANGLE WITH DOUBLE VERTICAL BAR}\N{VARIATION SELECTOR-16}"
)

0 comments on commit 62402a3

Please sign in to comment.