Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support named escapes (\N{...}) in string processing #2319

Merged
merged 12 commits into from Jun 9, 2021
83 changes: 35 additions & 48 deletions src/black/trans.py
Expand Up @@ -15,6 +15,7 @@
List,
Optional,
Sequence,
Set,
Tuple,
TypeVar,
Union,
Expand Down Expand Up @@ -1243,14 +1244,13 @@ def more_splits_should_be_made() -> bool:
last_line.comments = line.comments.copy()
yield Ok(last_line)

def _get_nameescape_slices(self, string: str) -> List[Tuple[int, int]]:
def _iter_nameescape_slices(self, string: str) -> Iterator[Tuple[Index, Index]]:
"""
Returns:
List of all ranges of @string which, if @string were to be split there,
Yields:
All ranges of @string which, if @string were to be split there,
would result in the splitting of an \\N{...} expression (which is NOT
allowed).
"""
slices = []
# True - the previous backslash was unescaped
# False - the previous backslash was escaped *or* there was no backslash
previous_was_unescaped_backslash = False
Expand All @@ -1264,7 +1264,7 @@ def _get_nameescape_slices(self, string: str) -> List[Tuple[int, int]]:
continue
Jackenmen marked this conversation as resolved.
Show resolved Hide resolved
previous_was_unescaped_backslash = False

start = idx - 1 # the position of backslash before \N{...}
begin = idx - 1 # the position of backslash before \N{...}
for idx, c in it:
if c == "}":
end = idx
Expand All @@ -1273,9 +1273,32 @@ def _get_nameescape_slices(self, string: str) -> List[Tuple[int, int]]:
# malformed nameescape expression?
# should have been detected by AST parsing earlier...
raise RuntimeError(f"{self.__class__.__name__} LOGIC ERROR!")
slices.append((start, end))
yield begin, end

def _iter_fexpr_slices(self, string: str) -> Iterator[Tuple[Index, Index]]:
"""
Yields:
All ranges of @string which, if @string were to be split there,
would result in the splitting of an f-expression (which is NOT
allowed).
"""
if "f" not in get_string_prefix(string).lower():
return

return slices
for match in re.finditer(self.RE_FEXPR, string, re.VERBOSE):
yield match.span()

def _get_illegal_split_indices(self, string: str) -> Set[Index]:
illegal_indices: Set[Index] = set()
iterators = [
self._iter_fexpr_slices(string),
self._iter_nameescape_slices(string),
]
for it in iterators:
for begin, end in it:
for idx in range(begin, end + 1):
illegal_indices.add(idx)
Jackenmen marked this conversation as resolved.
Show resolved Hide resolved
return illegal_indices

def _get_break_idx(self, string: str, max_break_idx: int) -> Optional[int]:
"""
Expand Down Expand Up @@ -1306,50 +1329,15 @@ def _get_break_idx(self, string: str, max_break_idx: int) -> Optional[int]:
assert is_valid_index(max_break_idx)
assert_is_leaf_string(string)

_fexpr_slices: Optional[List[Tuple[Index, Index]]] = None

def fexpr_slices() -> Iterator[Tuple[Index, Index]]:
"""
Yields:
All ranges of @string which, if @string were to be split there,
would result in the splitting of an f-expression (which is NOT
allowed).
"""
nonlocal _fexpr_slices

if _fexpr_slices is None:
_fexpr_slices = []
for match in re.finditer(self.RE_FEXPR, string, re.VERBOSE):
_fexpr_slices.append(match.span())

yield from _fexpr_slices

is_fstring = "f" in get_string_prefix(string).lower()

def breaks_fstring_expression(i: Index) -> bool:
"""
Returns:
True iff returning @i would result in the splitting of an
f-expression (which is NOT allowed).
"""
if not is_fstring:
return False

for (start, end) in fexpr_slices():
if start <= i < end:
return True

return False

nameescape_slices = self._get_nameescape_slices(string)
_illegal_split_indices = self._get_illegal_split_indices(string)

def breaks_nameescape_expression(i: Index) -> bool:
def breaks_unsplittable_expression(i: Index) -> bool:
"""
Returns:
True iff returning @i would result in the splitting of an
\\N{...} expression (which is NOT allowed).
unsplittable expression (which is NOT allowed).
"""
return any(start <= i < end for start, end in nameescape_slices)
return i in _illegal_split_indices

def passes_all_checks(i: Index) -> bool:
"""
Expand All @@ -1373,8 +1361,7 @@ def passes_all_checks(i: Index) -> bool:
is_space
and is_not_escaped
and is_big_enough
and not breaks_fstring_expression(i)
and not breaks_nameescape_expression(i)
and not breaks_unsplittable_expression(i)
)

# First, we check all indices BELOW @max_break_idx.
Expand Down