Skip to content

Commit

Permalink
Make sure to split lines that start with a string operator (#2286)
Browse files Browse the repository at this point in the history
Fixes #2284
  • Loading branch information
bbugyi200 committed May 30, 2021
1 parent eec44f5 commit 4ca4407
Show file tree
Hide file tree
Showing 3 changed files with 142 additions and 21 deletions.
1 change: 1 addition & 0 deletions CHANGES.md
Expand Up @@ -11,6 +11,7 @@
- Restored compatibility with Click 8.0 on Python 3.6 when LANG=C used (#2227)
- Add extra uvloop install + import support if in python env (#2258)
- Fix --experimental-string-processing crash when matching parens are not found (#2283)
- Make sure to split lines that start with a string operator (#2286)

### _Blackd_

Expand Down
78 changes: 57 additions & 21 deletions src/black/trans.py
Expand Up @@ -920,9 +920,9 @@ class StringSplitter(CustomSplitMapMixin, BaseStringSplitter):
lines by themselves).
Requirements:
* The line consists ONLY of a single string (with the exception of a
'+' symbol which MAY exist at the start of the line), MAYBE a string
trailer, and MAYBE a trailing comma.
* The line consists ONLY of a single string (possibly prefixed by a
string operator [e.g. '+' or '==']), MAYBE a string trailer, and MAYBE
a trailing comma.
AND
* All of the requirements listed in BaseStringSplitter's docstring.
Expand Down Expand Up @@ -952,6 +952,16 @@ class StringSplitter(CustomSplitMapMixin, BaseStringSplitter):
CustomSplit objects and add them to the custom split map.
"""

STRING_OPERATORS = [
token.PLUS,
token.STAR,
token.EQEQUAL,
token.NOTEQUAL,
token.LESS,
token.LESSEQUAL,
token.GREATER,
token.GREATEREQUAL,
]
MIN_SUBSTR_SIZE = 6
# Matches an "f-expression" (e.g. {var}) that might be found in an f-string.
RE_FEXPR = r"""
Expand All @@ -972,8 +982,20 @@ def do_splitter_match(self, line: Line) -> TMatchResult:

idx = 0

# The first leaf MAY be a '+' symbol...
if is_valid_index(idx) and LL[idx].type == token.PLUS:
# The first two leaves MAY be the 'not in' keywords...
if (
is_valid_index(idx)
and is_valid_index(idx + 1)
and [LL[idx].type, LL[idx + 1].type] == [token.NAME, token.NAME]
and str(LL[idx]) + str(LL[idx + 1]) == "not in"
):
idx += 2
# Else the first leaf MAY be a string operator symbol or the 'in' keyword...
elif is_valid_index(idx) and (
LL[idx].type in self.STRING_OPERATORS
or LL[idx].type == token.NAME
and str(LL[idx]) == "in"
):
idx += 1

# The next/first leaf MAY be an empty LPAR...
Expand Down Expand Up @@ -1023,23 +1045,26 @@ def do_transform(self, line: Line, string_idx: int) -> Iterator[TResult[Line]]:
)

first_string_line = True
starts_with_plus = LL[0].type == token.PLUS

def line_needs_plus() -> bool:
return first_string_line and starts_with_plus
string_op_leaves = self._get_string_operator_leaves(LL)
string_op_leaves_length = (
sum([len(str(prefix_leaf)) for prefix_leaf in string_op_leaves]) + 1
if string_op_leaves
else 0
)

def maybe_append_plus(new_line: Line) -> None:
def maybe_append_string_operators(new_line: Line) -> None:
"""
Side Effects:
If @line starts with a plus and this is the first line we are
constructing, this function appends a PLUS leaf to @new_line
and replaces the old PLUS leaf in the node structure. Otherwise
this function does nothing.
If @line starts with a string operator and this is the first
line we are constructing, this function appends the string
operator to @new_line and replaces the old string operator leaf
in the node structure. Otherwise this function does nothing.
"""
if line_needs_plus():
plus_leaf = Leaf(token.PLUS, "+")
replace_child(LL[0], plus_leaf)
new_line.append(plus_leaf)
maybe_prefix_leaves = string_op_leaves if first_string_line else []
for i, prefix_leaf in enumerate(maybe_prefix_leaves):
replace_child(LL[i], prefix_leaf)
new_line.append(prefix_leaf)

ends_with_comma = (
is_valid_index(string_idx + 1) and LL[string_idx + 1].type == token.COMMA
Expand All @@ -1054,7 +1079,7 @@ def max_last_string() -> int:
result = self.line_length
result -= line.depth * 4
result -= 1 if ends_with_comma else 0
result -= 2 if line_needs_plus() else 0
result -= string_op_leaves_length
return result

# --- Calculate Max Break Index (for string value)
Expand Down Expand Up @@ -1103,7 +1128,7 @@ def more_splits_should_be_made() -> bool:
break_idx = csplit.break_idx
else:
# Algorithmic Split (automatic)
max_bidx = max_break_idx - 2 if line_needs_plus() else max_break_idx
max_bidx = max_break_idx - string_op_leaves_length
maybe_break_idx = self._get_break_idx(rest_value, max_bidx)
if maybe_break_idx is None:
# If we are unable to algorithmically determine a good split
Expand Down Expand Up @@ -1148,7 +1173,7 @@ def more_splits_should_be_made() -> bool:

# --- Construct `next_line`
next_line = line.clone()
maybe_append_plus(next_line)
maybe_append_string_operators(next_line)
next_line.append(next_leaf)
string_line_results.append(Ok(next_line))

Expand All @@ -1169,7 +1194,7 @@ def more_splits_should_be_made() -> bool:
self._maybe_normalize_string_quotes(rest_leaf)

last_line = line.clone()
maybe_append_plus(last_line)
maybe_append_string_operators(last_line)

# If there are any leaves to the right of the target string...
if is_valid_index(string_idx + 1):
Expand Down Expand Up @@ -1345,6 +1370,17 @@ def _normalize_f_string(self, string: str, prefix: str) -> str:
else:
return string

def _get_string_operator_leaves(self, leaves: Iterable[Leaf]) -> List[Leaf]:
LL = list(leaves)

string_op_leaves = []
i = 0
while LL[i].type in self.STRING_OPERATORS + [token.NAME]:
prefix_leaf = Leaf(LL[i].type, str(LL[i]).strip())
string_op_leaves.append(prefix_leaf)
i += 1
return string_op_leaves


class StringParenWrapper(CustomSplitMapMixin, BaseStringSplitter):
"""
Expand Down
84 changes: 84 additions & 0 deletions tests/data/long_strings__regression.py
Expand Up @@ -406,6 +406,40 @@ def _legacy_listen_examples():
}
)


assert str(suffix_arr) == (
"['$', 'angaroo$', 'angrykangaroo$', 'aroo$', 'garoo$', "
"'grykangaroo$', 'kangaroo$', 'ngaroo$', 'ngrykangaroo$', "
"'o$', 'oo$', 'roo$', 'rykangaroo$', 'ykangaroo$']"
)
assert str(suffix_arr) != (
"['$', 'angaroo$', 'angrykangaroo$', 'aroo$', 'garoo$', "
"'grykangaroo$', 'kangaroo$', 'ngaroo$', 'ngrykangaroo$', "
"'o$', 'oo$', 'roo$', 'rykangaroo$', 'ykangaroo$']"
)
assert str(suffix_arr) <= (
"['$', 'angaroo$', 'angrykangaroo$', 'aroo$', 'garoo$', "
"'grykangaroo$', 'kangaroo$', 'ngaroo$', 'ngrykangaroo$', "
"'o$', 'oo$', 'roo$', 'rykangaroo$', 'ykangaroo$']"
)
assert str(suffix_arr) >= (
"['$', 'angaroo$', 'angrykangaroo$', 'aroo$', 'garoo$', "
"'grykangaroo$', 'kangaroo$', 'ngaroo$', 'ngrykangaroo$', "
"'o$', 'oo$', 'roo$', 'rykangaroo$', 'ykangaroo$']"
)
assert str(suffix_arr) < (
"['$', 'angaroo$', 'angrykangaroo$', 'aroo$', 'garoo$', "
"'grykangaroo$', 'kangaroo$', 'ngaroo$', 'ngrykangaroo$', "
"'o$', 'oo$', 'roo$', 'rykangaroo$', 'ykangaroo$']"
)
assert str(suffix_arr) > (
"['$', 'angaroo$', 'angrykangaroo$', 'aroo$', 'garoo$', "
"'grykangaroo$', 'kangaroo$', 'ngaroo$', 'ngrykangaroo$', "
"'o$', 'oo$', 'roo$', 'rykangaroo$', 'ykangaroo$']"
)
assert str(suffix_arr) in "['$', 'angaroo$', 'angrykangaroo$', 'aroo$', 'garoo$', 'grykangaroo$', 'kangaroo$', 'ngaroo$', 'ngrykangaroo$', 'o$', 'oo$', 'roo$', 'rykangaroo$', 'ykangaroo$']"
assert str(suffix_arr) not in "['$', 'angaroo$', 'angrykangaroo$', 'aroo$', 'garoo$', 'grykangaroo$', 'kangaroo$', 'ngaroo$', 'ngrykangaroo$', 'o$', 'oo$', 'roo$', 'rykangaroo$', 'ykangaroo$']"

# output


Expand Down Expand Up @@ -906,3 +940,53 @@ def _legacy_listen_examples():
"since": since,
}
)


assert (
str(suffix_arr)
== "['$', 'angaroo$', 'angrykangaroo$', 'aroo$', 'garoo$', "
"'grykangaroo$', 'kangaroo$', 'ngaroo$', 'ngrykangaroo$', "
"'o$', 'oo$', 'roo$', 'rykangaroo$', 'ykangaroo$']"
)
assert (
str(suffix_arr)
!= "['$', 'angaroo$', 'angrykangaroo$', 'aroo$', 'garoo$', "
"'grykangaroo$', 'kangaroo$', 'ngaroo$', 'ngrykangaroo$', "
"'o$', 'oo$', 'roo$', 'rykangaroo$', 'ykangaroo$']"
)
assert (
str(suffix_arr)
<= "['$', 'angaroo$', 'angrykangaroo$', 'aroo$', 'garoo$', "
"'grykangaroo$', 'kangaroo$', 'ngaroo$', 'ngrykangaroo$', "
"'o$', 'oo$', 'roo$', 'rykangaroo$', 'ykangaroo$']"
)
assert (
str(suffix_arr)
>= "['$', 'angaroo$', 'angrykangaroo$', 'aroo$', 'garoo$', "
"'grykangaroo$', 'kangaroo$', 'ngaroo$', 'ngrykangaroo$', "
"'o$', 'oo$', 'roo$', 'rykangaroo$', 'ykangaroo$']"
)
assert (
str(suffix_arr)
< "['$', 'angaroo$', 'angrykangaroo$', 'aroo$', 'garoo$', "
"'grykangaroo$', 'kangaroo$', 'ngaroo$', 'ngrykangaroo$', "
"'o$', 'oo$', 'roo$', 'rykangaroo$', 'ykangaroo$']"
)
assert (
str(suffix_arr)
> "['$', 'angaroo$', 'angrykangaroo$', 'aroo$', 'garoo$', "
"'grykangaroo$', 'kangaroo$', 'ngaroo$', 'ngrykangaroo$', "
"'o$', 'oo$', 'roo$', 'rykangaroo$', 'ykangaroo$']"
)
assert (
str(suffix_arr)
in "['$', 'angaroo$', 'angrykangaroo$', 'aroo$', 'garoo$', 'grykangaroo$',"
" 'kangaroo$', 'ngaroo$', 'ngrykangaroo$', 'o$', 'oo$', 'roo$', 'rykangaroo$',"
" 'ykangaroo$']"
)
assert (
str(suffix_arr)
not in "['$', 'angaroo$', 'angrykangaroo$', 'aroo$', 'garoo$', 'grykangaroo$',"
" 'kangaroo$', 'ngaroo$', 'ngrykangaroo$', 'o$', 'oo$', 'roo$',"
" 'rykangaroo$', 'ykangaroo$']"
)

0 comments on commit 4ca4407

Please sign in to comment.