Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Make sure to split lines that start with a string operator #2286

Merged
merged 5 commits into from May 30, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGES.md
Expand Up @@ -11,6 +11,7 @@
- Restored compatibility with Click 8.0 on Python 3.6 when LANG=C used (#2227)
- Add extra uvloop install + import support if in python env (#2258)
- Fix --experimental-string-processing crash when matching parens are not found (#2283)
- Make sure to split lines that start with a string operator (#2286)

### _Blackd_

Expand Down
78 changes: 57 additions & 21 deletions src/black/trans.py
Expand Up @@ -920,9 +920,9 @@ class StringSplitter(CustomSplitMapMixin, BaseStringSplitter):
lines by themselves).

Requirements:
* The line consists ONLY of a single string (with the exception of a
'+' symbol which MAY exist at the start of the line), MAYBE a string
trailer, and MAYBE a trailing comma.
* The line consists ONLY of a single string (possibly prefixed by a
string operator [e.g. '+' or '==']), MAYBE a string trailer, and MAYBE
a trailing comma.
AND
* All of the requirements listed in BaseStringSplitter's docstring.

Expand Down Expand Up @@ -952,6 +952,16 @@ class StringSplitter(CustomSplitMapMixin, BaseStringSplitter):
CustomSplit objects and add them to the custom split map.
"""

STRING_OPERATORS = [
token.PLUS,
token.STAR,
token.EQEQUAL,
token.NOTEQUAL,
token.LESS,
token.LESSEQUAL,
token.GREATER,
token.GREATEREQUAL,
]
MIN_SUBSTR_SIZE = 6
# Matches an "f-expression" (e.g. {var}) that might be found in an f-string.
RE_FEXPR = r"""
Expand All @@ -972,8 +982,20 @@ def do_splitter_match(self, line: Line) -> TMatchResult:

idx = 0

# The first leaf MAY be a '+' symbol...
if is_valid_index(idx) and LL[idx].type == token.PLUS:
# The first two leaves MAY be the 'not in' keywords...
if (
is_valid_index(idx)
and is_valid_index(idx + 1)
and [LL[idx].type, LL[idx + 1].type] == [token.NAME, token.NAME]
and str(LL[idx]) + str(LL[idx + 1]) == "not in"
):
idx += 2
# Else the first leaf MAY be a string operator symbol or the 'in' keyword...
elif is_valid_index(idx) and (
LL[idx].type in self.STRING_OPERATORS
or LL[idx].type == token.NAME
and str(LL[idx]) == "in"
):
idx += 1

# The next/first leaf MAY be an empty LPAR...
Expand Down Expand Up @@ -1023,23 +1045,26 @@ def do_transform(self, line: Line, string_idx: int) -> Iterator[TResult[Line]]:
)

first_string_line = True
starts_with_plus = LL[0].type == token.PLUS

def line_needs_plus() -> bool:
return first_string_line and starts_with_plus
string_op_leaves = self._get_string_operator_leaves(LL)
string_op_leaves_length = (
sum([len(str(prefix_leaf)) for prefix_leaf in string_op_leaves]) + 1
if string_op_leaves
else 0
)

def maybe_append_plus(new_line: Line) -> None:
def maybe_append_string_operators(new_line: Line) -> None:
"""
Side Effects:
If @line starts with a plus and this is the first line we are
constructing, this function appends a PLUS leaf to @new_line
and replaces the old PLUS leaf in the node structure. Otherwise
this function does nothing.
If @line starts with a string operator and this is the first
line we are constructing, this function appends the string
operator to @new_line and replaces the old string operator leaf
in the node structure. Otherwise this function does nothing.
"""
if line_needs_plus():
plus_leaf = Leaf(token.PLUS, "+")
replace_child(LL[0], plus_leaf)
new_line.append(plus_leaf)
maybe_prefix_leaves = string_op_leaves if first_string_line else []
for i, prefix_leaf in enumerate(maybe_prefix_leaves):
replace_child(LL[i], prefix_leaf)
new_line.append(prefix_leaf)

ends_with_comma = (
is_valid_index(string_idx + 1) and LL[string_idx + 1].type == token.COMMA
Expand All @@ -1054,7 +1079,7 @@ def max_last_string() -> int:
result = self.line_length
result -= line.depth * 4
result -= 1 if ends_with_comma else 0
result -= 2 if line_needs_plus() else 0
result -= string_op_leaves_length
return result

# --- Calculate Max Break Index (for string value)
Expand Down Expand Up @@ -1103,7 +1128,7 @@ def more_splits_should_be_made() -> bool:
break_idx = csplit.break_idx
else:
# Algorithmic Split (automatic)
max_bidx = max_break_idx - 2 if line_needs_plus() else max_break_idx
max_bidx = max_break_idx - string_op_leaves_length
maybe_break_idx = self._get_break_idx(rest_value, max_bidx)
if maybe_break_idx is None:
# If we are unable to algorithmically determine a good split
Expand Down Expand Up @@ -1148,7 +1173,7 @@ def more_splits_should_be_made() -> bool:

# --- Construct `next_line`
next_line = line.clone()
maybe_append_plus(next_line)
maybe_append_string_operators(next_line)
next_line.append(next_leaf)
string_line_results.append(Ok(next_line))

Expand All @@ -1169,7 +1194,7 @@ def more_splits_should_be_made() -> bool:
self._maybe_normalize_string_quotes(rest_leaf)

last_line = line.clone()
maybe_append_plus(last_line)
maybe_append_string_operators(last_line)

# If there are any leaves to the right of the target string...
if is_valid_index(string_idx + 1):
Expand Down Expand Up @@ -1345,6 +1370,17 @@ def _normalize_f_string(self, string: str, prefix: str) -> str:
else:
return string

def _get_string_operator_leaves(self, leaves: Iterable[Leaf]) -> List[Leaf]:
LL = list(leaves)

string_op_leaves = []
i = 0
while LL[i].type in self.STRING_OPERATORS + [token.NAME]:
prefix_leaf = Leaf(LL[i].type, str(LL[i]).strip())
string_op_leaves.append(prefix_leaf)
i += 1
return string_op_leaves


class StringParenWrapper(CustomSplitMapMixin, BaseStringSplitter):
"""
Expand Down
84 changes: 84 additions & 0 deletions tests/data/long_strings__regression.py
Expand Up @@ -406,6 +406,40 @@ def _legacy_listen_examples():
}
)


assert str(suffix_arr) == (
"['$', 'angaroo$', 'angrykangaroo$', 'aroo$', 'garoo$', "
"'grykangaroo$', 'kangaroo$', 'ngaroo$', 'ngrykangaroo$', "
"'o$', 'oo$', 'roo$', 'rykangaroo$', 'ykangaroo$']"
)
assert str(suffix_arr) != (
"['$', 'angaroo$', 'angrykangaroo$', 'aroo$', 'garoo$', "
"'grykangaroo$', 'kangaroo$', 'ngaroo$', 'ngrykangaroo$', "
"'o$', 'oo$', 'roo$', 'rykangaroo$', 'ykangaroo$']"
)
assert str(suffix_arr) <= (
"['$', 'angaroo$', 'angrykangaroo$', 'aroo$', 'garoo$', "
"'grykangaroo$', 'kangaroo$', 'ngaroo$', 'ngrykangaroo$', "
"'o$', 'oo$', 'roo$', 'rykangaroo$', 'ykangaroo$']"
)
assert str(suffix_arr) >= (
"['$', 'angaroo$', 'angrykangaroo$', 'aroo$', 'garoo$', "
"'grykangaroo$', 'kangaroo$', 'ngaroo$', 'ngrykangaroo$', "
"'o$', 'oo$', 'roo$', 'rykangaroo$', 'ykangaroo$']"
)
assert str(suffix_arr) < (
"['$', 'angaroo$', 'angrykangaroo$', 'aroo$', 'garoo$', "
"'grykangaroo$', 'kangaroo$', 'ngaroo$', 'ngrykangaroo$', "
"'o$', 'oo$', 'roo$', 'rykangaroo$', 'ykangaroo$']"
)
assert str(suffix_arr) > (
"['$', 'angaroo$', 'angrykangaroo$', 'aroo$', 'garoo$', "
"'grykangaroo$', 'kangaroo$', 'ngaroo$', 'ngrykangaroo$', "
"'o$', 'oo$', 'roo$', 'rykangaroo$', 'ykangaroo$']"
)
assert str(suffix_arr) in "['$', 'angaroo$', 'angrykangaroo$', 'aroo$', 'garoo$', 'grykangaroo$', 'kangaroo$', 'ngaroo$', 'ngrykangaroo$', 'o$', 'oo$', 'roo$', 'rykangaroo$', 'ykangaroo$']"
assert str(suffix_arr) not in "['$', 'angaroo$', 'angrykangaroo$', 'aroo$', 'garoo$', 'grykangaroo$', 'kangaroo$', 'ngaroo$', 'ngrykangaroo$', 'o$', 'oo$', 'roo$', 'rykangaroo$', 'ykangaroo$']"

# output


Expand Down Expand Up @@ -906,3 +940,53 @@ def _legacy_listen_examples():
"since": since,
}
)


assert (
str(suffix_arr)
== "['$', 'angaroo$', 'angrykangaroo$', 'aroo$', 'garoo$', "
"'grykangaroo$', 'kangaroo$', 'ngaroo$', 'ngrykangaroo$', "
"'o$', 'oo$', 'roo$', 'rykangaroo$', 'ykangaroo$']"
)
assert (
str(suffix_arr)
!= "['$', 'angaroo$', 'angrykangaroo$', 'aroo$', 'garoo$', "
"'grykangaroo$', 'kangaroo$', 'ngaroo$', 'ngrykangaroo$', "
"'o$', 'oo$', 'roo$', 'rykangaroo$', 'ykangaroo$']"
)
assert (
str(suffix_arr)
<= "['$', 'angaroo$', 'angrykangaroo$', 'aroo$', 'garoo$', "
"'grykangaroo$', 'kangaroo$', 'ngaroo$', 'ngrykangaroo$', "
"'o$', 'oo$', 'roo$', 'rykangaroo$', 'ykangaroo$']"
)
assert (
str(suffix_arr)
>= "['$', 'angaroo$', 'angrykangaroo$', 'aroo$', 'garoo$', "
"'grykangaroo$', 'kangaroo$', 'ngaroo$', 'ngrykangaroo$', "
"'o$', 'oo$', 'roo$', 'rykangaroo$', 'ykangaroo$']"
)
assert (
str(suffix_arr)
< "['$', 'angaroo$', 'angrykangaroo$', 'aroo$', 'garoo$', "
"'grykangaroo$', 'kangaroo$', 'ngaroo$', 'ngrykangaroo$', "
"'o$', 'oo$', 'roo$', 'rykangaroo$', 'ykangaroo$']"
)
assert (
str(suffix_arr)
> "['$', 'angaroo$', 'angrykangaroo$', 'aroo$', 'garoo$', "
"'grykangaroo$', 'kangaroo$', 'ngaroo$', 'ngrykangaroo$', "
"'o$', 'oo$', 'roo$', 'rykangaroo$', 'ykangaroo$']"
)
assert (
str(suffix_arr)
in "['$', 'angaroo$', 'angrykangaroo$', 'aroo$', 'garoo$', 'grykangaroo$',"
" 'kangaroo$', 'ngaroo$', 'ngrykangaroo$', 'o$', 'oo$', 'roo$', 'rykangaroo$',"
" 'ykangaroo$']"
)
assert (
str(suffix_arr)
not in "['$', 'angaroo$', 'angrykangaroo$', 'aroo$', 'garoo$', 'grykangaroo$',"
" 'kangaroo$', 'ngaroo$', 'ngrykangaroo$', 'o$', 'oo$', 'roo$',"
" 'rykangaroo$', 'ykangaroo$']"
)