Skip to content

Commit

Permalink
Add parens around implicit string concatenations where it increases r…
Browse files Browse the repository at this point in the history
…eadability (#3162)

Adds parentheses around implicit string concatenations when it's inside
a list, set, or tuple. Except when it's only element and there's no trailing
comma.

Looking at the order of the transformers here, we need to "wrap in
parens" before string_split runs. So my solution is to introduce a
"collaboration" between StringSplitter and StringParenWrapper where the
splitter "skips" the split until the wrapper adds the parens (and then
the line after the paren is split by StringSplitter) in another pass.

I have also considered an alternative approach, where I tried to add a
different "string paren wrapper" class, and it runs before string_split.
Then I found out it requires a different do_transform implementation
than StringParenWrapper.do_transform, since the later assumes it runs
after the delimiter_split transform. So I stopped researching that
route.

Originally function calls were also included in this change, but given
missing commas should usually result in a runtime error and the scary
amount of changes this cause on downstream code, they were removed in
later revisions.
  • Loading branch information
yilei committed Aug 31, 2022
1 parent c0cc19b commit ba618a3
Show file tree
Hide file tree
Showing 6 changed files with 191 additions and 44 deletions.
2 changes: 2 additions & 0 deletions CHANGES.md
Expand Up @@ -26,6 +26,8 @@
normalized as expected (#3168)
- When using `--skip-magic-trailing-comma` or `-C`, trailing commas are stripped from
subscript expressions with more than 1 element (#3209)
- Implicitly concatenated strings inside a list, set, or tuple are now wrapped inside
parentheses (#3162)
- Fix a string merging/split issue when a comment is present in the middle of implicitly
concatenated strings on its own line (#3227)

Expand Down
50 changes: 48 additions & 2 deletions src/black/trans.py
Expand Up @@ -1043,6 +1043,41 @@ def _get_max_string_length(self, line: Line, string_idx: int) -> int:
max_string_length = self.line_length - offset
return max_string_length

@staticmethod
def _prefer_paren_wrap_match(LL: List[Leaf]) -> Optional[int]:
"""
Returns:
string_idx such that @LL[string_idx] is equal to our target (i.e.
matched) string, if this line matches the "prefer paren wrap" statement
requirements listed in the 'Requirements' section of the StringParenWrapper
class's docstring.
OR
None, otherwise.
"""
# The line must start with a string.
if LL[0].type != token.STRING:
return None

matching_nodes = [
syms.listmaker,
syms.dictsetmaker,
syms.testlist_gexp,
]
# If the string is an immediate child of a list/set/tuple literal...
if (
parent_type(LL[0]) in matching_nodes
or parent_type(LL[0].parent) in matching_nodes
):
# And the string is surrounded by commas (or is the first/last child)...
prev_sibling = LL[0].prev_sibling
next_sibling = LL[0].next_sibling
if (not prev_sibling or prev_sibling.type == token.COMMA) and (
not next_sibling or next_sibling.type == token.COMMA
):
return 0

return None


def iter_fexpr_spans(s: str) -> Iterator[Tuple[int, int]]:
"""
Expand Down Expand Up @@ -1138,6 +1173,9 @@ class StringSplitter(BaseStringSplitter, CustomSplitMapMixin):
def do_splitter_match(self, line: Line) -> TMatchResult:
LL = line.leaves

if self._prefer_paren_wrap_match(LL) is not None:
return TErr("Line needs to be wrapped in parens first.")

is_valid_index = is_valid_index_factory(LL)

idx = 0
Expand Down Expand Up @@ -1583,8 +1621,7 @@ def _get_string_operator_leaves(self, leaves: Iterable[Leaf]) -> List[Leaf]:

class StringParenWrapper(BaseStringSplitter, CustomSplitMapMixin):
"""
StringTransformer that splits non-"atom" strings (i.e. strings that do not
exist on lines by themselves).
StringTransformer that wraps strings in parens and then splits at the LPAR.
Requirements:
All of the requirements listed in BaseStringSplitter's docstring in
Expand All @@ -1604,6 +1641,11 @@ class StringParenWrapper(BaseStringSplitter, CustomSplitMapMixin):
OR
* The line is a dictionary key assignment where some valid key is being
assigned the value of some string.
OR
* The line starts with an "atom" string that prefers to be wrapped in
parens. It's preferred to be wrapped when it's is an immediate child of
a list/set/tuple literal, AND the string is surrounded by commas (or is
the first/last child).
Transformations:
The chosen string is wrapped in parentheses and then split at the LPAR.
Expand All @@ -1628,6 +1670,9 @@ class StringParenWrapper(BaseStringSplitter, CustomSplitMapMixin):
changed such that it no longer needs to be given its own line,
StringParenWrapper relies on StringParenStripper to clean up the
parentheses it created.
For "atom" strings that prefers to be wrapped in parens, it requires
StringSplitter to hold the split until the string is wrapped in parens.
"""

def do_splitter_match(self, line: Line) -> TMatchResult:
Expand All @@ -1644,6 +1689,7 @@ def do_splitter_match(self, line: Line) -> TMatchResult:
or self._assert_match(LL)
or self._assign_match(LL)
or self._dict_match(LL)
or self._prefer_paren_wrap_match(LL)
)

if string_idx is not None:
Expand Down
46 changes: 30 additions & 16 deletions tests/data/preview/comments7.py
Expand Up @@ -226,39 +226,53 @@ class C:
# metadata_version errors.
(
{},
"None is an invalid value for Metadata-Version. Error: This field is"
" required. see"
" https://packaging.python.org/specifications/core-metadata",
(
"None is an invalid value for Metadata-Version. Error: This field"
" is required. see"
" https://packaging.python.org/specifications/core-metadata"
),
),
(
{"metadata_version": "-1"},
"'-1' is an invalid value for Metadata-Version. Error: Unknown Metadata"
" Version see"
" https://packaging.python.org/specifications/core-metadata",
(
"'-1' is an invalid value for Metadata-Version. Error: Unknown"
" Metadata Version see"
" https://packaging.python.org/specifications/core-metadata"
),
),
# name errors.
(
{"metadata_version": "1.2"},
"'' is an invalid value for Name. Error: This field is required. see"
" https://packaging.python.org/specifications/core-metadata",
(
"'' is an invalid value for Name. Error: This field is required."
" see https://packaging.python.org/specifications/core-metadata"
),
),
(
{"metadata_version": "1.2", "name": "foo-"},
"'foo-' is an invalid value for Name. Error: Must start and end with a"
" letter or numeral and contain only ascii numeric and '.', '_' and"
" '-'. see https://packaging.python.org/specifications/core-metadata",
(
"'foo-' is an invalid value for Name. Error: Must start and end"
" with a letter or numeral and contain only ascii numeric and '.',"
" '_' and '-'. see"
" https://packaging.python.org/specifications/core-metadata"
),
),
# version errors.
(
{"metadata_version": "1.2", "name": "example"},
"'' is an invalid value for Version. Error: This field is required. see"
" https://packaging.python.org/specifications/core-metadata",
(
"'' is an invalid value for Version. Error: This field is required."
" see https://packaging.python.org/specifications/core-metadata"
),
),
(
{"metadata_version": "1.2", "name": "example", "version": "dog"},
"'dog' is an invalid value for Version. Error: Must start and end with"
" a letter or numeral and contain only ascii numeric and '.', '_' and"
" '-'. see https://packaging.python.org/specifications/core-metadata",
(
"'dog' is an invalid value for Version. Error: Must start and end"
" with a letter or numeral and contain only ascii numeric and '.',"
" '_' and '-'. see"
" https://packaging.python.org/specifications/core-metadata"
),
),
],
)
Expand Down
81 changes: 79 additions & 2 deletions tests/data/preview/long_strings.py
Expand Up @@ -18,6 +18,18 @@

D4 = {"A long and ridiculous {}".format(string_key): "This is a really really really long string that has to go i,side of a dictionary. It is soooo bad.", some_func("calling", "some", "stuff"): "This is a really really really long string that has to go inside of a dictionary. It is {soooo} bad (#{x}).".format(sooo="soooo", x=2), "A %s %s" % ("formatted", "string"): "This is a really really really long string that has to go inside of a dictionary. It is %s bad (#%d)." % ("soooo", 2)}

L1 = ["The is a short string", "This is a really long string that can't possibly be expected to fit all together on one line. Also it is inside a list literal, so it's expected to be wrapped in parens when spliting to avoid implicit str concatenation.", short_call("arg", {"key": "value"}), "This is another really really (not really) long string that also can't be expected to fit on one line and is, like the other string, inside a list literal.", ("parens should be stripped for short string in list")]

L2 = ["This is a really long string that can't be expected to fit in one line and is the only child of a list literal."]

S1 = {"The is a short string", "This is a really long string that can't possibly be expected to fit all together on one line. Also it is inside a set literal, so it's expected to be wrapped in parens when spliting to avoid implicit str concatenation.", short_call("arg", {"key": "value"}), "This is another really really (not really) long string that also can't be expected to fit on one line and is, like the other string, inside a set literal.", ("parens should be stripped for short string in set")}

S2 = {"This is a really long string that can't be expected to fit in one line and is the only child of a set literal."}

T1 = ("The is a short string", "This is a really long string that can't possibly be expected to fit all together on one line. Also it is inside a tuple literal, so it's expected to be wrapped in parens when spliting to avoid implicit str concatenation.", short_call("arg", {"key": "value"}), "This is another really really (not really) long string that also can't be expected to fit on one line and is, like the other string, inside a tuple literal.", ("parens should be stripped for short string in list"))

T2 = ("This is a really long string that can't be expected to fit in one line and is the only child of a tuple literal.",)

func_with_keywords(my_arg, my_kwarg="Long keyword strings also need to be wrapped, but they will probably need to be handled a little bit differently.")

bad_split1 = (
Expand Down Expand Up @@ -109,7 +121,7 @@

comment_string = "Long lines with inline comments should have their comments appended to the reformatted string's enclosing right parentheses." # This comment gets thrown to the top.

arg_comment_string = print("Long lines with inline comments which are apart of (and not the only member of) an argument list should have their comments appended to the reformatted string's enclosing left parentheses.", # This comment stays on the bottom.
arg_comment_string = print("Long lines with inline comments which are apart of (and not the only member of) an argument list should have their comments appended to the reformatted string's enclosing left parentheses.", # This comment gets thrown to the top.
"Arg #2", "Arg #3", "Arg #4", "Arg #5")

pragma_comment_string1 = "Lines which end with an inline pragma comment of the form `# <pragma>: <...>` should be left alone." # noqa: E501
Expand Down Expand Up @@ -345,6 +357,71 @@ def foo():
% ("soooo", 2),
}

L1 = [
"The is a short string",
(
"This is a really long string that can't possibly be expected to fit all"
" together on one line. Also it is inside a list literal, so it's expected to"
" be wrapped in parens when spliting to avoid implicit str concatenation."
),
short_call("arg", {"key": "value"}),
(
"This is another really really (not really) long string that also can't be"
" expected to fit on one line and is, like the other string, inside a list"
" literal."
),
"parens should be stripped for short string in list",
]

L2 = [
"This is a really long string that can't be expected to fit in one line and is the"
" only child of a list literal."
]

S1 = {
"The is a short string",
(
"This is a really long string that can't possibly be expected to fit all"
" together on one line. Also it is inside a set literal, so it's expected to be"
" wrapped in parens when spliting to avoid implicit str concatenation."
),
short_call("arg", {"key": "value"}),
(
"This is another really really (not really) long string that also can't be"
" expected to fit on one line and is, like the other string, inside a set"
" literal."
),
"parens should be stripped for short string in set",
}

S2 = {
"This is a really long string that can't be expected to fit in one line and is the"
" only child of a set literal."
}

T1 = (
"The is a short string",
(
"This is a really long string that can't possibly be expected to fit all"
" together on one line. Also it is inside a tuple literal, so it's expected to"
" be wrapped in parens when spliting to avoid implicit str concatenation."
),
short_call("arg", {"key": "value"}),
(
"This is another really really (not really) long string that also can't be"
" expected to fit on one line and is, like the other string, inside a tuple"
" literal."
),
"parens should be stripped for short string in list",
)

T2 = (
(
"This is a really long string that can't be expected to fit in one line and is"
" the only child of a tuple literal."
),
)

func_with_keywords(
my_arg,
my_kwarg=(
Expand Down Expand Up @@ -487,7 +564,7 @@ def foo():
arg_comment_string = print(
"Long lines with inline comments which are apart of (and not the only member of) an"
" argument list should have their comments appended to the reformatted string's"
" enclosing left parentheses.", # This comment stays on the bottom.
" enclosing left parentheses.", # This comment gets thrown to the top.
"Arg #2",
"Arg #3",
"Arg #4",
Expand Down
32 changes: 20 additions & 12 deletions tests/data/preview/long_strings__regression.py
Expand Up @@ -763,20 +763,28 @@ def xxxx_xxx_xx_xxxxxxxxxx_xxxx_xxxxxxxxx(xxxx):

some_dictionary = {
"xxxxx006": [
"xxx-xxx"
" xxxxx3xxxx1xx2xxxxxxxxxxxxxx0xx6xxxxxxxxxx2xxxxxx9xxxxxxxxxx0xxxxx1xxx2x/xx9xx6+x+xxxxxxxxxxxxxx4xxxxxxxxxxxxxxxxxxxxx43xxx2xx2x4x++xxx6xxxxxxxxx+xxxxx/xx9x+xxxxxxxxxxxxxx8x15xxxxxxxxxxxxxxxxx82xx/xxxxxxxxxxxxxx/x5xxxxxxxxxxxxxx6xxxxxx74x4/xxx4x+xxxxxxxxx2xxxxxxxx87xxxxx4xxxxxxxx3xx0xxxxx4xxx1xx9xx5xxxxxxx/xxxxx5xx6xx4xxxx1x/x2xxxxxxxxxxxx64xxxxxxx1x0xx5xxxxxxxxxxxxxx=="
" xxxxx000 xxxxxxxxxx\n",
"xxx-xxx"
" xxxxx3xxxx1xx2xxxxxxxxxxxxxx6xxxxxxxxxxxxxx9xxxxxxxxxxxxx3xxx9xxxxxxxxxxxxxxxx0xxxxxxxxxxxxxxxxx2xxxx2xxx6xxxxx/xx54xxxxxxxxx4xxx3xxxxxx9xx3xxxxx39xxxxxxxxx5xx91xxxx7xxxxxx8xxxxxxxxxxxxxxxx9xxx93xxxxxxxxxxxxxxxxx7xxx8xx8xx4/x1xxxxx1x3xxxxxxxxxxxxx3xxxxxx9xx4xx4x7xxxxxxxxxxxxx1xxxxxxxxx7xxxxxxxxxxxxxx4xx6xxxxxxxxx9xxx7xxxx2xxxxxxxxxxxxxxxxxxxxxx8xxxxxxxxxxxxxxxxxxxx6xx=="
" xxxxx010 xxxxxxxxxx\n",
(
"xxx-xxx"
" xxxxx3xxxx1xx2xxxxxxxxxxxxxx0xx6xxxxxxxxxx2xxxxxx9xxxxxxxxxx0xxxxx1xxx2x/xx9xx6+x+xxxxxxxxxxxxxx4xxxxxxxxxxxxxxxxxxxxx43xxx2xx2x4x++xxx6xxxxxxxxx+xxxxx/xx9x+xxxxxxxxxxxxxx8x15xxxxxxxxxxxxxxxxx82xx/xxxxxxxxxxxxxx/x5xxxxxxxxxxxxxx6xxxxxx74x4/xxx4x+xxxxxxxxx2xxxxxxxx87xxxxx4xxxxxxxx3xx0xxxxx4xxx1xx9xx5xxxxxxx/xxxxx5xx6xx4xxxx1x/x2xxxxxxxxxxxx64xxxxxxx1x0xx5xxxxxxxxxxxxxx=="
" xxxxx000 xxxxxxxxxx\n"
),
(
"xxx-xxx"
" xxxxx3xxxx1xx2xxxxxxxxxxxxxx6xxxxxxxxxxxxxx9xxxxxxxxxxxxx3xxx9xxxxxxxxxxxxxxxx0xxxxxxxxxxxxxxxxx2xxxx2xxx6xxxxx/xx54xxxxxxxxx4xxx3xxxxxx9xx3xxxxx39xxxxxxxxx5xx91xxxx7xxxxxx8xxxxxxxxxxxxxxxx9xxx93xxxxxxxxxxxxxxxxx7xxx8xx8xx4/x1xxxxx1x3xxxxxxxxxxxxx3xxxxxx9xx4xx4x7xxxxxxxxxxxxx1xxxxxxxxx7xxxxxxxxxxxxxx4xx6xxxxxxxxx9xxx7xxxx2xxxxxxxxxxxxxxxxxxxxxx8xxxxxxxxxxxxxxxxxxxx6xx=="
" xxxxx010 xxxxxxxxxx\n"
),
],
"xxxxx016": [
"xxx-xxx"
" xxxxx3xxxx1xx2xxxxxxxxxxxxxx0xx6xxxxxxxxxx2xxxxxx9xxxxxxxxxx0xxxxx1xxx2x/xx9xx6+x+xxxxxxxxxxxxxx4xxxxxxxxxxxxxxxxxxxxx43xxx2xx2x4x++xxx6xxxxxxxxx+xxxxx/xx9x+xxxxxxxxxxxxxx8x15xxxxxxxxxxxxxxxxx82xx/xxxxxxxxxxxxxx/x5xxxxxxxxxxxxxx6xxxxxx74x4/xxx4x+xxxxxxxxx2xxxxxxxx87xxxxx4xxxxxxxx3xx0xxxxx4xxx1xx9xx5xxxxxxx/xxxxx5xx6xx4xxxx1x/x2xxxxxxxxxxxx64xxxxxxx1x0xx5xxxxxxxxxxxxxx=="
" xxxxx000 xxxxxxxxxx\n",
"xxx-xxx"
" xxxxx3xxxx1xx2xxxxxxxxxxxxxx6xxxxxxxxxxxxxx9xxxxxxxxxxxxx3xxx9xxxxxxxxxxxxxxxx0xxxxxxxxxxxxxxxxx2xxxx2xxx6xxxxx/xx54xxxxxxxxx4xxx3xxxxxx9xx3xxxxx39xxxxxxxxx5xx91xxxx7xxxxxx8xxxxxxxxxxxxxxxx9xxx93xxxxxxxxxxxxxxxxx7xxx8xx8xx4/x1xxxxx1x3xxxxxxxxxxxxx3xxxxxx9xx4xx4x7xxxxxxxxxxxxx1xxxxxxxxx7xxxxxxxxxxxxxx4xx6xxxxxxxxx9xxx7xxxx2xxxxxxxxxxxxxxxxxxxxxx8xxxxxxxxxxxxxxxxxxxx6xx=="
" xxxxx010 xxxxxxxxxx\n",
(
"xxx-xxx"
" xxxxx3xxxx1xx2xxxxxxxxxxxxxx0xx6xxxxxxxxxx2xxxxxx9xxxxxxxxxx0xxxxx1xxx2x/xx9xx6+x+xxxxxxxxxxxxxx4xxxxxxxxxxxxxxxxxxxxx43xxx2xx2x4x++xxx6xxxxxxxxx+xxxxx/xx9x+xxxxxxxxxxxxxx8x15xxxxxxxxxxxxxxxxx82xx/xxxxxxxxxxxxxx/x5xxxxxxxxxxxxxx6xxxxxx74x4/xxx4x+xxxxxxxxx2xxxxxxxx87xxxxx4xxxxxxxx3xx0xxxxx4xxx1xx9xx5xxxxxxx/xxxxx5xx6xx4xxxx1x/x2xxxxxxxxxxxx64xxxxxxx1x0xx5xxxxxxxxxxxxxx=="
" xxxxx000 xxxxxxxxxx\n"
),
(
"xxx-xxx"
" xxxxx3xxxx1xx2xxxxxxxxxxxxxx6xxxxxxxxxxxxxx9xxxxxxxxxxxxx3xxx9xxxxxxxxxxxxxxxx0xxxxxxxxxxxxxxxxx2xxxx2xxx6xxxxx/xx54xxxxxxxxx4xxx3xxxxxx9xx3xxxxx39xxxxxxxxx5xx91xxxx7xxxxxx8xxxxxxxxxxxxxxxx9xxx93xxxxxxxxxxxxxxxxx7xxx8xx8xx4/x1xxxxx1x3xxxxxxxxxxxxx3xxxxxx9xx4xx4x7xxxxxxxxxxxxx1xxxxxxxxx7xxxxxxxxxxxxxx4xx6xxxxxxxxx9xxx7xxxx2xxxxxxxxxxxxxxxxxxxxxx8xxxxxxxxxxxxxxxxxxxx6xx=="
" xxxxx010 xxxxxxxxxx\n"
),
],
}

Expand Down

0 comments on commit ba618a3

Please sign in to comment.