Prefer splitting right hand side of assignment statements.

This Cherrypicks the changes from psf#3368 before this can be merged upstream. PiperOrigin-RevId: 493104368
google · Dec 6, 2022 · 228fff8 · 228fff8
1 parent 8831847
commit 228fff8
Show file tree

Hide file tree

Showing 4 changed files with 295 additions and 44 deletions.
diff --git a/CHANGES.md b/CHANGES.md
@@ -4,7 +4,9 @@ All notable changes to Pyink are recorded here.
 
 ## Unreleased
 
-Nothing notable unreleased.
+* Prefer splitting right hand side of assignment statements
+  (see [psf/black#1498](https://github.com/psf/black/issues/1498), this is being
+  upstreamed in [psf/black/pull/3368](https://github.com/psf/black/pull/3368)).
 
 ## 22.10.0
 

diff --git a/README.md b/README.md
@@ -121,6 +121,23 @@ patches as possible in the future.
             ...
     ```
 
+*   (Not yet released) Prefer splitting right hand side of assignment statements
+    (see [psf/black#1498](https://github.com/psf/black/issues/1498), this is
+    being upstreamed in
+    [psf/black/pull/3368](https://github.com/psf/black/pull/3368)). Example:
+
+    ```python
+    # Pyink:
+    some_dictionary["some_key"] = (
+        some_long_expression_causing_long_line
+    )
+
+    # Black:
+    some_dictionary[
+        "some_key"
+    ] = some_long_expression_causing_long_line
+    ```
+
 # How do I use *Pyink*?
 
 Same as `black`, except you'll use `pyink`. All `black` command line options are

diff --git a/patches/pyink.patch b/patches/pyink.patch
@@ -335,9 +335,11 @@
              str(mode),
 --- a/linegen.py
 +++ b/linegen.py
-@@ -3,11 +3,12 @@ Generating lines of code.
+@@ -2,12 +2,14 @@
+ Generating lines of code.
  """
  import sys
++from dataclasses import dataclass
  from functools import partial, wraps
 -from typing import Collection, Iterator, List, Optional, Set, Union, cast
 +from typing import Collection, Iterator, List, Literal, Optional, Set, Union, cast
@@ -349,7 +351,15 @@
      Line,
      append_leaves,
      can_be_split,
-@@ -70,6 +71,9 @@ LeafID = int
+@@ -18,6 +20,7 @@ from pyink.lines import (
+ from pyink.mode import Feature, Mode, Preview
+ from pyink.nodes import (
+     ASSIGNMENTS,
++    BRACKETS,
+     CLOSING_BRACKETS,
+     OPENING_BRACKETS,
+     RARROW,
+@@ -70,6 +73,9 @@ LeafID = int
  LN = Union[Leaf, Node]
 
 
@@ -359,7 +369,7 @@
  class CannotSplit(CannotTransform):
      """A readable split that fits the allotted line length is impossible."""
 
-@@ -88,7 +92,9 @@ class LineGenerator(Visitor[Line]):
+@@ -88,7 +94,9 @@ class LineGenerator(Visitor[Line]):
          self.current_line: Line
          self.__post_init__()
 
@@ -370,7 +380,7 @@
          """Generate a line.
 
          If the line is empty, only emit if it makes sense.
-@@ -97,11 +103,20 @@ class LineGenerator(Visitor[Line]):
+@@ -97,11 +105,20 @@ class LineGenerator(Visitor[Line]):
          If any lines were generated, set up a new current_line.
          """
          if not self.current_line:
@@ -393,7 +403,7 @@
          yield complete_line
 
      def visit_default(self, node: LN) -> Iterator[Line]:
-@@ -127,7 +142,9 @@ class LineGenerator(Visitor[Line]):
+@@ -127,7 +144,9 @@ class LineGenerator(Visitor[Line]):
              normalize_prefix(node, inside_brackets=any_open_brackets)
              if self.mode.string_normalization and node.type == token.STRING:
                  node.value = normalize_string_prefix(node.value)
@@ -404,7 +414,7 @@
              if node.type == token.NUMBER:
                  normalize_numeric_literal(node)
              if node.type not in WHITESPACE:
-@@ -137,7 +154,7 @@ class LineGenerator(Visitor[Line]):
+@@ -137,7 +156,7 @@ class LineGenerator(Visitor[Line]):
      def visit_INDENT(self, node: Leaf) -> Iterator[Line]:
          """Increase indentation level, maybe yield a line."""
          # In blib2to3 INDENT never holds comments.
@@ -413,7 +423,7 @@
          yield from self.visit_default(node)
 
      def visit_DEDENT(self, node: Leaf) -> Iterator[Line]:
-@@ -152,7 +169,7 @@ class LineGenerator(Visitor[Line]):
+@@ -152,7 +171,7 @@ class LineGenerator(Visitor[Line]):
          yield from self.visit_default(node)
 
          # Finally, emit the dedent.
@@ -422,7 +432,7 @@
 
      def visit_stmt(
          self, node: Node, keywords: Set[str], parens: Set[str]
-@@ -230,9 +247,9 @@ class LineGenerator(Visitor[Line]):
+@@ -230,9 +249,9 @@ class LineGenerator(Visitor[Line]):
              if self.mode.is_pyi and is_stub_body(node):
                  yield from self.visit_default(node)
              else:
@@ -434,7 +444,7 @@
 
          else:
              if (
-@@ -338,7 +355,9 @@ class LineGenerator(Visitor[Line]):
+@@ -338,7 +357,9 @@ class LineGenerator(Visitor[Line]):
                      # formatting as visit_default() is called *after*. To avoid a
                      # situation where this function formats a docstring differently on
                      # the second pass, normalize it early.
@@ -445,7 +455,7 @@
                  else:
                      docstring = leaf.value
              else:
-@@ -355,7 +374,7 @@ class LineGenerator(Visitor[Line]):
+@@ -355,7 +376,7 @@ class LineGenerator(Visitor[Line]):
              quote_len = 1 if docstring[1] != quote_char else 3
              docstring = docstring[quote_len:-quote_len]
              docstring_started_empty = not docstring
@@ -454,7 +464,7 @@
 
              if is_multiline_string(leaf):
                  docstring = fix_docstring(docstring, indent)
-@@ -439,7 +458,8 @@ class LineGenerator(Visitor[Line]):
+@@ -439,7 +460,8 @@ class LineGenerator(Visitor[Line]):
          self.visit_classdef = partial(v, keywords={"class"}, parens=Ø)
          self.visit_expr_stmt = partial(v, keywords=Ø, parens=ASSIGNMENTS)
          self.visit_return_stmt = partial(v, keywords={"return"}, parens={"return"})
@@ -464,7 +474,7 @@
          self.visit_del_stmt = partial(v, keywords=Ø, parens={"del"})
          self.visit_async_funcdef = self.visit_async_stmt
          self.visit_decorated = self.visit_decorators
-@@ -466,10 +486,11 @@ def transform_line(
+@@ -466,10 +488,11 @@ def transform_line(
 
      ll = mode.line_length
      sn = mode.string_normalization
@@ -480,7 +490,47 @@
 
      transformers: List[Transformer]
      if (
-@@ -648,18 +669,56 @@ def right_hand_split(
+@@ -609,6 +632,17 @@ def left_hand_split(line: Line, _feature
+             yield result
+
+
++@dataclass
++class _RHSResult:
++    """Intermediate split result from a right hand split."""
++
++    head: Line
++    body: Line
++    tail: Line
++    opening_bracket: Leaf
++    closing_bracket: Leaf
++
++
+ def right_hand_split(
+     line: Line,
+     line_length: int,
+@@ -623,6 +657,21 @@ def right_hand_split(
+
+     Note: running this function modifies `bracket_depth` on the leaves of `line`.
+     """
++    rhs_result = _first_right_hand_split(line, omit=omit)
++    yield from _maybe_split_omitting_optional_parens(
++        rhs_result, line, line_length, features=features, omit=omit
++    )
++
++
++def _first_right_hand_split(
++    line: Line,
++    omit: Collection[LeafID] = (),
++) -> _RHSResult:
++    """Split the line into head, body, tail starting with the last bracket pair.
++    Note: this function should not have side effects. It's replied upon by
++    _maybe_split_omitting_optional_parens to get an opinion whether to prefer
++    splitting on the right side of an assignment statement.
++    """
+     tail_leaves: List[Leaf] = []
+     body_leaves: List[Leaf] = []
+     head_leaves: List[Leaf] = []
+@@ -648,41 +697,113 @@ def right_hand_split(
      tail_leaves.reverse()
      body_leaves.reverse()
      head_leaves.reverse()
@@ -529,42 +579,133 @@
 +        )
 +    tail = bracket_split_build_line(tail_leaves, line, opening_brackets[0])
      bracket_split_succeeded_or_raise(head, body, tail)
++    return _RHSResult(head, body, tail, opening_bracket, closing_bracket)
++
++
++def _maybe_split_omitting_optional_parens(
++    rhs: _RHSResult,
++    line: Line,
++    line_length: int,
++    features: Collection[Feature] = (),
++    omit: Collection[LeafID] = (),
++) -> Iterator[Line]:
      if (
          Feature.FORCE_OPTIONAL_PARENTHESES not in features
          # the opening bracket is an optional paren
 -        and opening_bracket.type == token.LPAR
 -        and not opening_bracket.value
-+        and opening_brackets[0].type == token.LPAR
-+        and not opening_brackets[0].value
++        and rhs.opening_bracket.type == token.LPAR
++        and not rhs.opening_bracket.value
          # the closing bracket is an optional paren
 -        and closing_bracket.type == token.RPAR
 -        and not closing_bracket.value
-+        and closing_brackets[-1].type == token.RPAR
-+        and not closing_brackets[-1].value
++        and rhs.closing_bracket.type == token.RPAR
++        and not rhs.closing_bracket.value
          # it's not an import (optional parens are the only thing we can split on
          # in this case; attempting a split without them is a waste of time)
          and not line.is_import
-@@ -668,7 +727,7 @@ def right_hand_split(
+         # there are no standalone comments in the body
+-        and not body.contains_standalone_comments(0)
++        and not rhs.body.contains_standalone_comments(0)
          # and we can actually remove the parens
-         and can_omit_invisible_parens(body, line_length)
+-        and can_omit_invisible_parens(body, line_length)
++        and can_omit_invisible_parens(rhs.body, line_length)
      ):
 -        omit = {id(closing_bracket), *omit}
-+        omit = {*{id(b) for b in closing_brackets}, *omit}
++        omit = {id(rhs.closing_bracket), *omit}
          try:
-             yield from right_hand_split(line, line_length, features=features, omit=omit)
-             return
-@@ -690,8 +749,8 @@ def right_hand_split(
+-            yield from right_hand_split(line, line_length, features=features, omit=omit)
+-            return
++            # The _RHSResult Omitting Optional Parens.
++            rhs_oop = _first_right_hand_split(line, omit=omit)
++            if not (
++                line.mode.is_pyink
++                # the split is right after `=`
++                and len(rhs.head.leaves) >= 2
++                and rhs.head.leaves[-2].type == token.EQUAL
++                # the left side of assignement contains brackets
++                and any(leaf.type in BRACKETS for leaf in rhs.head.leaves[:-1])
++                # the left side of assignment is short enough (the -1 is for the ending
++                # optional paren)
++                and is_line_short_enough(rhs.head, line_length=line_length - 1)
++                # the left side of assignment won't explode further because of magic
++                # trailing comma
++                and rhs.head.magic_trailing_comma is None
++                # the split by omitting optional parens isn't preferred by some other
++                # reason
++                and not _prefer_split_rhs_oop(rhs_oop, line_length=line_length)
++            ):
++                yield from _maybe_split_omitting_optional_parens(
++                    rhs_oop, line, line_length, features=features, omit=omit
++                )
++                return
+
+         except CannotSplit as e:
+             if not (
+-                can_be_split(body)
+-                or is_line_short_enough(body, line_length=line_length)
++                can_be_split(rhs.body)
++                or is_line_short_enough(rhs.body, line_length=line_length)
+             ):
+                 raise CannotSplit(
+                     "Splitting failed, body is still too long and can't be split."
+                 ) from e
+
+-            elif head.contains_multiline_strings() or tail.contains_multiline_strings():
++            elif (
++                rhs.head.contains_multiline_strings()
++                or rhs.tail.contains_multiline_strings()
++            ):
+                 raise CannotSplit(
+                     "The current optional pair of parentheses is bound to fail to"
+                     " satisfy the splitting algorithm because the head or the tail"
+@@ -690,13 +811,42 @@ def right_hand_split(
                      " line."
                  ) from e
 
 -    ensure_visible(opening_bracket)
 -    ensure_visible(closing_bracket)
-+    ensure_visible(opening_brackets[-1])
-+    ensure_visible(closing_brackets[0])
-     for result in (head, body, tail):
+-    for result in (head, body, tail):
++    ensure_visible(rhs.opening_bracket)
++    ensure_visible(rhs.closing_bracket)
++    for result in (rhs.head, rhs.body, rhs.tail):
          if result:
              yield result
-@@ -734,7 +793,7 @@ def bracket_split_build_line(
+
+
++def _prefer_split_rhs_oop(rhs_oop: _RHSResult, line_length: int) -> bool:
++    """
++    Returns whether we should prefer the result from a split omitting optional parens.
++    """
++    has_closing_bracket_after_assign = False
++    for leaf in reversed(rhs_oop.head.leaves):
++        if leaf.type == token.EQUAL:
++            break
++        if leaf.type in CLOSING_BRACKETS:
++            has_closing_bracket_after_assign = True
++            break
++    return (
++        # contains matching brackets after the `=` (done by checking there is a
++        # closing bracket)
++        has_closing_bracket_after_assign
++        or (
++            # the split is actually from inside the optional parens (done by checking
++            # the first line still contains the `=`)
++            any(leaf.type == token.EQUAL for leaf in rhs_oop.head.leaves)
++            # the first line is short enough
++            and is_line_short_enough(rhs_oop.head, line_length=line_length)
++        )
++        # contains unsplittable type ignore
++        or rhs_oop.head.contains_unsplittable_type_ignore()
++        or rhs_oop.body.contains_unsplittable_type_ignore()
++        or rhs_oop.tail.contains_unsplittable_type_ignore()
++    )
++
++
+ def bracket_split_succeeded_or_raise(head: Line, body: Line, tail: Line) -> None:
+     """Raise :exc:`CannotSplit` if the last left- or right-hand split failed.
+
+@@ -734,7 +884,7 @@ def bracket_split_build_line(
      result = Line(mode=original.mode, depth=original.depth)
      if is_body:
          result.inside_brackets = True
@@ -573,7 +714,7 @@
          if leaves:
              # Since body is a new indent level, remove spurious leading whitespace.
              normalize_prefix(leaves[0], inside_brackets=True)
-@@ -1196,7 +1255,7 @@ def generate_trailers_to_omit(line: Line
+@@ -1196,7 +1346,7 @@ def generate_trailers_to_omit(line: Line
      if not line.magic_trailing_comma:
          yield omit