From 228fff8eda665f3ca67084bf2193b13fa2976161 Mon Sep 17 00:00:00 2001
From: Yilei Yang <yileiyang@google.com>
Date: Mon, 5 Dec 2022 13:55:29 -0800
Subject: [PATCH] Prefer splitting right hand side of assignment statements.

This Cherrypicks the changes from https://github.com/psf/black/pull/3368 before this can be merged upstream.

PiperOrigin-RevId: 493104368
---
 CHANGES.md           |   4 +-
 README.md            |  17 ++++
 patches/pyink.patch  | 197 +++++++++++++++++++++++++++++++++++++------
 src/pyink/linegen.py | 121 ++++++++++++++++++++++----
 4 files changed, 295 insertions(+), 44 deletions(-)

diff --git a/CHANGES.md b/CHANGES.md
index 2f0ccd9f162..5d0645ffa75 100644
--- a/CHANGES.md
+++ b/CHANGES.md
@@ -4,7 +4,9 @@ All notable changes to Pyink are recorded here.
 
 ## Unreleased
 
-Nothing notable unreleased.
+* Prefer splitting right hand side of assignment statements
+  (see [psf/black#1498](https://github.com/psf/black/issues/1498), this is being
+  upstreamed in [psf/black/pull/3368](https://github.com/psf/black/pull/3368)).
 
 ## 22.10.0
 
diff --git a/README.md b/README.md
index d8627f800d6..f8c88e59512 100644
--- a/README.md
+++ b/README.md
@@ -121,6 +121,23 @@ patches as possible in the future.
             ...
     ```
 
+*   (Not yet released) Prefer splitting right hand side of assignment statements
+    (see [psf/black#1498](https://github.com/psf/black/issues/1498), this is
+    being upstreamed in
+    [psf/black/pull/3368](https://github.com/psf/black/pull/3368)). Example:
+
+    ```python
+    # Pyink:
+    some_dictionary["some_key"] = (
+        some_long_expression_causing_long_line
+    )
+
+    # Black:
+    some_dictionary[
+        "some_key"
+    ] = some_long_expression_causing_long_line
+    ```
+
 # How do I use *Pyink*?
 
 Same as `black`, except you'll use `pyink`. All `black` command line options are
diff --git a/patches/pyink.patch b/patches/pyink.patch
index aace123c335..9d0013335e1 100644
--- a/patches/pyink.patch
+++ b/patches/pyink.patch
@@ -335,9 +335,11 @@
              str(mode),
 --- a/linegen.py
 +++ b/linegen.py
-@@ -3,11 +3,12 @@ Generating lines of code.
+@@ -2,12 +2,14 @@
+ Generating lines of code.
  """
  import sys
++from dataclasses import dataclass
  from functools import partial, wraps
 -from typing import Collection, Iterator, List, Optional, Set, Union, cast
 +from typing import Collection, Iterator, List, Literal, Optional, Set, Union, cast
@@ -349,7 +351,15 @@
      Line,
      append_leaves,
      can_be_split,
-@@ -70,6 +71,9 @@ LeafID = int
+@@ -18,6 +20,7 @@ from pyink.lines import (
+ from pyink.mode import Feature, Mode, Preview
+ from pyink.nodes import (
+     ASSIGNMENTS,
++    BRACKETS,
+     CLOSING_BRACKETS,
+     OPENING_BRACKETS,
+     RARROW,
+@@ -70,6 +73,9 @@ LeafID = int
  LN = Union[Leaf, Node]
  
  
@@ -359,7 +369,7 @@
  class CannotSplit(CannotTransform):
      """A readable split that fits the allotted line length is impossible."""
  
-@@ -88,7 +92,9 @@ class LineGenerator(Visitor[Line]):
+@@ -88,7 +94,9 @@ class LineGenerator(Visitor[Line]):
          self.current_line: Line
          self.__post_init__()
  
@@ -370,7 +380,7 @@
          """Generate a line.
  
          If the line is empty, only emit if it makes sense.
-@@ -97,11 +103,20 @@ class LineGenerator(Visitor[Line]):
+@@ -97,11 +105,20 @@ class LineGenerator(Visitor[Line]):
          If any lines were generated, set up a new current_line.
          """
          if not self.current_line:
@@ -393,7 +403,7 @@
          yield complete_line
  
      def visit_default(self, node: LN) -> Iterator[Line]:
-@@ -127,7 +142,9 @@ class LineGenerator(Visitor[Line]):
+@@ -127,7 +144,9 @@ class LineGenerator(Visitor[Line]):
              normalize_prefix(node, inside_brackets=any_open_brackets)
              if self.mode.string_normalization and node.type == token.STRING:
                  node.value = normalize_string_prefix(node.value)
@@ -404,7 +414,7 @@
              if node.type == token.NUMBER:
                  normalize_numeric_literal(node)
              if node.type not in WHITESPACE:
-@@ -137,7 +154,7 @@ class LineGenerator(Visitor[Line]):
+@@ -137,7 +156,7 @@ class LineGenerator(Visitor[Line]):
      def visit_INDENT(self, node: Leaf) -> Iterator[Line]:
          """Increase indentation level, maybe yield a line."""
          # In blib2to3 INDENT never holds comments.
@@ -413,7 +423,7 @@
          yield from self.visit_default(node)
  
      def visit_DEDENT(self, node: Leaf) -> Iterator[Line]:
-@@ -152,7 +169,7 @@ class LineGenerator(Visitor[Line]):
+@@ -152,7 +171,7 @@ class LineGenerator(Visitor[Line]):
          yield from self.visit_default(node)
  
          # Finally, emit the dedent.
@@ -422,7 +432,7 @@
  
      def visit_stmt(
          self, node: Node, keywords: Set[str], parens: Set[str]
-@@ -230,9 +247,9 @@ class LineGenerator(Visitor[Line]):
+@@ -230,9 +249,9 @@ class LineGenerator(Visitor[Line]):
              if self.mode.is_pyi and is_stub_body(node):
                  yield from self.visit_default(node)
              else:
@@ -434,7 +444,7 @@
  
          else:
              if (
-@@ -338,7 +355,9 @@ class LineGenerator(Visitor[Line]):
+@@ -338,7 +357,9 @@ class LineGenerator(Visitor[Line]):
                      # formatting as visit_default() is called *after*. To avoid a
                      # situation where this function formats a docstring differently on
                      # the second pass, normalize it early.
@@ -445,7 +455,7 @@
                  else:
                      docstring = leaf.value
              else:
-@@ -355,7 +374,7 @@ class LineGenerator(Visitor[Line]):
+@@ -355,7 +376,7 @@ class LineGenerator(Visitor[Line]):
              quote_len = 1 if docstring[1] != quote_char else 3
              docstring = docstring[quote_len:-quote_len]
              docstring_started_empty = not docstring
@@ -454,7 +464,7 @@
  
              if is_multiline_string(leaf):
                  docstring = fix_docstring(docstring, indent)
-@@ -439,7 +458,8 @@ class LineGenerator(Visitor[Line]):
+@@ -439,7 +460,8 @@ class LineGenerator(Visitor[Line]):
          self.visit_classdef = partial(v, keywords={"class"}, parens=Ø)
          self.visit_expr_stmt = partial(v, keywords=Ø, parens=ASSIGNMENTS)
          self.visit_return_stmt = partial(v, keywords={"return"}, parens={"return"})
@@ -464,7 +474,7 @@
          self.visit_del_stmt = partial(v, keywords=Ø, parens={"del"})
          self.visit_async_funcdef = self.visit_async_stmt
          self.visit_decorated = self.visit_decorators
-@@ -466,10 +486,11 @@ def transform_line(
+@@ -466,10 +488,11 @@ def transform_line(
  
      ll = mode.line_length
      sn = mode.string_normalization
@@ -480,7 +490,47 @@
  
      transformers: List[Transformer]
      if (
-@@ -648,18 +669,56 @@ def right_hand_split(
+@@ -609,6 +632,17 @@ def left_hand_split(line: Line, _feature
+             yield result
+ 
+ 
++@dataclass
++class _RHSResult:
++    """Intermediate split result from a right hand split."""
++
++    head: Line
++    body: Line
++    tail: Line
++    opening_bracket: Leaf
++    closing_bracket: Leaf
++
++
+ def right_hand_split(
+     line: Line,
+     line_length: int,
+@@ -623,6 +657,21 @@ def right_hand_split(
+ 
+     Note: running this function modifies `bracket_depth` on the leaves of `line`.
+     """
++    rhs_result = _first_right_hand_split(line, omit=omit)
++    yield from _maybe_split_omitting_optional_parens(
++        rhs_result, line, line_length, features=features, omit=omit
++    )
++
++
++def _first_right_hand_split(
++    line: Line,
++    omit: Collection[LeafID] = (),
++) -> _RHSResult:
++    """Split the line into head, body, tail starting with the last bracket pair.
++    Note: this function should not have side effects. It's replied upon by
++    _maybe_split_omitting_optional_parens to get an opinion whether to prefer
++    splitting on the right side of an assignment statement.
++    """
+     tail_leaves: List[Leaf] = []
+     body_leaves: List[Leaf] = []
+     head_leaves: List[Leaf] = []
+@@ -648,41 +697,113 @@ def right_hand_split(
      tail_leaves.reverse()
      body_leaves.reverse()
      head_leaves.reverse()
@@ -529,42 +579,133 @@
 +        )
 +    tail = bracket_split_build_line(tail_leaves, line, opening_brackets[0])
      bracket_split_succeeded_or_raise(head, body, tail)
++    return _RHSResult(head, body, tail, opening_bracket, closing_bracket)
++
++
++def _maybe_split_omitting_optional_parens(
++    rhs: _RHSResult,
++    line: Line,
++    line_length: int,
++    features: Collection[Feature] = (),
++    omit: Collection[LeafID] = (),
++) -> Iterator[Line]:
      if (
          Feature.FORCE_OPTIONAL_PARENTHESES not in features
          # the opening bracket is an optional paren
 -        and opening_bracket.type == token.LPAR
 -        and not opening_bracket.value
-+        and opening_brackets[0].type == token.LPAR
-+        and not opening_brackets[0].value
++        and rhs.opening_bracket.type == token.LPAR
++        and not rhs.opening_bracket.value
          # the closing bracket is an optional paren
 -        and closing_bracket.type == token.RPAR
 -        and not closing_bracket.value
-+        and closing_brackets[-1].type == token.RPAR
-+        and not closing_brackets[-1].value
++        and rhs.closing_bracket.type == token.RPAR
++        and not rhs.closing_bracket.value
          # it's not an import (optional parens are the only thing we can split on
          # in this case; attempting a split without them is a waste of time)
          and not line.is_import
-@@ -668,7 +727,7 @@ def right_hand_split(
+         # there are no standalone comments in the body
+-        and not body.contains_standalone_comments(0)
++        and not rhs.body.contains_standalone_comments(0)
          # and we can actually remove the parens
-         and can_omit_invisible_parens(body, line_length)
+-        and can_omit_invisible_parens(body, line_length)
++        and can_omit_invisible_parens(rhs.body, line_length)
      ):
 -        omit = {id(closing_bracket), *omit}
-+        omit = {*{id(b) for b in closing_brackets}, *omit}
++        omit = {id(rhs.closing_bracket), *omit}
          try:
-             yield from right_hand_split(line, line_length, features=features, omit=omit)
-             return
-@@ -690,8 +749,8 @@ def right_hand_split(
+-            yield from right_hand_split(line, line_length, features=features, omit=omit)
+-            return
++            # The _RHSResult Omitting Optional Parens.
++            rhs_oop = _first_right_hand_split(line, omit=omit)
++            if not (
++                line.mode.is_pyink
++                # the split is right after `=`
++                and len(rhs.head.leaves) >= 2
++                and rhs.head.leaves[-2].type == token.EQUAL
++                # the left side of assignement contains brackets
++                and any(leaf.type in BRACKETS for leaf in rhs.head.leaves[:-1])
++                # the left side of assignment is short enough (the -1 is for the ending
++                # optional paren)
++                and is_line_short_enough(rhs.head, line_length=line_length - 1)
++                # the left side of assignment won't explode further because of magic
++                # trailing comma
++                and rhs.head.magic_trailing_comma is None
++                # the split by omitting optional parens isn't preferred by some other
++                # reason
++                and not _prefer_split_rhs_oop(rhs_oop, line_length=line_length)
++            ):
++                yield from _maybe_split_omitting_optional_parens(
++                    rhs_oop, line, line_length, features=features, omit=omit
++                )
++                return
+ 
+         except CannotSplit as e:
+             if not (
+-                can_be_split(body)
+-                or is_line_short_enough(body, line_length=line_length)
++                can_be_split(rhs.body)
++                or is_line_short_enough(rhs.body, line_length=line_length)
+             ):
+                 raise CannotSplit(
+                     "Splitting failed, body is still too long and can't be split."
+                 ) from e
+ 
+-            elif head.contains_multiline_strings() or tail.contains_multiline_strings():
++            elif (
++                rhs.head.contains_multiline_strings()
++                or rhs.tail.contains_multiline_strings()
++            ):
+                 raise CannotSplit(
+                     "The current optional pair of parentheses is bound to fail to"
+                     " satisfy the splitting algorithm because the head or the tail"
+@@ -690,13 +811,42 @@ def right_hand_split(
                      " line."
                  ) from e
  
 -    ensure_visible(opening_bracket)
 -    ensure_visible(closing_bracket)
-+    ensure_visible(opening_brackets[-1])
-+    ensure_visible(closing_brackets[0])
-     for result in (head, body, tail):
+-    for result in (head, body, tail):
++    ensure_visible(rhs.opening_bracket)
++    ensure_visible(rhs.closing_bracket)
++    for result in (rhs.head, rhs.body, rhs.tail):
          if result:
              yield result
-@@ -734,7 +793,7 @@ def bracket_split_build_line(
+ 
+ 
++def _prefer_split_rhs_oop(rhs_oop: _RHSResult, line_length: int) -> bool:
++    """
++    Returns whether we should prefer the result from a split omitting optional parens.
++    """
++    has_closing_bracket_after_assign = False
++    for leaf in reversed(rhs_oop.head.leaves):
++        if leaf.type == token.EQUAL:
++            break
++        if leaf.type in CLOSING_BRACKETS:
++            has_closing_bracket_after_assign = True
++            break
++    return (
++        # contains matching brackets after the `=` (done by checking there is a
++        # closing bracket)
++        has_closing_bracket_after_assign
++        or (
++            # the split is actually from inside the optional parens (done by checking
++            # the first line still contains the `=`)
++            any(leaf.type == token.EQUAL for leaf in rhs_oop.head.leaves)
++            # the first line is short enough
++            and is_line_short_enough(rhs_oop.head, line_length=line_length)
++        )
++        # contains unsplittable type ignore
++        or rhs_oop.head.contains_unsplittable_type_ignore()
++        or rhs_oop.body.contains_unsplittable_type_ignore()
++        or rhs_oop.tail.contains_unsplittable_type_ignore()
++    )
++
++
+ def bracket_split_succeeded_or_raise(head: Line, body: Line, tail: Line) -> None:
+     """Raise :exc:`CannotSplit` if the last left- or right-hand split failed.
+ 
+@@ -734,7 +884,7 @@ def bracket_split_build_line(
      result = Line(mode=original.mode, depth=original.depth)
      if is_body:
          result.inside_brackets = True
@@ -573,7 +714,7 @@
          if leaves:
              # Since body is a new indent level, remove spurious leading whitespace.
              normalize_prefix(leaves[0], inside_brackets=True)
-@@ -1196,7 +1255,7 @@ def generate_trailers_to_omit(line: Line
+@@ -1196,7 +1346,7 @@ def generate_trailers_to_omit(line: Line
      if not line.magic_trailing_comma:
          yield omit
  
diff --git a/src/pyink/linegen.py b/src/pyink/linegen.py
index 665c6baf544..3cde2d09e4d 100644
--- a/src/pyink/linegen.py
+++ b/src/pyink/linegen.py
@@ -2,6 +2,7 @@
 Generating lines of code.
 """
 import sys
+from dataclasses import dataclass
 from functools import partial, wraps
 from typing import Collection, Iterator, List, Literal, Optional, Set, Union, cast
 
@@ -19,6 +20,7 @@
 from pyink.mode import Feature, Mode, Preview
 from pyink.nodes import (
     ASSIGNMENTS,
+    BRACKETS,
     CLOSING_BRACKETS,
     OPENING_BRACKETS,
     RARROW,
@@ -621,6 +623,17 @@ def left_hand_split(line: Line, _features: Collection[Feature] = ()) -> Iterator
             yield result
 
 
+@dataclass
+class _RHSResult:
+    """Intermediate split result from a right hand split."""
+
+    head: Line
+    body: Line
+    tail: Line
+    opening_bracket: Leaf
+    closing_bracket: Leaf
+
+
 def right_hand_split(
     line: Line,
     line_length: int,
@@ -635,6 +648,21 @@ def right_hand_split(
 
     Note: running this function modifies `bracket_depth` on the leaves of `line`.
     """
+    rhs_result = _first_right_hand_split(line, omit=omit)
+    yield from _maybe_split_omitting_optional_parens(
+        rhs_result, line, line_length, features=features, omit=omit
+    )
+
+
+def _first_right_hand_split(
+    line: Line,
+    omit: Collection[LeafID] = (),
+) -> _RHSResult:
+    """Split the line into head, body, tail starting with the last bracket pair.
+    Note: this function should not have side effects. It's replied upon by
+    _maybe_split_omitting_optional_parens to get an opinion whether to prefer
+    splitting on the right side of an assignment statement.
+    """
     tail_leaves: List[Leaf] = []
     body_leaves: List[Leaf] = []
     head_leaves: List[Leaf] = []
@@ -702,37 +730,71 @@ def right_hand_split(
         )
     tail = bracket_split_build_line(tail_leaves, line, opening_brackets[0])
     bracket_split_succeeded_or_raise(head, body, tail)
+    return _RHSResult(head, body, tail, opening_bracket, closing_bracket)
+
+
+def _maybe_split_omitting_optional_parens(
+    rhs: _RHSResult,
+    line: Line,
+    line_length: int,
+    features: Collection[Feature] = (),
+    omit: Collection[LeafID] = (),
+) -> Iterator[Line]:
     if (
         Feature.FORCE_OPTIONAL_PARENTHESES not in features
         # the opening bracket is an optional paren
-        and opening_brackets[0].type == token.LPAR
-        and not opening_brackets[0].value
+        and rhs.opening_bracket.type == token.LPAR
+        and not rhs.opening_bracket.value
         # the closing bracket is an optional paren
-        and closing_brackets[-1].type == token.RPAR
-        and not closing_brackets[-1].value
+        and rhs.closing_bracket.type == token.RPAR
+        and not rhs.closing_bracket.value
         # it's not an import (optional parens are the only thing we can split on
         # in this case; attempting a split without them is a waste of time)
         and not line.is_import
         # there are no standalone comments in the body
-        and not body.contains_standalone_comments(0)
+        and not rhs.body.contains_standalone_comments(0)
         # and we can actually remove the parens
-        and can_omit_invisible_parens(body, line_length)
+        and can_omit_invisible_parens(rhs.body, line_length)
     ):
-        omit = {*{id(b) for b in closing_brackets}, *omit}
+        omit = {id(rhs.closing_bracket), *omit}
         try:
-            yield from right_hand_split(line, line_length, features=features, omit=omit)
-            return
+            # The _RHSResult Omitting Optional Parens.
+            rhs_oop = _first_right_hand_split(line, omit=omit)
+            if not (
+                line.mode.is_pyink
+                # the split is right after `=`
+                and len(rhs.head.leaves) >= 2
+                and rhs.head.leaves[-2].type == token.EQUAL
+                # the left side of assignement contains brackets
+                and any(leaf.type in BRACKETS for leaf in rhs.head.leaves[:-1])
+                # the left side of assignment is short enough (the -1 is for the ending
+                # optional paren)
+                and is_line_short_enough(rhs.head, line_length=line_length - 1)
+                # the left side of assignment won't explode further because of magic
+                # trailing comma
+                and rhs.head.magic_trailing_comma is None
+                # the split by omitting optional parens isn't preferred by some other
+                # reason
+                and not _prefer_split_rhs_oop(rhs_oop, line_length=line_length)
+            ):
+                yield from _maybe_split_omitting_optional_parens(
+                    rhs_oop, line, line_length, features=features, omit=omit
+                )
+                return
 
         except CannotSplit as e:
             if not (
-                can_be_split(body)
-                or is_line_short_enough(body, line_length=line_length)
+                can_be_split(rhs.body)
+                or is_line_short_enough(rhs.body, line_length=line_length)
             ):
                 raise CannotSplit(
                     "Splitting failed, body is still too long and can't be split."
                 ) from e
 
-            elif head.contains_multiline_strings() or tail.contains_multiline_strings():
+            elif (
+                rhs.head.contains_multiline_strings()
+                or rhs.tail.contains_multiline_strings()
+            ):
                 raise CannotSplit(
                     "The current optional pair of parentheses is bound to fail to"
                     " satisfy the splitting algorithm because the head or the tail"
@@ -740,13 +802,42 @@ def right_hand_split(
                     " line."
                 ) from e
 
-    ensure_visible(opening_brackets[-1])
-    ensure_visible(closing_brackets[0])
-    for result in (head, body, tail):
+    ensure_visible(rhs.opening_bracket)
+    ensure_visible(rhs.closing_bracket)
+    for result in (rhs.head, rhs.body, rhs.tail):
         if result:
             yield result
 
 
+def _prefer_split_rhs_oop(rhs_oop: _RHSResult, line_length: int) -> bool:
+    """
+    Returns whether we should prefer the result from a split omitting optional parens.
+    """
+    has_closing_bracket_after_assign = False
+    for leaf in reversed(rhs_oop.head.leaves):
+        if leaf.type == token.EQUAL:
+            break
+        if leaf.type in CLOSING_BRACKETS:
+            has_closing_bracket_after_assign = True
+            break
+    return (
+        # contains matching brackets after the `=` (done by checking there is a
+        # closing bracket)
+        has_closing_bracket_after_assign
+        or (
+            # the split is actually from inside the optional parens (done by checking
+            # the first line still contains the `=`)
+            any(leaf.type == token.EQUAL for leaf in rhs_oop.head.leaves)
+            # the first line is short enough
+            and is_line_short_enough(rhs_oop.head, line_length=line_length)
+        )
+        # contains unsplittable type ignore
+        or rhs_oop.head.contains_unsplittable_type_ignore()
+        or rhs_oop.body.contains_unsplittable_type_ignore()
+        or rhs_oop.tail.contains_unsplittable_type_ignore()
+    )
+
+
 def bracket_split_succeeded_or_raise(head: Line, body: Line, tail: Line) -> None:
     """Raise :exc:`CannotSplit` if the last left- or right-hand split failed.