From 228fff8eda665f3ca67084bf2193b13fa2976161 Mon Sep 17 00:00:00 2001 From: Yilei Yang Date: Mon, 5 Dec 2022 13:55:29 -0800 Subject: [PATCH] Prefer splitting right hand side of assignment statements. This Cherrypicks the changes from https://github.com/psf/black/pull/3368 before this can be merged upstream. PiperOrigin-RevId: 493104368 --- CHANGES.md | 4 +- README.md | 17 ++++ patches/pyink.patch | 197 +++++++++++++++++++++++++++++++++++++------ src/pyink/linegen.py | 121 ++++++++++++++++++++++---- 4 files changed, 295 insertions(+), 44 deletions(-) diff --git a/CHANGES.md b/CHANGES.md index 2f0ccd9f162..5d0645ffa75 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -4,7 +4,9 @@ All notable changes to Pyink are recorded here. ## Unreleased -Nothing notable unreleased. +* Prefer splitting right hand side of assignment statements + (see [psf/black#1498](https://github.com/psf/black/issues/1498), this is being + upstreamed in [psf/black/pull/3368](https://github.com/psf/black/pull/3368)). ## 22.10.0 diff --git a/README.md b/README.md index d8627f800d6..f8c88e59512 100644 --- a/README.md +++ b/README.md @@ -121,6 +121,23 @@ patches as possible in the future. ... ``` +* (Not yet released) Prefer splitting right hand side of assignment statements + (see [psf/black#1498](https://github.com/psf/black/issues/1498), this is + being upstreamed in + [psf/black/pull/3368](https://github.com/psf/black/pull/3368)). Example: + + ```python + # Pyink: + some_dictionary["some_key"] = ( + some_long_expression_causing_long_line + ) + + # Black: + some_dictionary[ + "some_key" + ] = some_long_expression_causing_long_line + ``` + # How do I use *Pyink*? Same as `black`, except you'll use `pyink`. All `black` command line options are diff --git a/patches/pyink.patch b/patches/pyink.patch index aace123c335..9d0013335e1 100644 --- a/patches/pyink.patch +++ b/patches/pyink.patch @@ -335,9 +335,11 @@ str(mode), --- a/linegen.py +++ b/linegen.py -@@ -3,11 +3,12 @@ Generating lines of code. +@@ -2,12 +2,14 @@ + Generating lines of code. """ import sys ++from dataclasses import dataclass from functools import partial, wraps -from typing import Collection, Iterator, List, Optional, Set, Union, cast +from typing import Collection, Iterator, List, Literal, Optional, Set, Union, cast @@ -349,7 +351,15 @@ Line, append_leaves, can_be_split, -@@ -70,6 +71,9 @@ LeafID = int +@@ -18,6 +20,7 @@ from pyink.lines import ( + from pyink.mode import Feature, Mode, Preview + from pyink.nodes import ( + ASSIGNMENTS, ++ BRACKETS, + CLOSING_BRACKETS, + OPENING_BRACKETS, + RARROW, +@@ -70,6 +73,9 @@ LeafID = int LN = Union[Leaf, Node] @@ -359,7 +369,7 @@ class CannotSplit(CannotTransform): """A readable split that fits the allotted line length is impossible.""" -@@ -88,7 +92,9 @@ class LineGenerator(Visitor[Line]): +@@ -88,7 +94,9 @@ class LineGenerator(Visitor[Line]): self.current_line: Line self.__post_init__() @@ -370,7 +380,7 @@ """Generate a line. If the line is empty, only emit if it makes sense. -@@ -97,11 +103,20 @@ class LineGenerator(Visitor[Line]): +@@ -97,11 +105,20 @@ class LineGenerator(Visitor[Line]): If any lines were generated, set up a new current_line. """ if not self.current_line: @@ -393,7 +403,7 @@ yield complete_line def visit_default(self, node: LN) -> Iterator[Line]: -@@ -127,7 +142,9 @@ class LineGenerator(Visitor[Line]): +@@ -127,7 +144,9 @@ class LineGenerator(Visitor[Line]): normalize_prefix(node, inside_brackets=any_open_brackets) if self.mode.string_normalization and node.type == token.STRING: node.value = normalize_string_prefix(node.value) @@ -404,7 +414,7 @@ if node.type == token.NUMBER: normalize_numeric_literal(node) if node.type not in WHITESPACE: -@@ -137,7 +154,7 @@ class LineGenerator(Visitor[Line]): +@@ -137,7 +156,7 @@ class LineGenerator(Visitor[Line]): def visit_INDENT(self, node: Leaf) -> Iterator[Line]: """Increase indentation level, maybe yield a line.""" # In blib2to3 INDENT never holds comments. @@ -413,7 +423,7 @@ yield from self.visit_default(node) def visit_DEDENT(self, node: Leaf) -> Iterator[Line]: -@@ -152,7 +169,7 @@ class LineGenerator(Visitor[Line]): +@@ -152,7 +171,7 @@ class LineGenerator(Visitor[Line]): yield from self.visit_default(node) # Finally, emit the dedent. @@ -422,7 +432,7 @@ def visit_stmt( self, node: Node, keywords: Set[str], parens: Set[str] -@@ -230,9 +247,9 @@ class LineGenerator(Visitor[Line]): +@@ -230,9 +249,9 @@ class LineGenerator(Visitor[Line]): if self.mode.is_pyi and is_stub_body(node): yield from self.visit_default(node) else: @@ -434,7 +444,7 @@ else: if ( -@@ -338,7 +355,9 @@ class LineGenerator(Visitor[Line]): +@@ -338,7 +357,9 @@ class LineGenerator(Visitor[Line]): # formatting as visit_default() is called *after*. To avoid a # situation where this function formats a docstring differently on # the second pass, normalize it early. @@ -445,7 +455,7 @@ else: docstring = leaf.value else: -@@ -355,7 +374,7 @@ class LineGenerator(Visitor[Line]): +@@ -355,7 +376,7 @@ class LineGenerator(Visitor[Line]): quote_len = 1 if docstring[1] != quote_char else 3 docstring = docstring[quote_len:-quote_len] docstring_started_empty = not docstring @@ -454,7 +464,7 @@ if is_multiline_string(leaf): docstring = fix_docstring(docstring, indent) -@@ -439,7 +458,8 @@ class LineGenerator(Visitor[Line]): +@@ -439,7 +460,8 @@ class LineGenerator(Visitor[Line]): self.visit_classdef = partial(v, keywords={"class"}, parens=Ø) self.visit_expr_stmt = partial(v, keywords=Ø, parens=ASSIGNMENTS) self.visit_return_stmt = partial(v, keywords={"return"}, parens={"return"}) @@ -464,7 +474,7 @@ self.visit_del_stmt = partial(v, keywords=Ø, parens={"del"}) self.visit_async_funcdef = self.visit_async_stmt self.visit_decorated = self.visit_decorators -@@ -466,10 +486,11 @@ def transform_line( +@@ -466,10 +488,11 @@ def transform_line( ll = mode.line_length sn = mode.string_normalization @@ -480,7 +490,47 @@ transformers: List[Transformer] if ( -@@ -648,18 +669,56 @@ def right_hand_split( +@@ -609,6 +632,17 @@ def left_hand_split(line: Line, _feature + yield result + + ++@dataclass ++class _RHSResult: ++ """Intermediate split result from a right hand split.""" ++ ++ head: Line ++ body: Line ++ tail: Line ++ opening_bracket: Leaf ++ closing_bracket: Leaf ++ ++ + def right_hand_split( + line: Line, + line_length: int, +@@ -623,6 +657,21 @@ def right_hand_split( + + Note: running this function modifies `bracket_depth` on the leaves of `line`. + """ ++ rhs_result = _first_right_hand_split(line, omit=omit) ++ yield from _maybe_split_omitting_optional_parens( ++ rhs_result, line, line_length, features=features, omit=omit ++ ) ++ ++ ++def _first_right_hand_split( ++ line: Line, ++ omit: Collection[LeafID] = (), ++) -> _RHSResult: ++ """Split the line into head, body, tail starting with the last bracket pair. ++ Note: this function should not have side effects. It's replied upon by ++ _maybe_split_omitting_optional_parens to get an opinion whether to prefer ++ splitting on the right side of an assignment statement. ++ """ + tail_leaves: List[Leaf] = [] + body_leaves: List[Leaf] = [] + head_leaves: List[Leaf] = [] +@@ -648,41 +697,113 @@ def right_hand_split( tail_leaves.reverse() body_leaves.reverse() head_leaves.reverse() @@ -529,42 +579,133 @@ + ) + tail = bracket_split_build_line(tail_leaves, line, opening_brackets[0]) bracket_split_succeeded_or_raise(head, body, tail) ++ return _RHSResult(head, body, tail, opening_bracket, closing_bracket) ++ ++ ++def _maybe_split_omitting_optional_parens( ++ rhs: _RHSResult, ++ line: Line, ++ line_length: int, ++ features: Collection[Feature] = (), ++ omit: Collection[LeafID] = (), ++) -> Iterator[Line]: if ( Feature.FORCE_OPTIONAL_PARENTHESES not in features # the opening bracket is an optional paren - and opening_bracket.type == token.LPAR - and not opening_bracket.value -+ and opening_brackets[0].type == token.LPAR -+ and not opening_brackets[0].value ++ and rhs.opening_bracket.type == token.LPAR ++ and not rhs.opening_bracket.value # the closing bracket is an optional paren - and closing_bracket.type == token.RPAR - and not closing_bracket.value -+ and closing_brackets[-1].type == token.RPAR -+ and not closing_brackets[-1].value ++ and rhs.closing_bracket.type == token.RPAR ++ and not rhs.closing_bracket.value # it's not an import (optional parens are the only thing we can split on # in this case; attempting a split without them is a waste of time) and not line.is_import -@@ -668,7 +727,7 @@ def right_hand_split( + # there are no standalone comments in the body +- and not body.contains_standalone_comments(0) ++ and not rhs.body.contains_standalone_comments(0) # and we can actually remove the parens - and can_omit_invisible_parens(body, line_length) +- and can_omit_invisible_parens(body, line_length) ++ and can_omit_invisible_parens(rhs.body, line_length) ): - omit = {id(closing_bracket), *omit} -+ omit = {*{id(b) for b in closing_brackets}, *omit} ++ omit = {id(rhs.closing_bracket), *omit} try: - yield from right_hand_split(line, line_length, features=features, omit=omit) - return -@@ -690,8 +749,8 @@ def right_hand_split( +- yield from right_hand_split(line, line_length, features=features, omit=omit) +- return ++ # The _RHSResult Omitting Optional Parens. ++ rhs_oop = _first_right_hand_split(line, omit=omit) ++ if not ( ++ line.mode.is_pyink ++ # the split is right after `=` ++ and len(rhs.head.leaves) >= 2 ++ and rhs.head.leaves[-2].type == token.EQUAL ++ # the left side of assignement contains brackets ++ and any(leaf.type in BRACKETS for leaf in rhs.head.leaves[:-1]) ++ # the left side of assignment is short enough (the -1 is for the ending ++ # optional paren) ++ and is_line_short_enough(rhs.head, line_length=line_length - 1) ++ # the left side of assignment won't explode further because of magic ++ # trailing comma ++ and rhs.head.magic_trailing_comma is None ++ # the split by omitting optional parens isn't preferred by some other ++ # reason ++ and not _prefer_split_rhs_oop(rhs_oop, line_length=line_length) ++ ): ++ yield from _maybe_split_omitting_optional_parens( ++ rhs_oop, line, line_length, features=features, omit=omit ++ ) ++ return + + except CannotSplit as e: + if not ( +- can_be_split(body) +- or is_line_short_enough(body, line_length=line_length) ++ can_be_split(rhs.body) ++ or is_line_short_enough(rhs.body, line_length=line_length) + ): + raise CannotSplit( + "Splitting failed, body is still too long and can't be split." + ) from e + +- elif head.contains_multiline_strings() or tail.contains_multiline_strings(): ++ elif ( ++ rhs.head.contains_multiline_strings() ++ or rhs.tail.contains_multiline_strings() ++ ): + raise CannotSplit( + "The current optional pair of parentheses is bound to fail to" + " satisfy the splitting algorithm because the head or the tail" +@@ -690,13 +811,42 @@ def right_hand_split( " line." ) from e - ensure_visible(opening_bracket) - ensure_visible(closing_bracket) -+ ensure_visible(opening_brackets[-1]) -+ ensure_visible(closing_brackets[0]) - for result in (head, body, tail): +- for result in (head, body, tail): ++ ensure_visible(rhs.opening_bracket) ++ ensure_visible(rhs.closing_bracket) ++ for result in (rhs.head, rhs.body, rhs.tail): if result: yield result -@@ -734,7 +793,7 @@ def bracket_split_build_line( + + ++def _prefer_split_rhs_oop(rhs_oop: _RHSResult, line_length: int) -> bool: ++ """ ++ Returns whether we should prefer the result from a split omitting optional parens. ++ """ ++ has_closing_bracket_after_assign = False ++ for leaf in reversed(rhs_oop.head.leaves): ++ if leaf.type == token.EQUAL: ++ break ++ if leaf.type in CLOSING_BRACKETS: ++ has_closing_bracket_after_assign = True ++ break ++ return ( ++ # contains matching brackets after the `=` (done by checking there is a ++ # closing bracket) ++ has_closing_bracket_after_assign ++ or ( ++ # the split is actually from inside the optional parens (done by checking ++ # the first line still contains the `=`) ++ any(leaf.type == token.EQUAL for leaf in rhs_oop.head.leaves) ++ # the first line is short enough ++ and is_line_short_enough(rhs_oop.head, line_length=line_length) ++ ) ++ # contains unsplittable type ignore ++ or rhs_oop.head.contains_unsplittable_type_ignore() ++ or rhs_oop.body.contains_unsplittable_type_ignore() ++ or rhs_oop.tail.contains_unsplittable_type_ignore() ++ ) ++ ++ + def bracket_split_succeeded_or_raise(head: Line, body: Line, tail: Line) -> None: + """Raise :exc:`CannotSplit` if the last left- or right-hand split failed. + +@@ -734,7 +884,7 @@ def bracket_split_build_line( result = Line(mode=original.mode, depth=original.depth) if is_body: result.inside_brackets = True @@ -573,7 +714,7 @@ if leaves: # Since body is a new indent level, remove spurious leading whitespace. normalize_prefix(leaves[0], inside_brackets=True) -@@ -1196,7 +1255,7 @@ def generate_trailers_to_omit(line: Line +@@ -1196,7 +1346,7 @@ def generate_trailers_to_omit(line: Line if not line.magic_trailing_comma: yield omit diff --git a/src/pyink/linegen.py b/src/pyink/linegen.py index 665c6baf544..3cde2d09e4d 100644 --- a/src/pyink/linegen.py +++ b/src/pyink/linegen.py @@ -2,6 +2,7 @@ Generating lines of code. """ import sys +from dataclasses import dataclass from functools import partial, wraps from typing import Collection, Iterator, List, Literal, Optional, Set, Union, cast @@ -19,6 +20,7 @@ from pyink.mode import Feature, Mode, Preview from pyink.nodes import ( ASSIGNMENTS, + BRACKETS, CLOSING_BRACKETS, OPENING_BRACKETS, RARROW, @@ -621,6 +623,17 @@ def left_hand_split(line: Line, _features: Collection[Feature] = ()) -> Iterator yield result +@dataclass +class _RHSResult: + """Intermediate split result from a right hand split.""" + + head: Line + body: Line + tail: Line + opening_bracket: Leaf + closing_bracket: Leaf + + def right_hand_split( line: Line, line_length: int, @@ -635,6 +648,21 @@ def right_hand_split( Note: running this function modifies `bracket_depth` on the leaves of `line`. """ + rhs_result = _first_right_hand_split(line, omit=omit) + yield from _maybe_split_omitting_optional_parens( + rhs_result, line, line_length, features=features, omit=omit + ) + + +def _first_right_hand_split( + line: Line, + omit: Collection[LeafID] = (), +) -> _RHSResult: + """Split the line into head, body, tail starting with the last bracket pair. + Note: this function should not have side effects. It's replied upon by + _maybe_split_omitting_optional_parens to get an opinion whether to prefer + splitting on the right side of an assignment statement. + """ tail_leaves: List[Leaf] = [] body_leaves: List[Leaf] = [] head_leaves: List[Leaf] = [] @@ -702,37 +730,71 @@ def right_hand_split( ) tail = bracket_split_build_line(tail_leaves, line, opening_brackets[0]) bracket_split_succeeded_or_raise(head, body, tail) + return _RHSResult(head, body, tail, opening_bracket, closing_bracket) + + +def _maybe_split_omitting_optional_parens( + rhs: _RHSResult, + line: Line, + line_length: int, + features: Collection[Feature] = (), + omit: Collection[LeafID] = (), +) -> Iterator[Line]: if ( Feature.FORCE_OPTIONAL_PARENTHESES not in features # the opening bracket is an optional paren - and opening_brackets[0].type == token.LPAR - and not opening_brackets[0].value + and rhs.opening_bracket.type == token.LPAR + and not rhs.opening_bracket.value # the closing bracket is an optional paren - and closing_brackets[-1].type == token.RPAR - and not closing_brackets[-1].value + and rhs.closing_bracket.type == token.RPAR + and not rhs.closing_bracket.value # it's not an import (optional parens are the only thing we can split on # in this case; attempting a split without them is a waste of time) and not line.is_import # there are no standalone comments in the body - and not body.contains_standalone_comments(0) + and not rhs.body.contains_standalone_comments(0) # and we can actually remove the parens - and can_omit_invisible_parens(body, line_length) + and can_omit_invisible_parens(rhs.body, line_length) ): - omit = {*{id(b) for b in closing_brackets}, *omit} + omit = {id(rhs.closing_bracket), *omit} try: - yield from right_hand_split(line, line_length, features=features, omit=omit) - return + # The _RHSResult Omitting Optional Parens. + rhs_oop = _first_right_hand_split(line, omit=omit) + if not ( + line.mode.is_pyink + # the split is right after `=` + and len(rhs.head.leaves) >= 2 + and rhs.head.leaves[-2].type == token.EQUAL + # the left side of assignement contains brackets + and any(leaf.type in BRACKETS for leaf in rhs.head.leaves[:-1]) + # the left side of assignment is short enough (the -1 is for the ending + # optional paren) + and is_line_short_enough(rhs.head, line_length=line_length - 1) + # the left side of assignment won't explode further because of magic + # trailing comma + and rhs.head.magic_trailing_comma is None + # the split by omitting optional parens isn't preferred by some other + # reason + and not _prefer_split_rhs_oop(rhs_oop, line_length=line_length) + ): + yield from _maybe_split_omitting_optional_parens( + rhs_oop, line, line_length, features=features, omit=omit + ) + return except CannotSplit as e: if not ( - can_be_split(body) - or is_line_short_enough(body, line_length=line_length) + can_be_split(rhs.body) + or is_line_short_enough(rhs.body, line_length=line_length) ): raise CannotSplit( "Splitting failed, body is still too long and can't be split." ) from e - elif head.contains_multiline_strings() or tail.contains_multiline_strings(): + elif ( + rhs.head.contains_multiline_strings() + or rhs.tail.contains_multiline_strings() + ): raise CannotSplit( "The current optional pair of parentheses is bound to fail to" " satisfy the splitting algorithm because the head or the tail" @@ -740,13 +802,42 @@ def right_hand_split( " line." ) from e - ensure_visible(opening_brackets[-1]) - ensure_visible(closing_brackets[0]) - for result in (head, body, tail): + ensure_visible(rhs.opening_bracket) + ensure_visible(rhs.closing_bracket) + for result in (rhs.head, rhs.body, rhs.tail): if result: yield result +def _prefer_split_rhs_oop(rhs_oop: _RHSResult, line_length: int) -> bool: + """ + Returns whether we should prefer the result from a split omitting optional parens. + """ + has_closing_bracket_after_assign = False + for leaf in reversed(rhs_oop.head.leaves): + if leaf.type == token.EQUAL: + break + if leaf.type in CLOSING_BRACKETS: + has_closing_bracket_after_assign = True + break + return ( + # contains matching brackets after the `=` (done by checking there is a + # closing bracket) + has_closing_bracket_after_assign + or ( + # the split is actually from inside the optional parens (done by checking + # the first line still contains the `=`) + any(leaf.type == token.EQUAL for leaf in rhs_oop.head.leaves) + # the first line is short enough + and is_line_short_enough(rhs_oop.head, line_length=line_length) + ) + # contains unsplittable type ignore + or rhs_oop.head.contains_unsplittable_type_ignore() + or rhs_oop.body.contains_unsplittable_type_ignore() + or rhs_oop.tail.contains_unsplittable_type_ignore() + ) + + def bracket_split_succeeded_or_raise(head: Line, body: Line, tail: Line) -> None: """Raise :exc:`CannotSplit` if the last left- or right-hand split failed.