From 70d09970c82d21cef43d15f9370623017c7dd11d Mon Sep 17 00:00:00 2001 From: Batuhan Taskaya Date: Wed, 3 Nov 2021 22:02:56 +0100 Subject: [PATCH 01/10] black/parser: partial support for pattern matching --- src/blib2to3/Grammar.txt | 40 ++++++++++-- src/blib2to3/pgen2/driver.py | 81 ++++++++++++++++++++++- src/blib2to3/pgen2/grammar.py | 1 + src/blib2to3/pgen2/parse.py | 119 +++++++++++++++++++++++++++++++--- src/blib2to3/pgen2/pgen.py | 11 +++- 5 files changed, 234 insertions(+), 18 deletions(-) diff --git a/src/blib2to3/Grammar.txt b/src/blib2to3/Grammar.txt index ac8a067378d..14fdff513d1 100644 --- a/src/blib2to3/Grammar.txt +++ b/src/blib2to3/Grammar.txt @@ -105,7 +105,7 @@ global_stmt: ('global' | 'nonlocal') NAME (',' NAME)* exec_stmt: 'exec' expr ['in' test [',' test]] assert_stmt: 'assert' test [',' test] -compound_stmt: if_stmt | while_stmt | for_stmt | try_stmt | with_stmt | funcdef | classdef | decorated | async_stmt +compound_stmt: if_stmt | while_stmt | for_stmt | try_stmt | with_stmt | funcdef | classdef | decorated | async_stmt | match_stmt async_stmt: ASYNC (funcdef | with_stmt | for_stmt) if_stmt: 'if' namedexpr_test ':' suite ('elif' namedexpr_test ':' suite)* ['else' ':' suite] while_stmt: 'while' namedexpr_test ':' suite ['else' ':' suite] @@ -115,9 +115,8 @@ try_stmt: ('try' ':' suite ['else' ':' suite] ['finally' ':' suite] | 'finally' ':' suite)) -with_stmt: 'with' with_item (',' with_item)* ':' suite -with_item: test ['as' expr] -with_var: 'as' expr +with_stmt: 'with' asexpr_test (',' asexpr_test)* ':' suite + # NB compile.c makes sure that the default except clause is last except_clause: 'except' [test [(',' | 'as') test]] suite: simple_stmt | NEWLINE INDENT stmt+ DEDENT @@ -131,7 +130,15 @@ testlist_safe: old_test [(',' old_test)+ [',']] old_test: or_test | old_lambdef old_lambdef: 'lambda' [varargslist] ':' old_test -namedexpr_test: test [':=' test] +namedexpr_test: asexpr_test [':=' asexpr_test] + +# This is actually not a real rule, though since the parser is very +# limited in terms of the strategy about match/case rules, we are inserting +# a virtual case ( as ) as a valid expression. Unless a better +# approach is thought, the only side effect of this seem to be just allowing +# more stuff to be parser (which would fail on the ast). +asexpr_test: test ['as' test] + test: or_test ['if' or_test 'else' test] | lambdef or_test: and_test ('or' and_test)* and_test: not_test ('and' not_test)* @@ -213,3 +220,26 @@ encoding_decl: NAME yield_expr: 'yield' [yield_arg] yield_arg: 'from' test | testlist_star_expr + + +# 3.10 match statement definition + +# PS: normally the grammar is much much more restricted, but +# at this moment for not trying to bother much with encoding the +# exact same DSL in a LL(1) parser, we will just accept an expression +# and let the ast.parse() step of the safe mode to reject invalid +# grammar. + +# The reason why it is more restricted is that, patterns are some +# sort of a DSL (more advanced than our LHS on assignments, but +# still in a very limited python subset). They are not really +# expressions, but who cares. If we can parse them, that is enough +# to reformat them. + +match_stmt: "match" subject_expr ':' NEWLINE INDENT case_block+ DEDENT +subject_expr: namedexpr_test + +# cases +case_block: "case" patterns [guard] ':' suite +guard: 'if' namedexpr_test +patterns: or_test ['as' or_test] diff --git a/src/blib2to3/pgen2/driver.py b/src/blib2to3/pgen2/driver.py index af1dc6b8aeb..5edd75b1333 100644 --- a/src/blib2to3/pgen2/driver.py +++ b/src/blib2to3/pgen2/driver.py @@ -28,19 +28,92 @@ List, Optional, Text, + Iterator, Tuple, + TypeVar, + Generic, Union, ) +from dataclasses import dataclass, field # Pgen imports from . import grammar, parse, token, tokenize, pgen from logging import Logger from blib2to3.pytree import _Convert, NL from blib2to3.pgen2.grammar import Grammar +from contextlib import contextmanager Path = Union[str, "os.PathLike[str]"] +@dataclass +class ReleaseRange: + start: int + end: Optional[int] = None + tokens: List[Any] = field(default_factory=list) + + def lock(self) -> None: + total_eaten = len(self.tokens) + self.end = self.start + total_eaten + + +class TokenProxy: + def __init__(self, generator: Any) -> None: + self._tokens = generator + self._counter = 0 + self._release_ranges: List[ReleaseRange] = [] + + @contextmanager + def release(self) -> Iterator["TokenProxy"]: + release_range = ReleaseRange(self._counter) + self._release_ranges.append(release_range) + try: + yield self + finally: + # Lock the last release range to the final position that + # has been eaten. + release_range.lock() + + def eat(self, point: int) -> Any: + eaten_tokens = self._release_ranges[-1].tokens + if point < len(eaten_tokens): + return eaten_tokens[point] + else: + while point >= len(eaten_tokens): + token = next(self._tokens) + eaten_tokens.append(token) + return token + + def __iter__(self) -> "TokenProxy": + return self + + def __next__(self) -> Any: + # If the current position is already compromised (looked up) + # return the eaten token, if not just go further on the given + # token producer. + for release_range in self._release_ranges: + assert release_range.end is not None + + start, end = release_range.start, release_range.end + if start <= self._counter < end: + token = release_range.tokens[self._counter - start] + break + else: + token = next(self._tokens) + self._counter += 1 + return token + + def can_advance(self, to: int) -> bool: + # Try to eat, fail if it can't. The eat operation is cached + # so there wont be any additional cost of eating here + try: + self.eat(to) + except StopIteration: + return False + else: + return True + + class Driver(object): def __init__( self, @@ -57,14 +130,18 @@ def __init__( def parse_tokens(self, tokens: Iterable[Any], debug: bool = False) -> NL: """Parse a series of tokens and return the syntax tree.""" # XXX Move the prefix computation into a wrapper around tokenize. + proxy = TokenProxy(tokens) + p = parse.Parser(self.grammar, self.convert) - p.setup() + p.setup(proxy=proxy) + lineno = 1 column = 0 indent_columns = [] type = value = start = end = line_text = None prefix = "" - for quintuple in tokens: + + for quintuple in proxy: type, value, start, end, line_text = quintuple if start != (lineno, column): assert (lineno, column) <= start, ((lineno, column), start) diff --git a/src/blib2to3/pgen2/grammar.py b/src/blib2to3/pgen2/grammar.py index 2882cdac89b..f6b0eae6d7e 100644 --- a/src/blib2to3/pgen2/grammar.py +++ b/src/blib2to3/pgen2/grammar.py @@ -89,6 +89,7 @@ def __init__(self) -> None: self.dfas: Dict[int, DFAS] = {} self.labels: List[Label] = [(0, "EMPTY")] self.keywords: Dict[str, int] = {} + self.soft_keywords: Dict[str, int] = {} self.tokens: Dict[int, int] = {} self.symbol2label: Dict[str, int] = {} self.start = 256 diff --git a/src/blib2to3/pgen2/parse.py b/src/blib2to3/pgen2/parse.py index 47c8f02b4f5..a48a86279e0 100644 --- a/src/blib2to3/pgen2/parse.py +++ b/src/blib2to3/pgen2/parse.py @@ -9,22 +9,31 @@ how this parsing engine works. """ +import copy +from contextlib import contextmanager # Local imports -from . import token +from . import grammar, token, tokenize from typing import ( + cast, + Any, Optional, Text, Union, Tuple, Dict, List, + Iterator, Callable, Set, + TYPE_CHECKING, ) from blib2to3.pgen2.grammar import Grammar from blib2to3.pytree import NL, Context, RawNode, Leaf, Node +if TYPE_CHECKING: + from blib2to3.driver import TokenProxy + Results = Dict[Text, NL] Convert = Callable[[Grammar, RawNode], Union[Node, Leaf]] @@ -37,6 +46,61 @@ def lam_sub(grammar: Grammar, node: RawNode) -> NL: return Node(type=node[0], children=node[3], context=node[2]) +class Recorder: + def __init__(self, parser: "Parser", ilabels: List[int], context: Context) -> None: + self.parser = parser + self._ilabels = ilabels + self.context = context # not really matter + + self._dead_ilabels: Set[int] = set() + self._start_point = copy.deepcopy(self.parser.stack) + self._points = {ilabel: copy.deepcopy(self._start_point) for ilabel in ilabels} + + @property + def ilabels(self) -> Set[int]: + return self._dead_ilabels.symmetric_difference(self._ilabels) + + @contextmanager + def switch_to(self, ilabel: int) -> Iterator[None]: + self.parser.stack = self._points[ilabel] + try: + yield + except ParseError: + self._dead_ilabels.add(ilabel) + finally: + self.parser.stack = self._start_point + + def add_token( + self, tok_type: int, tok_val: Optional[Text], raw: bool = False + ) -> None: + func: Callable[..., Any] + if raw: + func = self.parser._addtoken + else: + func = self.parser.addtoken + + for ilabel in self.ilabels: + with self.switch_to(ilabel): + args = [tok_type, tok_val, self.context] + if raw: + args.insert(0, ilabel) + func(*args) + + def determine_route( + self, value: Optional[Text] = None, force: bool = False + ) -> Optional[int]: + alive_ilabels = self.ilabels + if len(alive_ilabels) == 0: + *_, most_successful_ilabel = self._dead_ilabels + raise ParseError("bad input", most_successful_ilabel, value, self.context) + + ilabel, *rest = alive_ilabels + if force or not rest: + return ilabel + else: + return None + + class ParseError(Exception): """Exception to signal the parser is stuck.""" @@ -114,7 +178,7 @@ def __init__(self, grammar: Grammar, convert: Optional[Convert] = None) -> None: self.grammar = grammar self.convert = convert or lam_sub - def setup(self, start: Optional[int] = None) -> None: + def setup(self, proxy: "TokenProxy", start: Optional[int] = None) -> None: """Prepare for parsing. This *must* be called before starting to parse. @@ -137,11 +201,44 @@ def setup(self, start: Optional[int] = None) -> None: self.stack: List[Tuple[DFAS, int, RawNode]] = [stackentry] self.rootnode: Optional[NL] = None self.used_names: Set[str] = set() + self.proxy = proxy def addtoken(self, type: int, value: Optional[Text], context: Context) -> bool: """Add a token; return True iff this is the end of the program.""" # Map from token to label - ilabel = self.classify(type, value, context) + ilabels = self.classify(type, value, context) + assert len(ilabels) >= 1 + + if len(ilabels) == 1: + [ilabel] = ilabels + return self._addtoken(ilabel, type, value, context) + + with self.proxy.release() as proxy: + counter, force = 0, False + recorder = Recorder(self, ilabels, context) + recorder.add_token(type, value, raw=True) + + next_token_value = value + while recorder.determine_route(next_token_value) is None: + if not proxy.can_advance(counter): + force = True + break + + next_token_type, next_token_value, *_ = proxy.eat(counter) + if next_token_type == tokenize.OP: + next_token_type = grammar.opmap[cast(str, next_token_value)] + + recorder.add_token(next_token_type, next_token_value) + counter += 1 + + ilabel = cast(int, recorder.determine_route(next_token_value, force=force)) + assert ilabel is not None + + return self._addtoken(ilabel, type, value, context) + + def _addtoken( + self, ilabel: int, type: int, value: Optional[Text], context: Context + ) -> bool: # Loop until the token is shifted; may raise exceptions while True: dfa, state, node = self.stack[-1] @@ -185,20 +282,26 @@ def addtoken(self, type: int, value: Optional[Text], context: Context) -> bool: # No success finding a transition raise ParseError("bad input", type, value, context) - def classify(self, type: int, value: Optional[Text], context: Context) -> int: + def classify(self, type: int, value: Optional[Text], context: Context) -> List[int]: """Turn a token into a label. (Internal)""" if type == token.NAME: # Keep a listing of all used names assert value is not None self.used_names.add(value) # Check for reserved words - ilabel = self.grammar.keywords.get(value) - if ilabel is not None: - return ilabel + if value in self.grammar.keywords: + return [self.grammar.keywords[value]] + elif value in self.grammar.soft_keywords: + assert type in self.grammar.tokens + return [ + self.grammar.soft_keywords[value], + self.grammar.tokens[type], + ] + ilabel = self.grammar.tokens.get(type) if ilabel is None: raise ParseError("bad token", type, value, context) - return ilabel + return [ilabel] def shift( self, type: int, value: Optional[Text], newstate: int, context: Context diff --git a/src/blib2to3/pgen2/pgen.py b/src/blib2to3/pgen2/pgen.py index 564ebbd1184..631682a77c9 100644 --- a/src/blib2to3/pgen2/pgen.py +++ b/src/blib2to3/pgen2/pgen.py @@ -115,12 +115,17 @@ def make_label(self, c: PgenGrammar, label: Text) -> int: assert label[0] in ('"', "'"), label value = eval(label) if value[0].isalpha(): + if label[0] == '"': + keywords = c.soft_keywords + else: + keywords = c.keywords + # A keyword - if value in c.keywords: - return c.keywords[value] + if value in keywords: + return keywords[value] else: c.labels.append((token.NAME, value)) - c.keywords[value] = ilabel + keywords[value] = ilabel return ilabel else: # An operator (any non-numeric token) From f9e019fc58b6c7cb1798bb78034589cc26c26f1e Mon Sep 17 00:00:00 2001 From: Batuhan Taskaya Date: Thu, 4 Nov 2021 10:31:39 +0100 Subject: [PATCH 02/10] temporarily append python.gram to the list of grammars --- src/black/linegen.py | 6 +++++- src/black/parsing.py | 2 +- src/blib2to3/pygram.py | 4 ++++ 3 files changed, 10 insertions(+), 2 deletions(-) diff --git a/src/black/linegen.py b/src/black/linegen.py index eb53fa0ac56..a5358f36c4c 100644 --- a/src/black/linegen.py +++ b/src/black/linegen.py @@ -126,7 +126,7 @@ def visit_stmt( """Visit a statement. This implementation is shared for `if`, `while`, `for`, `try`, `except`, - `def`, `with`, `class`, `assert` and assignments. + `def`, `with`, `class`, `assert`, `match`, `case` and assignments. The relevant Python language `keywords` for a given statement will be NAME leaves within it. This methods puts those on a separate line. @@ -292,6 +292,10 @@ def __post_init__(self) -> None: self.visit_async_funcdef = self.visit_async_stmt self.visit_decorated = self.visit_decorators + # PEP 634 + self.visit_match_stmt = partial(v, keywords={"match"}, parens=Ø) + self.visit_case_stmt = partial(v, keywords={"case"}, parens={"case"}) + def transform_line( line: Line, mode: Mode, features: Collection[Feature] = () diff --git a/src/black/parsing.py b/src/black/parsing.py index 0b8d984cedd..d3d0256922f 100644 --- a/src/black/parsing.py +++ b/src/black/parsing.py @@ -78,7 +78,7 @@ def lib2to3_parse(src_txt: str, target_versions: Iterable[TargetVersion] = ()) - if not src_txt.endswith("\n"): src_txt += "\n" - for grammar in get_grammars(set(target_versions)): + for grammar in get_grammars(set(target_versions)) + [pygram.python_grammar]: drv = driver.Driver(grammar, pytree.convert) try: result = drv.parse_string(src_txt, True) diff --git a/src/blib2to3/pygram.py b/src/blib2to3/pygram.py index b8362b81473..361463f6e33 100644 --- a/src/blib2to3/pygram.py +++ b/src/blib2to3/pygram.py @@ -191,6 +191,10 @@ def initialize(cache_dir: Union[str, "os.PathLike[str]", None] = None) -> None: True ) + # TODO: + # We might need a new grammar that makes match/case soft keywords, so we won't affect + # older versions which doesn't have these. + pattern_grammar = driver.load_packaged_grammar( "blib2to3", _PATTERN_GRAMMAR_FILE, cache_dir ) From abad13b93aade233b3d8e3086da81d9bd164fe19 Mon Sep 17 00:00:00 2001 From: Richard Si <63936253+ichard26@users.noreply.github.com> Date: Wed, 3 Nov 2021 19:33:41 -0400 Subject: [PATCH 03/10] A few small fixes + undo testing grammar pin --- src/black/parsing.py | 2 +- src/blib2to3/pgen2/grammar.py | 1 + src/blib2to3/pygram.py | 8 ++++++-- 3 files changed, 8 insertions(+), 3 deletions(-) diff --git a/src/black/parsing.py b/src/black/parsing.py index d3d0256922f..0b8d984cedd 100644 --- a/src/black/parsing.py +++ b/src/black/parsing.py @@ -78,7 +78,7 @@ def lib2to3_parse(src_txt: str, target_versions: Iterable[TargetVersion] = ()) - if not src_txt.endswith("\n"): src_txt += "\n" - for grammar in get_grammars(set(target_versions)) + [pygram.python_grammar]: + for grammar in get_grammars(set(target_versions)): drv = driver.Driver(grammar, pytree.convert) try: result = drv.parse_string(src_txt, True) diff --git a/src/blib2to3/pgen2/grammar.py b/src/blib2to3/pgen2/grammar.py index f6b0eae6d7e..56851070933 100644 --- a/src/blib2to3/pgen2/grammar.py +++ b/src/blib2to3/pgen2/grammar.py @@ -137,6 +137,7 @@ def copy(self: _P) -> _P: "number2symbol", "dfas", "keywords", + "soft_keywords", "tokens", "symbol2label", ): diff --git a/src/blib2to3/pygram.py b/src/blib2to3/pygram.py index 361463f6e33..705bbc09251 100644 --- a/src/blib2to3/pygram.py +++ b/src/blib2to3/pygram.py @@ -39,12 +39,14 @@ class _python_symbols(Symbols): arglist: int argument: int arith_expr: int + asexpr_test: int assert_stmt: int async_funcdef: int async_stmt: int atom: int augassign: int break_stmt: int + case_block: int classdef: int comp_for: int comp_if: int @@ -74,6 +76,7 @@ class _python_symbols(Symbols): for_stmt: int funcdef: int global_stmt: int + guard: int if_stmt: int import_as_name: int import_as_names: int @@ -82,6 +85,7 @@ class _python_symbols(Symbols): import_stmt: int lambdef: int listmaker: int + match_stmt: int namedexpr_test: int not_test: int old_comp_for: int @@ -92,6 +96,7 @@ class _python_symbols(Symbols): or_test: int parameters: int pass_stmt: int + patterns: int power: int print_stmt: int raise_stmt: int @@ -101,6 +106,7 @@ class _python_symbols(Symbols): single_input: int sliceop: int small_stmt: int + subject_expr: int star_expr: int stmt: int subscript: int @@ -124,9 +130,7 @@ class _python_symbols(Symbols): vfplist: int vname: int while_stmt: int - with_item: int with_stmt: int - with_var: int xor_expr: int yield_arg: int yield_expr: int From 49ac15a3cd0c16ab9b7034efca4a1424ae00717b Mon Sep 17 00:00:00 2001 From: Batuhan Taskaya Date: Tue, 9 Nov 2021 14:17:24 +0300 Subject: [PATCH 04/10] support top-level expression list --- src/blib2to3/Grammar.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/blib2to3/Grammar.txt b/src/blib2to3/Grammar.txt index 14fdff513d1..49680323d8b 100644 --- a/src/blib2to3/Grammar.txt +++ b/src/blib2to3/Grammar.txt @@ -242,4 +242,5 @@ subject_expr: namedexpr_test # cases case_block: "case" patterns [guard] ':' suite guard: 'if' namedexpr_test -patterns: or_test ['as' or_test] +patterns: pattern ['as' pattern] +pattern: (expr|star_expr) (',' (expr|star_expr))* [','] From adbf0eb1e6c20d552dcb8441000beaf1aeac5daf Mon Sep 17 00:00:00 2001 From: Batuhan Taskaya Date: Tue, 9 Nov 2021 14:50:11 +0300 Subject: [PATCH 05/10] Explicitly enable 3.10+ support (no automatic feature detection) --- src/black/mode.py | 5 +++++ src/black/parsing.py | 3 +++ src/blib2to3/pygram.py | 11 ++++++++--- 3 files changed, 16 insertions(+), 3 deletions(-) diff --git a/src/black/mode.py b/src/black/mode.py index 01ee336366c..b24c9c60ded 100644 --- a/src/black/mode.py +++ b/src/black/mode.py @@ -20,6 +20,7 @@ class TargetVersion(Enum): PY37 = 7 PY38 = 8 PY39 = 9 + PY310 = 10 def is_python2(self) -> bool: return self is TargetVersion.PY27 @@ -39,6 +40,7 @@ class Feature(Enum): ASSIGNMENT_EXPRESSIONS = 8 POS_ONLY_ARGUMENTS = 9 RELAXED_DECORATORS = 10 + PATTERN_MATCHING = 11 FORCE_OPTIONAL_PARENTHESES = 50 # temporary for Python 2 deprecation @@ -108,6 +110,9 @@ class Feature(Enum): Feature.RELAXED_DECORATORS, Feature.POS_ONLY_ARGUMENTS, }, + TargetVersion.PY310: { + Feature.PATTERN_MATCHING, + }, } diff --git a/src/black/parsing.py b/src/black/parsing.py index 0b8d984cedd..fc540ad021d 100644 --- a/src/black/parsing.py +++ b/src/black/parsing.py @@ -59,6 +59,9 @@ def get_grammars(target_versions: Set[TargetVersion]) -> List[Grammar]: # Python 3-compatible code, so only try Python 3 grammar. grammars = [] + if supports_feature(target_versions, Feature.PATTERN_MATCHING): + # Python 3.10+ + grammars.append(pygram.python_grammar_soft_keywords) # If we have to parse both, try to parse async as a keyword first if not supports_feature(target_versions, Feature.ASYNC_IDENTIFIERS): # Python 3.7+ diff --git a/src/blib2to3/pygram.py b/src/blib2to3/pygram.py index 705bbc09251..601d57f9840 100644 --- a/src/blib2to3/pygram.py +++ b/src/blib2to3/pygram.py @@ -163,6 +163,7 @@ def initialize(cache_dir: Union[str, "os.PathLike[str]", None] = None) -> None: global python_grammar_no_print_statement global python_grammar_no_print_statement_no_exec_statement global python_grammar_no_print_statement_no_exec_statement_async_keywords + global python_grammar_soft_keywords global python_symbols global pattern_grammar global pattern_symbols @@ -175,6 +176,8 @@ def initialize(cache_dir: Union[str, "os.PathLike[str]", None] = None) -> None: # Python 2 python_grammar = driver.load_packaged_grammar("blib2to3", _GRAMMAR_FILE, cache_dir) + soft_keywords = python_grammar.soft_keywords.copy() + python_grammar.soft_keywords.clear() python_symbols = _python_symbols(python_grammar) @@ -195,9 +198,11 @@ def initialize(cache_dir: Union[str, "os.PathLike[str]", None] = None) -> None: True ) - # TODO: - # We might need a new grammar that makes match/case soft keywords, so we won't affect - # older versions which doesn't have these. + # Python 3.10+ + python_grammar_soft_keywords = ( + python_grammar_no_print_statement_no_exec_statement_async_keywords.copy() + ) + python_grammar_soft_keywords.soft_keywords = soft_keywords pattern_grammar = driver.load_packaged_grammar( "blib2to3", _PATTERN_GRAMMAR_FILE, cache_dir From 46272d18a96a5f72e97956839dbd2d60f6172198 Mon Sep 17 00:00:00 2001 From: Batuhan Taskaya Date: Tue, 9 Nov 2021 15:28:53 +0300 Subject: [PATCH 06/10] Integrate basic tests --- src/black/linegen.py | 2 +- tests/data/pattern_matching_complex.py | 144 +++++++++++++++++++++++++ tests/data/pattern_matching_simple.py | 92 ++++++++++++++++ tests/test_format.py | 8 ++ 4 files changed, 245 insertions(+), 1 deletion(-) create mode 100644 tests/data/pattern_matching_complex.py create mode 100644 tests/data/pattern_matching_simple.py diff --git a/src/black/linegen.py b/src/black/linegen.py index a5358f36c4c..8cf32c973bb 100644 --- a/src/black/linegen.py +++ b/src/black/linegen.py @@ -294,7 +294,7 @@ def __post_init__(self) -> None: # PEP 634 self.visit_match_stmt = partial(v, keywords={"match"}, parens=Ø) - self.visit_case_stmt = partial(v, keywords={"case"}, parens={"case"}) + self.visit_case_block = partial(v, keywords={"case"}, parens=Ø) def transform_line( diff --git a/tests/data/pattern_matching_complex.py b/tests/data/pattern_matching_complex.py new file mode 100644 index 00000000000..97ee194fd39 --- /dev/null +++ b/tests/data/pattern_matching_complex.py @@ -0,0 +1,144 @@ +# Cases sampled from Lib/test/test_patma.py + +# case black_test_patma_098 +match x: + case -0j: + y = 0 +# case black_test_patma_142 +match x: + case bytes(z): + y = 0 +# case black_test_patma_073 +match x: + case 0 if 0: + y = 0 + case 0 if 1: + y = 1 +# case black_test_patma_006 +match 3: + case 0 | 1 | 2 | 3: + x = True +# case black_test_patma_049 +match x: + case [0, 1] | [1, 0]: + y = 0 +# case black_check_sequence_then_mapping +match x: + case [*_]: + return "seq" + case {}: + return "map" +# case black_test_patma_035 +match x: + case {0: [1, 2, {}]}: + y = 0 + case {0: [1, 2, {}] | True} | {1: [[]]} | {0: [1, 2, {}]} | [] | "X" | {}: + y = 1 + case []: + y = 2 +# case black_test_patma_107 +match x: + case 0.25 + 1.75j: + y = 0 +# case black_test_patma_097 +match x: + case -0j: + y = 0 +# case black_test_patma_007 +match 4: + case 0 | 1 | 2 | 3: + x = True +# case black_test_patma_154 +match x: + case 0 if x: + y = 0 +# case black_test_patma_134 +match x: + case {1: 0}: + y = 0 + case {0: 0}: + y = 1 + case {**z}: + y = 2 +# case black_test_patma_185 +match Seq(): + case [*_]: + y = 0 +# case black_test_patma_063 +match x: + case 1: + y = 0 + case 1: + y = 1 +# case black_test_patma_248 +match x: + case {"foo": bar}: + y = bar +# case black_test_patma_019 +match (0, 1, 2): + case [0, 1, *x, 2]: + y = 0 +# case black_test_patma_052 +match x: + case [0]: + y = 0 + case [1, 0] if (x := x[:0]): + y = 1 + case [1, 0]: + y = 2 +# case black_test_patma_191 +match w: + case [x, y, *_]: + z = 0 +# case black_test_patma_110 +match x: + case -0.25 - 1.75j: + y = 0 +# case black_test_patma_151 +match (x,): + case [y]: + z = 0 +# case black_test_patma_114 +match x: + case A.B.C.D: + y = 0 +# case black_test_patma_232 +match x: + case None: + y = 0 +# case black_test_patma_058 +match x: + case 0: + y = 0 +# case black_test_patma_233 +match x: + case False: + y = 0 +# case black_test_patma_078 +match x: + case []: + y = 0 + case [""]: + y = 1 + case "": + y = 2 +# case black_test_patma_156 +match x: + case z: + y = 0 +# case black_test_patma_189 +match w: + case [x, y, *rest]: + z = 0 +# case black_test_patma_042 +match x: + case (0 as z) | (1 as z) | (2 as z) if z == x % 2: + y = 0 +# case black_test_patma_034 +match x: + case {0: [1, 2, {}]}: + y = 0 + case {0: [1, 2, {}] | False} | {1: [[]]} | {0: [1, 2, {}]} | [] | "X" | {}: + y = 1 + case []: + y = 2 diff --git a/tests/data/pattern_matching_simple.py b/tests/data/pattern_matching_simple.py new file mode 100644 index 00000000000..5ed62415a4b --- /dev/null +++ b/tests/data/pattern_matching_simple.py @@ -0,0 +1,92 @@ +# Cases sampled from PEP 636 examples + +match command.split(): + case [action, obj]: + ... # interpret action, obj + +match command.split(): + case [action]: + ... # interpret single-verb action + case [action, obj]: + ... # interpret action, obj + +match command.split(): + case ["quit"]: + print("Goodbye!") + quit_game() + case ["look"]: + current_room.describe() + case ["get", obj]: + character.get(obj, current_room) + case ["go", direction]: + current_room = current_room.neighbor(direction) + # The rest of your commands go here + +match command.split(): + case ["drop", *objects]: + for obj in objects: + character.drop(obj, current_room) + # The rest of your commands go here + +match command.split(): + case ["quit"]: + pass + case ["go", direction]: + print("Going:", direction) + case ["drop", *objects]: + print("Dropping: ", *objects) + case _: + print(f"Sorry, I couldn't understand {command!r}") + +match command.split(): + case ["north"] | ["go", "north"]: + current_room = current_room.neighbor("north") + case ["get", obj] | ["pick", "up", obj] | ["pick", obj, "up"]: + ... # Code for picking up the given object + +match command.split(): + case ["go", ("north" | "south" | "east" | "west")]: + current_room = current_room.neighbor(...) + # how do I know which direction to go? + +match command.split(): + case ["go", ("north" | "south" | "east" | "west") as direction]: + current_room = current_room.neighbor(direction) + +match command.split(): + case ["go", direction] if direction in current_room.exits: + current_room = current_room.neighbor(direction) + case ["go", _]: + print("Sorry, you can't go that way") + +match event.get(): + case Click(position=(x, y)): + handle_click_at(x, y) + case KeyPress(key_name="Q") | Quit(): + game.quit() + case KeyPress(key_name="up arrow"): + game.go_north() + case KeyPress(): + pass # Ignore other keystrokes + case other_event: + raise ValueError(f"Unrecognized event: {other_event}") + +match event.get(): + case Click((x, y), button=Button.LEFT): # This is a left click + handle_click_at(x, y) + case Click(): + pass # ignore other clicks + + +def where_is(point): + match point: + case Point(x=0, y=0): + print("Origin") + case Point(x=0, y=y): + print(f"Y={y}") + case Point(x=x, y=0): + print(f"X={x}") + case Point(): + print("Somewhere else") + case _: + print("Not a point") diff --git a/tests/test_format.py b/tests/test_format.py index 649c1572bee..1d289950726 100644 --- a/tests/test_format.py +++ b/tests/test_format.py @@ -70,6 +70,7 @@ "percent_precedence", ] +PY310_CASES = ["pattern_matching_simple", "pattern_matching_complex"] SOURCES = [ "src/black/__init__.py", @@ -187,6 +188,13 @@ def test_pep_570() -> None: assert_format(source, expected, minimum_version=(3, 8)) +@pytest.mark.parametrize("filename", PY310_CASES) +def test_python_310(filename: str) -> None: + source, expected = read_data(filename) + mode = black.Mode(target_versions={black.TargetVersion.PY310}) + assert_format(source, expected, mode, minimum_version=(3, 10)) + + def test_docstring_no_string_normalization() -> None: """Like test_docstring but with string normalization off.""" source, expected = read_data("docstring_no_string_normalization") From e0b6ffc5b4a1ecb328540a9a64b231b8557d405b Mon Sep 17 00:00:00 2001 From: Batuhan Taskaya Date: Tue, 9 Nov 2021 15:41:02 +0300 Subject: [PATCH 07/10] add some inline explanations --- src/blib2to3/pgen2/parse.py | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/src/blib2to3/pgen2/parse.py b/src/blib2to3/pgen2/parse.py index a48a86279e0..dc405264bad 100644 --- a/src/blib2to3/pgen2/parse.py +++ b/src/blib2to3/pgen2/parse.py @@ -209,10 +209,21 @@ def addtoken(self, type: int, value: Optional[Text], context: Context) -> bool: ilabels = self.classify(type, value, context) assert len(ilabels) >= 1 + # If we have only one state to advance, we'll directly + # take it as is. if len(ilabels) == 1: [ilabel] = ilabels return self._addtoken(ilabel, type, value, context) + # If there are multiple states which we can advance (only + # happen under soft-keywords), then we will try all of them + # in parallel and as soon as one state can reach further than + # the rest, we'll choose that one. This is a pretty hacky + # and hopefully temporary algorithm. + # + # For a more detailed explanation, check out this post: + # https://tree.science/what-the-backtracking.html + with self.proxy.release() as proxy: counter, force = 0, False recorder = Recorder(self, ilabels, context) @@ -283,7 +294,10 @@ def _addtoken( raise ParseError("bad input", type, value, context) def classify(self, type: int, value: Optional[Text], context: Context) -> List[int]: - """Turn a token into a label. (Internal)""" + """Turn a token into a label. (Internal) + + Depending on whether the value is a soft-keyword or not, + this function may return multiple labels to choose from.""" if type == token.NAME: # Keep a listing of all used names assert value is not None From bd68b81f3f146a5a3065ba2367633d70e594a6a7 Mon Sep 17 00:00:00 2001 From: Batuhan Taskaya Date: Sat, 13 Nov 2021 11:57:50 +0300 Subject: [PATCH 08/10] add tests about parenthesized context managers --- tests/data/parenthesized_context_managers.py | 21 ++++++++++++++++++++ tests/test_format.py | 6 +++++- 2 files changed, 26 insertions(+), 1 deletion(-) create mode 100644 tests/data/parenthesized_context_managers.py diff --git a/tests/data/parenthesized_context_managers.py b/tests/data/parenthesized_context_managers.py new file mode 100644 index 00000000000..ccf1f94883e --- /dev/null +++ b/tests/data/parenthesized_context_managers.py @@ -0,0 +1,21 @@ +with (CtxManager() as example): + ... + +with (CtxManager1(), CtxManager2()): + ... + +with (CtxManager1() as example, CtxManager2()): + ... + +with (CtxManager1(), CtxManager2() as example): + ... + +with (CtxManager1() as example1, CtxManager2() as example2): + ... + +with ( + CtxManager1() as example1, + CtxManager2() as example2, + CtxManager3() as example3, +): + ... diff --git a/tests/test_format.py b/tests/test_format.py index 1d289950726..4359deea92b 100644 --- a/tests/test_format.py +++ b/tests/test_format.py @@ -70,7 +70,11 @@ "percent_precedence", ] -PY310_CASES = ["pattern_matching_simple", "pattern_matching_complex"] +PY310_CASES = [ + "pattern_matching_simple", + "pattern_matching_complex", + "parenthesized_context_managers", +] SOURCES = [ "src/black/__init__.py", From 49096af7e944cbd9df255a1b83b2d440ff4ea584 Mon Sep 17 00:00:00 2001 From: Batuhan Taskaya Date: Sat, 13 Nov 2021 12:01:00 +0300 Subject: [PATCH 09/10] Add changelog; fix two minor blib2to3 issues Co-authored-by: Richard Si <63936253+ichard26@users.noreply.github.com> --- CHANGES.md | 2 ++ src/blib2to3/pygram.py | 2 ++ 2 files changed, 4 insertions(+) diff --git a/CHANGES.md b/CHANGES.md index 4b8dc57388c..661297d0f52 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -6,6 +6,8 @@ - Warn about Python 2 deprecation in more cases by improving Python 2 only syntax detection (#2592) +- Add partial support for the match statement. As it's experimental, it's only enabled + when `--target-version py310` is explicitly specified (#2586) ## 21.10b0 diff --git a/src/blib2to3/pygram.py b/src/blib2to3/pygram.py index 601d57f9840..aa20b8104ae 100644 --- a/src/blib2to3/pygram.py +++ b/src/blib2to3/pygram.py @@ -96,6 +96,7 @@ class _python_symbols(Symbols): or_test: int parameters: int pass_stmt: int + pattern: int patterns: int power: int print_stmt: int @@ -153,6 +154,7 @@ class _pattern_symbols(Symbols): python_grammar_no_print_statement_no_exec_statement_async_keywords: Grammar python_grammar_no_exec_statement: Grammar pattern_grammar: Grammar +python_grammar_soft_keywords: Grammar python_symbols: _python_symbols pattern_symbols: _pattern_symbols From 3565e9f887124963fdaf9ec9cba43b75875f74b6 Mon Sep 17 00:00:00 2001 From: Richard Si <63936253+ichard26@users.noreply.github.com> Date: Sat, 13 Nov 2021 11:53:29 -0500 Subject: [PATCH 10/10] Parenthesized with deserves a changelog entry too --- CHANGES.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGES.md b/CHANGES.md index 661297d0f52..b2e8f7439b7 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -8,6 +8,7 @@ detection (#2592) - Add partial support for the match statement. As it's experimental, it's only enabled when `--target-version py310` is explicitly specified (#2586) +- Add support for parenthesized with (#2586) ## 21.10b0