black/parser: partial support for pattern matching (#2586)

Partial implementation for #2242. Only works when explicitly stated -t py310. Co-authored-by: Richard Si <63936253+ichard26@users.noreply.github.com>
psf · Nov 16, 2021 · 3c2ea8a · 3c2ea8a
1 parent 2e1b951
commit 3c2ea8a
Show file tree

Hide file tree

Showing 14 changed files with 553 additions and 22 deletions.
diff --git a/CHANGES.md b/CHANGES.md
@@ -6,6 +6,9 @@
 
 - Warn about Python 2 deprecation in more cases by improving Python 2 only syntax
   detection (#2592)
+- Add partial support for the match statement. As it's experimental, it's only enabled
+  when `--target-version py310` is explicitly specified (#2586)
+- Add support for parenthesized with (#2586)
 
 ## 21.10b0
 

diff --git a/src/black/linegen.py b/src/black/linegen.py
@@ -126,7 +126,7 @@ def visit_stmt(
         """Visit a statement.
 
         This implementation is shared for `if`, `while`, `for`, `try`, `except`,
-        `def`, `with`, `class`, `assert` and assignments.
+        `def`, `with`, `class`, `assert`, `match`, `case` and assignments.
 
         The relevant Python language `keywords` for a given statement will be
         NAME leaves within it. This methods puts those on a separate line.
@@ -292,6 +292,10 @@ def __post_init__(self) -> None:
         self.visit_async_funcdef = self.visit_async_stmt
         self.visit_decorated = self.visit_decorators
 
+        # PEP 634
+        self.visit_match_stmt = partial(v, keywords={"match"}, parens=Ø)
+        self.visit_case_block = partial(v, keywords={"case"}, parens=Ø)
+
 
 def transform_line(
     line: Line, mode: Mode, features: Collection[Feature] = ()

diff --git a/src/black/mode.py b/src/black/mode.py
@@ -20,6 +20,7 @@ class TargetVersion(Enum):
     PY37 = 7
     PY38 = 8
     PY39 = 9
+    PY310 = 10
 
     def is_python2(self) -> bool:
         return self is TargetVersion.PY27
@@ -39,6 +40,7 @@ class Feature(Enum):
     ASSIGNMENT_EXPRESSIONS = 8
     POS_ONLY_ARGUMENTS = 9
     RELAXED_DECORATORS = 10
+    PATTERN_MATCHING = 11
     FORCE_OPTIONAL_PARENTHESES = 50
 
     # temporary for Python 2 deprecation
@@ -108,6 +110,9 @@ class Feature(Enum):
         Feature.RELAXED_DECORATORS,
         Feature.POS_ONLY_ARGUMENTS,
     },
+    TargetVersion.PY310: {
+        Feature.PATTERN_MATCHING,
+    },
 }
 
 

diff --git a/src/black/parsing.py b/src/black/parsing.py
@@ -59,6 +59,9 @@ def get_grammars(target_versions: Set[TargetVersion]) -> List[Grammar]:
 
     # Python 3-compatible code, so only try Python 3 grammar.
     grammars = []
+    if supports_feature(target_versions, Feature.PATTERN_MATCHING):
+        # Python 3.10+
+        grammars.append(pygram.python_grammar_soft_keywords)
     # If we have to parse both, try to parse async as a keyword first
     if not supports_feature(target_versions, Feature.ASYNC_IDENTIFIERS):
         # Python 3.7+

diff --git a/src/blib2to3/Grammar.txt b/src/blib2to3/Grammar.txt
@@ -105,7 +105,7 @@ global_stmt: ('global' | 'nonlocal') NAME (',' NAME)*
 exec_stmt: 'exec' expr ['in' test [',' test]]
 assert_stmt: 'assert' test [',' test]
 
-compound_stmt: if_stmt | while_stmt | for_stmt | try_stmt | with_stmt | funcdef | classdef | decorated | async_stmt
+compound_stmt: if_stmt | while_stmt | for_stmt | try_stmt | with_stmt | funcdef | classdef | decorated | async_stmt | match_stmt
 async_stmt: ASYNC (funcdef | with_stmt | for_stmt)
 if_stmt: 'if' namedexpr_test ':' suite ('elif' namedexpr_test ':' suite)* ['else' ':' suite]
 while_stmt: 'while' namedexpr_test ':' suite ['else' ':' suite]
@@ -115,9 +115,8 @@ try_stmt: ('try' ':' suite
 	    ['else' ':' suite]
 	    ['finally' ':' suite] |
 	   'finally' ':' suite))
-with_stmt: 'with' with_item (',' with_item)*  ':' suite
-with_item: test ['as' expr]
-with_var: 'as' expr
+with_stmt: 'with' asexpr_test (',' asexpr_test)*  ':' suite
+
 # NB compile.c makes sure that the default except clause is last
 except_clause: 'except' [test [(',' | 'as') test]]
 suite: simple_stmt | NEWLINE INDENT stmt+ DEDENT
@@ -131,7 +130,15 @@ testlist_safe: old_test [(',' old_test)+ [',']]
 old_test: or_test | old_lambdef
 old_lambdef: 'lambda' [varargslist] ':' old_test
 
-namedexpr_test: test [':=' test]
+namedexpr_test: asexpr_test [':=' asexpr_test]
+
+# This is actually not a real rule, though since the parser is very
+# limited in terms of the strategy about match/case rules, we are inserting
+# a virtual case (<expr> as <expr>) as a valid expression. Unless a better
+# approach is thought, the only side effect of this seem to be just allowing
+# more stuff to be parser (which would fail on the ast).
+asexpr_test: test ['as' test]
+
 test: or_test ['if' or_test 'else' test] | lambdef
 or_test: and_test ('or' and_test)*
 and_test: not_test ('and' not_test)*
@@ -213,3 +220,27 @@ encoding_decl: NAME
 
 yield_expr: 'yield' [yield_arg]
 yield_arg: 'from' test | testlist_star_expr
+
+
+# 3.10 match statement definition
+
+# PS: normally the grammar is much much more restricted, but
+# at this moment for not trying to bother much with encoding the
+# exact same DSL in a LL(1) parser, we will just accept an expression
+# and let the ast.parse() step of the safe mode to reject invalid
+# grammar.
+
+# The reason why it is more restricted is that, patterns are some
+# sort of a DSL (more advanced than our LHS on assignments, but
+# still in a very limited python subset). They are not really
+# expressions, but who cares. If we can parse them, that is enough
+# to reformat them.
+
+match_stmt: "match" subject_expr ':' NEWLINE INDENT case_block+ DEDENT
+subject_expr: namedexpr_test
+
+# cases
+case_block: "case" patterns [guard] ':' suite
+guard: 'if' namedexpr_test
+patterns: pattern ['as' pattern]
+pattern: (expr|star_expr) (',' (expr|star_expr))* [',']
diff --git a/src/blib2to3/pgen2/driver.py b/src/blib2to3/pgen2/driver.py
@@ -28,19 +28,92 @@
     List,
     Optional,
     Text,
+    Iterator,
     Tuple,
+    TypeVar,
+    Generic,
     Union,
 )
+from dataclasses import dataclass, field
 
 # Pgen imports
 from . import grammar, parse, token, tokenize, pgen
 from logging import Logger
 from blib2to3.pytree import _Convert, NL
 from blib2to3.pgen2.grammar import Grammar
+from contextlib import contextmanager
 
 Path = Union[str, "os.PathLike[str]"]
 
 
+@dataclass
+class ReleaseRange:
+    start: int
+    end: Optional[int] = None
+    tokens: List[Any] = field(default_factory=list)
+
+    def lock(self) -> None:
+        total_eaten = len(self.tokens)
+        self.end = self.start + total_eaten
+
+
+class TokenProxy:
+    def __init__(self, generator: Any) -> None:
+        self._tokens = generator
+        self._counter = 0
+        self._release_ranges: List[ReleaseRange] = []
+
+    @contextmanager
+    def release(self) -> Iterator["TokenProxy"]:
+        release_range = ReleaseRange(self._counter)
+        self._release_ranges.append(release_range)
+        try:
+            yield self
+        finally:
+            # Lock the last release range to the final position that
+            # has been eaten.
+            release_range.lock()
+
+    def eat(self, point: int) -> Any:
+        eaten_tokens = self._release_ranges[-1].tokens
+        if point < len(eaten_tokens):
+            return eaten_tokens[point]
+        else:
+            while point >= len(eaten_tokens):
+                token = next(self._tokens)
+                eaten_tokens.append(token)
+            return token
+
+    def __iter__(self) -> "TokenProxy":
+        return self
+
+    def __next__(self) -> Any:
+        # If the current position is already compromised (looked up)
+        # return the eaten token, if not just go further on the given
+        # token producer.
+        for release_range in self._release_ranges:
+            assert release_range.end is not None
+
+            start, end = release_range.start, release_range.end
+            if start <= self._counter < end:
+                token = release_range.tokens[self._counter - start]
+                break
+        else:
+            token = next(self._tokens)
+        self._counter += 1
+        return token
+
+    def can_advance(self, to: int) -> bool:
+        # Try to eat, fail if it can't. The eat operation is cached
+        # so there wont be any additional cost of eating here
+        try:
+            self.eat(to)
+        except StopIteration:
+            return False
+        else:
+            return True
+
+
 class Driver(object):
     def __init__(
         self,
@@ -57,14 +130,18 @@ def __init__(
     def parse_tokens(self, tokens: Iterable[Any], debug: bool = False) -> NL:
         """Parse a series of tokens and return the syntax tree."""
         # XXX Move the prefix computation into a wrapper around tokenize.
+        proxy = TokenProxy(tokens)
+
         p = parse.Parser(self.grammar, self.convert)
-        p.setup()
+        p.setup(proxy=proxy)
+
         lineno = 1
         column = 0
         indent_columns = []
         type = value = start = end = line_text = None
         prefix = ""
-        for quintuple in tokens:
+
+        for quintuple in proxy:
             type, value, start, end, line_text = quintuple
             if start != (lineno, column):
                 assert (lineno, column) <= start, ((lineno, column), start)

diff --git a/src/blib2to3/pgen2/grammar.py b/src/blib2to3/pgen2/grammar.py
@@ -89,6 +89,7 @@ def __init__(self) -> None:
         self.dfas: Dict[int, DFAS] = {}
         self.labels: List[Label] = [(0, "EMPTY")]
         self.keywords: Dict[str, int] = {}
+        self.soft_keywords: Dict[str, int] = {}
         self.tokens: Dict[int, int] = {}
         self.symbol2label: Dict[str, int] = {}
         self.start = 256
@@ -136,6 +137,7 @@ def copy(self: _P) -> _P:
             "number2symbol",
             "dfas",
             "keywords",
+            "soft_keywords",
             "tokens",
             "symbol2label",
         ):