Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

black/parser: partial support for pattern matching #2586

Merged
merged 10 commits into from Nov 14, 2021
3 changes: 3 additions & 0 deletions CHANGES.md
Expand Up @@ -6,6 +6,9 @@

- Warn about Python 2 deprecation in more cases by improving Python 2 only syntax
detection (#2592)
- Add partial support for the match statement. As it's experimental, it's only enabled
when `--target-version py310` is explicitly specified (#2586)
- Add support for parenthesized with (#2586)

## 21.10b0

Expand Down
6 changes: 5 additions & 1 deletion src/black/linegen.py
Expand Up @@ -126,7 +126,7 @@ def visit_stmt(
"""Visit a statement.

This implementation is shared for `if`, `while`, `for`, `try`, `except`,
`def`, `with`, `class`, `assert` and assignments.
`def`, `with`, `class`, `assert`, `match`, `case` and assignments.

The relevant Python language `keywords` for a given statement will be
NAME leaves within it. This methods puts those on a separate line.
Expand Down Expand Up @@ -292,6 +292,10 @@ def __post_init__(self) -> None:
self.visit_async_funcdef = self.visit_async_stmt
self.visit_decorated = self.visit_decorators

# PEP 634
self.visit_match_stmt = partial(v, keywords={"match"}, parens=Ø)
self.visit_case_block = partial(v, keywords={"case"}, parens=Ø)


def transform_line(
line: Line, mode: Mode, features: Collection[Feature] = ()
Expand Down
5 changes: 5 additions & 0 deletions src/black/mode.py
Expand Up @@ -20,6 +20,7 @@ class TargetVersion(Enum):
PY37 = 7
PY38 = 8
PY39 = 9
PY310 = 10

def is_python2(self) -> bool:
return self is TargetVersion.PY27
Expand All @@ -39,6 +40,7 @@ class Feature(Enum):
ASSIGNMENT_EXPRESSIONS = 8
POS_ONLY_ARGUMENTS = 9
RELAXED_DECORATORS = 10
PATTERN_MATCHING = 11
FORCE_OPTIONAL_PARENTHESES = 50

# temporary for Python 2 deprecation
Expand Down Expand Up @@ -108,6 +110,9 @@ class Feature(Enum):
Feature.RELAXED_DECORATORS,
Feature.POS_ONLY_ARGUMENTS,
},
TargetVersion.PY310: {
Feature.PATTERN_MATCHING,
},
}


Expand Down
3 changes: 3 additions & 0 deletions src/black/parsing.py
Expand Up @@ -59,6 +59,9 @@ def get_grammars(target_versions: Set[TargetVersion]) -> List[Grammar]:

# Python 3-compatible code, so only try Python 3 grammar.
grammars = []
if supports_feature(target_versions, Feature.PATTERN_MATCHING):
# Python 3.10+
grammars.append(pygram.python_grammar_soft_keywords)
# If we have to parse both, try to parse async as a keyword first
if not supports_feature(target_versions, Feature.ASYNC_IDENTIFIERS):
# Python 3.7+
Expand Down
41 changes: 36 additions & 5 deletions src/blib2to3/Grammar.txt
Expand Up @@ -105,7 +105,7 @@ global_stmt: ('global' | 'nonlocal') NAME (',' NAME)*
exec_stmt: 'exec' expr ['in' test [',' test]]
assert_stmt: 'assert' test [',' test]

compound_stmt: if_stmt | while_stmt | for_stmt | try_stmt | with_stmt | funcdef | classdef | decorated | async_stmt
compound_stmt: if_stmt | while_stmt | for_stmt | try_stmt | with_stmt | funcdef | classdef | decorated | async_stmt | match_stmt
async_stmt: ASYNC (funcdef | with_stmt | for_stmt)
if_stmt: 'if' namedexpr_test ':' suite ('elif' namedexpr_test ':' suite)* ['else' ':' suite]
while_stmt: 'while' namedexpr_test ':' suite ['else' ':' suite]
Expand All @@ -115,9 +115,8 @@ try_stmt: ('try' ':' suite
['else' ':' suite]
['finally' ':' suite] |
'finally' ':' suite))
with_stmt: 'with' with_item (',' with_item)* ':' suite
with_item: test ['as' expr]
with_var: 'as' expr
with_stmt: 'with' asexpr_test (',' asexpr_test)* ':' suite

# NB compile.c makes sure that the default except clause is last
except_clause: 'except' [test [(',' | 'as') test]]
suite: simple_stmt | NEWLINE INDENT stmt+ DEDENT
Expand All @@ -131,7 +130,15 @@ testlist_safe: old_test [(',' old_test)+ [',']]
old_test: or_test | old_lambdef
old_lambdef: 'lambda' [varargslist] ':' old_test

namedexpr_test: test [':=' test]
namedexpr_test: asexpr_test [':=' asexpr_test]

# This is actually not a real rule, though since the parser is very
# limited in terms of the strategy about match/case rules, we are inserting
# a virtual case (<expr> as <expr>) as a valid expression. Unless a better
# approach is thought, the only side effect of this seem to be just allowing
# more stuff to be parser (which would fail on the ast).
asexpr_test: test ['as' test]

test: or_test ['if' or_test 'else' test] | lambdef
or_test: and_test ('or' and_test)*
and_test: not_test ('and' not_test)*
Expand Down Expand Up @@ -213,3 +220,27 @@ encoding_decl: NAME

yield_expr: 'yield' [yield_arg]
yield_arg: 'from' test | testlist_star_expr


# 3.10 match statement definition

# PS: normally the grammar is much much more restricted, but
# at this moment for not trying to bother much with encoding the
# exact same DSL in a LL(1) parser, we will just accept an expression
# and let the ast.parse() step of the safe mode to reject invalid
# grammar.

# The reason why it is more restricted is that, patterns are some
# sort of a DSL (more advanced than our LHS on assignments, but
# still in a very limited python subset). They are not really
# expressions, but who cares. If we can parse them, that is enough
# to reformat them.

match_stmt: "match" subject_expr ':' NEWLINE INDENT case_block+ DEDENT
subject_expr: namedexpr_test

# cases
case_block: "case" patterns [guard] ':' suite
guard: 'if' namedexpr_test
patterns: pattern ['as' pattern]
pattern: (expr|star_expr) (',' (expr|star_expr))* [',']
81 changes: 79 additions & 2 deletions src/blib2to3/pgen2/driver.py
Expand Up @@ -28,19 +28,92 @@
List,
Optional,
Text,
Iterator,
Tuple,
TypeVar,
Generic,
Union,
)
from dataclasses import dataclass, field

# Pgen imports
from . import grammar, parse, token, tokenize, pgen
from logging import Logger
from blib2to3.pytree import _Convert, NL
from blib2to3.pgen2.grammar import Grammar
from contextlib import contextmanager

Path = Union[str, "os.PathLike[str]"]


@dataclass
class ReleaseRange:
start: int
end: Optional[int] = None
tokens: List[Any] = field(default_factory=list)

def lock(self) -> None:
total_eaten = len(self.tokens)
self.end = self.start + total_eaten


class TokenProxy:
isidentical marked this conversation as resolved.
Show resolved Hide resolved
def __init__(self, generator: Any) -> None:
self._tokens = generator
self._counter = 0
self._release_ranges: List[ReleaseRange] = []

@contextmanager
def release(self) -> Iterator["TokenProxy"]:
release_range = ReleaseRange(self._counter)
self._release_ranges.append(release_range)
try:
yield self
finally:
# Lock the last release range to the final position that
# has been eaten.
release_range.lock()

def eat(self, point: int) -> Any:
eaten_tokens = self._release_ranges[-1].tokens
if point < len(eaten_tokens):
return eaten_tokens[point]
else:
while point >= len(eaten_tokens):
token = next(self._tokens)
eaten_tokens.append(token)
return token

def __iter__(self) -> "TokenProxy":
return self

def __next__(self) -> Any:
# If the current position is already compromised (looked up)
# return the eaten token, if not just go further on the given
# token producer.
for release_range in self._release_ranges:
assert release_range.end is not None

start, end = release_range.start, release_range.end
if start <= self._counter < end:
token = release_range.tokens[self._counter - start]
break
else:
token = next(self._tokens)
self._counter += 1
return token

def can_advance(self, to: int) -> bool:
# Try to eat, fail if it can't. The eat operation is cached
# so there wont be any additional cost of eating here
try:
self.eat(to)
except StopIteration:
return False
else:
return True


class Driver(object):
def __init__(
self,
Expand All @@ -57,14 +130,18 @@ def __init__(
def parse_tokens(self, tokens: Iterable[Any], debug: bool = False) -> NL:
"""Parse a series of tokens and return the syntax tree."""
# XXX Move the prefix computation into a wrapper around tokenize.
proxy = TokenProxy(tokens)

p = parse.Parser(self.grammar, self.convert)
p.setup()
p.setup(proxy=proxy)

lineno = 1
column = 0
indent_columns = []
type = value = start = end = line_text = None
prefix = ""
for quintuple in tokens:

for quintuple in proxy:
type, value, start, end, line_text = quintuple
if start != (lineno, column):
assert (lineno, column) <= start, ((lineno, column), start)
Expand Down
2 changes: 2 additions & 0 deletions src/blib2to3/pgen2/grammar.py
Expand Up @@ -89,6 +89,7 @@ def __init__(self) -> None:
self.dfas: Dict[int, DFAS] = {}
self.labels: List[Label] = [(0, "EMPTY")]
self.keywords: Dict[str, int] = {}
self.soft_keywords: Dict[str, int] = {}
self.tokens: Dict[int, int] = {}
self.symbol2label: Dict[str, int] = {}
self.start = 256
Expand Down Expand Up @@ -136,6 +137,7 @@ def copy(self: _P) -> _P:
"number2symbol",
"dfas",
"keywords",
"soft_keywords",
"tokens",
"symbol2label",
):
Expand Down