From 1a08d2a94aae13253f3c5b0abed8829472c1c49d Mon Sep 17 00:00:00 2001 From: nrser Date: Tue, 21 Feb 2023 23:28:20 -0800 Subject: [PATCH] Use blib2to3 parser to support match statement The built-in lib2to3 does not support pattern matching (Python 3.10+): https://docs.python.org/3.11/library/2to3.html#module-lib2to3 The [black][] project managed to get some level of parsing support for `match` out of their modified version `blib2to3`, see: 1. https://github.com/psf/black/issues/2242 2. https://github.com/psf/black/pull/2586 [black]: https://github.com/psf/black This change adds `black` as a dependency and switches to using `blib2to3` to parse. Tests pass, but that's all that's been attempted thus far. --- docspec-python/pyproject.toml | 3 +- docspec-python/src/docspec_python/parser.py | 41 ++++++++++++++++----- docspec-python/test/test_parser.py | 26 +++++++++++++ 3 files changed, 59 insertions(+), 11 deletions(-) diff --git a/docspec-python/pyproject.toml b/docspec-python/pyproject.toml index e8544df..f7e578e 100644 --- a/docspec-python/pyproject.toml +++ b/docspec-python/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "docspec-python" -version = "2.0.2" +version = "2.0.2+blib2to3" description = "A parser based on lib2to3 producing docspec data from Python source code." authors = ["Niklas Rosenstein "] license = "MIT" @@ -12,6 +12,7 @@ packages = [{ include = "docspec_python", from="src" }] python = "^3.7" docspec = "^2.0.2" "nr.util" = ">=0.7.0" +black = "^23.1.0" [tool.poetry.dev-dependencies] mypy = "*" diff --git a/docspec-python/src/docspec_python/parser.py b/docspec-python/src/docspec_python/parser.py index 839e9eb..e362345 100644 --- a/docspec-python/src/docspec_python/parser.py +++ b/docspec-python/src/docspec_python/parser.py @@ -42,11 +42,11 @@ Location, Module, _ModuleMembers) -from lib2to3.refactor import RefactoringTool # type: ignore -from lib2to3.pgen2 import token -from lib2to3.pgen2.parse import ParseError -from lib2to3.pygram import python_symbols as syms -from lib2to3.pytree import Leaf, Node +from black.parsing import lib2to3_parse +from blib2to3.pgen2 import token +from blib2to3.pgen2.parse import ParseError +from blib2to3.pygram import python_symbols as syms +from blib2to3.pytree import Leaf, Node def dedent_docstring(s): @@ -65,6 +65,13 @@ def find(predicate, iterable): @dataclasses.dataclass class ParserOptions: + # NOTE (@nrser) This is no longer used. It was passed to + # `lib2to3.refactor.RefactoringTool`, but that's been swapped out for + # `black.parsing.lib2to3_parse`, which does not take the same options. + # + # It looks like it supported Python 2.x code, and I don't see anything + # before 3.3 in `black.mode.TargetVersion`, so 2.x might be completely off + # the table when using the Black parser. print_function: bool = True treat_singleline_comment_blocks_as_docstrings: bool = False @@ -79,13 +86,11 @@ def parse_to_ast(self, code, filename): Parses the string *code* to an AST with #lib2to3. """ - options = {'print_function': self.options.print_function} - try: # NOTE (@NiklasRosenstein): Adding newline at the end, a ParseError # could be raised without a trailing newline (tested in CPython 3.6 # and 3.7). - return RefactoringTool([], options).refactor_string(code + '\n', filename) + return lib2to3_parse(code + '\n') except ParseError as exc: raise ParseError(exc.msg, exc.type, exc.value, tuple(exc.context) + (filename,)) @@ -243,8 +248,24 @@ def parse_statement(self, parent, stmt): def parse_decorator(self, node): assert node.children[0].value == '@' - name = self.name_to_string(node.children[1]) - call_expr = self.nodes_to_string(node.children[2:]).strip() + + # NOTE (@nrser)I have no idea why `blib2to3` parses some decorators with a 'power' + # node (which _seems_ refer to the exponent operator `**`), but it + # does. + # + # The hint I eventually found was: + # + # https://github.com/psf/black/blob/b0d1fba7ac3be53c71fb0d3211d911e629f8aecb/src/black/nodes.py#L657 + # + # Anyways, this works around that curiosity. + if node.children[1].type == syms.power: + name = self.name_to_string(node.children[1].children[0]) + call_expr = self.nodes_to_string(node.children[1].children[1:]).strip() + + else: + name = self.name_to_string(node.children[1]) + call_expr = self.nodes_to_string(node.children[2:]).strip() + return Decoration(location=self.location_from(node), name=name, args=call_expr or None) def parse_funcdef(self, parent, node, is_async, decorations): diff --git a/docspec-python/test/test_parser.py b/docspec-python/test/test_parser.py index 01ed949..ecedc07 100644 --- a/docspec-python/test/test_parser.py +++ b/docspec-python/test/test_parser.py @@ -472,3 +472,29 @@ def build_docker_image( return_type="Task" ), ] + +@docspec_test() +def test_funcdef_with_match_statement(): + """ + def f(x): + match x: + case str(s): + return "string" + case Path() as p: + return "path" + case int(n) | float(n): + return "number" + case _: + return "idk" + """ + + return [ + mkfunc( + "f", + None, + 0, + [ + Argument(loc, "x", Argument.Type.POSITIONAL, None), + ], + ), + ]