Skip to content

Commit

Permalink
Make most of blib2to3 directly typed and mypyc-compatible
Browse files Browse the repository at this point in the history
This used a combination of retype and pytype's merge-pyi to do the
initial merges of the stubs, which then required manual tweaking to
make actually typecheck and work with mypyc.

Co-authored-by: Sanjit Kalapatapu <sanjitkal@gmail.com>
Co-authored-by: Michael J. Sullivan <sully@msully.net>
  • Loading branch information
msullivan and SanjitKal committed Oct 30, 2019
1 parent 31f4105 commit 443f6d8
Show file tree
Hide file tree
Showing 25 changed files with 682 additions and 762 deletions.
2 changes: 1 addition & 1 deletion .appveyor.yml
Expand Up @@ -7,7 +7,7 @@ build: off

test_script:
- C:\Python36\python.exe tests/test_black.py
- C:\Python36\python.exe -m mypy black.py blackd.py tests/test_black.py
- C:\Python36\python.exe -m mypy black.py blackd.py tests/test_black.py blib2to3

after_test:
- C:\Python36\python.exe -m pip install pyinstaller
Expand Down
1 change: 1 addition & 0 deletions .gitignore
Expand Up @@ -12,3 +12,4 @@ pip-wheel-metadata/
_black_version.py
.idea
.eggs
.dmypy.json
2 changes: 1 addition & 1 deletion black.py
Expand Up @@ -865,7 +865,7 @@ def visit(self, node: LN) -> Iterator[T]:
if node.type < 256:
name = token.tok_name[node.type]
else:
name = type_repr(node.type)
name = str(type_repr(node.type))
yield from getattr(self, f"visit_{name}", self.visit_default)(node)

def visit_default(self, node: LN) -> Iterator[T]:
Expand Down
1 change: 0 additions & 1 deletion blib2to3/__init__.pyi

This file was deleted.

10 changes: 0 additions & 10 deletions blib2to3/pgen2/__init__.pyi

This file was deleted.

2 changes: 2 additions & 0 deletions blib2to3/pgen2/conv.py
@@ -1,6 +1,8 @@
# Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved.
# Licensed to PSF under a Contributor Agreement.

# mypy: ignore-errors

"""Convert graminit.[ch] spit out by pgen to Python code.
Pgen is the Python parser generator. It is useful to quickly create a
Expand Down
65 changes: 50 additions & 15 deletions blib2to3/pgen2/driver.py
Expand Up @@ -22,20 +22,42 @@
import logging
import pkgutil
import sys
from typing import (
Any,
Callable,
IO,
Iterable,
List,
Optional,
Text,
Tuple,
Union,
Sequence,
)

# Pgen imports
from . import grammar, parse, token, tokenize, pgen
from logging import Logger
from blib2to3.pytree import _Convert, NL
from blib2to3.pgen2.grammar import Grammar

Path = Union[str, "os.PathLike[str]"]


class Driver(object):
def __init__(self, grammar, convert=None, logger=None):
def __init__(
self,
grammar: Grammar,
convert: Optional[_Convert] = None,
logger: Optional[Logger] = None,
) -> None:
self.grammar = grammar
if logger is None:
logger = logging.getLogger(__name__)
self.logger = logger
self.convert = convert

def parse_tokens(self, tokens, debug=False):
def parse_tokens(self, tokens: Iterable[Any], debug: bool = False) -> NL:
"""Parse a series of tokens and return the syntax tree."""
# XXX Move the prefix computation into a wrapper around tokenize.
p = parse.Parser(self.grammar, self.convert)
Expand Down Expand Up @@ -91,32 +113,36 @@ def parse_tokens(self, tokens, debug=False):
column = 0
else:
# We never broke out -- EOF is too soon (how can this happen???)
assert start is not None
raise parse.ParseError("incomplete input", type, value, (prefix, start))
assert p.rootnode is not None
return p.rootnode

def parse_stream_raw(self, stream, debug=False):
def parse_stream_raw(self, stream: IO[Text], debug: bool = False) -> NL:
"""Parse a stream and return the syntax tree."""
tokens = tokenize.generate_tokens(stream.readline, grammar=self.grammar)
return self.parse_tokens(tokens, debug)

def parse_stream(self, stream, debug=False):
def parse_stream(self, stream: IO[Text], debug: bool = False) -> NL:
"""Parse a stream and return the syntax tree."""
return self.parse_stream_raw(stream, debug)

def parse_file(self, filename, encoding=None, debug=False):
def parse_file(
self, filename: Path, encoding: Optional[Text] = None, debug: bool = False,
) -> NL:
"""Parse a file and return the syntax tree."""
with io.open(filename, "r", encoding=encoding) as stream:
return self.parse_stream(stream, debug)

def parse_string(self, text, debug=False):
def parse_string(self, text: Text, debug: bool = False) -> NL:
"""Parse a string and return the syntax tree."""
tokens = tokenize.generate_tokens(
io.StringIO(text).readline, grammar=self.grammar
)
return self.parse_tokens(tokens, debug)

def _partially_consume_prefix(self, prefix, column):
lines = []
def _partially_consume_prefix(self, prefix: Text, column: int) -> Tuple[Text, Text]:
lines: List[str] = []
current_line = ""
current_column = 0
wait_for_nl = False
Expand All @@ -143,7 +169,7 @@ def _partially_consume_prefix(self, prefix, column):
return "".join(lines), current_line


def _generate_pickle_name(gt, cache_dir=None):
def _generate_pickle_name(gt: Path, cache_dir: Optional[Path] = None) -> Text:
head, tail = os.path.splitext(gt)
if tail == ".txt":
tail = ""
Expand All @@ -154,14 +180,20 @@ def _generate_pickle_name(gt, cache_dir=None):
return name


def load_grammar(gt="Grammar.txt", gp=None, save=True, force=False, logger=None):
def load_grammar(
gt: Text = "Grammar.txt",
gp: Optional[Text] = None,
save: bool = True,
force: bool = False,
logger: Optional[Logger] = None,
) -> Grammar:
"""Load the grammar (maybe from a pickle)."""
if logger is None:
logger = logging.getLogger(__name__)
gp = _generate_pickle_name(gt) if gp is None else gp
if force or not _newer(gp, gt):
logger.info("Generating grammar tables from %s", gt)
g = pgen.generate_grammar(gt)
g: grammar.Grammar = pgen.generate_grammar(gt)
if save:
logger.info("Writing grammar tables to %s", gp)
try:
Expand All @@ -174,7 +206,7 @@ def load_grammar(gt="Grammar.txt", gp=None, save=True, force=False, logger=None)
return g


def _newer(a, b):
def _newer(a: Text, b: Text) -> bool:
"""Inquire whether file a was written since file b."""
if not os.path.exists(a):
return False
Expand All @@ -183,7 +215,9 @@ def _newer(a, b):
return os.path.getmtime(a) >= os.path.getmtime(b)


def load_packaged_grammar(package, grammar_source, cache_dir=None):
def load_packaged_grammar(
package: str, grammar_source: Text, cache_dir: Optional[Path] = None
) -> grammar.Grammar:
"""Normally, loads a pickled grammar by doing
pkgutil.get_data(package, pickled_grammar)
where *pickled_grammar* is computed from *grammar_source* by adding the
Expand All @@ -199,18 +233,19 @@ def load_packaged_grammar(package, grammar_source, cache_dir=None):
return load_grammar(grammar_source, gp=gp)
pickled_name = _generate_pickle_name(os.path.basename(grammar_source), cache_dir)
data = pkgutil.get_data(package, pickled_name)
assert data is not None
g = grammar.Grammar()
g.loads(data)
return g


def main(*args):
def main(*args: Text) -> bool:
"""Main program, when run as a script: produce grammar pickle files.
Calls load_grammar for each argument, a path to a grammar text file.
"""
if not args:
args = sys.argv[1:]
args = tuple(sys.argv[1:])
logging.basicConfig(level=logging.INFO, stream=sys.stdout, format="%(message)s")
for gt in args:
load_grammar(gt, save=True, force=True)
Expand Down
24 changes: 0 additions & 24 deletions blib2to3/pgen2/driver.pyi

This file was deleted.

54 changes: 37 additions & 17 deletions blib2to3/pgen2/grammar.py
Expand Up @@ -16,10 +16,17 @@
import os
import pickle
import tempfile
from typing import Any, Dict, List, Optional, Text, Tuple, TypeVar, Union

# Local imports
from . import token

_P = TypeVar("_P", bound="Grammar")
Label = Tuple[int, Optional[Text]]
DFA = List[List[Tuple[int, int]]]
DFAS = Tuple[DFA, Dict[int, int]]
Path = Union[str, "os.PathLike[str]"]


class Grammar(object):
"""Pgen parsing tables conversion class.
Expand Down Expand Up @@ -75,38 +82,51 @@ class Grammar(object):
"""

def __init__(self):
self.symbol2number = {}
self.number2symbol = {}
self.states = []
self.dfas = {}
self.labels = [(0, "EMPTY")]
self.keywords = {}
self.tokens = {}
self.symbol2label = {}
def __init__(self) -> None:
self.symbol2number: Dict[str, int] = {}
self.number2symbol: Dict[int, str] = {}
self.states: List[DFA] = []
self.dfas: Dict[int, DFAS] = {}
self.labels: List[Label] = [(0, "EMPTY")]
self.keywords: Dict[str, int] = {}
self.tokens: Dict[int, int] = {}
self.symbol2label: Dict[str, int] = {}
self.start = 256
# Python 3.7+ parses async as a keyword, not an identifier
self.async_keywords = False

def dump(self, filename):
def dump(self, filename: Path) -> None:
"""Dump the grammar tables to a pickle file."""

# mypyc generates objects that don't have a __dict__, but they
# do have __getstate__ methods that will return an equivalent
# dictionary
if hasattr(self, "__dict__"):
d = self.__dict__
else:
d = self.__getstate__() # type: ignore

with tempfile.NamedTemporaryFile(
dir=os.path.dirname(filename), delete=False
) as f:
pickle.dump(self.__dict__, f, pickle.HIGHEST_PROTOCOL)
pickle.dump(d, f, pickle.HIGHEST_PROTOCOL)
os.replace(f.name, filename)

def load(self, filename):
def _update(self, attrs: Dict[str, Any]) -> None:
for k, v in attrs.items():
setattr(self, k, v)

def load(self, filename: Path) -> None:
"""Load the grammar tables from a pickle file."""
with open(filename, "rb") as f:
d = pickle.load(f)
self.__dict__.update(d)
self._update(d)

def loads(self, pkl):
def loads(self, pkl: bytes) -> None:
"""Load the grammar tables from a pickle bytes object."""
self.__dict__.update(pickle.loads(pkl))
self._update(pickle.loads(pkl))

def copy(self):
def copy(self: _P) -> _P:
"""
Copy the grammar.
"""
Expand All @@ -126,7 +146,7 @@ def copy(self):
new.async_keywords = self.async_keywords
return new

def report(self):
def report(self) -> None:
"""Dump the grammar tables to standard output, for debugging."""
from pprint import pprint

Expand Down
30 changes: 0 additions & 30 deletions blib2to3/pgen2/grammar.pyi

This file was deleted.

13 changes: 8 additions & 5 deletions blib2to3/pgen2/literals.py
Expand Up @@ -3,9 +3,12 @@

"""Safely evaluate Python string literals without using eval()."""

import regex as re
import re

simple_escapes = {
from typing import Dict, Match, Text


simple_escapes: Dict[Text, Text] = {
"a": "\a",
"b": "\b",
"f": "\f",
Expand All @@ -19,7 +22,7 @@
}


def escape(m):
def escape(m: Match[Text]) -> Text:
all, tail = m.group(0, 1)
assert all.startswith("\\")
esc = simple_escapes.get(tail)
Expand All @@ -41,7 +44,7 @@ def escape(m):
return chr(i)


def evalString(s):
def evalString(s: Text) -> Text:
assert s.startswith("'") or s.startswith('"'), repr(s[:1])
q = s[0]
if s[:3] == q * 3:
Expand All @@ -52,7 +55,7 @@ def evalString(s):
return re.sub(r"\\(\'|\"|\\|[abfnrtv]|x.{0,2}|[0-7]{1,3})", escape, s)


def test():
def test() -> None:
for i in range(256):
c = chr(i)
s = repr(c)
Expand Down

0 comments on commit 443f6d8

Please sign in to comment.