Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

reduce modulegraph stack depth and implement 'yield from' #5698

Merged
merged 4 commits into from Apr 8, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Expand Up @@ -110,3 +110,4 @@ venv.bak/

# VSCode projects
*.code-workspace
*.vscode
120 changes: 51 additions & 69 deletions PyInstaller/lib/modulegraph/modulegraph.py
Expand Up @@ -1421,7 +1421,8 @@ def run_script(self, pathname, caller=None):
co = compile(co_ast, pathname, 'exec', 0, True)
m = self.createNode(Script, pathname)
self._updateReference(caller, m, None)
self._scan_code(m, co, co_ast)
n = self._scan_code(m, co, co_ast)
self._process_imports(n)
m.code = co
if self.replace_paths:
m.code = self._replace_paths_in_code(m.code)
Expand Down Expand Up @@ -1495,7 +1496,28 @@ def import_hook(
source_package = self._determine_parent(source_module)
target_package, target_module_partname = self._find_head_package(
source_package, target_module_partname, level)
target_module = self._load_tail(target_package, target_module_partname)

self.msgin(4, "load_tail", target_package, target_module_partname)

submodule = target_package
while target_module_partname:
i = target_module_partname.find('.')
if i < 0:
i = len(target_module_partname)
head, target_module_partname = target_module_partname[
:i], target_module_partname[i+1:]
mname = "%s.%s" % (submodule.identifier, head)
submodule = self._safe_import_module(head, mname, submodule)

if submodule is None:
# FIXME: Why do we no longer return a MissingModule instance?
# result = self.createNode(MissingModule, mname)
self.msgout(4, "raise ImportError: No module named", mname)
raise ImportError("No module named " + repr(mname))

self.msgout(4, "load_tail ->", submodule)

target_module = submodule
target_modules = [target_module]

# If this is a "from"-style import *AND* this target module is
Expand Down Expand Up @@ -1681,50 +1703,6 @@ def _find_head_package(
raise ImportError("No module named " + target_package_name)


def _load_tail(self, package, submodule_name):
"""
Import the submodule with the passed name and all parent packages of
this module from the previously imported parent package corresponding
to the passed graph node.

Parameters
----------
package : Package
Graph node of the previously imported package containing this
submodule.
submodule_name : str
Name of the submodule to be imported in either qualified (e.g.,
`email.mime.base`) or unqualified (e.g., `base`) form.

Returns
----------
Node
Graph node created for this submodule.

Raises
----------
ImportError
If this submodule is unimportable.
"""
self.msgin(4, "load_tail", package, submodule_name)

submodule = package
while submodule_name:
i = submodule_name.find('.')
if i < 0:
i = len(submodule_name)
head, submodule_name = submodule_name[:i], submodule_name[i+1:]
mname = "%s.%s" % (submodule.identifier, head)
submodule = self._safe_import_module(head, mname, submodule)

if submodule is None:
# FIXME: Why do we no longer return a MissingModule instance?
# result = self.createNode(MissingModule, mname)
self.msgout(4, "raise ImportError: No module named", mname)
raise ImportError("No module named " + repr(mname))

self.msgout(4, "load_tail ->", submodule)
return submodule


#FIXME: Refactor from a generator yielding graph nodes into a non-generator
Expand Down Expand Up @@ -2064,7 +2042,26 @@ def _safe_import_module(
self.msgout(3, "safe_import_module -> None (%r)" % exc)
return None

module = self._load_module(module_name, pathname, loader)
(module, co) = self._load_module(module_name, pathname, loader)
if co is not None:
try:
if isinstance(co, ast.AST):
co_ast = co
co = compile(co_ast, pathname, 'exec', 0, True)
else:
co_ast = None
n = self._scan_code(module, co, co_ast)
self._process_imports(n)

if self.replace_paths:
co = self._replace_paths_in_code(co)
module.code = co
except SyntaxError:
self.msg(
1, "safe_import_module: SyntaxError in ", pathname,
)
cls = InvalidSourceModule
module = self.createNode(cls, module_name)

# If this is a submodule rather than top-level module...
if parent_module is not None:
Expand Down Expand Up @@ -2127,7 +2124,7 @@ def _load_module(self, fqname, pathname, loader):
fqname, [])

if isinstance(m, NamespacePackage):
return m
return (m, None)

co = None
if loader is BUILTIN_MODULE:
Expand Down Expand Up @@ -2166,26 +2163,9 @@ def _load_module(self, fqname, pathname, loader):

m = self.createNode(cls, fqname)
m.filename = pathname
if co is not None:
try:
if isinstance(co, ast.AST):
co_ast = co
co = compile(co_ast, pathname, 'exec', 0, True)
else:
co_ast = None
self._scan_code(m, co, co_ast)

if self.replace_paths:
co = self._replace_paths_in_code(co)
m.code = co
except SyntaxError:
self.msg(1, "load_module: SyntaxError in ", pathname)
cls = InvalidSourceModule
m = self.createNode(cls, fqname)

self.msgout(2, "load_module ->", m)
return m

return (m, co)

def _safe_import_hook(
self, target_module_partname, source_module, target_attr_names,
Expand Down Expand Up @@ -2629,6 +2609,10 @@ def _scan_code(
Optional abstract syntax tree (AST) of this module if any or `None`
otherwise. Defaults to `None`, in which case the passed
`module_code_object` is parsed instead.
Returns
----------
module : Node
Graph node of the module to be parsed.
"""

# For safety, guard against multiple scans of the same module by
Expand Down Expand Up @@ -2657,9 +2641,7 @@ def _scan_code(
self._scan_bytecode(
module, module_code_object, is_scanning_imports=True)

# Add all imports parsed above to this graph.
self._process_imports(module)

return module

def _scan_ast(self, module, module_code_object_ast):
"""
Expand Down
8 changes: 2 additions & 6 deletions PyInstaller/lib/modulegraph/util.py
Expand Up @@ -136,16 +136,12 @@ def iterate_instructions(code_object):
Yields `dis.Instruction`. After each code-block (`co_code`), `None` is
yielded to mark the end of the block and to interrupt the steam.
"""
# TODO: Implement "yield from" for python 3

for instruction in get_instructions(code_object):
yield instruction
yield from get_instructions(code_object)

yield None

# For each constant in this code object that is itself a code object,
# parse this constant in the same manner.
for constant in code_object.co_consts:
if inspect.iscode(constant):
for instruction in iterate_instructions(constant):
yield instruction
yield from iterate_instructions(constant)
1 change: 1 addition & 0 deletions news/5698.bugfix.rst
@@ -0,0 +1 @@
Improve performance and reduce stack usage of module scanning.
65 changes: 0 additions & 65 deletions tests/unit/test_modulegraph/test_modulegraph.py
Expand Up @@ -699,71 +699,6 @@ def test_determine_parent(self):
def test_find_head_package(self):
self.fail("find_head_package")

def test_load_tail(self):
# XXX: This test is dodgy!

class MockedModuleGraph(modulegraph.ModuleGraph):
def _safe_import_module(self, partname, fqname, parent):
record.append((partname, fqname, parent))
if partname == 'raises' or '.raises.' in fqname:
# FIXME: original _load_tail returned a MissingModule if
# _import_module did return None. PyInstaller changed this
# in cae47e4f5b51a94ac3ceb5d093283ba0cc895589
return self.createNode(modulegraph.MissingModule, fqname)
return modulegraph.Node(fqname)

graph = MockedModuleGraph()

record = []
root = modulegraph.Node('root')
m = graph._load_tail(root, '')
self.assertTrue(m is root)
self.assertEqual(record, [
])

record = []
root = modulegraph.Node('root')
m = graph._load_tail(root, 'sub')
self.assertFalse(m is root)
self.assertEqual(record, [
('sub', 'root.sub', root),
])

record = []
root = modulegraph.Node('root')
m = graph._load_tail(root, 'sub.sub1')
self.assertFalse(m is root)
node = modulegraph.Node('root.sub')
self.assertEqual(record, [
('sub', 'root.sub', root),
('sub1', 'root.sub.sub1', node),
])

record = []
root = modulegraph.Node('root')
m = graph._load_tail(root, 'sub.sub1.sub2')
self.assertFalse(m is root)
node = modulegraph.Node('root.sub')
node2 = modulegraph.Node('root.sub.sub1')
self.assertEqual(record, [
('sub', 'root.sub', root),
('sub1', 'root.sub.sub1', node),
('sub2', 'root.sub.sub1.sub2', node2),
])

n = graph._load_tail(root, 'raises')
self.assertIsInstance(n, modulegraph.MissingModule)
self.assertEqual(n.identifier, 'root.raises')

n = graph._load_tail(root, 'sub.raises')
self.assertIsInstance(n, modulegraph.MissingModule)
self.assertEqual(n.identifier, 'root.sub.raises')

n = graph._load_tail(root, 'sub.raises.sub')
self.assertIsInstance(n, modulegraph.MissingModule)
self.assertEqual(n.identifier, 'root.sub.raises.sub')



@expectedFailure
def test_ensure_fromlist(self):
Expand Down