From 673820e3df5e7d78b0a3a121562a2b77948cc619 Mon Sep 17 00:00:00 2001 From: xoviat <49173759+xoviat@users.noreply.github.com> Date: Thu, 8 Apr 2021 09:12:09 -0500 Subject: [PATCH] reduce modulegraph stack depth and implement 'yield from' (#5698) --- .gitignore | 1 + PyInstaller/lib/modulegraph/modulegraph.py | 120 ++++++++---------- PyInstaller/lib/modulegraph/util.py | 8 +- news/5698.bugfix.rst | 1 + .../unit/test_modulegraph/test_modulegraph.py | 65 ---------- 5 files changed, 55 insertions(+), 140 deletions(-) create mode 100644 news/5698.bugfix.rst diff --git a/.gitignore b/.gitignore index 9e02e59a9c..2760fa2c42 100644 --- a/.gitignore +++ b/.gitignore @@ -110,3 +110,4 @@ venv.bak/ # VSCode projects *.code-workspace +*.vscode diff --git a/PyInstaller/lib/modulegraph/modulegraph.py b/PyInstaller/lib/modulegraph/modulegraph.py index 05b96491f3..788de51f1a 100644 --- a/PyInstaller/lib/modulegraph/modulegraph.py +++ b/PyInstaller/lib/modulegraph/modulegraph.py @@ -1421,7 +1421,8 @@ def run_script(self, pathname, caller=None): co = compile(co_ast, pathname, 'exec', 0, True) m = self.createNode(Script, pathname) self._updateReference(caller, m, None) - self._scan_code(m, co, co_ast) + n = self._scan_code(m, co, co_ast) + self._process_imports(n) m.code = co if self.replace_paths: m.code = self._replace_paths_in_code(m.code) @@ -1495,7 +1496,28 @@ def import_hook( source_package = self._determine_parent(source_module) target_package, target_module_partname = self._find_head_package( source_package, target_module_partname, level) - target_module = self._load_tail(target_package, target_module_partname) + + self.msgin(4, "load_tail", target_package, target_module_partname) + + submodule = target_package + while target_module_partname: + i = target_module_partname.find('.') + if i < 0: + i = len(target_module_partname) + head, target_module_partname = target_module_partname[ + :i], target_module_partname[i+1:] + mname = "%s.%s" % (submodule.identifier, head) + submodule = self._safe_import_module(head, mname, submodule) + + if submodule is None: + # FIXME: Why do we no longer return a MissingModule instance? + # result = self.createNode(MissingModule, mname) + self.msgout(4, "raise ImportError: No module named", mname) + raise ImportError("No module named " + repr(mname)) + + self.msgout(4, "load_tail ->", submodule) + + target_module = submodule target_modules = [target_module] # If this is a "from"-style import *AND* this target module is @@ -1681,50 +1703,6 @@ def _find_head_package( raise ImportError("No module named " + target_package_name) - def _load_tail(self, package, submodule_name): - """ - Import the submodule with the passed name and all parent packages of - this module from the previously imported parent package corresponding - to the passed graph node. - - Parameters - ---------- - package : Package - Graph node of the previously imported package containing this - submodule. - submodule_name : str - Name of the submodule to be imported in either qualified (e.g., - `email.mime.base`) or unqualified (e.g., `base`) form. - - Returns - ---------- - Node - Graph node created for this submodule. - - Raises - ---------- - ImportError - If this submodule is unimportable. - """ - self.msgin(4, "load_tail", package, submodule_name) - - submodule = package - while submodule_name: - i = submodule_name.find('.') - if i < 0: - i = len(submodule_name) - head, submodule_name = submodule_name[:i], submodule_name[i+1:] - mname = "%s.%s" % (submodule.identifier, head) - submodule = self._safe_import_module(head, mname, submodule) - - if submodule is None: - # FIXME: Why do we no longer return a MissingModule instance? - # result = self.createNode(MissingModule, mname) - self.msgout(4, "raise ImportError: No module named", mname) - raise ImportError("No module named " + repr(mname)) - - self.msgout(4, "load_tail ->", submodule) - return submodule #FIXME: Refactor from a generator yielding graph nodes into a non-generator @@ -2064,7 +2042,26 @@ def _safe_import_module( self.msgout(3, "safe_import_module -> None (%r)" % exc) return None - module = self._load_module(module_name, pathname, loader) + (module, co) = self._load_module(module_name, pathname, loader) + if co is not None: + try: + if isinstance(co, ast.AST): + co_ast = co + co = compile(co_ast, pathname, 'exec', 0, True) + else: + co_ast = None + n = self._scan_code(module, co, co_ast) + self._process_imports(n) + + if self.replace_paths: + co = self._replace_paths_in_code(co) + module.code = co + except SyntaxError: + self.msg( + 1, "safe_import_module: SyntaxError in ", pathname, + ) + cls = InvalidSourceModule + module = self.createNode(cls, module_name) # If this is a submodule rather than top-level module... if parent_module is not None: @@ -2127,7 +2124,7 @@ def _load_module(self, fqname, pathname, loader): fqname, []) if isinstance(m, NamespacePackage): - return m + return (m, None) co = None if loader is BUILTIN_MODULE: @@ -2166,26 +2163,9 @@ def _load_module(self, fqname, pathname, loader): m = self.createNode(cls, fqname) m.filename = pathname - if co is not None: - try: - if isinstance(co, ast.AST): - co_ast = co - co = compile(co_ast, pathname, 'exec', 0, True) - else: - co_ast = None - self._scan_code(m, co, co_ast) - - if self.replace_paths: - co = self._replace_paths_in_code(co) - m.code = co - except SyntaxError: - self.msg(1, "load_module: SyntaxError in ", pathname) - cls = InvalidSourceModule - m = self.createNode(cls, fqname) self.msgout(2, "load_module ->", m) - return m - + return (m, co) def _safe_import_hook( self, target_module_partname, source_module, target_attr_names, @@ -2629,6 +2609,10 @@ def _scan_code( Optional abstract syntax tree (AST) of this module if any or `None` otherwise. Defaults to `None`, in which case the passed `module_code_object` is parsed instead. + Returns + ---------- + module : Node + Graph node of the module to be parsed. """ # For safety, guard against multiple scans of the same module by @@ -2657,9 +2641,7 @@ def _scan_code( self._scan_bytecode( module, module_code_object, is_scanning_imports=True) - # Add all imports parsed above to this graph. - self._process_imports(module) - + return module def _scan_ast(self, module, module_code_object_ast): """ diff --git a/PyInstaller/lib/modulegraph/util.py b/PyInstaller/lib/modulegraph/util.py index fe087777f6..7c742da7b4 100644 --- a/PyInstaller/lib/modulegraph/util.py +++ b/PyInstaller/lib/modulegraph/util.py @@ -136,10 +136,7 @@ def iterate_instructions(code_object): Yields `dis.Instruction`. After each code-block (`co_code`), `None` is yielded to mark the end of the block and to interrupt the steam. """ - # TODO: Implement "yield from" for python 3 - - for instruction in get_instructions(code_object): - yield instruction + yield from get_instructions(code_object) yield None @@ -147,5 +144,4 @@ def iterate_instructions(code_object): # parse this constant in the same manner. for constant in code_object.co_consts: if inspect.iscode(constant): - for instruction in iterate_instructions(constant): - yield instruction + yield from iterate_instructions(constant) diff --git a/news/5698.bugfix.rst b/news/5698.bugfix.rst new file mode 100644 index 0000000000..3b5805a37e --- /dev/null +++ b/news/5698.bugfix.rst @@ -0,0 +1 @@ +Improve performance and reduce stack usage of module scanning. \ No newline at end of file diff --git a/tests/unit/test_modulegraph/test_modulegraph.py b/tests/unit/test_modulegraph/test_modulegraph.py index 62e0843e9a..1877cb891a 100644 --- a/tests/unit/test_modulegraph/test_modulegraph.py +++ b/tests/unit/test_modulegraph/test_modulegraph.py @@ -699,71 +699,6 @@ def test_determine_parent(self): def test_find_head_package(self): self.fail("find_head_package") - def test_load_tail(self): - # XXX: This test is dodgy! - - class MockedModuleGraph(modulegraph.ModuleGraph): - def _safe_import_module(self, partname, fqname, parent): - record.append((partname, fqname, parent)) - if partname == 'raises' or '.raises.' in fqname: - # FIXME: original _load_tail returned a MissingModule if - # _import_module did return None. PyInstaller changed this - # in cae47e4f5b51a94ac3ceb5d093283ba0cc895589 - return self.createNode(modulegraph.MissingModule, fqname) - return modulegraph.Node(fqname) - - graph = MockedModuleGraph() - - record = [] - root = modulegraph.Node('root') - m = graph._load_tail(root, '') - self.assertTrue(m is root) - self.assertEqual(record, [ - ]) - - record = [] - root = modulegraph.Node('root') - m = graph._load_tail(root, 'sub') - self.assertFalse(m is root) - self.assertEqual(record, [ - ('sub', 'root.sub', root), - ]) - - record = [] - root = modulegraph.Node('root') - m = graph._load_tail(root, 'sub.sub1') - self.assertFalse(m is root) - node = modulegraph.Node('root.sub') - self.assertEqual(record, [ - ('sub', 'root.sub', root), - ('sub1', 'root.sub.sub1', node), - ]) - - record = [] - root = modulegraph.Node('root') - m = graph._load_tail(root, 'sub.sub1.sub2') - self.assertFalse(m is root) - node = modulegraph.Node('root.sub') - node2 = modulegraph.Node('root.sub.sub1') - self.assertEqual(record, [ - ('sub', 'root.sub', root), - ('sub1', 'root.sub.sub1', node), - ('sub2', 'root.sub.sub1.sub2', node2), - ]) - - n = graph._load_tail(root, 'raises') - self.assertIsInstance(n, modulegraph.MissingModule) - self.assertEqual(n.identifier, 'root.raises') - - n = graph._load_tail(root, 'sub.raises') - self.assertIsInstance(n, modulegraph.MissingModule) - self.assertEqual(n.identifier, 'root.sub.raises') - - n = graph._load_tail(root, 'sub.raises.sub') - self.assertIsInstance(n, modulegraph.MissingModule) - self.assertEqual(n.identifier, 'root.sub.raises.sub') - - @expectedFailure def test_ensure_fromlist(self):