/
__init__.py
1382 lines (1123 loc) · 52.5 KB
/
__init__.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
#-----------------------------------------------------------------------------
# Copyright (c) 2005-2022, PyInstaller Development Team.
#
# Distributed under the terms of the GNU General Public License (version 2
# or later) with exception for distributing the bootloader.
#
# The full license is in the file COPYING.txt, distributed with this software.
#
# SPDX-License-Identifier: (GPL-2.0-or-later WITH Bootloader-exception)
#-----------------------------------------------------------------------------
from __future__ import annotations
import copy
import os
import sys
import textwrap
import fnmatch
from pathlib import Path
from collections import deque
from typing import Callable, Dict, Iterable
import pkg_resources
from PyInstaller import HOMEPATH, compat
from PyInstaller import log as logging
from PyInstaller.depend.imphookapi import PostGraphAPI
from PyInstaller.exceptions import ExecCommandFailed
from PyInstaller.utils.hooks.win32 import \
get_pywin32_module_file_attribute # noqa: F401
from PyInstaller import isolated
logger = logging.getLogger(__name__)
# These extensions represent Python executables and should therefore be ignored when collecting data files.
# NOTE: .dylib files are not Python executable and should not be in this list.
PY_IGNORE_EXTENSIONS = set(compat.ALL_SUFFIXES)
# Some hooks need to save some values. This is the dict that can be used for that.
#
# When running tests this variable should be reset before every test.
#
# For example the 'wx' module needs variable 'wxpubsub'. This tells PyInstaller which protocol of the wx module
# should be bundled.
hook_variables = {}
def __exec_python_cmd(cmd, env=None, capture_stdout=True):
"""
Executes an externally spawned Python interpreter. If capture_stdout is set to True, returns anything that was
emitted in the standard output as a single string. Otherwise, returns the exit code.
"""
# 'PyInstaller.config' cannot be imported as other top-level modules.
from PyInstaller.config import CONF
if env is None:
env = {}
# Update environment. Defaults to 'os.environ'
pp_env = copy.deepcopy(os.environ)
pp_env.update(env)
# Prepend PYTHONPATH with pathex.
# Some functions use some PyInstaller code in subprocess, so add PyInstaller HOMEPATH to sys.path as well.
pp = os.pathsep.join(CONF['pathex'] + [HOMEPATH])
# PYTHONPATH might be already defined in the 'env' argument or in the original 'os.environ'. Prepend it.
if 'PYTHONPATH' in pp_env:
pp = os.pathsep.join([pp_env.get('PYTHONPATH'), pp])
pp_env['PYTHONPATH'] = pp
if capture_stdout:
txt = compat.exec_python(*cmd, env=pp_env)
return txt.strip()
else:
return compat.exec_python_rc(*cmd, env=pp_env)
def __exec_statement(statement, capture_stdout=True):
statement = textwrap.dedent(statement)
cmd = ['-c', statement]
return __exec_python_cmd(cmd, capture_stdout=capture_stdout)
def exec_statement(statement: str):
"""
Execute a single Python statement in an externally-spawned interpreter, and return the resulting standard output
as a string.
Examples::
tk_version = exec_statement("from _tkinter import TK_VERSION; print(TK_VERSION)")
mpl_data_dir = exec_statement("import matplotlib; print(matplotlib.get_data_path())")
datas = [ (mpl_data_dir, "") ]
Notes:
As of v5.0, usage of this function is discouraged in favour of the
new :mod:`PyInstaller.isolated` module.
"""
return __exec_statement(statement, capture_stdout=True)
def exec_statement_rc(statement: str):
"""
Executes a Python statement in an externally spawned interpreter, and returns the exit code.
"""
return __exec_statement(statement, capture_stdout=False)
def __exec_script(script_filename, *args, env=None, capture_stdout=True):
"""
Executes a Python script in an externally spawned interpreter. If capture_stdout is set to True, returns anything
that was emitted in the standard output as a single string. Otherwise, returns the exit code.
To prevent misuse, the script passed to utils.hooks.exec_script must be located in the
`PyInstaller/utils/hooks/subproc` directory.
"""
script_filename = os.path.basename(script_filename)
script_filename = os.path.join(os.path.dirname(__file__), 'subproc', script_filename)
if not os.path.exists(script_filename):
raise SystemError(
"To prevent misuse, the script passed to PyInstaller.utils.hooks.exec_script must be located in the "
"`PyInstaller/utils/hooks/subproc` directory."
)
cmd = [script_filename]
cmd.extend(args)
return __exec_python_cmd(cmd, env=env, capture_stdout=capture_stdout)
def exec_script(script_filename: str | bytes | os.PathLike, *args: str, env: Dict | Iterable | None = None):
"""
Executes a Python script in an externally spawned interpreter, and returns anything that was emitted to the standard
output as a single string.
To prevent misuse, the script passed to utils.hooks.exec_script must be located in the
`PyInstaller/utils/hooks/subproc` directory.
"""
return __exec_script(script_filename, *args, env=env, capture_stdout=True)
def exec_script_rc(script_filename: str | bytes | os.PathLike, *args: str, env: Dict | Iterable | None = None):
"""
Executes a Python script in an externally spawned interpreter, and returns the exit code.
To prevent misuse, the script passed to utils.hooks.exec_script must be located in the
`PyInstaller/utils/hooks/subproc` directory.
"""
return __exec_script(script_filename, *args, env=env, capture_stdout=False)
def eval_statement(statement: str):
"""
Execute a single Python statement in an externally-spawned interpreter, and :func:`eval` its output (if any).
Example::
databases = eval_statement('''
import sqlalchemy.databases
print(sqlalchemy.databases.__all__)
''')
for db in databases:
hiddenimports.append("sqlalchemy.databases." + db)
Notes:
As of v5.0, usage of this function is discouraged in favour of the
new :mod:`PyInstaller.isolated` module.
"""
txt = exec_statement(statement).strip()
if not txt:
# Return an empty string, which is "not true" but is iterable.
return ''
return eval(txt)
def eval_script(script_filename: str | bytes | os.PathLike, *args: str, env: Dict | Iterable | None = None):
txt = exec_script(script_filename, *args, env=env).strip()
if not txt:
# Return an empty string, which is "not true" but is iterable.
return ''
return eval(txt)
@isolated.decorate
def get_pyextension_imports(module_name: str):
"""
Return list of modules required by binary (C/C++) Python extension.
Python extension files ends with .so (Unix) or .pyd (Windows). It is almost impossible to analyze binary extension
and its dependencies.
Module cannot be imported directly.
Let's at least try import it in a subprocess and observe the difference in module list from sys.modules.
This function could be used for 'hiddenimports' in PyInstaller hooks files.
"""
import sys
import importlib
original = set(sys.modules.keys())
# When importing this module - sys.modules gets updated.
importlib.import_module(module_name)
# Find and return which new modules have been loaded.
return list(set(sys.modules.keys()) - original - {module_name})
def get_homebrew_path(formula: str = ''):
"""
Return the homebrew path to the requested formula, or the global prefix when called with no argument.
Returns the path as a string or None if not found.
"""
import subprocess
brewcmd = ['brew', '--prefix']
path = None
if formula:
brewcmd.append(formula)
dbgstr = 'homebrew formula "%s"' % formula
else:
dbgstr = 'homebrew prefix'
try:
path = subprocess.check_output(brewcmd).strip()
logger.debug('Found %s at "%s"' % (dbgstr, path))
except OSError:
logger.debug('Detected homebrew not installed')
except subprocess.CalledProcessError:
logger.debug('homebrew formula "%s" not installed' % formula)
if path:
return path.decode('utf8') # Mac OS filenames are UTF-8
else:
return None
def remove_prefix(string: str, prefix: str):
"""
This function removes the given prefix from a string, if the string does indeed begin with the prefix; otherwise,
it returns the original string.
"""
if string.startswith(prefix):
return string[len(prefix):]
else:
return string
def remove_suffix(string: str, suffix: str):
"""
This function removes the given suffix from a string, if the string does indeed end with the suffix; otherwise,
it returns the original string.
"""
# Special case: if suffix is empty, string[:0] returns ''. So, test for a non-empty suffix.
if suffix and string.endswith(suffix):
return string[:-len(suffix)]
else:
return string
# TODO: Do we really need a helper for this? This is pretty trivially obvious.
def remove_file_extension(filename: str):
"""
This function returns filename without its extension.
For Python C modules it removes even whole '.cpython-34m.so' etc.
"""
for suff in compat.EXTENSION_SUFFIXES:
if filename.endswith(suff):
return filename[0:filename.rfind(suff)]
# Fallback to ordinary 'splitext'.
return os.path.splitext(filename)[0]
@isolated.decorate
def can_import_module(module_name: str):
"""
Check if the specified module can be imported.
Intended as a silent module availability check, as it does not print ModuleNotFoundError traceback to stderr when
the module is unavailable.
Parameters
----------
module_name : str
Fully-qualified name of the module.
Returns
----------
bool
Boolean indicating whether the module can be imported or not.
"""
try:
__import__(module_name)
return True
except Exception:
return False
# TODO: Replace most calls to exec_statement() with calls to this function.
def get_module_attribute(module_name: str, attr_name: str):
"""
Get the string value of the passed attribute from the passed module if this attribute is defined by this module
_or_ raise `AttributeError` otherwise.
Since modules cannot be directly imported during analysis, this function spawns a subprocess importing this module
and returning the string value of this attribute in this module.
Parameters
----------
module_name : str
Fully-qualified name of this module.
attr_name : str
Name of the attribute in this module to be retrieved.
Returns
----------
str
String value of this attribute.
Raises
----------
AttributeError
If this attribute is undefined.
"""
@isolated.decorate
def _get_module_attribute(module_name, attr_name):
import importlib
module = importlib.import_module(module_name)
return getattr(module, attr_name)
# Return AttributeError on any kind of errors, to preserve old behavior.
try:
return _get_module_attribute(module_name, attr_name)
except Exception as e:
raise AttributeError(f"Failed to retrieve attribute {attr_name} from module {module_name}") from e
def get_module_file_attribute(package: str):
"""
Get the absolute path to the specified module or package.
Modules and packages *must not* be directly imported in the main process during the analysis. Therefore, to
avoid leaking the imports, this function uses an isolated subprocess when it needs to import the module and
obtain its ``__file__`` attribute.
Parameters
----------
package : str
Fully-qualified name of module or package.
Returns
----------
str
Absolute path of this module.
"""
# First, try to use 'pkgutil'. It is the fastest way, but does not work on certain modules in pywin32 that replace
# all module attributes with those of the .dll. In addition, we need to avoid it for submodules/subpackages,
# because it ends up importing their parent package, which would cause an import leak during the analysis.
filename: str | None = None
if '.' not in package:
try:
import pkgutil
loader = pkgutil.find_loader(package)
filename = loader.get_filename(package)
# Apparently in the past, ``None`` could be returned for built-in ``datetime`` module. Just in case this
# is still possible, return only if filename is valid.
if filename:
return filename
except (AttributeError, ImportError):
pass
# Second attempt: try to obtain module/package's __file__ attribute in an isolated subprocess.
@isolated.decorate
def _get_module_file_attribute(package):
# First try to use 'pkgutil'; it returns the filename even if the module or package cannot be imported
# (e.g., C-extension module with missing dependencies).
try:
import pkgutil
loader = pkgutil.find_loader(package)
filename = loader.get_filename(package)
# Safe-guard against ``None`` being returned (see comment in the non-isolated codepath).
if filename:
return filename
except (AttributeError, ImportError):
pass
# Fall back to import attempt
import importlib
p = importlib.import_module(package)
return p.__file__
# The old behavior was to return ImportError (and that is what the test are also expecting...).
try:
filename = _get_module_file_attribute(package)
except Exception as e:
raise ImportError(f"Failed to obtain the __file__ attribute of package/module {package}!") from e
return filename
def is_module_satisfies(
requirements: Iterable | pkg_resources.Requirement,
version: str | pkg_resources.Distribution | None = None,
version_attr: str = "__version__",
):
"""
Test if a :pep:`0440` requirement is installed.
Parameters
----------
requirements : str
Requirements in `pkg_resources.Requirements.parse()` format.
version : str
Optional PEP 0440-compliant version (e.g., `3.14-rc5`) to be used _instead_ of the current version of this
module. If non-`None`, this function ignores all `setuptools` distributions for this module and instead
compares this version against the version embedded in the passed requirements. This ignores the module name
embedded in the passed requirements, permitting arbitrary versions to be compared in a robust manner.
See examples below.
version_attr : str
Optional name of the version attribute defined by this module, defaulting to `__version__`. If a
`setuptools` distribution exists for this module (it usually does) _and_ the `version` parameter is `None`
(it usually is), this parameter is ignored.
Returns
----------
bool
Boolean result of the desired validation.
Raises
----------
AttributeError
If no `setuptools` distribution exists for this module _and_ this module defines no attribute whose name is the
passed `version_attr` parameter.
ValueError
If the passed specification does _not_ comply with `pkg_resources.Requirements`_ syntax.
Examples
--------
::
# Assume PIL 2.9.0, Sphinx 1.3.1, and SQLAlchemy 0.6 are all installed.
>>> from PyInstaller.utils.hooks import is_module_satisfies
>>> is_module_satisfies('sphinx >= 1.3.1')
True
>>> is_module_satisfies('sqlalchemy != 0.6')
False
>>> is_module_satisfies('sphinx >= 1.3.1; sqlalchemy != 0.6')
False
# Compare two arbitrary versions. In this case, the module name "sqlalchemy" is simply ignored.
>>> is_module_satisfies('sqlalchemy != 0.6', version='0.5')
True
# Since the "pillow" project providing PIL publishes its version via the custom "PILLOW_VERSION" attribute
# (rather than the standard "__version__" attribute), an attribute name is passed as a fallback to validate PIL
# when not installed by setuptools. As PIL is usually installed by setuptools, this optional parameter is
# usually ignored.
>>> is_module_satisfies('PIL == 2.9.0', version_attr='PILLOW_VERSION')
True
.. seealso::
`pkg_resources.Requirements`_ for the syntax details.
.. _`pkg_resources.Requirements`:
https://pythonhosted.org/setuptools/pkg_resources.html#id12
"""
# If no version was explicitly passed...
if version is None:
# If a setuptools distribution exists for this module, this validation is a simple one-liner. This approach
# supports non-version validation (e.g., of "["- and "]"-delimited extras) and is hence preferable.
try:
pkg_resources.get_distribution(requirements)
# If no such distribution exists, fall back to the logic below.
except pkg_resources.DistributionNotFound:
pass
# If all existing distributions violate these requirements, fail.
except (pkg_resources.UnknownExtra, pkg_resources.VersionConflict):
return False
# Else, an existing distribution satisfies these requirements. Win!
else:
return True
# Either a module version was explicitly passed or no setuptools distribution exists for this module. First, parse a
# setuptools "Requirements" object from this requirements string.
requirements_parsed = pkg_resources.Requirement.parse(requirements)
# If no version was explicitly passed, query this module for it.
if version is None:
module_name = requirements_parsed.project_name
if can_import_module(module_name):
version = get_module_attribute(module_name, version_attr)
else:
version = None
if not version:
# Module does not exist in the system.
return False
else:
# Compare this version against the one parsed from the requirements.
return version in requirements_parsed
def is_package(module_name: str):
"""
Check if a Python module is really a module or is a package containing other modules, without importing anything
in the main process.
:param module_name: Module name to check.
:return: True if module is a package else otherwise.
"""
def _is_package(module_name: str):
"""
Determines whether the given name represents a package or not. If the name represents a top-level module or
a package, it is not imported. If the name represents a sub-module or a sub-package, its parent is imported.
In such cases, this function should be called from an isolated suprocess.
"""
try:
import importlib.util
spec = importlib.util.find_spec(module_name)
return bool(spec.submodule_search_locations)
except Exception:
return False
# For top-level packages/modules, we can perform check in the main process; otherwise, we need to isolate the
# call to prevent import leaks in the main process.
if '.' not in module_name:
return _is_package(module_name)
else:
return isolated.call(_is_package, module_name)
def get_all_package_paths(package: str):
"""
Given a package name, return all paths associated with the package. Typically, packages have a single location
path, but PEP 420 namespace packages may be split across multiple locations. Returns an empty list if the specified
package is not found or is not a package.
"""
def _get_package_paths(package: str):
"""
Retrieve package path(s), as advertised by submodule_search_paths attribute of the spec obtained via
importlib.util.find_spec(package). If the name represents a top-level package, the package is not imported.
If the name represents a sub-module or a sub-package, its parent is imported. In such cases, this function
should be called from an isolated suprocess. Returns an empty list if specified package is not found or is not
a package.
"""
try:
import importlib.util
spec = importlib.util.find_spec(package)
if not spec or not spec.submodule_search_locations:
return []
return [str(path) for path in spec.submodule_search_locations]
except Exception:
return []
# For top-level packages/modules, we can perform check in the main process; otherwise, we need to isolate the
# call to prevent import leaks in the main process.
if '.' not in package:
pkg_paths = _get_package_paths(package)
else:
pkg_paths = isolated.call(_get_package_paths, package)
return pkg_paths
def package_base_path(package_path: str, package: str):
"""
Given a package location path and package name, return the package base path, i.e., the directory in which the
top-level package is located. For example, given the path ``/abs/path/to/python/libs/pkg/subpkg`` and
package name ``pkg.subpkg``, the function returns ``/abs/path/to/python/libs``.
"""
return remove_suffix(package_path, package.replace('.', os.sep)) # Base directory
def get_package_paths(package: str):
"""
Given a package, return the path to packages stored on this machine and also returns the path to this particular
package. For example, if pkg.subpkg lives in /abs/path/to/python/libs, then this function returns
``(/abs/path/to/python/libs, /abs/path/to/python/libs/pkg/subpkg)``.
NOTE: due to backwards compatibility, this function returns only one package path along with its base directory.
In case of PEP 420 namespace package with multiple location, only first location is returned. To obtain all
package paths, use the ``get_all_package_paths`` function and obtain corresponding base directories using the
``package_base_path`` helper.
"""
pkg_paths = get_all_package_paths(package)
if not pkg_paths:
raise ValueError(f"Package '{package}' does not exist or is not a package!")
if len(pkg_paths) > 1:
logger.warning(
"get_package_paths - package %s has multiple paths (%r); returning only first one!", package, pkg_paths
)
pkg_dir = pkg_paths[0]
pkg_base = package_base_path(pkg_dir, package)
return pkg_base, pkg_dir
def collect_submodules(
package: str,
filter: Callable = lambda name: True,
on_error: str = "warn once",
):
"""
List all submodules of a given package.
Arguments:
package:
An ``import``-able package.
filter:
Filter the submodules found: A callable that takes a submodule name and returns True if it should be
included.
on_error:
The action to take when a submodule fails to import. May be any of:
- raise: Errors are reraised and terminate the build.
- warn: Errors are downgraded to warnings.
- warn once: The first error issues a warning but all
subsequent errors are ignored to minimise *stderr pollution*. This
is the default.
- ignore: Skip all errors. Don't warn about anything.
Returns:
All submodules to be assigned to ``hiddenimports`` in a hook.
This function is intended to be used by hook scripts, not by main PyInstaller code.
Examples::
# Collect all submodules of Sphinx don't contain the word ``test``.
hiddenimports = collect_submodules(
"Sphinx", ``filter=lambda name: 'test' not in name)
.. versionchanged:: 4.5
Add the **on_error** parameter.
"""
# Accept only strings as packages.
if not isinstance(package, str):
raise TypeError('package must be a str')
if on_error not in ("ignore", "warn once", "warn", "raise"):
raise ValueError(
f"Invalid on-error action '{on_error}': Must be one of ('ignore', 'warn once', 'warn', 'raise')"
)
logger.debug('Collecting submodules for %s', package)
# Skip a module which is not a package.
if not is_package(package):
logger.debug('collect_submodules - %s is not a package.', package)
# If module is importable, return its name in the list, in order to keep behavior consistent with the
# one we have for packages (i.e., we include the package in the list of returned names)
if can_import_module(package):
return [package]
return []
# Determine the filesystem path(s) to the specified package.
package_submodules = []
todo = deque()
todo.append(package)
with isolated.Python() as isolated_python:
while todo:
# Scan the given (sub)package
name = todo.pop()
modules, subpackages, on_error = isolated_python.call(_collect_submodules, name, on_error)
# Add modules to the list of all submodules
package_submodules += [module for module in modules if filter(module)]
# Add sub-packages to deque for subsequent recursion
for subpackage_name in subpackages:
if filter(subpackage_name):
todo.append(subpackage_name)
package_submodules = sorted(package_submodules)
logger.debug("collect_submodules - found submodules: %s", package_submodules)
return package_submodules
# This function is called in an isolated sub-process via `isolated.Python.call`.
def _collect_submodules(name, on_error):
import pkgutil
import sys
from traceback import format_exception_only
from PyInstaller.utils.hooks import logger
logger.debug("collect_submodules - scanning (sub)package %s", name)
modules = []
subpackages = []
# Resolve package location(s)
try:
__import__(name)
except Exception as ex:
# Catch all errors and either raise, warn, or ignore them as determined by the *on_error* parameter.
if on_error in ("warn", "warn once"):
from PyInstaller.log import logger
ex = "".join(format_exception_only(type(ex), ex)).strip()
logger.warning(f"Failed to collect submodules for '{name}' because importing '{name}' raised: {ex}")
if on_error == "warn once":
on_error = "ignore"
return modules, subpackages, on_error
elif on_error == "raise":
raise ImportError(f"Unable to load subpackage '{name}'.") from ex
# Do not attempt to recurse into package if it did not make it into sys.modules.
if name not in sys.modules:
return modules, subpackages, on_error
# Or if it does not have __path__ attribute.
paths = getattr(sys.modules[name], '__path__', None) or []
if not paths:
return modules, subpackages, on_error
# Package was successfully imported - include it in the list of modules.
modules.append(name)
# Iterate package contents
logger.debug("collect_submodules - scanning (sub)package %s in location(s): %s", name, paths)
for importer, name, ispkg in pkgutil.iter_modules(paths, name + '.'):
if not ispkg:
modules.append(name)
else:
subpackages.append(name)
return modules, subpackages, on_error
def is_module_or_submodule(name: str, mod_or_submod: str):
"""
This helper function is designed for use in the ``filter`` argument of :func:`collect_submodules`, by returning
``True`` if the given ``name`` is a module or a submodule of ``mod_or_submod``.
Examples:
The following excludes ``foo.test`` and ``foo.test.one`` but not ``foo.testifier``. ::
collect_submodules('foo', lambda name: not is_module_or_submodule(name, 'foo.test'))``
"""
return name.startswith(mod_or_submod + '.') or name == mod_or_submod
# Patterns of dynamic library filenames that might be bundled with some installed Python packages.
PY_DYLIB_PATTERNS = [
'*.dll',
'*.dylib',
'lib*.so',
]
def collect_dynamic_libs(package: str, destdir: object | None = None):
"""
This function produces a list of (source, dest) of dynamic library files that reside in package. Its output can be
directly assigned to ``binaries`` in a hook script. The package parameter must be a string which names the package.
:param destdir: Relative path to ./dist/APPNAME where the libraries should be put.
"""
logger.debug('Collecting dynamic libraries for %s' % package)
# Accept only strings as packages.
if not isinstance(package, str):
raise TypeError('package must be a str')
# Skip a module which is not a package.
if not is_package(package):
logger.warning(
"collect_dynamic_libs - skipping library collection for module '%s' as it is not a package.", package
)
return []
pkg_dirs = get_all_package_paths(package)
dylibs = []
for pkg_dir in pkg_dirs:
pkg_base = package_base_path(pkg_dir, package)
# Recursively glob for all file patterns in the package directory
for pattern in PY_DYLIB_PATTERNS:
files = Path(pkg_dir).rglob(pattern)
for source in files:
# Produce the tuple ('/abs/path/to/source/mod/submod/file.pyd', 'mod/submod')
if destdir:
# Put libraries in the specified target directory.
dest = destdir
else:
# Preserve original directory hierarchy.
dest = source.parent.relative_to(pkg_base)
logger.debug(' %s, %s' % (source, dest))
dylibs.append((str(source), str(dest)))
return dylibs
def collect_data_files(
package: str,
include_py_files: bool = False,
subdir: str | bytes | os.PathLike | None = None,
excludes: Iterable | None = None,
includes: Iterable | None = None,
):
r"""
This function produces a list of ``(source, dest)`` non-Python (i.e., data) files that reside in ``package``.
Its output can be directly assigned to ``datas`` in a hook script; for example, see ``hook-sphinx.py``.
Parameters:
- The ``package`` parameter is a string which names the package.
- By default, all Python executable files (those ending in ``.py``, ``.pyc``, and so on) will NOT be collected;
setting the ``include_py_files`` argument to ``True`` collects these files as well. This is typically used with
Python functions (such as those in ``pkgutil``) that search a given directory for Python executable files and
load them as extensions or plugins.
- The ``subdir`` argument gives a subdirectory relative to ``package`` to search, which is helpful when submodules
are imported at run-time from a directory lacking ``__init__.py``.
- The ``excludes`` argument contains a sequence of strings or Paths. These provide a list of
`globs <https://docs.python.org/3/library/pathlib.html#pathlib.Path.glob>`_
to exclude from the collected data files; if a directory matches the provided glob, all files it contains will
be excluded as well. All elements must be relative paths, which are relative to the provided package's path
(/ ``subdir`` if provided).
Therefore, ``*.txt`` will exclude only ``.txt`` files in ``package``\ 's path, while ``**/*.txt`` will exclude
all ``.txt`` files in ``package``\ 's path and all its subdirectories. Likewise, ``**/__pycache__`` will exclude
all files contained in any subdirectory named ``__pycache__``.
- The ``includes`` function like ``excludes``, but only include matching paths. ``excludes`` override
``includes``: a file or directory in both lists will be excluded.
This function does not work on zipped Python eggs.
This function is intended to be used by hook scripts, not by main PyInstaller code.
"""
logger.debug('Collecting data files for %s' % package)
# Accept only strings as packages.
if not isinstance(package, str):
raise TypeError('package must be a str')
# Skip a module which is not a package.
if not is_package(package):
logger.warning("collect_data_files - skipping data collection for module '%s' as it is not a package.", package)
return []
# Make sure the excludes are a list; this also makes a copy, so we don't modify the original.
excludes = list(excludes) if excludes else []
# These excludes may contain directories which need to be searched.
excludes_len = len(excludes)
# Including py files means don't exclude them. This pattern will search any directories for containing files, so
# do not modify ``excludes_len``.
if not include_py_files:
excludes += ['**/*' + s for s in compat.ALL_SUFFIXES]
# Exclude .pyo files if include_py_files is False.
if not include_py_files and ".pyo" not in compat.ALL_SUFFIXES:
excludes.append('**/*.pyo')
# If not specified, include all files. Follow the same process as the excludes.
includes = list(includes) if includes else ["**/*"]
includes_len = len(includes)
# A helper function to glob the in/ex "cludes", adding a wildcard to refer to all files under a subdirectory if a
# subdirectory is matched by the first ``clude_len`` patterns. Otherwise, it in/excludes the matched file.
# **This modifies** ``cludes``.
def clude_walker(
# Package directory to scan
pkg_dir,
# A list of paths relative to ``pkg_dir`` to in/exclude.
cludes,
# The number of ``cludes`` for which matching directories should be searched for all files under them.
clude_len,
# True if the list is includes, False for excludes.
is_include
):
for i, c in enumerate(cludes):
for g in Path(pkg_dir).glob(c):
if g.is_dir():
# Only files are sources. Subdirectories are not.
if i < clude_len:
# In/exclude all files under a matching subdirectory.
cludes.append(str((g / "**/*").relative_to(pkg_dir)))
else:
# In/exclude a matching file.
sources.add(g) if is_include else sources.discard(g)
# Obtain all paths for the specified package, and process each path independently.
datas = []
pkg_dirs = get_all_package_paths(package)
for pkg_dir in pkg_dirs:
sources = set() # Reset sources set
pkg_base = package_base_path(pkg_dir, package)
if subdir:
pkg_dir = os.path.join(pkg_dir, subdir)
# Process the package path with clude walker
clude_walker(pkg_dir, includes, includes_len, True)
clude_walker(pkg_dir, excludes, excludes_len, False)
# Transform the sources into tuples for ``datas``.
datas += [(str(s), str(s.parent.relative_to(pkg_base))) for s in sources]
logger.debug("collect_data_files - Found files: %s", datas)
return datas
def collect_system_data_files(
path: str, destdir: str | bytes | os.PathLike | None = None, include_py_files: bool = False
):
"""
This function produces a list of (source, dest) non-Python (i.e., data) files that reside somewhere on the system.
Its output can be directly assigned to ``datas`` in a hook script.
This function is intended to be used by hook scripts, not by main PyInstaller code.
"""
# Accept only strings as paths.
if not isinstance(path, str):
raise TypeError('path must be a str')
# Walk through all file in the given package, looking for data files.
datas = []
for dirpath, dirnames, files in os.walk(path):
for f in files:
extension = os.path.splitext(f)[1]
if include_py_files or (extension not in PY_IGNORE_EXTENSIONS):
# Produce the tuple: (/abs/path/to/source/mod/submod/file.dat, mod/submod/destdir)
source = os.path.join(dirpath, f)
dest = str(Path(dirpath).relative_to(path))
if destdir is not None:
dest = os.path.join(destdir, dest)
datas.append((source, dest))
return datas
def copy_metadata(package_name: str, recursive: bool = False):
"""
Collect distribution metadata so that ``pkg_resources.get_distribution()`` can find it.
This function returns a list to be assigned to the ``datas`` global variable. This list instructs PyInstaller to
copy the metadata for the given package to the frozen application's data directory.
Parameters
----------
package_name : str
Specifies the name of the package for which metadata should be copied.
recursive : bool
If true, collect metadata for the package's dependencies too. This enables use of
``pkg_resources.require('package')`` inside the frozen application.
Returns
-------
list
This should be assigned to ``datas``.
Examples
--------
>>> from PyInstaller.utils.hooks import copy_metadata
>>> copy_metadata('sphinx')
[('c:\\python27\\lib\\site-packages\\Sphinx-1.3.2.dist-info',
'Sphinx-1.3.2.dist-info')]
Some packages rely on metadata files accessed through the ``pkg_resources`` module. Normally PyInstaller does not
include these metadata files. If a package fails without them, you can use this function in a hook file to easily
add them to the frozen bundle. The tuples in the returned list have two strings. The first is the full pathname to a
folder in this system. The second is the folder name only. When these tuples are added to ``datas``\\ , the folder
will be bundled at the top level.
.. versionchanged:: 4.3.1
Prevent ``dist-info`` metadata folders being renamed to ``egg-info`` which broke ``pkg_resources.require`` with
*extras* (see :issue:`#3033`).
.. versionchanged:: 4.4.0
Add the **recursive** option.
"""
from collections import deque
todo = deque([package_name])
done = set()
out = []
while todo:
package_name = todo.pop()
if package_name in done:
continue
dist = pkg_resources.get_distribution(package_name)
if dist.egg_info is not None:
# If available, dist.egg_info points to the source .egg-info or .dist-info directory.
dest = _copy_metadata_dest(dist.egg_info, dist.project_name)
out.append((dist.egg_info, dest))
else:
# When .egg-info is not a directory but a single file, dist.egg_info is None, and we need to resolve the
# path ourselves. This format is common on Ubuntu/Debian with their deb-packaged python packages.