From 343da119685f622da2d1658ef7b3e2516a01817f Mon Sep 17 00:00:00 2001 From: Samuel Hinton Date: Sun, 1 Aug 2021 10:03:07 +0100 Subject: [PATCH] Add ability to register modules to be deeply serialized (#417) Co-authored-by: Pierre Glaser Co-authored-by: Samuel Hinton Co-authored-by: Olivier Grisel --- CHANGES.md | 6 +- README.md | 53 ++++ cloudpickle/cloudpickle.py | 116 ++++++- cloudpickle/cloudpickle_fast.py | 12 +- tests/cloudpickle_test.py | 296 +++++++++++++++++- .../_cloudpickle_testpkg/__init__.py | 8 + .../_cloudpickle_testpkg/mod.py | 8 + tests/mock_local_folder/mod.py | 20 ++ tests/mock_local_folder/subfolder/submod.py | 13 + 9 files changed, 503 insertions(+), 29 deletions(-) create mode 100644 tests/mock_local_folder/mod.py create mode 100644 tests/mock_local_folder/subfolder/submod.py diff --git a/CHANGES.md b/CHANGES.md index 17e621c19..e7ed612f3 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -6,9 +6,13 @@ dev - Python 3.5 is no longer supported. +- Support for registering modules to be serialised by value. This will + allow for code defined in local modules to be serialised and executed + remotely without those local modules installed on the remote machine. + ([PR #417](https://github.com/cloudpipe/cloudpickle/pull/417)) + - Fix a side effect altering dynamic modules at pickling time. ([PR #426](https://github.com/cloudpipe/cloudpickle/pull/426)) - - Support for pickling type annotations on Python 3.10 as per [PEP 563]( https://www.python.org/dev/peps/pep-0563/) ([PR #400](https://github.com/cloudpipe/cloudpickle/pull/400)) diff --git a/README.md b/README.md index 5229d57c4..731fe0abc 100644 --- a/README.md +++ b/README.md @@ -66,6 +66,59 @@ Pickling a function interactively defined in a Python shell session 85 ``` + +Overriding pickle's serialization mechanism for importable constructs: +---------------------------------------------------------------------- + +An important difference between `cloudpickle` and `pickle` is that +`cloudpickle` can serialize a function or class **by value**, whereas `pickle` +can only serialize it **by reference**. Serialization by reference treats +functions and classes as attributes of modules, and pickles them through +instructions that trigger the import of their module at load time. +Serialization by reference is thus limited in that it assumes that the module +containing the function or class is available/importable in the unpickling +environment. This assumption breaks when pickling constructs defined in an +interactive session, a case that is automatically detected by `cloudpickle`, +that pickles such constructs **by value**. + +Another case where the importability assumption is expected to break is when +developing a module in a distributed execution environment: the worker +processes may not have access to the said module, for example if they live on a +different machine than the process in which the module is being developed. +By itself, `cloudpickle` cannot detect such "locally importable" modules and +switch to serialization by value; instead, it relies on its default mode, +which is serialization by reference. However, since `cloudpickle 1.7.0`, one +can explicitly specify modules for which serialization by value should be used, +using the `register_pickle_by_value(module)`/`/unregister_pickle(module)` API: + +```python +>>> import cloudpickle +>>> import my_module +>>> cloudpickle.register_pickle_by_value(my_module) +>>> cloudpickle.dumps(my_module.my_function) # my_function is pickled by value +>>> cloudpickle.unregister_pickle_by_value(my_module) +>>> cloudpickle.dumps(my_module.my_function) # my_function is pickled by reference +``` + +Using this API, there is no need to re-install the new version of the module on +all the worker nodes nor to restart the workers: restarting the client Python +process with the new source code is enough. + +Note that this feature is still **experimental**, and may fail in the following +situations: + +- If the body of a function/class pickled by value contains an `import` statement: + ```python + >>> def f(): + >>> ... from another_module import g + >>> ... # calling f in the unpickling environment may fail if another_module + >>> ... # is unavailable + >>> ... return g() + 1 + ``` + +- If a function pickled by reference uses a function pickled by value during its execution. + + Running the tests ----------------- diff --git a/cloudpickle/cloudpickle.py b/cloudpickle/cloudpickle.py index 763e9d6f7..347b38695 100644 --- a/cloudpickle/cloudpickle.py +++ b/cloudpickle/cloudpickle.py @@ -88,6 +88,9 @@ def g(): # communication speed over compatibility: DEFAULT_PROTOCOL = pickle.HIGHEST_PROTOCOL +# Names of modules whose resources should be treated as dynamic. +_PICKLE_BY_VALUE_MODULES = set() + # Track the provenance of reconstructed dynamic classes to make it possible to # reconstruct instances from the matching singleton class definition when # appropriate and preserve the usual "isinstance" semantics of Python objects. @@ -124,6 +127,77 @@ def _lookup_class_or_track(class_tracker_id, class_def): return class_def +def register_pickle_by_value(module): + """Register a module to make it functions and classes picklable by value. + + By default, functions and classes that are attributes of an importable + module are to be pickled by reference, that is relying on re-importing + the attribute from the module at load time. + + If `register_pickle_by_value(module)` is called, all its functions and + classes are subsequently to be pickled by value, meaning that they can + be loaded in Python processes where the module is not importable. + + This is especially useful when developing a module in a distributed + execution environment: restarting the client Python process with the new + source code is enough: there is no need to re-install the new version + of the module on all the worker nodes nor to restart the workers. + + Note: this feature is considered experimental. See the cloudpickle + README.md file for more details and limitations. + """ + if not isinstance(module, types.ModuleType): + raise ValueError( + f"Input should be a module object, got {str(module)} instead" + ) + # In the future, cloudpickle may need a way to access any module registered + # for pickling by value in order to introspect relative imports inside + # functions pickled by value. (see + # https://github.com/cloudpipe/cloudpickle/pull/417#issuecomment-873684633). + # This access can be ensured by checking that module is present in + # sys.modules at registering time and assuming that it will still be in + # there when accessed during pickling. Another alternative would be to + # store a weakref to the module. Even though cloudpickle does not implement + # this introspection yet, in order to avoid a possible breaking change + # later, we still enforce the presence of module inside sys.modules. + if module.__name__ not in sys.modules: + raise ValueError( + f"{module} was not imported correctly, have you used an " + f"`import` statement to access it?" + ) + _PICKLE_BY_VALUE_MODULES.add(module.__name__) + + +def unregister_pickle_by_value(module): + """Unregister that the input module should be pickled by value.""" + if not isinstance(module, types.ModuleType): + raise ValueError( + f"Input should be a module object, got {str(module)} instead" + ) + if module.__name__ not in _PICKLE_BY_VALUE_MODULES: + raise ValueError(f"{module} is not registered for pickle by value") + else: + _PICKLE_BY_VALUE_MODULES.remove(module.__name__) + + +def list_registry_pickle_by_value(): + return _PICKLE_BY_VALUE_MODULES.copy() + + +def _is_registered_pickle_by_value(module): + module_name = module.__name__ + if module_name in _PICKLE_BY_VALUE_MODULES: + return True + while True: + parent_name = module_name.rsplit(".", 1)[0] + if parent_name == module_name: + break + if parent_name in _PICKLE_BY_VALUE_MODULES: + return True + module_name = parent_name + return False + + def _whichmodule(obj, name): """Find the module an object belongs to. @@ -170,18 +244,35 @@ def _whichmodule(obj, name): return None -def _is_importable(obj, name=None): - """Dispatcher utility to test the importability of various constructs.""" - if isinstance(obj, types.FunctionType): - return _lookup_module_and_qualname(obj, name=name) is not None - elif issubclass(type(obj), type): - return _lookup_module_and_qualname(obj, name=name) is not None +def _should_pickle_by_reference(obj, name=None): + """Test whether an function or a class should be pickled by reference + + Pickling by reference means by that the object (typically a function or a + class) is an attribute of a module that is assumed to be importable in the + target Python environment. Loading will therefore rely on importing the + module and then calling `getattr` on it to access the function or class. + + Pickling by reference is the only option to pickle functions and classes + in the standard library. In cloudpickle the alternative option is to + pickle by value (for instance for interactively or locally defined + functions and classes or for attributes of modules that have been + explicitly registered to be pickled by value. + """ + if isinstance(obj, types.FunctionType) or issubclass(type(obj), type): + module_and_name = _lookup_module_and_qualname(obj, name=name) + if module_and_name is None: + return False + module, name = module_and_name + return not _is_registered_pickle_by_value(module) + elif isinstance(obj, types.ModuleType): # We assume that sys.modules is primarily used as a cache mechanism for # the Python import machinery. Checking if a module has been added in - # is sys.modules therefore a cheap and simple heuristic to tell us whether - # we can assume that a given module could be imported by name in - # another Python process. + # is sys.modules therefore a cheap and simple heuristic to tell us + # whether we can assume that a given module could be imported by name + # in another Python process. + if _is_registered_pickle_by_value(obj): + return False return obj.__name__ in sys.modules else: raise TypeError( @@ -839,10 +930,15 @@ def _decompose_typevar(obj): def _typevar_reduce(obj): - # TypeVar instances have no __qualname__ hence we pass the name explicitly. + # TypeVar instances require the module information hence why we + # are not using the _should_pickle_by_reference directly module_and_name = _lookup_module_and_qualname(obj, name=obj.__name__) + if module_and_name is None: return (_make_typevar, _decompose_typevar(obj)) + elif _is_registered_pickle_by_value(module_and_name[0]): + return (_make_typevar, _decompose_typevar(obj)) + return (getattr, module_and_name) diff --git a/cloudpickle/cloudpickle_fast.py b/cloudpickle/cloudpickle_fast.py index 10ceef1bd..6db059eb8 100644 --- a/cloudpickle/cloudpickle_fast.py +++ b/cloudpickle/cloudpickle_fast.py @@ -28,7 +28,7 @@ from .compat import pickle, Pickler from .cloudpickle import ( _extract_code_globals, _BUILTIN_TYPE_NAMES, DEFAULT_PROTOCOL, - _find_imported_submodules, _get_cell_contents, _is_importable, + _find_imported_submodules, _get_cell_contents, _should_pickle_by_reference, _builtin_type, _get_or_create_tracker_id, _make_skeleton_class, _make_skeleton_enum, _extract_class_dict, dynamic_subimport, subimport, _typevar_reduce, _get_bases, _make_cell, _make_empty_cell, CellType, @@ -352,7 +352,7 @@ def _memoryview_reduce(obj): def _module_reduce(obj): - if _is_importable(obj): + if _should_pickle_by_reference(obj): return subimport, (obj.__name__,) else: # Some external libraries can populate the "__builtins__" entry of a @@ -414,7 +414,7 @@ def _class_reduce(obj): return type, (NotImplemented,) elif obj in _BUILTIN_TYPE_NAMES: return _builtin_type, (_BUILTIN_TYPE_NAMES[obj],) - elif not _is_importable(obj): + elif not _should_pickle_by_reference(obj): return _dynamic_class_reduce(obj) return NotImplemented @@ -559,7 +559,7 @@ def _function_reduce(self, obj): As opposed to cloudpickle.py, There no special handling for builtin pypy functions because cloudpickle_fast is CPython-specific. """ - if _is_importable(obj): + if _should_pickle_by_reference(obj): return NotImplemented else: return self._dynamic_function_reduce(obj) @@ -763,7 +763,7 @@ def save_global(self, obj, name=None, pack=struct.pack): ) elif name is not None: Pickler.save_global(self, obj, name=name) - elif not _is_importable(obj, name=name): + elif not _should_pickle_by_reference(obj, name=name): self._save_reduce_pickle5(*_dynamic_class_reduce(obj), obj=obj) else: Pickler.save_global(self, obj, name=name) @@ -775,7 +775,7 @@ def save_function(self, obj, name=None): Determines what kind of function obj is (e.g. lambda, defined at interactive prompt, etc) and handles the pickling appropriately. """ - if _is_importable(obj, name=name): + if _should_pickle_by_reference(obj, name=name): return Pickler.save_global(self, obj, name=name) elif PYPY and isinstance(obj.__code__, builtin_code_type): return self.save_pypy_builtin_func(obj) diff --git a/tests/cloudpickle_test.py b/tests/cloudpickle_test.py index baca23cc7..d2acfb711 100644 --- a/tests/cloudpickle_test.py +++ b/tests/cloudpickle_test.py @@ -13,6 +13,7 @@ import pickletools import platform import random +import re import shutil import subprocess import sys @@ -45,7 +46,10 @@ import cloudpickle from cloudpickle.compat import pickle -from cloudpickle.cloudpickle import _is_importable +from cloudpickle import register_pickle_by_value +from cloudpickle import unregister_pickle_by_value +from cloudpickle import list_registry_pickle_by_value +from cloudpickle.cloudpickle import _should_pickle_by_reference from cloudpickle.cloudpickle import _make_empty_cell, cell_set from cloudpickle.cloudpickle import _extract_class_dict, _whichmodule from cloudpickle.cloudpickle import _lookup_module_and_qualname @@ -83,6 +87,14 @@ def _escape(raw_filepath): return raw_filepath.replace("\\", r"\\\\") +def _maybe_remove(list_, item): + try: + list_.remove(item) + except ValueError: + pass + return list_ + + def test_extract_class_dict(): class A(int): """A docstring""" @@ -732,24 +744,24 @@ def test_module_importability(self): import distutils import distutils.ccompiler - assert _is_importable(pickle) - assert _is_importable(os.path) # fake (aliased) module - assert _is_importable(distutils) # package - assert _is_importable(distutils.ccompiler) # module in package + assert _should_pickle_by_reference(pickle) + assert _should_pickle_by_reference(os.path) # fake (aliased) module + assert _should_pickle_by_reference(distutils) # package + assert _should_pickle_by_reference(distutils.ccompiler) # module in package dynamic_module = types.ModuleType('dynamic_module') - assert not _is_importable(dynamic_module) + assert not _should_pickle_by_reference(dynamic_module) if platform.python_implementation() == 'PyPy': import _codecs - assert _is_importable(_codecs) + assert _should_pickle_by_reference(_codecs) # #354: Check that modules created dynamically during the import of # their parent modules are considered importable by cloudpickle. # See the mod_with_dynamic_submodule documentation for more # details of this use case. import _cloudpickle_testpkg.mod.dynamic_submodule as m - assert _is_importable(m) + assert _should_pickle_by_reference(m) assert pickle_depickle(m, protocol=self.protocol) is m # Check for similar behavior for a module that cannot be imported by @@ -757,14 +769,14 @@ def test_module_importability(self): from _cloudpickle_testpkg.mod import dynamic_submodule_two as m2 # Note: import _cloudpickle_testpkg.mod.dynamic_submodule_two as m2 # works only for Python 3.7+ - assert _is_importable(m2) + assert _should_pickle_by_reference(m2) assert pickle_depickle(m2, protocol=self.protocol) is m2 # Submodule_three is a dynamic module only importable via module lookup with pytest.raises(ImportError): import _cloudpickle_testpkg.mod.submodule_three # noqa from _cloudpickle_testpkg.mod import submodule_three as m3 - assert not _is_importable(m3) + assert not _should_pickle_by_reference(m3) # This module cannot be pickled using attribute lookup (as it does not # have a `__module__` attribute like classes and functions. @@ -776,12 +788,12 @@ def test_module_importability(self): # Do the same for an importable dynamic submodule inside a dynamic # module inside a file-backed module. import _cloudpickle_testpkg.mod.dynamic_submodule.dynamic_subsubmodule as sm # noqa - assert _is_importable(sm) + assert _should_pickle_by_reference(sm) assert pickle_depickle(sm, protocol=self.protocol) is sm expected = "cannot check importability of object instances" with pytest.raises(TypeError, match=expected): - _is_importable(object()) + _should_pickle_by_reference(object()) def test_Ellipsis(self): self.assertEqual(Ellipsis, @@ -2351,6 +2363,227 @@ def __type__(self): o = MyClass() pickle_depickle(o, protocol=self.protocol) + def test_pickle_constructs_from_module_registered_for_pickling_by_value(self): # noqa + _prev_sys_path = sys.path.copy() + try: + # We simulate an interactive session that: + # - we start from the /path/to/cloudpickle/tests directory, where a + # local .py file (mock_local_file) is located. + # - uses constructs from mock_local_file in remote workers that do + # not have access to this file. This situation is + # the justification behind the + # (un)register_pickle_by_value(module) api that cloudpickle + # exposes. + _mock_interactive_session_cwd = os.path.dirname(__file__) + + # First, remove sys.path entries that could point to + # /path/to/cloudpickle/tests and be in inherited by the worker + _maybe_remove(sys.path, '') + _maybe_remove(sys.path, _mock_interactive_session_cwd) + + # Add the desired session working directory + sys.path.insert(0, _mock_interactive_session_cwd) + + with subprocess_worker(protocol=self.protocol) as w: + # Make the module unavailable in the remote worker + w.run( + lambda p: sys.path.remove(p), _mock_interactive_session_cwd + ) + # Import the actual file after starting the module since the + # worker is started using fork on Linux, which will inherits + # the parent sys.modules. On Python>3.6, the worker can be + # started using spawn using mp_context in ProcessPoolExectutor. + # TODO Once Python 3.6 reaches end of life, rely on mp_context + # instead. + import mock_local_folder.mod as mod + # The constructs whose pickling mechanism is changed using + # register_pickle_by_value are functions, classes, TypeVar and + # modules. + from mock_local_folder.mod import ( + local_function, LocalT, LocalClass + ) + + # Make sure the module/constructs are unimportable in the + # worker. + with pytest.raises(ImportError): + w.run(lambda: __import__("mock_local_folder.mod")) + with pytest.raises(ImportError): + w.run( + lambda: __import__("mock_local_folder.subfolder.mod") + ) + + for o in [mod, local_function, LocalT, LocalClass]: + with pytest.raises(ImportError): + w.run(lambda: o) + + register_pickle_by_value(mod) + # function + assert w.run(lambda: local_function()) == local_function() + # typevar + assert w.run(lambda: LocalT.__name__) == LocalT.__name__ + # classes + assert ( + w.run(lambda: LocalClass().method()) + == LocalClass().method() + ) + # modules + assert ( + w.run(lambda: mod.local_function()) == local_function() + ) + + # Constructs from modules inside subfolders should be pickled + # by value if a namespace module pointing to some parent folder + # was registered for pickling by value. A "mock_local_folder" + # namespace module falls into that category, but a + # "mock_local_folder.mod" one does not. + from mock_local_folder.subfolder.submod import ( + LocalSubmodClass, LocalSubmodT, local_submod_function + ) + # Shorter aliases to comply with line-length limits + _t, _func, _class = ( + LocalSubmodT, local_submod_function, LocalSubmodClass + ) + with pytest.raises(ImportError): + w.run( + lambda: __import__("mock_local_folder.subfolder.mod") + ) + with pytest.raises(ImportError): + w.run(lambda: local_submod_function) + + unregister_pickle_by_value(mod) + + with pytest.raises(ImportError): + w.run(lambda: local_function) + + with pytest.raises(ImportError): + w.run(lambda: __import__("mock_local_folder.mod")) + + # Test the namespace folder case + import mock_local_folder + register_pickle_by_value(mock_local_folder) + assert w.run(lambda: local_function()) == local_function() + assert w.run(lambda: _func()) == _func() + unregister_pickle_by_value(mock_local_folder) + + with pytest.raises(ImportError): + w.run(lambda: local_function) + with pytest.raises(ImportError): + w.run(lambda: local_submod_function) + + # Test the case of registering a single module inside a + # subfolder. + import mock_local_folder.subfolder.submod + register_pickle_by_value(mock_local_folder.subfolder.submod) + assert w.run(lambda: _func()) == _func() + assert w.run(lambda: _t.__name__) == _t.__name__ + assert w.run(lambda: _class().method()) == _class().method() + + # Registering a module from a subfolder for pickling by value + # should not make constructs from modules from the parent + # folder pickleable + with pytest.raises(ImportError): + w.run(lambda: local_function) + with pytest.raises(ImportError): + w.run(lambda: __import__("mock_local_folder.mod")) + + unregister_pickle_by_value( + mock_local_folder.subfolder.submod + ) + with pytest.raises(ImportError): + w.run(lambda: local_submod_function) + + # Test the subfolder namespace module case + import mock_local_folder.subfolder + register_pickle_by_value(mock_local_folder.subfolder) + assert w.run(lambda: _func()) == _func() + assert w.run(lambda: _t.__name__) == _t.__name__ + assert w.run(lambda: _class().method()) == _class().method() + + unregister_pickle_by_value(mock_local_folder.subfolder) + finally: + _fname = "mock_local_folder" + sys.path = _prev_sys_path + for m in [_fname, f"{_fname}.mod", f"{_fname}.subfolder", + f"{_fname}.subfolder.submod"]: + mod = sys.modules.pop(m, None) + if mod and mod.__name__ in list_registry_pickle_by_value(): + unregister_pickle_by_value(mod) + + def test_pickle_constructs_from_installed_packages_registered_for_pickling_by_value( # noqa + self + ): + for package_or_module in ["package", "module"]: + if package_or_module == "package": + import _cloudpickle_testpkg as m + f = m.package_function_with_global + _original_global = m.global_variable + elif package_or_module == "module": + import _cloudpickle_testpkg.mod as m + f = m.module_function_with_global + _original_global = m.global_variable + try: + with subprocess_worker(protocol=self.protocol) as w: + assert w.run(lambda: f()) == _original_global + + # Test that f is pickled by value by modifying a global + # variable that f uses, and making sure that this + # modification shows up when calling the function remotely + register_pickle_by_value(m) + assert w.run(lambda: f()) == _original_global + m.global_variable = "modified global" + assert m.global_variable != _original_global + assert w.run(lambda: f()) == "modified global" + unregister_pickle_by_value(m) + finally: + m.global_variable = _original_global + if m.__name__ in list_registry_pickle_by_value(): + unregister_pickle_by_value(m) + + def test_pickle_various_versions_of_the_same_function_with_different_pickling_method( # noqa + self + ): + # Make sure that different versions of the same function (possibly + # pickled in a different way - by value and/or by reference) can + # peacefully co-exist (e.g. without globals interaction) in a remote + # worker. + import _cloudpickle_testpkg + from _cloudpickle_testpkg import package_function_with_global as f + _original_global = _cloudpickle_testpkg.global_variable + + def _create_registry(): + _main = __import__("sys").modules["__main__"] + _main._cloudpickle_registry = {} + # global _cloudpickle_registry + + def _add_to_registry(v, k): + _main = __import__("sys").modules["__main__"] + _main._cloudpickle_registry[k] = v + + def _call_from_registry(k): + _main = __import__("sys").modules["__main__"] + return _main._cloudpickle_registry[k]() + + try: + with subprocess_worker(protocol=self.protocol) as w: + w.run(_create_registry) + w.run(_add_to_registry, f, "f_by_ref") + + register_pickle_by_value(_cloudpickle_testpkg) + _cloudpickle_testpkg.global_variable = "modified global" + w.run(_add_to_registry, f, "f_by_val") + assert ( + w.run(_call_from_registry, "f_by_ref") == _original_global + ) + assert ( + w.run(_call_from_registry, "f_by_val") == "modified global" + ) + + finally: + _cloudpickle_testpkg.global_variable = _original_global + + if "_cloudpickle_testpkg" in list_registry_pickle_by_value(): + unregister_pickle_by_value(_cloudpickle_testpkg) + @pytest.mark.skipif( sys.version_info < (3, 7), reason="Determinism can only be guaranteed for Python 3.7+" @@ -2408,6 +2641,45 @@ def test_lookup_module_and_qualname_stdlib_typevar(): assert name == 'AnyStr' +def test_register_pickle_by_value(): + import _cloudpickle_testpkg as pkg + import _cloudpickle_testpkg.mod as mod + + assert list_registry_pickle_by_value() == set() + + register_pickle_by_value(pkg) + assert list_registry_pickle_by_value() == {pkg.__name__} + + register_pickle_by_value(mod) + assert list_registry_pickle_by_value() == {pkg.__name__, mod.__name__} + + unregister_pickle_by_value(mod) + assert list_registry_pickle_by_value() == {pkg.__name__} + + msg = f"Input should be a module object, got {pkg.__name__} instead" + with pytest.raises(ValueError, match=msg): + unregister_pickle_by_value(pkg.__name__) + + unregister_pickle_by_value(pkg) + assert list_registry_pickle_by_value() == set() + + msg = f"{pkg} is not registered for pickle by value" + with pytest.raises(ValueError, match=re.escape(msg)): + unregister_pickle_by_value(pkg) + + msg = f"Input should be a module object, got {pkg.__name__} instead" + with pytest.raises(ValueError, match=msg): + register_pickle_by_value(pkg.__name__) + + dynamic_mod = types.ModuleType('dynamic_mod') + msg = ( + f"{dynamic_mod} was not imported correctly, have you used an " + f"`import` statement to access it?" + ) + with pytest.raises(ValueError, match=re.escape(msg)): + register_pickle_by_value(dynamic_mod) + + def _all_types_to_test(): T = typing.TypeVar('T') diff --git a/tests/cloudpickle_testpkg/_cloudpickle_testpkg/__init__.py b/tests/cloudpickle_testpkg/_cloudpickle_testpkg/__init__.py index 595243c26..ea7aeb9e9 100644 --- a/tests/cloudpickle_testpkg/_cloudpickle_testpkg/__init__.py +++ b/tests/cloudpickle_testpkg/_cloudpickle_testpkg/__init__.py @@ -7,6 +7,14 @@ def package_function(): return "hello from a package!" +global_variable = "some global variable" + + +def package_function_with_global(): + global global_variable + return global_variable + + class _SingletonClass(object): def __reduce__(self): # This reducer is only valid for the top level "some_singleton" object. diff --git a/tests/cloudpickle_testpkg/_cloudpickle_testpkg/mod.py b/tests/cloudpickle_testpkg/_cloudpickle_testpkg/mod.py index 02b144c30..e8225a4ca 100644 --- a/tests/cloudpickle_testpkg/_cloudpickle_testpkg/mod.py +++ b/tests/cloudpickle_testpkg/_cloudpickle_testpkg/mod.py @@ -63,3 +63,11 @@ def f(x): def module_function(): return "hello from a module!" + + +global_variable = "some global variable" + + +def module_function_with_global(): + global global_variable + return global_variable diff --git a/tests/mock_local_folder/mod.py b/tests/mock_local_folder/mod.py new file mode 100644 index 000000000..1a1c1da46 --- /dev/null +++ b/tests/mock_local_folder/mod.py @@ -0,0 +1,20 @@ +""" +In the distributed computing setting, this file plays the role of a "local +development" file, e.g. a file that is importable locally, but unimportable in +remote workers. Constructs defined in this file and usually pickled by +reference should instead flagged to cloudpickle for pickling by value: this is +done using the register_pickle_by_value api exposed by cloudpickle. +""" +import typing + + +def local_function(): + return "hello from a function importable locally!" + + +class LocalClass: + def method(self): + return "hello from a class importable locally" + + +LocalT = typing.TypeVar("LocalT") diff --git a/tests/mock_local_folder/subfolder/submod.py b/tests/mock_local_folder/subfolder/submod.py new file mode 100644 index 000000000..deebc1477 --- /dev/null +++ b/tests/mock_local_folder/subfolder/submod.py @@ -0,0 +1,13 @@ +import typing + + +def local_submod_function(): + return "hello from a file located in a locally-importable subfolder!" + + +class LocalSubmodClass: + def method(self): + return "hello from a class located in a locally-importable subfolder!" + + +LocalSubmodT = typing.TypeVar("LocalSubmodT")