diff --git a/cloudpickle/cloudpickle_fast.py b/cloudpickle/cloudpickle_fast.py index 46a9540ec..e14e4710d 100644 --- a/cloudpickle/cloudpickle_fast.py +++ b/cloudpickle/cloudpickle_fast.py @@ -155,8 +155,8 @@ def _function_getstate(func): } f_globals_ref = _extract_code_globals(func.__code__) - f_globals = {k: func.__globals__[k] for k in f_globals_ref if k in - func.__globals__} + f_globals = {k: func.__globals__[k] for k in sorted(f_globals_ref) + if k in func.__globals__} closure_values = ( list(map(_get_cell_contents, func.__closure__)) diff --git a/tests/cloudpickle_test.py b/tests/cloudpickle_test.py index 845f27962..7784f5769 100644 --- a/tests/cloudpickle_test.py +++ b/tests/cloudpickle_test.py @@ -24,6 +24,7 @@ import enum import typing from functools import wraps +import time import pytest @@ -49,6 +50,7 @@ from cloudpickle.cloudpickle import _extract_class_dict, _whichmodule from cloudpickle.cloudpickle import _lookup_module_and_qualname +from .testutils import subprocess_pickle_string from .testutils import subprocess_pickle_echo from .testutils import assert_run_python_script from .testutils import subprocess_worker @@ -57,6 +59,9 @@ _TEST_GLOBAL_VARIABLE = "default_value" +_TEST_GLOBAL_VARIABLE2 = "another_value" + +exec("def _TEST_BIG_GLOBAL_SPACE():\n return %s" % ", ".join([f"a{i}" for i in range(1000)])) class RaiserOnPickle(object): @@ -2321,6 +2326,32 @@ def __type__(self): o = MyClass() pickle_depickle(o, protocol=self.protocol) + @pytest.mark.skipif( + sys.version_info < (3, 6, 0), + reason="Dict determinism is a lost cause in Python < 3.6") + def test_sorted_globals(self): + vals = set() + + def func_with_globals(): + return _TEST_GLOBAL_VARIABLE + _TEST_GLOBAL_VARIABLE2 + + for i in range(5): + vals.add( + subprocess_pickle_string(func_with_globals, + protocol=self.protocol, + add_env={"PYTHONHASHSEED": str(i)})) + assert len(vals) == 1 + + def test_efficient_sorted_globals(self): + # Non regression test to demonstrate that large numbers of globals + # do not cause slowdown + gvars = set(f"a{i}" for i in range(1000)) + assert cloudpickle.cloudpickle._extract_code_globals( + _TEST_BIG_GLOBAL_SPACE.__code__) == gvars + tic = time.time() + subprocess_pickle_string(_TEST_BIG_GLOBAL_SPACE, protocol=self.protocol) + assert time.time() - tic < 0.5 + class Protocol2CloudPickleTest(CloudPickleTest): diff --git a/tests/testutils.py b/tests/testutils.py index 6acc998d4..9c7d5c47d 100644 --- a/tests/testutils.py +++ b/tests/testutils.py @@ -38,24 +38,25 @@ def _make_cwd_env(): return cloudpickle_repo_folder, env -def subprocess_pickle_echo(input_data, protocol=None, timeout=TIMEOUT): - """Echo function with a child Python process +def subprocess_pickle_string(input_data, protocol=None, timeout=TIMEOUT, + add_env=None): + """Retrieve pickle string of an object generated by a child Python process Pickle the input data into a buffer, send it to a subprocess via stdin, expect the subprocess to unpickle, re-pickle that data back - and send it back to the parent process via stdout for final unpickling. + and send it back to the parent process via stdout. - >>> subprocess_pickle_echo([1, 'a', None]) - [1, 'a', None] + >>> testutils.subprocess_pickle_string([1, 'a', None], protocol=2) + b'\x80\x02]q\x00(K\x01X\x01\x00\x00\x00aq\x01Ne.' """ - # run then pickle_echo(protocol=protocol) in __main__: - # Protect stderr from any warning, as we will assume an error will happen # if it is not empty. A concrete example is pytest using the imp module, # which is deprecated in python 3.8 cmd = [sys.executable, '-W ignore', __file__, "--protocol", str(protocol)] cwd, env = _make_cwd_env() + if add_env: + env.update(add_env) proc = Popen(cmd, stdin=PIPE, stdout=PIPE, stderr=PIPE, cwd=cwd, env=env, bufsize=4096) pickle_string = dumps(input_data, protocol=protocol) @@ -67,7 +68,7 @@ def subprocess_pickle_echo(input_data, protocol=None, timeout=TIMEOUT): message = "Subprocess returned %d: " % proc.returncode message += err.decode('utf-8') raise RuntimeError(message) - return loads(out) + return out except TimeoutExpired as e: proc.kill() out, err = proc.communicate() @@ -75,6 +76,25 @@ def subprocess_pickle_echo(input_data, protocol=None, timeout=TIMEOUT): raise RuntimeError(message) from e +def subprocess_pickle_echo(input_data, protocol=None, timeout=TIMEOUT, + add_env=None): + """Echo function with a child Python process + + Pickle the input data into a buffer, send it to a subprocess via + stdin, expect the subprocess to unpickle, re-pickle that data back + and send it back to the parent process via stdout for final unpickling. + + >>> subprocess_pickle_echo([1, 'a', None]) + [1, 'a', None] + + """ + out = subprocess_pickle_string(input_data, + protocol=protocol, + timeout=timeout, + add_env=add_env) + return loads(out) + + def _read_all_bytes(stream_in, chunk_size=4096): all_data = b"" while True: