Skip to content

Commit

Permalink
Apply suggestions from code review
Browse files Browse the repository at this point in the history
Co-authored-by: Olivier Grisel <olivier.grisel@ensta.org>
  • Loading branch information
tomMoral and ogrisel committed Nov 18, 2023
1 parent e90c088 commit cadb9f2
Show file tree
Hide file tree
Showing 3 changed files with 50 additions and 27 deletions.
16 changes: 10 additions & 6 deletions cloudpickle/cloudpickle.py
Original file line number Diff line number Diff line change
Expand Up @@ -1150,12 +1150,16 @@ def _class_setstate(obj, state):
if attrname == "_abc_impl":
registry = attr
else:
# Note: attribute names are automatically interned in cpython. This means that to get
# determinist pickling in subprocess, we need to make sure that the dynamic function names
# are also interned since the Pickler's memoizer relies on physical object
# identity to break cycles in the reference graph of the object being
# serialized.
# https://github.com/python/cpython/blob/main/Objects/object.c#L1060
# Note: setting attribute names on a class automatically triggers their
# interning in CPython:
# https://github.com/python/cpython/blob/v3.12.0/Objects/object.c#L957
#
# This means that to get deterministic pickling for a dynamic class that
# was initially defined in remote Python process, we need to make sure that
# the dynamic function names are also interned.
#
# Indeed the Pickler's memoizer relies on physical object identity to break
# cycles in the reference graph of the object being serialized.
setattr(obj, attrname, attr)
if registry is not None:
for subclass in registry:
Expand Down
46 changes: 28 additions & 18 deletions tests/cloudpickle_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@
from .testutils import subprocess_pickle_echo
from .testutils import subprocess_pickle_string
from .testutils import assert_run_python_script
from .testutils import check_determinist_pickle
from .testutils import check_deterministic_pickle


_TEST_GLOBAL_VARIABLE = "default_value"
Expand Down Expand Up @@ -1995,7 +1995,7 @@ def test_method(arg_1, arg_2):
with subprocess_worker(protocol=self.protocol) as w:

A_dump = w.run(get_dynamic_func_pickle)
check_determinist_pickle(A_dump, get_dynamic_func_pickle())
check_deterministic_pickle(A_dump, get_dynamic_func_pickle())

def test_dynamic_class_determinist(self):
# Check that the pickle serialization for a dynamic class is the same
Expand All @@ -2018,11 +2018,14 @@ def test_method(self, arg_1, join):
with subprocess_worker(protocol=self.protocol) as w:

A_dump = w.run(get_dynamic_class_pickle)
check_determinist_pickle(A_dump, get_dynamic_class_pickle())
check_deterministic_pickle(A_dump, get_dynamic_class_pickle())

def test_dynamic_class_determinist_subworker_order(self):
# Check that the pickle produced by the unpickled instance is the same.
# This checks that the order of the class attributes is deterministic.
def test_deterministic_dynamic_class_attr_ordering_for_chained_pickling(self):
# Check that the pickle produced by pickling a reconstructed class definition
# in a remote process matches the pickle produced by pickling the original
# class definition.
# In particular, this test checks that the order of the class attributes is
# deterministic.

with subprocess_worker(protocol=self.protocol) as w:

Expand All @@ -2032,31 +2035,34 @@ class A:
pass

A_dump = w.run(cloudpickle.dumps, A)
check_determinist_pickle(A_dump, cloudpickle.dumps(A))
check_deterministic_pickle(A_dump, cloudpickle.dumps(A))

# If the doc is defined after some class variables, this can cause ordering changes
# due to the way we reconstruct the class with _make_class_skeleton, which create
# the class and thus `__doc__` before populating it.
# If the `__doc__` attribute is defined after some other class attribute,
# this can cause class attribute ordering changes due to the way we reconstruct
# the class definition in `_make_class_skeleton`, which creates the class
# and thus its `__doc__` attribute before populating the class attributes.
class A:
name = "A"
__doc__ = "Updated class definition"

A_dump = w.run(cloudpickle.dumps, A)
check_determinist_pickle(A_dump, cloudpickle.dumps(A))
check_deterministic_pickle(A_dump, cloudpickle.dumps(A))

# If the doc is defined in `__init__`, this can cause ordering changes due to the way
# we reconstruct the class with _make_class_skeleton. Make sure the order is deterministic
# If a `__doc__` is defined on the `__init__` method, this can cause ordering
# changes due to the way we reconstruct the class with `_make_class_skeleton`.
class A:
def __init__(self):
"""Class definition with explicit __init__"""
pass

A_dump = w.run(cloudpickle.dumps, A)
check_determinist_pickle(A_dump, cloudpickle.dumps(A))
check_deterministic_pickle(A_dump, cloudpickle.dumps(A))

def test_dynamic_class_determinist_subworker_str_interning(self):
def test_deterministic_str_interning_for_chained_dynamic_class_pickling(self):
# Check that the pickle produced by the unpickled instance is the same.
# This checks that there is no issue due to string interning.
# This checks that there is no issue related to the string interning of the
# names of attributes of class definitions and names of attributes of the
# `__code__` objects of the methods.

with subprocess_worker(protocol=self.protocol) as w:
# Due to interning of class attributes, check that this does not create issues
Expand Down Expand Up @@ -2086,7 +2092,11 @@ def join(self, arg_1):

A_dump = w.run(cloudpickle.dumps, A)
pytest.xfail(
"This test is expected to fail due to string interning errors."
"This test is expected to fail due to a string interning difference: "
"the first payloads shares the same string instance both for the "
"'join' method name and the literal value in 'arg_1' while this is "
"not the case for the payload generated by the reconstruction of the "
"class definition on the remote Python process."
)
check_determinist_pickle(A_dump, cloudpickle.dumps(A))

Expand All @@ -2109,7 +2119,7 @@ def func2(self,):
pass

A_dump = w.run(cloudpickle.dumps, A)
check_determinist_pickle(A_dump, cloudpickle.dumps(A))
check_deterministic_pickle(A_dump, cloudpickle.dumps(A))

@pytest.mark.skipif(
platform.python_implementation() == "PyPy",
Expand Down
15 changes: 12 additions & 3 deletions tests/testutils.py
Original file line number Diff line number Diff line change
Expand Up @@ -215,8 +215,8 @@ def assert_run_python_script(source_code, timeout=TIMEOUT):
os.unlink(source_file)


def check_determinist_pickle(a, b):
"""Check that two pickle output are the same.
def check_deterministic_pickle(a, b):
"""Check that two pickle output are bitwise equal.
If it is not the case, print the diff between the disassembled pickle
payloads.
Expand All @@ -235,7 +235,16 @@ def check_determinist_pickle(a, b):
pickletools.dis(pickletools.optimize(b), out)
b_out = out.getvalue()
b_out = "\n".join(ll[11:] for ll in b_out.splitlines())
assert a_out == b_out
full_diff = difflib.context_diff(
a_out.splitlines(keepends=True), b_out.splitlines(keepends=True)
)
full_diff = "".join(full_diff)
if len(full_diff) > 1000:
full_diff = full_diff[:994] + " [...]"
raise AssertionError(
"Pickle payloads are not bitwise equal:\n"
+ full_diff
)


if __name__ == "__main__":
Expand Down

0 comments on commit cadb9f2

Please sign in to comment.