Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fixing byte-order consistency/mismatch for cross-endian platform #1181

Merged
merged 12 commits into from Jun 11, 2021
4 changes: 4 additions & 0 deletions CHANGES.rst
Expand Up @@ -4,6 +4,10 @@ Latest changes
Development version
-------------------

- Fix byte order inconsistency issue during deserialization using joblib.load
in cross-endian environment.
https://github.com/joblib/joblib/pull/1181

- Fix joblib.Memory bug with the ``ignore`` parameter when the cached function
is a decorated function.
https://github.com/joblib/joblib/pull/1165
Expand Down
5 changes: 5 additions & 0 deletions joblib/numpy_pickle.py
Expand Up @@ -20,6 +20,7 @@
from .numpy_pickle_utils import Unpickler, Pickler
from .numpy_pickle_utils import _read_fileobject, _write_fileobject
from .numpy_pickle_utils import _read_bytes, BUFFER_SIZE
from .numpy_pickle_utils import _is_numpy_array_byte_order_mismatch
from .numpy_pickle_compat import load_compatibility
from .numpy_pickle_compat import NDArrayWrapper
# For compatibility with old versions of joblib, we need ZNDArrayWrapper
Expand Down Expand Up @@ -147,6 +148,10 @@ def read_array(self, unpickler):
else:
array.shape = self.shape

# Detect byte order mis-match and swap as needed.
if _is_numpy_array_byte_order_mismatch(array):
array = array.byteswap().newbyteorder('=')

return array

def read_mmap(self, unpickler):
Expand Down
6 changes: 5 additions & 1 deletion joblib/numpy_pickle_compat.py
Expand Up @@ -9,7 +9,7 @@

from .numpy_pickle_utils import _ZFILE_PREFIX
from .numpy_pickle_utils import Unpickler

from .numpy_pickle_utils import _is_numpy_array_byte_order_mismatch

def hex_str(an_int):
"""Convert an int to an hexadecimal string."""
Expand Down Expand Up @@ -105,6 +105,10 @@ def read(self, unpickler):
kwargs["allow_pickle"] = True
array = unpickler.np.load(filename, **kwargs)

# Detect byte order mis-match and swap as needed.
if _is_numpy_array_byte_order_mismatch(array):
array = array.byteswap().newbyteorder('=')

# Reconstruct subclasses. This does not work with old
# versions of numpy
if (hasattr(array, '__array_prepare__') and
Expand Down
14 changes: 14 additions & 0 deletions joblib/numpy_pickle_utils.py
Expand Up @@ -6,6 +6,7 @@

import pickle
import io
import sys
import warnings
import contextlib

Expand Down Expand Up @@ -48,6 +49,19 @@ def _get_prefixes_max_len():
return max(prefixes)


def _is_numpy_array_byte_order_mismatch(array):
"""Check if numpy array is having byte order mis-match"""
return ((sys.byteorder == 'big' and
(array.dtype.byteorder == '<' or
(array.dtype.byteorder == '|' and array.dtype.fields and
all(e[0].byteorder == '<'
for e in array.dtype.fields.values())))) or
(sys.byteorder == 'little' and
(array.dtype.byteorder == '>' or
(array.dtype.byteorder == '|' and array.dtype.fields and
all(e[0].byteorder == '>'
for e in array.dtype.fields.values())))))

###############################################################################
# Cache file utilities
def _detect_compressor(fileobj):
Expand Down
35 changes: 35 additions & 0 deletions joblib/test/test_numpy_pickle.py
Expand Up @@ -5,6 +5,7 @@
import random
import re
import io
import sys
import warnings
import gzip
import zlib
Expand All @@ -30,6 +31,7 @@

from joblib.numpy_pickle_utils import _IO_BUFFER_SIZE
from joblib.numpy_pickle_utils import _detect_compressor
from joblib.numpy_pickle_utils import _is_numpy_array_byte_order_mismatch
from joblib.compressor import (_COMPRESSORS, _LZ4_PREFIX, CompressorWrapper,
LZ4_NOT_INSTALLED_ERROR, BinaryZlibFile)

Expand Down Expand Up @@ -355,6 +357,8 @@ def test_compressed_pickle_dump_and_load(tmpdir):
result_list = numpy_pickle.load(fname)
for result, expected in zip(result_list, expected_list):
if isinstance(expected, np.ndarray):
if _is_numpy_array_byte_order_mismatch(expected):
expected = expected.byteswap().newbyteorder('=')
assert result.dtype == expected.dtype
np.testing.assert_equal(result, expected)
else:
Expand Down Expand Up @@ -394,6 +398,8 @@ def _check_pickle(filename, expected_list):
"pickle file.".format(filename))
for result, expected in zip(result_list, expected_list):
if isinstance(expected, np.ndarray):
if _is_numpy_array_byte_order_mismatch(expected):
expected = expected.byteswap().newbyteorder('=')
assert result.dtype == expected.dtype
np.testing.assert_equal(result, expected)
else:
Expand Down Expand Up @@ -457,6 +463,35 @@ def test_joblib_pickle_across_python_versions():
_check_pickle(fname, expected_list)


@with_numpy
def test_numpy_array_byte_order_mismatch_detection():
# List of numpy arrays with big endian byteorder.
be_arrays = [np.array([(1, 2.0), (3, 4.0)],
dtype=[('', '>i8'), ('', '>f8')]),
np.arange(3, dtype=np.dtype('>i8')),
np.arange(3, dtype=np.dtype('>f8'))]

# Verify the byteorder mismatch is correctly detected.
for array in be_arrays:
if sys.byteorder == 'big':
assert _is_numpy_array_byte_order_mismatch(array) is False
else:
assert _is_numpy_array_byte_order_mismatch(array) is True
ogrisel marked this conversation as resolved.
Show resolved Hide resolved

# List of numpy arrays with little endian byteorder.
le_arrays = [np.array([(1, 2.0), (3, 4.0)],
dtype=[('', '<i8'), ('', '<f8')]),
np.arange(3, dtype=np.dtype('<i8')),
np.arange(3, dtype=np.dtype('<f8'))]

# Verify the byteorder mismatch is correctly detected.
for array in le_arrays:
if sys.byteorder == 'little':
assert _is_numpy_array_byte_order_mismatch(array) is False
else:
assert _is_numpy_array_byte_order_mismatch(array) is True
ogrisel marked this conversation as resolved.
Show resolved Hide resolved


@parametrize('compress_tuple', [('zlib', 3), ('gzip', 3)])
def test_compress_tuple_argument(tmpdir, compress_tuple):
# Verify the tuple is correctly taken into account.
Expand Down