From 452d4cdf8c197db24954a62cd573fb227e7cac73 Mon Sep 17 00:00:00 2001 From: Pradipta Ghosh Date: Wed, 5 May 2021 02:49:49 -0700 Subject: [PATCH 01/12] Fixing byte-order consistency/missmatch for cross-endian platform - Addressing https://github.com/joblib/joblib/issues/1123 --- joblib/numpy_pickle.py | 5 +++++ joblib/numpy_pickle_compat.py | 6 +++++- joblib/numpy_pickle_utils.py | 12 ++++++++++++ joblib/test/test_numpy_pickle.py | 7 +++++++ 4 files changed, 29 insertions(+), 1 deletion(-) diff --git a/joblib/numpy_pickle.py b/joblib/numpy_pickle.py index 93e5537ea..c9f098228 100644 --- a/joblib/numpy_pickle.py +++ b/joblib/numpy_pickle.py @@ -20,6 +20,7 @@ from .numpy_pickle_utils import Unpickler, Pickler from .numpy_pickle_utils import _read_fileobject, _write_fileobject from .numpy_pickle_utils import _read_bytes, BUFFER_SIZE +from .numpy_pickle_utils import _is_numpy_array_byte_order_mismatch from .numpy_pickle_compat import load_compatibility from .numpy_pickle_compat import NDArrayWrapper # For compatibility with old versions of joblib, we need ZNDArrayWrapper @@ -147,6 +148,10 @@ def read_array(self, unpickler): else: array.shape = self.shape + # Detect byte order mis-match and swap as needed. + if _is_numpy_array_byte_order_mismatch(array): + array = array.byteswap().newbyteorder('=') + return array def read_mmap(self, unpickler): diff --git a/joblib/numpy_pickle_compat.py b/joblib/numpy_pickle_compat.py index 6541a066a..32bcc4ed1 100644 --- a/joblib/numpy_pickle_compat.py +++ b/joblib/numpy_pickle_compat.py @@ -9,7 +9,7 @@ from .numpy_pickle_utils import _ZFILE_PREFIX from .numpy_pickle_utils import Unpickler - +from .numpy_pickle_utils import _is_numpy_array_byte_order_mismatch def hex_str(an_int): """Convert an int to an hexadecimal string.""" @@ -105,6 +105,10 @@ def read(self, unpickler): kwargs["allow_pickle"] = True array = unpickler.np.load(filename, **kwargs) + # Detect byte order mis-match and swap as needed. + if _is_numpy_array_byte_order_mismatch(array): + array = array.byteswap().newbyteorder('=') + # Reconstruct subclasses. This does not work with old # versions of numpy if (hasattr(array, '__array_prepare__') and diff --git a/joblib/numpy_pickle_utils.py b/joblib/numpy_pickle_utils.py index a50105547..a34c5bf3f 100644 --- a/joblib/numpy_pickle_utils.py +++ b/joblib/numpy_pickle_utils.py @@ -6,6 +6,7 @@ import pickle import io +import sys import warnings import contextlib @@ -47,6 +48,17 @@ def _get_prefixes_max_len(): prefixes += [len(_ZFILE_PREFIX)] return max(prefixes) +def _is_numpy_array_byte_order_mismatch(array): + """Check if numpy array is having byte order mis-match""" + return \ + (sys.byteorder == 'big' and \ + (array.dtype.byteorder == '<' or \ + (array.dtype.byteorder == '|' and array.dtype.fields and \ + all(e[0].byteorder == '<' for e in array.dtype.fields.values())))) or \ + (sys.byteorder == 'little' and \ + (array.dtype.byteorder == '>' or \ + (array.dtype.byteorder == '|' and array.dtype.fields and \ + all(e[0].byteorder == '>' for e in array.dtype.fields.values())))) ############################################################################### # Cache file utilities diff --git a/joblib/test/test_numpy_pickle.py b/joblib/test/test_numpy_pickle.py index db130b1f4..9a8c6cb4e 100644 --- a/joblib/test/test_numpy_pickle.py +++ b/joblib/test/test_numpy_pickle.py @@ -30,6 +30,7 @@ from joblib.numpy_pickle_utils import _IO_BUFFER_SIZE from joblib.numpy_pickle_utils import _detect_compressor +from joblib.numpy_pickle_utils import _is_numpy_array_byte_order_mismatch from joblib.compressor import (_COMPRESSORS, _LZ4_PREFIX, CompressorWrapper, LZ4_NOT_INSTALLED_ERROR, BinaryZlibFile) @@ -355,6 +356,10 @@ def test_compressed_pickle_dump_and_load(tmpdir): result_list = numpy_pickle.load(fname) for result, expected in zip(result_list, expected_list): if isinstance(expected, np.ndarray): + + if _is_numpy_array_byte_order_mismatch(expected): + expected = expected.byteswap().newbyteorder('=') + assert result.dtype == expected.dtype np.testing.assert_equal(result, expected) else: @@ -394,6 +399,8 @@ def _check_pickle(filename, expected_list): "pickle file.".format(filename)) for result, expected in zip(result_list, expected_list): if isinstance(expected, np.ndarray): + if _is_numpy_array_byte_order_mismatch(expected): + expected = expected.byteswap().newbyteorder('=') assert result.dtype == expected.dtype np.testing.assert_equal(result, expected) else: From d449d1045bc25a39095054065ec87a3e698f8ce1 Mon Sep 17 00:00:00 2001 From: Pradipta Ghosh Date: Wed, 5 May 2021 03:39:44 -0700 Subject: [PATCH 02/12] Fixed linting related failure --- joblib/numpy_pickle.py | 2 +- joblib/numpy_pickle_utils.py | 17 +++++++++-------- joblib/test/test_numpy_pickle.py | 4 +--- 3 files changed, 11 insertions(+), 12 deletions(-) diff --git a/joblib/numpy_pickle.py b/joblib/numpy_pickle.py index c9f098228..9efad4937 100644 --- a/joblib/numpy_pickle.py +++ b/joblib/numpy_pickle.py @@ -150,7 +150,7 @@ def read_array(self, unpickler): # Detect byte order mis-match and swap as needed. if _is_numpy_array_byte_order_mismatch(array): - array = array.byteswap().newbyteorder('=') + array = array.byteswap().newbyteorder('=') return array diff --git a/joblib/numpy_pickle_utils.py b/joblib/numpy_pickle_utils.py index a34c5bf3f..27d7eb38e 100644 --- a/joblib/numpy_pickle_utils.py +++ b/joblib/numpy_pickle_utils.py @@ -48,17 +48,18 @@ def _get_prefixes_max_len(): prefixes += [len(_ZFILE_PREFIX)] return max(prefixes) + def _is_numpy_array_byte_order_mismatch(array): """Check if numpy array is having byte order mis-match""" return \ - (sys.byteorder == 'big' and \ - (array.dtype.byteorder == '<' or \ - (array.dtype.byteorder == '|' and array.dtype.fields and \ - all(e[0].byteorder == '<' for e in array.dtype.fields.values())))) or \ - (sys.byteorder == 'little' and \ - (array.dtype.byteorder == '>' or \ - (array.dtype.byteorder == '|' and array.dtype.fields and \ - all(e[0].byteorder == '>' for e in array.dtype.fields.values())))) + (sys.byteorder == 'big' and + (array.dtype.byteorder == '<' or + (array.dtype.byteorder == '|' and array.dtype.fields and + all(e[0].byteorder == '<' for e in array.dtype.fields.values())))) or \ + (sys.byteorder == 'little' and + (array.dtype.byteorder == '>' or + (array.dtype.byteorder == '|' and array.dtype.fields and + all(e[0].byteorder == '>' for e in array.dtype.fields.values())))) ############################################################################### # Cache file utilities diff --git a/joblib/test/test_numpy_pickle.py b/joblib/test/test_numpy_pickle.py index 9a8c6cb4e..b418b7264 100644 --- a/joblib/test/test_numpy_pickle.py +++ b/joblib/test/test_numpy_pickle.py @@ -356,10 +356,8 @@ def test_compressed_pickle_dump_and_load(tmpdir): result_list = numpy_pickle.load(fname) for result, expected in zip(result_list, expected_list): if isinstance(expected, np.ndarray): - if _is_numpy_array_byte_order_mismatch(expected): - expected = expected.byteswap().newbyteorder('=') - + expected = expected.byteswap().newbyteorder('=') assert result.dtype == expected.dtype np.testing.assert_equal(result, expected) else: From eedb8a6ea719ca8c68c935b14980568c73316cca Mon Sep 17 00:00:00 2001 From: Pradipta Ghosh Date: Wed, 5 May 2021 04:47:12 -0700 Subject: [PATCH 03/12] fix linting issue --- joblib/numpy_pickle_utils.py | 19 ++++++++++--------- joblib/test/test_numpy_pickle.py | 2 +- 2 files changed, 11 insertions(+), 10 deletions(-) diff --git a/joblib/numpy_pickle_utils.py b/joblib/numpy_pickle_utils.py index 27d7eb38e..a0a3bf800 100644 --- a/joblib/numpy_pickle_utils.py +++ b/joblib/numpy_pickle_utils.py @@ -51,15 +51,16 @@ def _get_prefixes_max_len(): def _is_numpy_array_byte_order_mismatch(array): """Check if numpy array is having byte order mis-match""" - return \ - (sys.byteorder == 'big' and - (array.dtype.byteorder == '<' or - (array.dtype.byteorder == '|' and array.dtype.fields and - all(e[0].byteorder == '<' for e in array.dtype.fields.values())))) or \ - (sys.byteorder == 'little' and - (array.dtype.byteorder == '>' or - (array.dtype.byteorder == '|' and array.dtype.fields and - all(e[0].byteorder == '>' for e in array.dtype.fields.values())))) + return ((sys.byteorder == 'big' and + (array.dtype.byteorder == '<' or + (array.dtype.byteorder == '|' and array.dtype.fields and + all(e[0].byteorder == '<' + for e in array.dtype.fields.values())))) or + (sys.byteorder == 'little' and + (array.dtype.byteorder == '>' or + (array.dtype.byteorder == '|' and array.dtype.fields and + all(e[0].byteorder == '>' + for e in array.dtype.fields.values()))))) ############################################################################### # Cache file utilities diff --git a/joblib/test/test_numpy_pickle.py b/joblib/test/test_numpy_pickle.py index b418b7264..496e3e6a9 100644 --- a/joblib/test/test_numpy_pickle.py +++ b/joblib/test/test_numpy_pickle.py @@ -398,7 +398,7 @@ def _check_pickle(filename, expected_list): for result, expected in zip(result_list, expected_list): if isinstance(expected, np.ndarray): if _is_numpy_array_byte_order_mismatch(expected): - expected = expected.byteswap().newbyteorder('=') + expected = expected.byteswap().newbyteorder('=') assert result.dtype == expected.dtype np.testing.assert_equal(result, expected) else: From 01c88e72e2f707bd10af538dde2465c90576b25c Mon Sep 17 00:00:00 2001 From: Pradipta Ghosh Date: Wed, 5 May 2021 05:24:09 -0700 Subject: [PATCH 04/12] fix another linting issue --- joblib/numpy_pickle_utils.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/joblib/numpy_pickle_utils.py b/joblib/numpy_pickle_utils.py index a0a3bf800..998b8393f 100644 --- a/joblib/numpy_pickle_utils.py +++ b/joblib/numpy_pickle_utils.py @@ -51,16 +51,16 @@ def _get_prefixes_max_len(): def _is_numpy_array_byte_order_mismatch(array): """Check if numpy array is having byte order mis-match""" - return ((sys.byteorder == 'big' and - (array.dtype.byteorder == '<' or - (array.dtype.byteorder == '|' and array.dtype.fields and - all(e[0].byteorder == '<' - for e in array.dtype.fields.values())))) or - (sys.byteorder == 'little' and - (array.dtype.byteorder == '>' or - (array.dtype.byteorder == '|' and array.dtype.fields and - all(e[0].byteorder == '>' - for e in array.dtype.fields.values()))))) + return ((sys.byteorder == 'big' + and (array.dtype.byteorder == '<' + or (array.dtype.byteorder == '|' and array.dtype.fields + and all(e[0].byteorder == '<' + for e in array.dtype.fields.values())))) + or (sys.byteorder == 'little' + and (array.dtype.byteorder == '>' + or (array.dtype.byteorder == '|' and array.dtype.fields + and all(e[0].byteorder == '>' + for e in array.dtype.fields.values()))))) ############################################################################### # Cache file utilities From c30274341fe03bbedecb449b4bff60c0183fb05e Mon Sep 17 00:00:00 2001 From: Pradipta Ghosh Date: Wed, 5 May 2021 05:58:02 -0700 Subject: [PATCH 05/12] Fix some more linting issue --- joblib/numpy_pickle_utils.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/joblib/numpy_pickle_utils.py b/joblib/numpy_pickle_utils.py index 998b8393f..9545cddd6 100644 --- a/joblib/numpy_pickle_utils.py +++ b/joblib/numpy_pickle_utils.py @@ -51,16 +51,16 @@ def _get_prefixes_max_len(): def _is_numpy_array_byte_order_mismatch(array): """Check if numpy array is having byte order mis-match""" - return ((sys.byteorder == 'big' - and (array.dtype.byteorder == '<' - or (array.dtype.byteorder == '|' and array.dtype.fields - and all(e[0].byteorder == '<' - for e in array.dtype.fields.values())))) - or (sys.byteorder == 'little' - and (array.dtype.byteorder == '>' - or (array.dtype.byteorder == '|' and array.dtype.fields - and all(e[0].byteorder == '>' - for e in array.dtype.fields.values()))))) + return ((sys.byteorder == 'big' and + (array.dtype.byteorder == '<' or + (array.dtype.byteorder == '|' and array.dtype.fields and + all(e[0].byteorder == '<' + for e in array.dtype.fields.values())))) or + (sys.byteorder == 'little' and + (array.dtype.byteorder == '>' or + (array.dtype.byteorder == '|' and array.dtype.fields and + all(e[0].byteorder == '>' + for e in array.dtype.fields.values()))))) ############################################################################### # Cache file utilities From d9e5adbcc5478159310ebd58b3c070c5feea7e7d Mon Sep 17 00:00:00 2001 From: Pradipta Ghosh Date: Wed, 5 May 2021 06:35:21 -0700 Subject: [PATCH 06/12] Added CHANGES.rst --- CHANGES.rst | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/CHANGES.rst b/CHANGES.rst index 93144b324..3aac4476e 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -4,6 +4,10 @@ Latest changes Development version ------------------- +- Fix byte order inconsistency issue during deserialization using joblib.load + in cross-endian environment. + https://github.com/joblib/joblib/pull/1181 + - Fix joblib.Memory bug with the ``ignore`` parameter when the cached function is a decorated function. https://github.com/joblib/joblib/pull/1165 From acc886e0de78bff7a5a0e86e0aa026b5256806e1 Mon Sep 17 00:00:00 2001 From: Pradipta Ghosh Date: Tue, 8 Jun 2021 13:20:34 -0700 Subject: [PATCH 07/12] unit test added for _is_numpy_array_byte_order_mismatch --- joblib/test/test_numpy_pickle.py | 29 ++++++++++++++++++++++++++++- 1 file changed, 28 insertions(+), 1 deletion(-) diff --git a/joblib/test/test_numpy_pickle.py b/joblib/test/test_numpy_pickle.py index 496e3e6a9..8f5ebb2be 100644 --- a/joblib/test/test_numpy_pickle.py +++ b/joblib/test/test_numpy_pickle.py @@ -5,6 +5,7 @@ import random import re import io +import sys import warnings import gzip import zlib @@ -462,6 +463,33 @@ def test_joblib_pickle_across_python_versions(): _check_pickle(fname, expected_list) +@with_numpy +def test_numpy_array_byte_order_mismatch_detection(): + # List of numpy arrays with big endian byteorder. + be_arrays = [np.array([(1,2.0),(3,4.0)],dtype=[('','>i8'),('','>f8')]), + np.arange(3, dtype=np.dtype('>i8')), + np.arange(3, dtype=np.dtype('>f8'))] + + # Verify the byteorder mismatch is correctly detected. + for array in be_arrays: + if sys.byteorder == 'big': + assert _is_numpy_array_byte_order_mismatch(array) == False + else: + assert _is_numpy_array_byte_order_mismatch(array) == True + + # List of numpy arrays with little endian byteorder. + le_arrays = [np.array([(1,2.0),(3,4.0)],dtype=[('',' Date: Tue, 8 Jun 2021 13:49:33 -0700 Subject: [PATCH 08/12] Fix lint issues --- joblib/test/test_numpy_pickle.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/joblib/test/test_numpy_pickle.py b/joblib/test/test_numpy_pickle.py index 8f5ebb2be..2e9d928a2 100644 --- a/joblib/test/test_numpy_pickle.py +++ b/joblib/test/test_numpy_pickle.py @@ -466,28 +466,28 @@ def test_joblib_pickle_across_python_versions(): @with_numpy def test_numpy_array_byte_order_mismatch_detection(): # List of numpy arrays with big endian byteorder. - be_arrays = [np.array([(1,2.0),(3,4.0)],dtype=[('','>i8'),('','>f8')]), + be_arrays = [np.array([(1, 2.0), (3, 4.0)], dtype=[('', '>i8'), ('', '>f8')]), np.arange(3, dtype=np.dtype('>i8')), np.arange(3, dtype=np.dtype('>f8'))] # Verify the byteorder mismatch is correctly detected. for array in be_arrays: if sys.byteorder == 'big': - assert _is_numpy_array_byte_order_mismatch(array) == False + assert _is_numpy_array_byte_order_mismatch(array) is False else: - assert _is_numpy_array_byte_order_mismatch(array) == True + assert _is_numpy_array_byte_order_mismatch(array) is True # List of numpy arrays with little endian byteorder. - le_arrays = [np.array([(1,2.0),(3,4.0)],dtype=[('',' Date: Tue, 8 Jun 2021 14:00:50 -0700 Subject: [PATCH 09/12] Minor fix --- joblib/test/test_numpy_pickle.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/joblib/test/test_numpy_pickle.py b/joblib/test/test_numpy_pickle.py index 2e9d928a2..f26459160 100644 --- a/joblib/test/test_numpy_pickle.py +++ b/joblib/test/test_numpy_pickle.py @@ -466,19 +466,21 @@ def test_joblib_pickle_across_python_versions(): @with_numpy def test_numpy_array_byte_order_mismatch_detection(): # List of numpy arrays with big endian byteorder. - be_arrays = [np.array([(1, 2.0), (3, 4.0)], dtype=[('', '>i8'), ('', '>f8')]), + be_arrays = [np.array([(1, 2.0), (3, 4.0)], + dtype=[('', '>i8'), ('', '>f8')]), np.arange(3, dtype=np.dtype('>i8')), np.arange(3, dtype=np.dtype('>f8'))] # Verify the byteorder mismatch is correctly detected. for array in be_arrays: if sys.byteorder == 'big': - assert _is_numpy_array_byte_order_mismatch(array) is False + assert _is_numpy_array_byte_order_mismatch(array) is False else: - assert _is_numpy_array_byte_order_mismatch(array) is True + assert _is_numpy_array_byte_order_mismatch(array) is True # List of numpy arrays with little endian byteorder. - le_arrays = [np.array([(1, 2.0), (3, 4.0)], dtype=[('', ' Date: Fri, 11 Jun 2021 11:22:18 +0200 Subject: [PATCH 10/12] Apply suggestions from code review --- joblib/test/test_numpy_pickle.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/joblib/test/test_numpy_pickle.py b/joblib/test/test_numpy_pickle.py index f26459160..0e4104780 100644 --- a/joblib/test/test_numpy_pickle.py +++ b/joblib/test/test_numpy_pickle.py @@ -474,9 +474,9 @@ def test_numpy_array_byte_order_mismatch_detection(): # Verify the byteorder mismatch is correctly detected. for array in be_arrays: if sys.byteorder == 'big': - assert _is_numpy_array_byte_order_mismatch(array) is False + assert not _is_numpy_array_byte_order_mismatch(array) else: - assert _is_numpy_array_byte_order_mismatch(array) is True + assert _is_numpy_array_byte_order_mismatch(array) # List of numpy arrays with little endian byteorder. le_arrays = [np.array([(1, 2.0), (3, 4.0)], @@ -487,9 +487,9 @@ def test_numpy_array_byte_order_mismatch_detection(): # Verify the byteorder mismatch is correctly detected. for array in le_arrays: if sys.byteorder == 'little': - assert _is_numpy_array_byte_order_mismatch(array) is False + assert not _is_numpy_array_byte_order_mismatch(array) else: - assert _is_numpy_array_byte_order_mismatch(array) is True + assert _is_numpy_array_byte_order_mismatch(array) @parametrize('compress_tuple', [('zlib', 3), ('gzip', 3)]) From 950f791224afe3c9dbde28ad0e8359aa1850b84f Mon Sep 17 00:00:00 2001 From: Olivier Grisel Date: Fri, 11 Jun 2021 12:14:15 +0200 Subject: [PATCH 11/12] Factorize code in _ensure_native_byte_order helper function --- joblib/numpy_pickle.py | 7 ++----- joblib/numpy_pickle_compat.py | 5 ++--- joblib/numpy_pickle_utils.py | 11 +++++++++++ joblib/test/test_numpy_pickle.py | 19 +++++++++++++++---- 4 files changed, 30 insertions(+), 12 deletions(-) diff --git a/joblib/numpy_pickle.py b/joblib/numpy_pickle.py index 9efad4937..cc593af22 100644 --- a/joblib/numpy_pickle.py +++ b/joblib/numpy_pickle.py @@ -20,7 +20,7 @@ from .numpy_pickle_utils import Unpickler, Pickler from .numpy_pickle_utils import _read_fileobject, _write_fileobject from .numpy_pickle_utils import _read_bytes, BUFFER_SIZE -from .numpy_pickle_utils import _is_numpy_array_byte_order_mismatch +from .numpy_pickle_utils import _ensure_native_byte_order from .numpy_pickle_compat import load_compatibility from .numpy_pickle_compat import NDArrayWrapper # For compatibility with old versions of joblib, we need ZNDArrayWrapper @@ -149,10 +149,7 @@ def read_array(self, unpickler): array.shape = self.shape # Detect byte order mis-match and swap as needed. - if _is_numpy_array_byte_order_mismatch(array): - array = array.byteswap().newbyteorder('=') - - return array + return _ensure_native_byte_order(array) def read_mmap(self, unpickler): """Read an array using numpy memmap.""" diff --git a/joblib/numpy_pickle_compat.py b/joblib/numpy_pickle_compat.py index 32bcc4ed1..096acbcf0 100644 --- a/joblib/numpy_pickle_compat.py +++ b/joblib/numpy_pickle_compat.py @@ -9,7 +9,7 @@ from .numpy_pickle_utils import _ZFILE_PREFIX from .numpy_pickle_utils import Unpickler -from .numpy_pickle_utils import _is_numpy_array_byte_order_mismatch +from .numpy_pickle_utils import _ensure_native_byte_order def hex_str(an_int): """Convert an int to an hexadecimal string.""" @@ -106,8 +106,7 @@ def read(self, unpickler): array = unpickler.np.load(filename, **kwargs) # Detect byte order mis-match and swap as needed. - if _is_numpy_array_byte_order_mismatch(array): - array = array.byteswap().newbyteorder('=') + array = _ensure_native_byte_order(array) # Reconstruct subclasses. This does not work with old # versions of numpy diff --git a/joblib/numpy_pickle_utils.py b/joblib/numpy_pickle_utils.py index 9545cddd6..02a88ffaf 100644 --- a/joblib/numpy_pickle_utils.py +++ b/joblib/numpy_pickle_utils.py @@ -62,6 +62,17 @@ def _is_numpy_array_byte_order_mismatch(array): all(e[0].byteorder == '>' for e in array.dtype.fields.values()))))) + +def _ensure_native_byte_order(array): + """Use the byte order of the host while preserving values + + Does nothing if array already uses the system byte order. + """ + if _is_numpy_array_byte_order_mismatch(array): + array = array.byteswap().newbyteorder('=') + return array + + ############################################################################### # Cache file utilities def _detect_compressor(fileobj): diff --git a/joblib/test/test_numpy_pickle.py b/joblib/test/test_numpy_pickle.py index 0e4104780..7cdd90f28 100644 --- a/joblib/test/test_numpy_pickle.py +++ b/joblib/test/test_numpy_pickle.py @@ -32,6 +32,7 @@ from joblib.numpy_pickle_utils import _IO_BUFFER_SIZE from joblib.numpy_pickle_utils import _detect_compressor from joblib.numpy_pickle_utils import _is_numpy_array_byte_order_mismatch +from joblib.numpy_pickle_utils import _ensure_native_byte_order from joblib.compressor import (_COMPRESSORS, _LZ4_PREFIX, CompressorWrapper, LZ4_NOT_INSTALLED_ERROR, BinaryZlibFile) @@ -357,8 +358,7 @@ def test_compressed_pickle_dump_and_load(tmpdir): result_list = numpy_pickle.load(fname) for result, expected in zip(result_list, expected_list): if isinstance(expected, np.ndarray): - if _is_numpy_array_byte_order_mismatch(expected): - expected = expected.byteswap().newbyteorder('=') + expected = _ensure_native_byte_order(expected) assert result.dtype == expected.dtype np.testing.assert_equal(result, expected) else: @@ -398,8 +398,7 @@ def _check_pickle(filename, expected_list): "pickle file.".format(filename)) for result, expected in zip(result_list, expected_list): if isinstance(expected, np.ndarray): - if _is_numpy_array_byte_order_mismatch(expected): - expected = expected.byteswap().newbyteorder('=') + expected = _ensure_native_byte_order(expected) assert result.dtype == expected.dtype np.testing.assert_equal(result, expected) else: @@ -477,6 +476,12 @@ def test_numpy_array_byte_order_mismatch_detection(): assert not _is_numpy_array_byte_order_mismatch(array) else: assert _is_numpy_array_byte_order_mismatch(array) + converted = _ensure_native_byte_order(array) + if converted.dtype.fields: + for f in converted.dtype.fields.values(): + f[0].byteorder == '=' + else: + assert converted.dtype.byteorder == "=" # List of numpy arrays with little endian byteorder. le_arrays = [np.array([(1, 2.0), (3, 4.0)], @@ -490,6 +495,12 @@ def test_numpy_array_byte_order_mismatch_detection(): assert not _is_numpy_array_byte_order_mismatch(array) else: assert _is_numpy_array_byte_order_mismatch(array) + converted = _ensure_native_byte_order(array) + if converted.dtype.fields: + for f in converted.dtype.fields.values(): + f[0].byteorder == '=' + else: + assert converted.dtype.byteorder == "=" @parametrize('compress_tuple', [('zlib', 3), ('gzip', 3)]) From f8a2f166d54fe0c2ae15715da5f4a499b7ded7d3 Mon Sep 17 00:00:00 2001 From: Olivier Grisel Date: Fri, 11 Jun 2021 12:16:14 +0200 Subject: [PATCH 12/12] More explicit changelog entry --- CHANGES.rst | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/CHANGES.rst b/CHANGES.rst index 3aac4476e..ccb276e94 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -5,7 +5,9 @@ Development version ------------------- - Fix byte order inconsistency issue during deserialization using joblib.load - in cross-endian environment. + in cross-endian environment: the numpy arrays are now always loaded to + use the system byte order, independently of the byte order of the system + that serialized the pickle. https://github.com/joblib/joblib/pull/1181 - Fix joblib.Memory bug with the ``ignore`` parameter when the cached function