From 6674294f82d5eb6563dd0ab0fffb6aebb7d2ca74 Mon Sep 17 00:00:00 2001 From: Julian Taylor Date: Tue, 8 Jul 2014 23:35:59 +0200 Subject: [PATCH] BUG: add missing elementsize alignment check for simd reductions e.g. doubles are only aligned to 4 bytes on i386 so one cannot peel them to 16 byte alignment. Closes gh-4853 --- numpy/core/src/umath/simd.inc.src | 4 +++- numpy/core/tests/test_scalarmath.py | 12 ++++++++++++ numpy/core/tests/test_umath.py | 21 +++++++++++++++++++++ 3 files changed, 36 insertions(+), 1 deletion(-) diff --git a/numpy/core/src/umath/simd.inc.src b/numpy/core/src/umath/simd.inc.src index e274e05969aa..61600dcaf998 100644 --- a/numpy/core/src/umath/simd.inc.src +++ b/numpy/core/src/umath/simd.inc.src @@ -39,7 +39,9 @@ void PyUFunc_clearfperr(void); ((abs(args[1] - args[0]) >= (vsize)) || ((abs(args[1] - args[0]) == 0)))) #define IS_BLOCKABLE_REDUCE(esize, vsize) \ - (steps[1] == (esize) && abs(args[1] - args[0]) >= (vsize)) + (steps[1] == (esize) && abs(args[1] - args[0]) >= (vsize) && \ + npy_is_aligned(args[1], (esize)) && \ + npy_is_aligned(args[0], (esize))) #define IS_BLOCKABLE_BINARY(esize, vsize) \ (steps[0] == steps[1] && steps[1] == steps[2] && steps[2] == (esize) && \ diff --git a/numpy/core/tests/test_scalarmath.py b/numpy/core/tests/test_scalarmath.py index a17b4a312fa6..ea18b96f1d2f 100644 --- a/numpy/core/tests/test_scalarmath.py +++ b/numpy/core/tests/test_scalarmath.py @@ -75,6 +75,18 @@ def test_blocked(self): np.add(1, inp2, out=out) assert_almost_equal(out, exp1, err_msg=msg) + def test_lower_align(self): + # check data that is not aligned to element size + # i.e doubles are aligned to 4 bytes on i386 + d = np.zeros(23 * 8, dtype=np.int8)[4:-4].view(np.float64) + o = np.zeros(23 * 8, dtype=np.int8)[4:-4].view(np.float64) + assert_almost_equal(d + d, d * 2) + np.add(d, d, out=o) + np.add(np.ones_like(d), d, out=o) + np.add(d, np.ones_like(d), out=o) + np.add(np.ones_like(d), d) + np.add(d, np.ones_like(d)) + class TestPower(TestCase): def test_small_types(self): diff --git a/numpy/core/tests/test_umath.py b/numpy/core/tests/test_umath.py index 9e98aed7b4cf..a98fa4c8ab9c 100644 --- a/numpy/core/tests/test_umath.py +++ b/numpy/core/tests/test_umath.py @@ -711,6 +711,12 @@ def test_minmax_blocked(self): inp[i] = -1e10 assert_equal(inp.min(), -1e10, err_msg=msg) + def test_lower_align(self): + # check data that is not aligned to element size + # i.e doubles are aligned to 4 bytes on i386 + d = np.zeros(23 * 8, dtype=np.int8)[4:-4].view(np.float64) + assert_equal(d.max(), d[0]) + assert_equal(d.min(), d[0]) class TestAbsolute(TestCase): @@ -736,6 +742,21 @@ def test_abs_blocked(self): np.abs(inp, out=out) assert_array_equal(out, d, err_msg=msg) + assert_array_equal(-inp, -1*inp, err_msg=msg) + np.negative(inp, out=out) + assert_array_equal(out, -1*inp, err_msg=msg) + + def test_lower_align(self): + # check data that is not aligned to element size + # i.e doubles are aligned to 4 bytes on i386 + d = np.zeros(23 * 8, dtype=np.int8)[4:-4].view(np.float64) + assert_equal(np.abs(d), d) + assert_equal(np.negative(d), -d) + np.negative(d, out=d) + np.negative(np.ones_like(d), out=d) + np.abs(d, out=d) + np.abs(np.ones_like(d), out=d) + class TestSpecialMethods(TestCase): def test_wrap(self):