BUG: add missing elementsize alignment check for simd reductions

e.g. doubles are only aligned to 4 bytes on i386 so one cannot peel them to 16 byte alignment. Closes numpygh-4853
juliantaylor · Aug 4, 2014 · 6674294 · 6674294
1 parent 763aeea
commit 6674294
Show file tree

Hide file tree

Showing 3 changed files with 36 additions and 1 deletion.
diff --git a/numpy/core/src/umath/simd.inc.src b/numpy/core/src/umath/simd.inc.src
@@ -39,7 +39,9 @@ void PyUFunc_clearfperr(void);
      ((abs(args[1] - args[0]) >= (vsize)) || ((abs(args[1] - args[0]) == 0))))
 
 #define IS_BLOCKABLE_REDUCE(esize, vsize) \
-    (steps[1] == (esize) && abs(args[1] - args[0]) >= (vsize))
+    (steps[1] == (esize) && abs(args[1] - args[0]) >= (vsize) && \
+     npy_is_aligned(args[1], (esize)) && \
+     npy_is_aligned(args[0], (esize)))
 
 #define IS_BLOCKABLE_BINARY(esize, vsize) \
     (steps[0] == steps[1] && steps[1] == steps[2] && steps[2] == (esize) && \

diff --git a/numpy/core/tests/test_scalarmath.py b/numpy/core/tests/test_scalarmath.py
@@ -75,6 +75,18 @@ def test_blocked(self):
                 np.add(1, inp2, out=out)
                 assert_almost_equal(out, exp1, err_msg=msg)
 
+    def test_lower_align(self):
+        # check data that is not aligned to element size
+        # i.e doubles are aligned to 4 bytes on i386
+        d = np.zeros(23 * 8, dtype=np.int8)[4:-4].view(np.float64)
+        o = np.zeros(23 * 8, dtype=np.int8)[4:-4].view(np.float64)
+        assert_almost_equal(d + d, d * 2)
+        np.add(d, d, out=o)
+        np.add(np.ones_like(d), d, out=o)
+        np.add(d, np.ones_like(d), out=o)
+        np.add(np.ones_like(d), d)
+        np.add(d, np.ones_like(d))
+
 
 class TestPower(TestCase):
     def test_small_types(self):

diff --git a/numpy/core/tests/test_umath.py b/numpy/core/tests/test_umath.py
@@ -711,6 +711,12 @@ def test_minmax_blocked(self):
                     inp[i] = -1e10
                     assert_equal(inp.min(), -1e10, err_msg=msg)
 
+    def test_lower_align(self):
+        # check data that is not aligned to element size
+        # i.e doubles are aligned to 4 bytes on i386
+        d = np.zeros(23 * 8, dtype=np.int8)[4:-4].view(np.float64)
+        assert_equal(d.max(), d[0])
+        assert_equal(d.min(), d[0])
 
 
 class TestAbsolute(TestCase):
@@ -736,6 +742,21 @@ def test_abs_blocked(self):
                             np.abs(inp, out=out)
                             assert_array_equal(out, d, err_msg=msg)
 
+                            assert_array_equal(-inp, -1*inp, err_msg=msg)
+                            np.negative(inp, out=out)
+                            assert_array_equal(out, -1*inp, err_msg=msg)
+
+    def test_lower_align(self):
+        # check data that is not aligned to element size
+        # i.e doubles are aligned to 4 bytes on i386
+        d = np.zeros(23 * 8, dtype=np.int8)[4:-4].view(np.float64)
+        assert_equal(np.abs(d), d)
+        assert_equal(np.negative(d), -d)
+        np.negative(d, out=d)
+        np.negative(np.ones_like(d), out=d)
+        np.abs(d, out=d)
+        np.abs(np.ones_like(d), out=d)
+
 
 class TestSpecialMethods(TestCase):
     def test_wrap(self):