From 6674294f82d5eb6563dd0ab0fffb6aebb7d2ca74 Mon Sep 17 00:00:00 2001
From: Julian Taylor <jtaylor.debian@googlemail.com>
Date: Tue, 8 Jul 2014 23:35:59 +0200
Subject: [PATCH] BUG: add missing elementsize alignment check for simd
 reductions

e.g. doubles are only aligned to 4 bytes on i386 so one cannot peel them
to 16 byte alignment.
Closes gh-4853
---
 numpy/core/src/umath/simd.inc.src   |  4 +++-
 numpy/core/tests/test_scalarmath.py | 12 ++++++++++++
 numpy/core/tests/test_umath.py      | 21 +++++++++++++++++++++
 3 files changed, 36 insertions(+), 1 deletion(-)

diff --git a/numpy/core/src/umath/simd.inc.src b/numpy/core/src/umath/simd.inc.src
index e274e05969aa..61600dcaf998 100644
--- a/numpy/core/src/umath/simd.inc.src
+++ b/numpy/core/src/umath/simd.inc.src
@@ -39,7 +39,9 @@ void PyUFunc_clearfperr(void);
      ((abs(args[1] - args[0]) >= (vsize)) || ((abs(args[1] - args[0]) == 0))))
 
 #define IS_BLOCKABLE_REDUCE(esize, vsize) \
-    (steps[1] == (esize) && abs(args[1] - args[0]) >= (vsize))
+    (steps[1] == (esize) && abs(args[1] - args[0]) >= (vsize) && \
+     npy_is_aligned(args[1], (esize)) && \
+     npy_is_aligned(args[0], (esize)))
 
 #define IS_BLOCKABLE_BINARY(esize, vsize) \
     (steps[0] == steps[1] && steps[1] == steps[2] && steps[2] == (esize) && \
diff --git a/numpy/core/tests/test_scalarmath.py b/numpy/core/tests/test_scalarmath.py
index a17b4a312fa6..ea18b96f1d2f 100644
--- a/numpy/core/tests/test_scalarmath.py
+++ b/numpy/core/tests/test_scalarmath.py
@@ -75,6 +75,18 @@ def test_blocked(self):
                 np.add(1, inp2, out=out)
                 assert_almost_equal(out, exp1, err_msg=msg)
 
+    def test_lower_align(self):
+        # check data that is not aligned to element size
+        # i.e doubles are aligned to 4 bytes on i386
+        d = np.zeros(23 * 8, dtype=np.int8)[4:-4].view(np.float64)
+        o = np.zeros(23 * 8, dtype=np.int8)[4:-4].view(np.float64)
+        assert_almost_equal(d + d, d * 2)
+        np.add(d, d, out=o)
+        np.add(np.ones_like(d), d, out=o)
+        np.add(d, np.ones_like(d), out=o)
+        np.add(np.ones_like(d), d)
+        np.add(d, np.ones_like(d))
+
 
 class TestPower(TestCase):
     def test_small_types(self):
diff --git a/numpy/core/tests/test_umath.py b/numpy/core/tests/test_umath.py
index 9e98aed7b4cf..a98fa4c8ab9c 100644
--- a/numpy/core/tests/test_umath.py
+++ b/numpy/core/tests/test_umath.py
@@ -711,6 +711,12 @@ def test_minmax_blocked(self):
                     inp[i] = -1e10
                     assert_equal(inp.min(), -1e10, err_msg=msg)
 
+    def test_lower_align(self):
+        # check data that is not aligned to element size
+        # i.e doubles are aligned to 4 bytes on i386
+        d = np.zeros(23 * 8, dtype=np.int8)[4:-4].view(np.float64)
+        assert_equal(d.max(), d[0])
+        assert_equal(d.min(), d[0])
 
 
 class TestAbsolute(TestCase):
@@ -736,6 +742,21 @@ def test_abs_blocked(self):
                             np.abs(inp, out=out)
                             assert_array_equal(out, d, err_msg=msg)
 
+                            assert_array_equal(-inp, -1*inp, err_msg=msg)
+                            np.negative(inp, out=out)
+                            assert_array_equal(out, -1*inp, err_msg=msg)
+
+    def test_lower_align(self):
+        # check data that is not aligned to element size
+        # i.e doubles are aligned to 4 bytes on i386
+        d = np.zeros(23 * 8, dtype=np.int8)[4:-4].view(np.float64)
+        assert_equal(np.abs(d), d)
+        assert_equal(np.negative(d), -d)
+        np.negative(d, out=d)
+        np.negative(np.ones_like(d), out=d)
+        np.abs(d, out=d)
+        np.abs(np.ones_like(d), out=d)
+
 
 class TestSpecialMethods(TestCase):
     def test_wrap(self):