diff --git a/numpy/core/src/umath/fast_loop_macros.h b/numpy/core/src/umath/fast_loop_macros.h index cbd1f04aaacd..430033d31474 100644 --- a/numpy/core/src/umath/fast_loop_macros.h +++ b/numpy/core/src/umath/fast_loop_macros.h @@ -378,19 +378,6 @@ abs_ptrdiff(char *a, char *b) #undef abs_ptrdiff -#define IS_BLOCKABLE_BINARY_BOOL(esize, vsize) \ - (steps[0] == (esize) && steps[0] == steps[1] && steps[2] == (1) && \ - npy_is_aligned(args[1], (esize)) && \ - npy_is_aligned(args[0], (esize))) - -#define IS_BLOCKABLE_BINARY_SCALAR1_BOOL(esize, vsize) \ - (steps[0] == 0 && steps[1] == (esize) && steps[2] == (1) && \ - npy_is_aligned(args[1], (esize))) - -#define IS_BLOCKABLE_BINARY_SCALAR2_BOOL(esize, vsize) \ - (steps[0] == (esize) && steps[1] == 0 && steps[2] == (1) && \ - npy_is_aligned(args[0], (esize))) - /* align var to alignment */ #define LOOP_BLOCK_ALIGN_VAR(var, type, alignment)\ npy_intp i, peel = npy_aligned_block_offset(var, sizeof(type),\ diff --git a/numpy/core/src/umath/loops_comparison.dispatch.c.src b/numpy/core/src/umath/loops_comparison.dispatch.c.src index e87db067b07c..4a8eac4b9272 100644 --- a/numpy/core/src/umath/loops_comparison.dispatch.c.src +++ b/numpy/core/src/umath/loops_comparison.dispatch.c.src @@ -312,19 +312,23 @@ static NPY_INLINE void run_binary_simd_@kind@_@sfx@(char **args, npy_intp const *dimensions, npy_intp const *steps) { #if @VECTOR@ - /* argument one scalar */ - if (IS_BLOCKABLE_BINARY_SCALAR1_BOOL(sizeof(@type@), NPY_SIMD_WIDTH)) { - simd_binary_scalar1_@kind@_@sfx@(args, dimensions[0]); - return; - } - /* argument two scalar */ - else if (IS_BLOCKABLE_BINARY_SCALAR2_BOOL(sizeof(@type@), NPY_SIMD_WIDTH)) { - simd_binary_scalar2_@kind@_@sfx@(args, dimensions[0]); - return; - } - else if (IS_BLOCKABLE_BINARY_BOOL(sizeof(@type@), NPY_SIMD_WIDTH)) { - simd_binary_@kind@_@sfx@(args, dimensions[0]); - return; + if (!is_mem_overlap(args[0], steps[0], args[2], steps[2], dimensions[0]) && + !is_mem_overlap(args[1], steps[1], args[2], steps[2], dimensions[0]) + ) { + /* argument one scalar */ + if (IS_BINARY_CONT_S1(@type@, npy_bool)) { + simd_binary_scalar1_@kind@_@sfx@(args, dimensions[0]); + return; + } + /* argument two scalar */ + else if (IS_BINARY_CONT_S2(@type@, npy_bool)) { + simd_binary_scalar2_@kind@_@sfx@(args, dimensions[0]); + return; + } + else if (IS_BINARY_CONT(@type@, npy_bool)) { + simd_binary_@kind@_@sfx@(args, dimensions[0]); + return; + } } #endif diff --git a/numpy/core/tests/test_umath.py b/numpy/core/tests/test_umath.py index bcfed91a7586..10b09f963ab4 100644 --- a/numpy/core/tests/test_umath.py +++ b/numpy/core/tests/test_umath.py @@ -31,6 +31,13 @@ uf for uf in UFUNCS_UNARY if 'f->f' in uf.types ] +UFUNCS_BINARY = [ + uf for uf in UFUNCS if uf.nin == 2 +] +UFUNCS_BINARY_ACC = [ + uf for uf in UFUNCS_BINARY if hasattr(uf, "accumulate") and uf.nout == 1 +] + def interesting_binop_operands(val1, val2, dtype): """ Helper to create "interesting" operands to cover common code paths: @@ -4273,6 +4280,7 @@ def test_rint_big_int(): # Rint should not change the value assert_equal(val, np.rint(val)) + @pytest.mark.parametrize('ftype', [np.float32, np.float64]) def test_memoverlap_accumulate(ftype): # Reproduces bug https://github.com/numpy/numpy/issues/15597 @@ -4282,6 +4290,38 @@ def test_memoverlap_accumulate(ftype): assert_equal(np.maximum.accumulate(arr), out_max) assert_equal(np.minimum.accumulate(arr), out_min) +@pytest.mark.parametrize("ufunc, dtype", [ + (ufunc, t[0]) + for ufunc in UFUNCS_BINARY_ACC + for t in ufunc.types + if t[-1] == '?' and t[0] not in 'DFGMmO' +]) +def test_memoverlap_accumulate_cmp(ufunc, dtype): + if ufunc.signature: + pytest.skip('For generic signatures only') + for size in (2, 8, 32, 64, 128, 256): + arr = np.array([0, 1, 1]*size, dtype=dtype) + acc = ufunc.accumulate(arr, dtype='?') + acc_u8 = acc.view(np.uint8) + exp = np.array(list(itertools.accumulate(arr, ufunc)), dtype=np.uint8) + assert_equal(exp, acc_u8) + +@pytest.mark.parametrize("ufunc, dtype", [ + (ufunc, t[0]) + for ufunc in UFUNCS_BINARY_ACC + for t in ufunc.types + if t[0] == t[1] and t[0] == t[-1] and t[0] not in 'DFGMmO?' +]) +def test_memoverlap_accumulate_symmetric(ufunc, dtype): + if ufunc.signature: + pytest.skip('For generic signatures only') + with np.errstate(all='ignore'): + for size in (2, 8, 32, 64, 128, 256): + arr = np.array([0, 1, 2]*size).astype(dtype) + acc = ufunc.accumulate(arr, dtype=dtype) + exp = np.array(list(itertools.accumulate(arr, ufunc)), dtype=dtype) + assert_equal(exp, acc) + def test_signaling_nan_exceptions(): with assert_no_warnings(): a = np.ndarray(shape=(), dtype='float32', buffer=b'\x00\xe0\xbf\xff')