Skip to content

Commit

Permalink
Merge pull request #22867 from charris/backport-22851
Browse files Browse the repository at this point in the history
BUG, SIMD: Fix memory overlap in ufunc comparison loops
  • Loading branch information
charris committed Dec 22, 2022
2 parents 9bbf8c1 + 4c70288 commit d4b3bf5
Show file tree
Hide file tree
Showing 3 changed files with 57 additions and 26 deletions.
13 changes: 0 additions & 13 deletions numpy/core/src/umath/fast_loop_macros.h
Expand Up @@ -378,19 +378,6 @@ abs_ptrdiff(char *a, char *b)

#undef abs_ptrdiff

#define IS_BLOCKABLE_BINARY_BOOL(esize, vsize) \
(steps[0] == (esize) && steps[0] == steps[1] && steps[2] == (1) && \
npy_is_aligned(args[1], (esize)) && \
npy_is_aligned(args[0], (esize)))

#define IS_BLOCKABLE_BINARY_SCALAR1_BOOL(esize, vsize) \
(steps[0] == 0 && steps[1] == (esize) && steps[2] == (1) && \
npy_is_aligned(args[1], (esize)))

#define IS_BLOCKABLE_BINARY_SCALAR2_BOOL(esize, vsize) \
(steps[0] == (esize) && steps[1] == 0 && steps[2] == (1) && \
npy_is_aligned(args[0], (esize)))

/* align var to alignment */
#define LOOP_BLOCK_ALIGN_VAR(var, type, alignment)\
npy_intp i, peel = npy_aligned_block_offset(var, sizeof(type),\
Expand Down
30 changes: 17 additions & 13 deletions numpy/core/src/umath/loops_comparison.dispatch.c.src
Expand Up @@ -312,19 +312,23 @@ static NPY_INLINE void
run_binary_simd_@kind@_@sfx@(char **args, npy_intp const *dimensions, npy_intp const *steps)
{
#if @VECTOR@
/* argument one scalar */
if (IS_BLOCKABLE_BINARY_SCALAR1_BOOL(sizeof(@type@), NPY_SIMD_WIDTH)) {
simd_binary_scalar1_@kind@_@sfx@(args, dimensions[0]);
return;
}
/* argument two scalar */
else if (IS_BLOCKABLE_BINARY_SCALAR2_BOOL(sizeof(@type@), NPY_SIMD_WIDTH)) {
simd_binary_scalar2_@kind@_@sfx@(args, dimensions[0]);
return;
}
else if (IS_BLOCKABLE_BINARY_BOOL(sizeof(@type@), NPY_SIMD_WIDTH)) {
simd_binary_@kind@_@sfx@(args, dimensions[0]);
return;
if (!is_mem_overlap(args[0], steps[0], args[2], steps[2], dimensions[0]) &&
!is_mem_overlap(args[1], steps[1], args[2], steps[2], dimensions[0])
) {
/* argument one scalar */
if (IS_BINARY_CONT_S1(@type@, npy_bool)) {
simd_binary_scalar1_@kind@_@sfx@(args, dimensions[0]);
return;
}
/* argument two scalar */
else if (IS_BINARY_CONT_S2(@type@, npy_bool)) {
simd_binary_scalar2_@kind@_@sfx@(args, dimensions[0]);
return;
}
else if (IS_BINARY_CONT(@type@, npy_bool)) {
simd_binary_@kind@_@sfx@(args, dimensions[0]);
return;
}
}
#endif

Expand Down
40 changes: 40 additions & 0 deletions numpy/core/tests/test_umath.py
Expand Up @@ -31,6 +31,13 @@
uf for uf in UFUNCS_UNARY if 'f->f' in uf.types
]

UFUNCS_BINARY = [
uf for uf in UFUNCS if uf.nin == 2
]
UFUNCS_BINARY_ACC = [
uf for uf in UFUNCS_BINARY if hasattr(uf, "accumulate") and uf.nout == 1
]

def interesting_binop_operands(val1, val2, dtype):
"""
Helper to create "interesting" operands to cover common code paths:
Expand Down Expand Up @@ -4273,6 +4280,7 @@ def test_rint_big_int():
# Rint should not change the value
assert_equal(val, np.rint(val))


@pytest.mark.parametrize('ftype', [np.float32, np.float64])
def test_memoverlap_accumulate(ftype):
# Reproduces bug https://github.com/numpy/numpy/issues/15597
Expand All @@ -4282,6 +4290,38 @@ def test_memoverlap_accumulate(ftype):
assert_equal(np.maximum.accumulate(arr), out_max)
assert_equal(np.minimum.accumulate(arr), out_min)

@pytest.mark.parametrize("ufunc, dtype", [
(ufunc, t[0])
for ufunc in UFUNCS_BINARY_ACC
for t in ufunc.types
if t[-1] == '?' and t[0] not in 'DFGMmO'
])
def test_memoverlap_accumulate_cmp(ufunc, dtype):
if ufunc.signature:
pytest.skip('For generic signatures only')
for size in (2, 8, 32, 64, 128, 256):
arr = np.array([0, 1, 1]*size, dtype=dtype)
acc = ufunc.accumulate(arr, dtype='?')
acc_u8 = acc.view(np.uint8)
exp = np.array(list(itertools.accumulate(arr, ufunc)), dtype=np.uint8)
assert_equal(exp, acc_u8)

@pytest.mark.parametrize("ufunc, dtype", [
(ufunc, t[0])
for ufunc in UFUNCS_BINARY_ACC
for t in ufunc.types
if t[0] == t[1] and t[0] == t[-1] and t[0] not in 'DFGMmO?'
])
def test_memoverlap_accumulate_symmetric(ufunc, dtype):
if ufunc.signature:
pytest.skip('For generic signatures only')
with np.errstate(all='ignore'):
for size in (2, 8, 32, 64, 128, 256):
arr = np.array([0, 1, 2]*size).astype(dtype)
acc = ufunc.accumulate(arr, dtype=dtype)
exp = np.array(list(itertools.accumulate(arr, ufunc)), dtype=dtype)
assert_equal(exp, acc)

def test_signaling_nan_exceptions():
with assert_no_warnings():
a = np.ndarray(shape=(), dtype='float32', buffer=b'\x00\xe0\xbf\xff')
Expand Down

0 comments on commit d4b3bf5

Please sign in to comment.