Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

BUG, SIMD: Fix memory overlap in ufunc comparison loops #22867

Merged
merged 2 commits into from Dec 22, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
13 changes: 0 additions & 13 deletions numpy/core/src/umath/fast_loop_macros.h
Expand Up @@ -378,19 +378,6 @@ abs_ptrdiff(char *a, char *b)

#undef abs_ptrdiff

#define IS_BLOCKABLE_BINARY_BOOL(esize, vsize) \
(steps[0] == (esize) && steps[0] == steps[1] && steps[2] == (1) && \
npy_is_aligned(args[1], (esize)) && \
npy_is_aligned(args[0], (esize)))

#define IS_BLOCKABLE_BINARY_SCALAR1_BOOL(esize, vsize) \
(steps[0] == 0 && steps[1] == (esize) && steps[2] == (1) && \
npy_is_aligned(args[1], (esize)))

#define IS_BLOCKABLE_BINARY_SCALAR2_BOOL(esize, vsize) \
(steps[0] == (esize) && steps[1] == 0 && steps[2] == (1) && \
npy_is_aligned(args[0], (esize)))

/* align var to alignment */
#define LOOP_BLOCK_ALIGN_VAR(var, type, alignment)\
npy_intp i, peel = npy_aligned_block_offset(var, sizeof(type),\
Expand Down
30 changes: 17 additions & 13 deletions numpy/core/src/umath/loops_comparison.dispatch.c.src
Expand Up @@ -312,19 +312,23 @@ static NPY_INLINE void
run_binary_simd_@kind@_@sfx@(char **args, npy_intp const *dimensions, npy_intp const *steps)
{
#if @VECTOR@
/* argument one scalar */
if (IS_BLOCKABLE_BINARY_SCALAR1_BOOL(sizeof(@type@), NPY_SIMD_WIDTH)) {
simd_binary_scalar1_@kind@_@sfx@(args, dimensions[0]);
return;
}
/* argument two scalar */
else if (IS_BLOCKABLE_BINARY_SCALAR2_BOOL(sizeof(@type@), NPY_SIMD_WIDTH)) {
simd_binary_scalar2_@kind@_@sfx@(args, dimensions[0]);
return;
}
else if (IS_BLOCKABLE_BINARY_BOOL(sizeof(@type@), NPY_SIMD_WIDTH)) {
simd_binary_@kind@_@sfx@(args, dimensions[0]);
return;
if (!is_mem_overlap(args[0], steps[0], args[2], steps[2], dimensions[0]) &&
!is_mem_overlap(args[1], steps[1], args[2], steps[2], dimensions[0])
) {
/* argument one scalar */
if (IS_BINARY_CONT_S1(@type@, npy_bool)) {
simd_binary_scalar1_@kind@_@sfx@(args, dimensions[0]);
return;
}
/* argument two scalar */
else if (IS_BINARY_CONT_S2(@type@, npy_bool)) {
simd_binary_scalar2_@kind@_@sfx@(args, dimensions[0]);
return;
}
else if (IS_BINARY_CONT(@type@, npy_bool)) {
simd_binary_@kind@_@sfx@(args, dimensions[0]);
return;
}
}
#endif

Expand Down
40 changes: 40 additions & 0 deletions numpy/core/tests/test_umath.py
Expand Up @@ -31,6 +31,13 @@
uf for uf in UFUNCS_UNARY if 'f->f' in uf.types
]

UFUNCS_BINARY = [
uf for uf in UFUNCS if uf.nin == 2
]
UFUNCS_BINARY_ACC = [
uf for uf in UFUNCS_BINARY if hasattr(uf, "accumulate") and uf.nout == 1
]

def interesting_binop_operands(val1, val2, dtype):
"""
Helper to create "interesting" operands to cover common code paths:
Expand Down Expand Up @@ -4273,6 +4280,7 @@ def test_rint_big_int():
# Rint should not change the value
assert_equal(val, np.rint(val))


@pytest.mark.parametrize('ftype', [np.float32, np.float64])
def test_memoverlap_accumulate(ftype):
# Reproduces bug https://github.com/numpy/numpy/issues/15597
Expand All @@ -4282,6 +4290,38 @@ def test_memoverlap_accumulate(ftype):
assert_equal(np.maximum.accumulate(arr), out_max)
assert_equal(np.minimum.accumulate(arr), out_min)

@pytest.mark.parametrize("ufunc, dtype", [
(ufunc, t[0])
for ufunc in UFUNCS_BINARY_ACC
for t in ufunc.types
if t[-1] == '?' and t[0] not in 'DFGMmO'
])
def test_memoverlap_accumulate_cmp(ufunc, dtype):
if ufunc.signature:
pytest.skip('For generic signatures only')
for size in (2, 8, 32, 64, 128, 256):
arr = np.array([0, 1, 1]*size, dtype=dtype)
acc = ufunc.accumulate(arr, dtype='?')
acc_u8 = acc.view(np.uint8)
exp = np.array(list(itertools.accumulate(arr, ufunc)), dtype=np.uint8)
assert_equal(exp, acc_u8)

@pytest.mark.parametrize("ufunc, dtype", [
(ufunc, t[0])
for ufunc in UFUNCS_BINARY_ACC
for t in ufunc.types
if t[0] == t[1] and t[0] == t[-1] and t[0] not in 'DFGMmO?'
])
def test_memoverlap_accumulate_symmetric(ufunc, dtype):
if ufunc.signature:
pytest.skip('For generic signatures only')
with np.errstate(all='ignore'):
for size in (2, 8, 32, 64, 128, 256):
arr = np.array([0, 1, 2]*size).astype(dtype)
acc = ufunc.accumulate(arr, dtype=dtype)
exp = np.array(list(itertools.accumulate(arr, ufunc)), dtype=dtype)
assert_equal(exp, acc)

def test_signaling_nan_exceptions():
with assert_no_warnings():
a = np.ndarray(shape=(), dtype='float32', buffer=b'\x00\xe0\xbf\xff')
Expand Down