Skip to content

Commit

Permalink
BUG: Better report integer division overflow (backport) (#22230)
Browse files Browse the repository at this point in the history
* BUG, SIMD: Handle overflow errors

Overflows for remainder/divmod/fmod

* If a types minimum value is divided by -1, an overflow will be raised
  and result will be set to minimum
* Handle overflow and return 0 in case of mod related operations

* TST: New tests for overflow in division operations

* SIMD: Removed cvtozero

Co-authored-by: Rafael Cardoso Fernandes Sousa <rafaelcfsousa@ibm.com>

* TST: Removed eval | Fixed raises cases

* TST: Changed `raise` to `warns`

* Changed `raise` to `warns` and test for `RuntimeWarning`
* Added results check back

* TST: Add additional tests for division-by-zero and integer overflow

This introduces a helper to iterate through "interesting" array cases
that could maybe be used in other places.  Keep the other test intact,
it adds a check for mixed types (which is just casts currently, but
cannot hurt) and is otherwise thorough.

* MAINT: Remove nested NPY_UNLIKELY in division paths

I am not certain the unlikely cases make much sense to begin with,
but they are certainly not helpful within an unlikely block.

* TST: Add unsigned integers to integer divide-by-zero test

* BUG: Added missing `NAME` key to loop generator

* BUG, SIMD: Handle division overflow errors

* If a types minimum value is divided by -1, an overflow will be raised
  and result will be set to minimum

* TST: Modified tests to reflect new overflow

* Update numpy/core/src/umath/loops_arithmetic.dispatch.c.src

Co-authored-by: Rafael Sousa <90851201+rafaelcfsousa@users.noreply.github.com>

Co-authored-by: Rafael Cardoso Fernandes Sousa <rafaelcfsousa@ibm.com>
Co-authored-by: Sebastian Berg <sebastian@sipsolutions.net>
Co-authored-by: Rafael Sousa <90851201+rafaelcfsousa@users.noreply.github.com>
  • Loading branch information
4 people committed Sep 8, 2022
1 parent 0f0d355 commit 9bf22bb
Show file tree
Hide file tree
Showing 5 changed files with 337 additions and 67 deletions.
49 changes: 32 additions & 17 deletions numpy/core/src/umath/loops_arithmetic.dispatch.c.src
Expand Up @@ -51,13 +51,14 @@ simd_divide_by_scalar_contig_@sfx@(char **args, npy_intp len)
const npyv_@sfx@x3 divisor = npyv_divisor_@sfx@(scalar);

if (scalar == -1) {
npyv_b@len@ noverflow = npyv_cvt_b@len@_@sfx@(npyv_setall_@sfx@(-1));
npyv_@sfx@ vzero = npyv_zero_@sfx@();
npyv_b@len@ noverflow = npyv_cvt_b@len@_@sfx@(npyv_setall_@sfx@(-1));
const npyv_@sfx@ vzero = npyv_zero_@sfx@();
const npyv_@sfx@ vmin = npyv_setall_@sfx@(NPY_MIN_INT@len@);
for (; len >= vstep; len -= vstep, src += vstep, dst += vstep) {
npyv_@sfx@ a = npyv_load_@sfx@(src);
npyv_b@len@ gt_min = npyv_cmpgt_@sfx@(a, npyv_setall_@sfx@(NPY_MIN_INT@len@));
noverflow = npyv_and_b@len@(noverflow, gt_min);
npyv_@sfx@ neg = npyv_ifsub_@sfx@(gt_min, vzero, a, vzero);
npyv_@sfx@ neg = npyv_ifsub_@sfx@(gt_min, vzero, a, vmin);
npyv_store_@sfx@(dst, neg);
}

Expand All @@ -66,13 +67,13 @@ simd_divide_by_scalar_contig_@sfx@(char **args, npy_intp len)
npyv_lanetype_@sfx@ a = *src;
if (a == NPY_MIN_INT@len@) {
raise_err = 1;
*dst = 0;
*dst = NPY_MIN_INT@len@;
} else {
*dst = -a;
}
}
if (raise_err) {
npy_set_floatstatus_divbyzero();
npy_set_floatstatus_overflow();
}
} else {
for (; len >= vstep; len -= vstep, src += vstep, dst += vstep) {
Expand Down Expand Up @@ -253,7 +254,8 @@ vsx4_simd_divide_contig_@sfx@(char **args, npy_intp len)
const npyv_@sfx@ vneg_one = npyv_setall_@sfx@(-1);
const npyv_@sfx@ vzero = npyv_zero_@sfx@();
const npyv_@sfx@ vmin = npyv_setall_@sfx@(NPY_MIN_INT@len@);
npyv_b@len@ warn = npyv_cvt_b@len@_@sfx@(npyv_zero_@sfx@());
npyv_b@len@ warn_zero = npyv_cvt_b@len@_@sfx@(npyv_zero_@sfx@());
npyv_b@len@ warn_overflow = npyv_cvt_b@len@_@sfx@(npyv_zero_@sfx@());
const int vstep = npyv_nlanes_@sfx@;

for (; len >= vstep; len -= vstep, src1 += vstep, src2 += vstep,
Expand All @@ -267,10 +269,8 @@ vsx4_simd_divide_contig_@sfx@(char **args, npy_intp len)
npyv_b@len@ amin = npyv_cmpeq_@sfx@(a, vmin);
npyv_b@len@ bneg_one = npyv_cmpeq_@sfx@(b, vneg_one);
npyv_b@len@ overflow = npyv_and_@sfx@(bneg_one, amin);
npyv_b@len@ error = npyv_or_@sfx@(bzero, overflow);
// in case of overflow or b = 0, 'cvtozero' forces quo/rem to be 0
npyv_@sfx@ cvtozero = npyv_select_@sfx@(error, vzero, vneg_one);
warn = npyv_or_@sfx@(error, warn);
warn_zero = npyv_or_@sfx@(bzero, warn_zero);
warn_overflow = npyv_or_@sfx@(overflow, warn_overflow);
// handle mixed case the way Python does
// ((a > 0) == (b > 0) || rem == 0)
npyv_b@len@ a_gt_zero = npyv_cmpgt_@sfx@(a, vzero);
Expand All @@ -280,21 +280,30 @@ vsx4_simd_divide_contig_@sfx@(char **args, npy_intp len)
npyv_b@len@ or = npyv_or_@sfx@(ab_eq_cond, rem_zero);
npyv_@sfx@ to_sub = npyv_select_@sfx@(or, vzero, vneg_one);
quo = npyv_add_@sfx@(quo, to_sub);
npyv_store_@sfx@(dst1, npyv_and_@sfx@(cvtozero, quo));
// Divide by zero
quo = npyv_select_@sfx@(bzero, vzero, quo);
// Overflow
quo = npyv_select_@sfx@(overflow, vmin, quo);
npyv_store_@sfx@(dst1, quo);
}

if (!vec_all_eq(warn, vzero)) {
if (!vec_all_eq(warn_zero, vzero)) {
npy_set_floatstatus_divbyzero();
}
if (!vec_all_eq(warn_overflow, vzero)) {
npy_set_floatstatus_overflow();
}

for (; len > 0; --len, ++src1, ++src2, ++dst1) {
const npyv_lanetype_@sfx@ a = *src1;
const npyv_lanetype_@sfx@ b = *src2;
if (b == 0 || (a == NPY_MIN_INT@len@ && b == -1)) {
if (NPY_UNLIKELY(b == 0)) {
npy_set_floatstatus_divbyzero();
*dst1 = 0;
}
else {
} else if (NPY_UNLIKELY((a == NPY_MIN_INT@len@) && (b == -1))) {
npy_set_floatstatus_overflow();
*dst1 = NPY_MIN_INT@len@;
} else {
*dst1 = a / b;
if (((a > 0) != (b > 0)) && ((*dst1 * b) != a)) {
*dst1 -= 1;
Expand Down Expand Up @@ -340,8 +349,14 @@ NPY_FINLINE @type@ floor_div_@TYPE@(const @type@ n, const @type@ d)
* (i.e. a different approach than npy_set_floatstatus_divbyzero()).
*/
if (NPY_UNLIKELY(d == 0 || (n == NPY_MIN_@TYPE@ && d == -1))) {
npy_set_floatstatus_divbyzero();
return 0;
if (d == 0) {
npy_set_floatstatus_divbyzero();
return 0;
}
else {
npy_set_floatstatus_overflow();
return NPY_MIN_@TYPE@;
}
}
@type@ r = n / d;
// Negative quotients needs to be rounded down
Expand Down
115 changes: 80 additions & 35 deletions numpy/core/src/umath/loops_modulo.dispatch.c.src
Expand Up @@ -12,6 +12,21 @@
// Provides the various *_LOOP macros
#include "fast_loop_macros.h"


#define DIVIDEBYZERO_OVERFLOW_CHECK(x, y, min_val, signed) \
(NPY_UNLIKELY( \
(signed) ? \
((y == 0) || ((x == min_val) && (y == -1))) : \
(y == 0)) \
)

#define FLAG_IF_DIVIDEBYZERO(x) do { \
if (NPY_UNLIKELY(x == 0)) { \
npy_set_floatstatus_divbyzero(); \
} \
} while (0)


#if NPY_SIMD && defined(NPY_HAVE_VSX4)
typedef struct {
npyv_u32x2 hi;
Expand Down Expand Up @@ -166,7 +181,6 @@ vsx4_simd_@func@_contig_@sfx@(char **args, npy_intp len)
const int vstep = npyv_nlanes_@sfx@;
#if @id@ == 2 /* divmod */
npyv_lanetype_@sfx@ *dst2 = (npyv_lanetype_@sfx@ *) args[3];
const npyv_@sfx@ vneg_one = npyv_setall_@sfx@(-1);
npyv_b@len@ warn = npyv_cvt_b@len@_@sfx@(npyv_zero_@sfx@());

for (; len >= vstep; len -= vstep, src1 += vstep, src2 += vstep,
Expand All @@ -176,11 +190,11 @@ vsx4_simd_@func@_contig_@sfx@(char **args, npy_intp len)
npyv_@sfx@ quo = vsx4_div_@sfx@(a, b);
npyv_@sfx@ rem = npyv_sub_@sfx@(a, vec_mul(b, quo));
npyv_b@len@ bzero = npyv_cmpeq_@sfx@(b, vzero);
// when b is 0, 'cvtozero' forces the modulo to be 0 too
npyv_@sfx@ cvtozero = npyv_select_@sfx@(bzero, vzero, vneg_one);
// when b is 0, forces the remainder to be 0 too
rem = npyv_select_@sfx@(bzero, vzero, rem);
warn = npyv_or_@sfx@(bzero, warn);
npyv_store_@sfx@(dst1, quo);
npyv_store_@sfx@(dst2, npyv_and_@sfx@(cvtozero, rem));
npyv_store_@sfx@(dst2, rem);
}

if (!vec_all_eq(warn, vzero)) {
Expand Down Expand Up @@ -290,7 +304,8 @@ vsx4_simd_@func@_contig_@sfx@(char **args, npy_intp len)
npyv_lanetype_@sfx@ *dst2 = (npyv_lanetype_@sfx@ *) args[3];
const npyv_@sfx@ vneg_one = npyv_setall_@sfx@(-1);
const npyv_@sfx@ vmin = npyv_setall_@sfx@(NPY_MIN_INT@len@);
npyv_b@len@ warn = npyv_cvt_b@len@_@sfx@(npyv_zero_@sfx@());
npyv_b@len@ warn_zero = npyv_cvt_b@len@_@sfx@(npyv_zero_@sfx@());
npyv_b@len@ warn_overflow = npyv_cvt_b@len@_@sfx@(npyv_zero_@sfx@());

for (; len >= vstep; len -= vstep, src1 += vstep, src2 += vstep,
dst1 += vstep, dst2 += vstep) {
Expand All @@ -310,10 +325,8 @@ vsx4_simd_@func@_contig_@sfx@(char **args, npy_intp len)
npyv_b@len@ amin = npyv_cmpeq_@sfx@(a, vmin);
npyv_b@len@ bneg_one = npyv_cmpeq_@sfx@(b, vneg_one);
npyv_b@len@ overflow = npyv_and_@sfx@(bneg_one, amin);
npyv_b@len@ error = npyv_or_@sfx@(bzero, overflow);
// in case of overflow or b = 0, 'cvtozero' forces quo/rem to be 0
npyv_@sfx@ cvtozero = npyv_select_@sfx@(error, vzero, vneg_one);
warn = npyv_or_@sfx@(error, warn);
warn_zero = npyv_or_@sfx@(bzero, warn_zero);
warn_overflow = npyv_or_@sfx@(overflow, warn_overflow);
#endif
#if @id@ >= 1 /* remainder and divmod */
// handle mixed case the way Python does
Expand All @@ -329,8 +342,14 @@ vsx4_simd_@func@_contig_@sfx@(char **args, npy_intp len)
#if @id@ == 2 /* divmod */
npyv_@sfx@ to_sub = npyv_select_@sfx@(or, vzero, vneg_one);
quo = npyv_add_@sfx@(quo, to_sub);
npyv_store_@sfx@(dst1, npyv_and_@sfx@(cvtozero, quo));
npyv_store_@sfx@(dst2, npyv_and_@sfx@(cvtozero, rem));
// Divide by zero
quo = npyv_select_@sfx@(bzero, vzero, quo);
rem = npyv_select_@sfx@(bzero, vzero, rem);
// Overflow
quo = npyv_select_@sfx@(overflow, vmin, quo);
rem = npyv_select_@sfx@(overflow, vzero, rem);
npyv_store_@sfx@(dst1, quo);
npyv_store_@sfx@(dst2, rem);
#else /* fmod and remainder */
npyv_store_@sfx@(dst1, rem);
if (NPY_UNLIKELY(vec_any_eq(b, vzero))) {
Expand All @@ -340,17 +359,27 @@ vsx4_simd_@func@_contig_@sfx@(char **args, npy_intp len)
}

#if @id@ == 2 /* divmod */
if (!vec_all_eq(warn, vzero)) {
if (!vec_all_eq(warn_zero, vzero)) {
npy_set_floatstatus_divbyzero();
}
if (!vec_all_eq(warn_overflow, vzero)) {
npy_set_floatstatus_overflow();
}

for (; len > 0; --len, ++src1, ++src2, ++dst1, ++dst2) {
const npyv_lanetype_@sfx@ a = *src1;
const npyv_lanetype_@sfx@ b = *src2;
if (b == 0 || (a == NPY_MIN_INT@len@ && b == -1)) {
npy_set_floatstatus_divbyzero();
*dst1 = 0;
*dst2 = 0;
if (DIVIDEBYZERO_OVERFLOW_CHECK(a, b, NPY_MIN_INT@len@, NPY_TRUE)) {
if (b == 0) {
npy_set_floatstatus_divbyzero();
*dst1 = 0;
*dst2 = 0;
}
else {
npy_set_floatstatus_overflow();
*dst1 = NPY_MIN_INT@len@;
*dst2 = 0;
}
}
else {
*dst1 = a / b;
Expand All @@ -365,8 +394,8 @@ vsx4_simd_@func@_contig_@sfx@(char **args, npy_intp len)
for (; len > 0; --len, ++src1, ++src2, ++dst1) {
const npyv_lanetype_@sfx@ a = *src1;
const npyv_lanetype_@sfx@ b = *src2;
if (NPY_UNLIKELY(b == 0)) {
npy_set_floatstatus_divbyzero();
if (DIVIDEBYZERO_OVERFLOW_CHECK(a, b, NPY_MIN_INT@len@, NPY_TRUE)) {
FLAG_IF_DIVIDEBYZERO(b);
*dst1 = 0;
} else{
*dst1 = a % b;
Expand Down Expand Up @@ -415,8 +444,6 @@ vsx4_simd_@func@_by_scalar_contig_@sfx@(char **args, npy_intp len)
// (a == NPY_MIN_INT@len@ && b == -1)
npyv_b@len@ amin = npyv_cmpeq_@sfx@(a, vmin);
npyv_b@len@ overflow = npyv_and_@sfx@(bneg_one, amin);
// in case of overflow, 'cvtozero' forces quo/rem to be 0
npyv_@sfx@ cvtozero = npyv_select_@sfx@(overflow, vzero, vneg_one);
warn = npyv_or_@sfx@(overflow, warn);
#endif
#if @id@ >= 1 /* remainder and divmod */
Expand All @@ -432,23 +459,26 @@ vsx4_simd_@func@_by_scalar_contig_@sfx@(char **args, npy_intp len)
#if @id@ == 2 /* divmod */
npyv_@sfx@ to_sub = npyv_select_@sfx@(or, vzero, vneg_one);
quo = npyv_add_@sfx@(quo, to_sub);
npyv_store_@sfx@(dst1, npyv_and_@sfx@(cvtozero, quo));
npyv_store_@sfx@(dst2, npyv_and_@sfx@(cvtozero, rem));
// Overflow: set quo to minimum and rem to 0
quo = npyv_select_@sfx@(overflow, vmin, quo);
rem = npyv_select_@sfx@(overflow, vzero, rem);
npyv_store_@sfx@(dst1, quo);
npyv_store_@sfx@(dst2, rem);
#else /* fmod and remainder */
npyv_store_@sfx@(dst1, rem);
#endif
}

#if @id@ == 2 /* divmod */
if (!vec_all_eq(warn, vzero)) {
npy_set_floatstatus_divbyzero();
npy_set_floatstatus_overflow();
}

for (; len > 0; --len, ++src1, ++dst1, ++dst2) {
const npyv_lanetype_@sfx@ a = *src1;
if (a == NPY_MIN_INT@len@ && scalar == -1) {
npy_set_floatstatus_divbyzero();
*dst1 = 0;
if (NPY_UNLIKELY(a == NPY_MIN_INT@len@ && scalar == -1)) {
npy_set_floatstatus_overflow();
*dst1 = NPY_MIN_INT@len@;
*dst2 = 0;
}
else {
Expand Down Expand Up @@ -524,8 +554,12 @@ NPY_NO_EXPORT void NPY_CPU_DISPATCH_CURFX(@TYPE@_fmod)
BINARY_LOOP {
const @type@ in1 = *(@type@ *)ip1;
const @type@ in2 = *(@type@ *)ip2;
if (NPY_UNLIKELY(in2 == 0)) {
npy_set_floatstatus_divbyzero();
#if @signed@
if (DIVIDEBYZERO_OVERFLOW_CHECK(in1, in2, NPY_MIN_@TYPE@, NPY_TRUE)) {
#else
if (DIVIDEBYZERO_OVERFLOW_CHECK(in1, in2, 0, NPY_FALSE)) {
#endif
FLAG_IF_DIVIDEBYZERO(in2);
*((@type@ *)op1) = 0;
} else{
*((@type@ *)op1)= in1 % in2;
Expand All @@ -552,8 +586,12 @@ NPY_NO_EXPORT void NPY_CPU_DISPATCH_CURFX(@TYPE@_remainder)
BINARY_LOOP {
const @type@ in1 = *(@type@ *)ip1;
const @type@ in2 = *(@type@ *)ip2;
if (NPY_UNLIKELY(in2 == 0)) {
npy_set_floatstatus_divbyzero();
#if @signed@
if (DIVIDEBYZERO_OVERFLOW_CHECK(in1, in2, NPY_MIN_@TYPE@, NPY_TRUE)) {
#else
if (DIVIDEBYZERO_OVERFLOW_CHECK(in1, in2, 0, NPY_FALSE)) {
#endif
FLAG_IF_DIVIDEBYZERO(in2);
*((@type@ *)op1) = 0;
} else{
#if @signed@
Expand Down Expand Up @@ -593,10 +631,17 @@ NPY_NO_EXPORT void NPY_CPU_DISPATCH_CURFX(@TYPE@_divmod)
const @type@ in1 = *(@type@ *)ip1;
const @type@ in2 = *(@type@ *)ip2;
/* see FIXME note for divide above */
if (NPY_UNLIKELY(in2 == 0 || (in1 == NPY_MIN_@TYPE@ && in2 == -1))) {
npy_set_floatstatus_divbyzero();
*((@type@ *)op1) = 0;
*((@type@ *)op2) = 0;
if (DIVIDEBYZERO_OVERFLOW_CHECK(in1, in2, NPY_MIN_@TYPE@, NPY_TRUE)) {
if (in2 == 0) {
npy_set_floatstatus_divbyzero();
*((@type@ *)op1) = 0;
*((@type@ *)op2) = 0;
}
else {
npy_set_floatstatus_overflow();
*((@type@ *)op1) = NPY_MIN_@TYPE@;
*((@type@ *)op2) = 0;
}
}
else {
/* handle mixed case the way Python does */
Expand All @@ -616,7 +661,7 @@ NPY_NO_EXPORT void NPY_CPU_DISPATCH_CURFX(@TYPE@_divmod)
BINARY_LOOP_TWO_OUT {
const @type@ in1 = *(@type@ *)ip1;
const @type@ in2 = *(@type@ *)ip2;
if (NPY_UNLIKELY(in2 == 0)) {
if (DIVIDEBYZERO_OVERFLOW_CHECK(in1, in2, 0, NPY_FALSE)) {
npy_set_floatstatus_divbyzero();
*((@type@ *)op1) = 0;
*((@type@ *)op2) = 0;
Expand Down
16 changes: 13 additions & 3 deletions numpy/core/src/umath/scalarmath.c.src
Expand Up @@ -156,16 +156,25 @@ static NPY_INLINE int
* #type = npy_byte, npy_ubyte, npy_short, npy_ushort, npy_int, npy_uint,
* npy_long, npy_ulong, npy_longlong, npy_ulonglong#
* #neg = (1,0)*5#
* #NAME = BYTE, UBYTE, SHORT, USHORT, INT, UINT,
* LONG, ULONG, LONGLONG, ULONGLONG#
*/

#if @neg@
#define DIVIDEBYZERO_CHECK (b == 0 || (a == NPY_MIN_@NAME@ && b == -1))
#else
#define DIVIDEBYZERO_CHECK (b == 0)
#endif

static NPY_INLINE int
@name@_ctype_divide(@type@ a, @type@ b, @type@ *out) {
if (b == 0) {
*out = 0;
return NPY_FPE_DIVIDEBYZERO;
}
#if @neg@
else if (b == -1 && a < 0 && a == -a) {
*out = a / b;
else if (b == -1 && a == NPY_MIN_@NAME@) {
*out = NPY_MIN_@NAME@;
return NPY_FPE_OVERFLOW;
}
#endif
Expand All @@ -188,7 +197,7 @@ static NPY_INLINE int

static NPY_INLINE int
@name@_ctype_remainder(@type@ a, @type@ b, @type@ *out) {
if (a == 0 || b == 0) {
if (DIVIDEBYZERO_CHECK) {
*out = 0;
if (b == 0) {
return NPY_FPE_DIVIDEBYZERO;
Expand All @@ -209,6 +218,7 @@ static NPY_INLINE int
#endif
return 0;
}
#undef DIVIDEBYZERO_CHECK
/**end repeat**/

/**begin repeat
Expand Down
2 changes: 1 addition & 1 deletion numpy/core/tests/test_regression.py
Expand Up @@ -1496,7 +1496,7 @@ def test_type(t):
min = np.array([np.iinfo(t).min])
min //= -1

with np.errstate(divide="ignore"):
with np.errstate(over="ignore"):
for t in (np.int8, np.int16, np.int32, np.int64, int):
test_type(t)

Expand Down

0 comments on commit 9bf22bb

Please sign in to comment.