-
-
Notifications
You must be signed in to change notification settings - Fork 9.4k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
BUG: Fix timedelta*float and median/percentile/quantile NaT handling #21726
Changes from all commits
f7d286c
9a7a625
7610433
30c21ca
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -1124,15 +1124,17 @@ TIMEDELTA_md_m_multiply(char **args, npy_intp const *dimensions, npy_intp const | |
BINARY_LOOP { | ||
const npy_timedelta in1 = *(npy_timedelta *)ip1; | ||
const double in2 = *(double *)ip2; | ||
if (in1 == NPY_DATETIME_NAT) { | ||
if (in1 == NPY_DATETIME_NAT || npy_isnan(in2)) { | ||
*((npy_timedelta *)op1) = NPY_DATETIME_NAT; | ||
} | ||
else { | ||
double result = in1 * in2; | ||
if (npy_isfinite(result)) { | ||
*((npy_timedelta *)op1) = (npy_timedelta)result; | ||
} | ||
else { | ||
/* `nearbyint` avoids warnings (should not matter here, though) */ | ||
double result = nearbyint(in1 * in2); | ||
npy_timedelta int_res = (npy_timedelta)result; | ||
*((npy_timedelta *)op1) = int_res; | ||
if ((double)int_res != result || int_res == NPY_DATETIME_NAT) { | ||
/* Conversion creates a new NaT from non-NaN/NaT input */ | ||
npy_set_floatstatus_invalid(); | ||
*((npy_timedelta *)op1) = NPY_DATETIME_NAT; | ||
} | ||
} | ||
|
@@ -1145,15 +1147,17 @@ TIMEDELTA_dm_m_multiply(char **args, npy_intp const *dimensions, npy_intp const | |
BINARY_LOOP { | ||
const double in1 = *(double *)ip1; | ||
const npy_timedelta in2 = *(npy_timedelta *)ip2; | ||
if (in2 == NPY_DATETIME_NAT) { | ||
if (in2 == NPY_DATETIME_NAT || npy_isnan(in1)) { | ||
*((npy_timedelta *)op1) = NPY_DATETIME_NAT; | ||
} | ||
else { | ||
double result = in1 * in2; | ||
if (npy_isfinite(result)) { | ||
*((npy_timedelta *)op1) = (npy_timedelta)result; | ||
} | ||
else { | ||
/* `nearbyint` avoids warnings (should not matter here, though) */ | ||
double result = nearbyint(in1 * in2); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I am not sure about this, also in that: Is this actually enough? The input is effectively int64, so by casting it to double we throw away precision, this means that:
woops. So, maybe the whole expectation of this rounding correct is too much, or maybe we should consider the above a serious bug? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
I think this should be considered a bug if and only if the analogous behavior in int64 is considered a bug:
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, it is the same thing. But But yeah, maybe it is just what it is, as Chuck keeps saying, maybe we really need a high-precision floating point time format (double-double, or whatever)... |
||
npy_timedelta int_res = (npy_timedelta)result; | ||
*((npy_timedelta *)op1) = int_res; | ||
if ((double)int_res != result || int_res == NPY_DATETIME_NAT) { | ||
/* Conversion creates a new NaT from non-NaN/NaT input */ | ||
npy_set_floatstatus_invalid(); | ||
*((npy_timedelta *)op1) = NPY_DATETIME_NAT; | ||
} | ||
} | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -3836,8 +3836,10 @@ def _median(a, axis=None, out=None, overwrite_input=False): | |
kth = [szh - 1, szh] | ||
else: | ||
kth = [(sz - 1) // 2] | ||
# Check if the array contains any nan's | ||
if np.issubdtype(a.dtype, np.inexact): | ||
|
||
# Check if the array contains any nan's or NaT's (unordered values) | ||
supports_nans = np.issubdtype(a.dtype, np.inexact) or a.dtype.kind == 'm' | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. if you want to support dt64 here, something like:
? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, we could. I won't make it a priority though (i.e. we can follow up if we want these changes and allow median). We could also replace the call to |
||
if supports_nans: | ||
kth.append(-1) | ||
|
||
if overwrite_input: | ||
|
@@ -3869,7 +3871,7 @@ def _median(a, axis=None, out=None, overwrite_input=False): | |
# using out array if needed. | ||
rout = mean(part[indexer], axis=axis, out=out) | ||
# Check if the array contains any nan's | ||
if np.issubdtype(a.dtype, np.inexact) and sz > 0: | ||
if supports_nans and sz > 0: | ||
# If nans are possible, warn and replace by nans like mean would. | ||
rout = np.lib.utils._median_nancheck(part, rout, axis) | ||
|
||
|
@@ -4655,11 +4657,16 @@ def _quantile( | |
# --- Setup | ||
arr = np.asanyarray(arr) | ||
values_count = arr.shape[axis] | ||
|
||
# Check if the array contains any nan's or NaT's (unordered values) | ||
supports_nans = ( | ||
np.issubdtype(arr.dtype, np.inexact) or arr.dtype.kind in 'Mm') | ||
|
||
# The dimensions of `q` are prepended to the output shape, so we need the | ||
# axis being sampled from `arr` to be last. | ||
DATA_AXIS = 0 | ||
if axis != DATA_AXIS: # But moveaxis is slow, so only call it if axis!=0. | ||
arr = np.moveaxis(arr, axis, destination=DATA_AXIS) | ||
|
||
if axis != 0: # But moveaxis is slow, so only call it if necessary. | ||
arr = np.moveaxis(arr, axis, destination=0) | ||
# --- Computation of indexes | ||
# Index where to find the value in the sorted array. | ||
# Virtual because it is a floating point value, not an valid index. | ||
|
@@ -4674,14 +4681,14 @@ def _quantile( | |
virtual_indexes = np.asanyarray(virtual_indexes) | ||
if np.issubdtype(virtual_indexes.dtype, np.integer): | ||
# No interpolation needed, take the points along axis | ||
if np.issubdtype(arr.dtype, np.inexact): | ||
if supports_nans: | ||
# may contain nan, which would sort to the end | ||
arr.partition(concatenate((virtual_indexes.ravel(), [-1])), axis=0) | ||
slices_having_nans = np.isnan(arr[-1]) | ||
slices_having_nans = np.isnan(arr[-1, ...]) | ||
else: | ||
# cannot contain nan | ||
arr.partition(virtual_indexes.ravel(), axis=0) | ||
slices_having_nans = np.array(False, dtype=bool) | ||
slices_having_nans = False | ||
result = take(arr, virtual_indexes, axis=0, out=out) | ||
else: | ||
previous_indexes, next_indexes = _get_indexes(arr, | ||
|
@@ -4693,16 +4700,14 @@ def _quantile( | |
previous_indexes.ravel(), | ||
next_indexes.ravel(), | ||
))), | ||
axis=DATA_AXIS) | ||
if np.issubdtype(arr.dtype, np.inexact): | ||
slices_having_nans = np.isnan( | ||
take(arr, indices=-1, axis=DATA_AXIS) | ||
) | ||
axis=0) | ||
if supports_nans: | ||
slices_having_nans = np.isnan(arr[-1, ...]) | ||
else: | ||
slices_having_nans = None | ||
slices_having_nans = False | ||
# --- Get values from indexes | ||
previous = np.take(arr, previous_indexes, axis=DATA_AXIS) | ||
next = np.take(arr, next_indexes, axis=DATA_AXIS) | ||
previous = arr[previous_indexes] | ||
next = arr[next_indexes] | ||
# --- Linear interpolation | ||
gamma = _get_gamma(virtual_indexes, previous_indexes, method) | ||
result_shape = virtual_indexes.shape + (1,) * (arr.ndim - 1) | ||
|
@@ -4711,10 +4716,13 @@ def _quantile( | |
next, | ||
gamma, | ||
out=out) | ||
if np.any(slices_having_nans): | ||
|
||
if slices_having_nans is not False and slices_having_nans.any(): | ||
if result.ndim == 0 and out is None: | ||
# can't write to a scalar | ||
result = arr.dtype.type(np.nan) | ||
result = arr[-1] | ||
elif result.dtype.kind in 'mM': | ||
result[..., slices_having_nans] = "NaT" | ||
else: | ||
result[..., slices_having_nans] = np.nan | ||
return result | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
is
nearbyint
similar to pythonround
?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
roughly speaking, yes. Of course it will round C-style. It is really the same thing as
np.rint
, except that it should give a warning (which should not really matter. If it sets a warning it should be the one we set anyway, but...)