Skip to content

Commit

Permalink
CLN: follow-ups (#40000)
Browse files Browse the repository at this point in the history
  • Loading branch information
jbrockmendel committed Feb 24, 2021
1 parent 8a307c1 commit 0b5c12d
Show file tree
Hide file tree
Showing 8 changed files with 38 additions and 22 deletions.
5 changes: 5 additions & 0 deletions .pre-commit-config.yaml
Expand Up @@ -163,6 +163,11 @@ repos:
entry: np\.bool[^_8]
language: pygrep
types_or: [python, cython, rst]
- id: np-object
name: Check for use of np.object instead of np.object_
entry: np\.object[^_8]
language: pygrep
types_or: [python, cython, rst]
- id: no-os-remove
name: Check code for instances of os.remove
entry: os\.remove
Expand Down
6 changes: 3 additions & 3 deletions asv_bench/benchmarks/algos/isin.py
Expand Up @@ -104,7 +104,7 @@ def time_isin(self, dtype, exponent, title):

class IsinWithRandomFloat:
params = [
[np.float64, np.object],
[np.float64, np.object_],
[
1_300,
2_000,
Expand Down Expand Up @@ -134,7 +134,7 @@ def time_isin(self, dtype, size, title):

class IsinWithArangeSorted:
params = [
[np.float64, np.int64, np.uint64, np.object],
[np.float64, np.int64, np.uint64, np.object_],
[
1_000,
2_000,
Expand All @@ -155,7 +155,7 @@ def time_isin(self, dtype, size):

class IsinWithArange:
params = [
[np.float64, np.int64, np.uint64, np.object],
[np.float64, np.int64, np.uint64, np.object_],
[
1_000,
2_000,
Expand Down
4 changes: 2 additions & 2 deletions doc/source/whatsnew/v0.24.0.rst
Expand Up @@ -1755,8 +1755,8 @@ Missing

- Bug in :func:`DataFrame.fillna` where a ``ValueError`` would raise when one column contained a ``datetime64[ns, tz]`` dtype (:issue:`15522`)
- Bug in :func:`Series.hasnans` that could be incorrectly cached and return incorrect answers if null elements are introduced after an initial call (:issue:`19700`)
- :func:`Series.isin` now treats all NaN-floats as equal also for ``np.object``-dtype. This behavior is consistent with the behavior for float64 (:issue:`22119`)
- :func:`unique` no longer mangles NaN-floats and the ``NaT``-object for ``np.object``-dtype, i.e. ``NaT`` is no longer coerced to a NaN-value and is treated as a different entity. (:issue:`22295`)
- :func:`Series.isin` now treats all NaN-floats as equal also for ``np.object_``-dtype. This behavior is consistent with the behavior for float64 (:issue:`22119`)
- :func:`unique` no longer mangles NaN-floats and the ``NaT``-object for ``np.object_``-dtype, i.e. ``NaT`` is no longer coerced to a NaN-value and is treated as a different entity. (:issue:`22295`)
- :class:`DataFrame` and :class:`Series` now properly handle numpy masked arrays with hardened masks. Previously, constructing a DataFrame or Series from a masked array with a hard mask would create a pandas object containing the underlying value, rather than the expected NaN. (:issue:`24574`)
- Bug in :class:`DataFrame` constructor where ``dtype`` argument was not honored when handling numpy masked record arrays. (:issue:`24874`)

Expand Down
2 changes: 1 addition & 1 deletion pandas/_libs/lib.pyx
Expand Up @@ -1468,7 +1468,7 @@ def infer_dtype(value: object, skipna: bool = True) -> str:
if is_decimal_array(values):
return "decimal"

elif is_complex(val):
elif util.is_complex_object(val):
if is_complex_array(values):
return "complex"

Expand Down
29 changes: 19 additions & 10 deletions pandas/core/array_algos/take.py
@@ -1,3 +1,5 @@
from __future__ import annotations

from typing import Optional

import numpy as np
Expand All @@ -6,6 +8,7 @@
algos as libalgos,
lib,
)
from pandas._typing import ArrayLike

from pandas.core.dtypes.cast import maybe_promote
from pandas.core.dtypes.common import (
Expand All @@ -14,20 +17,17 @@
)
from pandas.core.dtypes.missing import na_value_for_dtype

from pandas.core.construction import (
ensure_wrapped_if_datetimelike,
extract_array,
)
from pandas.core.construction import ensure_wrapped_if_datetimelike


def take_nd(
arr,
arr: ArrayLike,
indexer,
axis: int = 0,
out: Optional[np.ndarray] = None,
fill_value=lib.no_default,
allow_fill: bool = True,
):
) -> ArrayLike:

"""
Specialized Cython take which sets NaN values in one pass
Expand All @@ -37,7 +37,7 @@ def take_nd(
Parameters
----------
arr : array-like
arr : np.ndarray or ExtensionArray
Input array.
indexer : ndarray
1-D array of indices to take, subarrays corresponding to -1 value
Expand All @@ -57,20 +57,29 @@ def take_nd(
Returns
-------
subarray : array-like
subarray : np.ndarray or ExtensionArray
May be the same type as the input, or cast to an ndarray.
"""
if fill_value is lib.no_default:
fill_value = na_value_for_dtype(arr.dtype, compat=False)

arr = extract_array(arr, extract_numpy=True)

if not isinstance(arr, np.ndarray):
# i.e. ExtensionArray,
# includes for EA to catch DatetimeArray, TimedeltaArray
return arr.take(indexer, fill_value=fill_value, allow_fill=allow_fill)

arr = np.asarray(arr)
return _take_nd_ndarray(arr, indexer, axis, out, fill_value, allow_fill)


def _take_nd_ndarray(
arr: np.ndarray,
indexer,
axis: int,
out: Optional[np.ndarray],
fill_value,
allow_fill: bool,
) -> np.ndarray:

indexer, dtype, fill_value, mask_info = _take_preprocess_indexer_and_fill_value(
arr, indexer, axis, out, fill_value, allow_fill
Expand Down
8 changes: 3 additions & 5 deletions pandas/core/internals/blocks.py
Expand Up @@ -1186,7 +1186,6 @@ def coerce_to_target_dtype(self, other) -> Block:

return self.astype(new_dtype, copy=False)

@final
def interpolate(
self,
method: str = "pad",
Expand Down Expand Up @@ -1293,11 +1292,10 @@ def _interpolate(

# only deal with floats
if self.dtype.kind != "f":
if self.dtype.kind not in ["i", "u"]:
return [self]
data = data.astype(np.float64)
# bc we already checked that can_hold_na, we dont have int dtype here
return [self]

if fill_value is None:
if is_valid_na_for_dtype(fill_value, self.dtype):
fill_value = self.fill_value

if method in ("krogh", "piecewise_polynomial", "pchip"):
Expand Down
4 changes: 4 additions & 0 deletions pandas/core/reshape/merge.py
Expand Up @@ -874,12 +874,16 @@ def _maybe_add_join_keys(self, result, left_indexer, right_indexer):
if take_left is None:
lvals = result[name]._values
else:
# TODO: can we pin down take_left's type earlier?
take_left = extract_array(take_left, extract_numpy=True)
lfill = na_value_for_dtype(take_left.dtype)
lvals = algos.take_nd(take_left, left_indexer, fill_value=lfill)

if take_right is None:
rvals = result[name]._values
else:
# TODO: can we pin down take_right's type earlier?
take_right = extract_array(take_right, extract_numpy=True)
rfill = na_value_for_dtype(take_right.dtype)
rvals = algos.take_nd(take_right, right_indexer, fill_value=rfill)

Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/resample/test_base.py
Expand Up @@ -218,7 +218,7 @@ def test_resample_empty_dtypes(index, dtype, resample_method):
getattr(empty_series_dti.resample("d"), resample_method)()
except DataError:
# Ignore these since some combinations are invalid
# (ex: doing mean with dtype of np.object)
# (ex: doing mean with dtype of np.object_)
pass


Expand Down

0 comments on commit 0b5c12d

Please sign in to comment.