CLN: follow-ups (#40000)

pandas-dev · Feb 24, 2021 · 0b5c12d · 0b5c12d
1 parent 8a307c1
commit 0b5c12d
Show file tree

Hide file tree

Showing 8 changed files with 38 additions and 22 deletions.
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -163,6 +163,11 @@ repos:
         entry: np\.bool[^_8]
         language: pygrep
         types_or: [python, cython, rst]
+    -   id: np-object
+        name: Check for use of np.object instead of np.object_
+        entry: np\.object[^_8]
+        language: pygrep
+        types_or: [python, cython, rst]
     -   id: no-os-remove
         name: Check code for instances of os.remove
         entry: os\.remove

diff --git a/asv_bench/benchmarks/algos/isin.py b/asv_bench/benchmarks/algos/isin.py
@@ -104,7 +104,7 @@ def time_isin(self, dtype, exponent, title):
 
 class IsinWithRandomFloat:
     params = [
-        [np.float64, np.object],
+        [np.float64, np.object_],
         [
             1_300,
             2_000,
@@ -134,7 +134,7 @@ def time_isin(self, dtype, size, title):
 
 class IsinWithArangeSorted:
     params = [
-        [np.float64, np.int64, np.uint64, np.object],
+        [np.float64, np.int64, np.uint64, np.object_],
         [
             1_000,
             2_000,
@@ -155,7 +155,7 @@ def time_isin(self, dtype, size):
 
 class IsinWithArange:
     params = [
-        [np.float64, np.int64, np.uint64, np.object],
+        [np.float64, np.int64, np.uint64, np.object_],
         [
             1_000,
             2_000,

diff --git a/doc/source/whatsnew/v0.24.0.rst b/doc/source/whatsnew/v0.24.0.rst
@@ -1755,8 +1755,8 @@ Missing
 
 - Bug in :func:`DataFrame.fillna` where a ``ValueError`` would raise when one column contained a ``datetime64[ns, tz]`` dtype (:issue:`15522`)
 - Bug in :func:`Series.hasnans` that could be incorrectly cached and return incorrect answers if null elements are introduced after an initial call (:issue:`19700`)
-- :func:`Series.isin` now treats all NaN-floats as equal also for ``np.object``-dtype. This behavior is consistent with the behavior for float64 (:issue:`22119`)
-- :func:`unique` no longer mangles NaN-floats and the ``NaT``-object for ``np.object``-dtype, i.e. ``NaT`` is no longer coerced to a NaN-value and is treated as a different entity. (:issue:`22295`)
+- :func:`Series.isin` now treats all NaN-floats as equal also for ``np.object_``-dtype. This behavior is consistent with the behavior for float64 (:issue:`22119`)
+- :func:`unique` no longer mangles NaN-floats and the ``NaT``-object for ``np.object_``-dtype, i.e. ``NaT`` is no longer coerced to a NaN-value and is treated as a different entity. (:issue:`22295`)
 - :class:`DataFrame` and :class:`Series` now properly handle numpy masked arrays with hardened masks. Previously, constructing a DataFrame or Series from a masked array with a hard mask would create a pandas object containing the underlying value, rather than the expected NaN. (:issue:`24574`)
 - Bug in :class:`DataFrame` constructor where ``dtype`` argument was not honored when handling numpy masked record arrays. (:issue:`24874`)
 

diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx
@@ -1468,7 +1468,7 @@ def infer_dtype(value: object, skipna: bool = True) -> str:
         if is_decimal_array(values):
             return "decimal"
 
-    elif is_complex(val):
+    elif util.is_complex_object(val):
         if is_complex_array(values):
             return "complex"
 

diff --git a/pandas/core/array_algos/take.py b/pandas/core/array_algos/take.py
@@ -1,3 +1,5 @@
+from __future__ import annotations
+
 from typing import Optional
 
 import numpy as np
@@ -6,6 +8,7 @@
     algos as libalgos,
     lib,
 )
+from pandas._typing import ArrayLike
 
 from pandas.core.dtypes.cast import maybe_promote
 from pandas.core.dtypes.common import (
@@ -14,20 +17,17 @@
 )
 from pandas.core.dtypes.missing import na_value_for_dtype
 
-from pandas.core.construction import (
-    ensure_wrapped_if_datetimelike,
-    extract_array,
-)
+from pandas.core.construction import ensure_wrapped_if_datetimelike
 
 
 def take_nd(
-    arr,
+    arr: ArrayLike,
     indexer,
     axis: int = 0,
     out: Optional[np.ndarray] = None,
     fill_value=lib.no_default,
     allow_fill: bool = True,
-):
+) -> ArrayLike:
 
     """
     Specialized Cython take which sets NaN values in one pass
@@ -37,7 +37,7 @@ def take_nd(
 
     Parameters
     ----------
-    arr : array-like
+    arr : np.ndarray or ExtensionArray
         Input array.
     indexer : ndarray
         1-D array of indices to take, subarrays corresponding to -1 value
@@ -57,20 +57,29 @@ def take_nd(
 
     Returns
     -------
-    subarray : array-like
+    subarray : np.ndarray or ExtensionArray
         May be the same type as the input, or cast to an ndarray.
     """
     if fill_value is lib.no_default:
         fill_value = na_value_for_dtype(arr.dtype, compat=False)
 
-    arr = extract_array(arr, extract_numpy=True)
-
     if not isinstance(arr, np.ndarray):
         # i.e. ExtensionArray,
         # includes for EA to catch DatetimeArray, TimedeltaArray
         return arr.take(indexer, fill_value=fill_value, allow_fill=allow_fill)
 
     arr = np.asarray(arr)
+    return _take_nd_ndarray(arr, indexer, axis, out, fill_value, allow_fill)
+
+
+def _take_nd_ndarray(
+    arr: np.ndarray,
+    indexer,
+    axis: int,
+    out: Optional[np.ndarray],
+    fill_value,
+    allow_fill: bool,
+) -> np.ndarray:
 
     indexer, dtype, fill_value, mask_info = _take_preprocess_indexer_and_fill_value(
         arr, indexer, axis, out, fill_value, allow_fill

diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py
@@ -1186,7 +1186,6 @@ def coerce_to_target_dtype(self, other) -> Block:
 
         return self.astype(new_dtype, copy=False)
 
-    @final
     def interpolate(
         self,
         method: str = "pad",
@@ -1293,11 +1292,10 @@ def _interpolate(
 
         # only deal with floats
         if self.dtype.kind != "f":
-            if self.dtype.kind not in ["i", "u"]:
-                return [self]
-            data = data.astype(np.float64)
+            # bc we already checked that can_hold_na, we dont have int dtype here
+            return [self]
 
-        if fill_value is None:
+        if is_valid_na_for_dtype(fill_value, self.dtype):
             fill_value = self.fill_value
 
         if method in ("krogh", "piecewise_polynomial", "pchip"):

diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py
@@ -874,12 +874,16 @@ def _maybe_add_join_keys(self, result, left_indexer, right_indexer):
                 if take_left is None:
                     lvals = result[name]._values
                 else:
+                    # TODO: can we pin down take_left's type earlier?
+                    take_left = extract_array(take_left, extract_numpy=True)
                     lfill = na_value_for_dtype(take_left.dtype)
                     lvals = algos.take_nd(take_left, left_indexer, fill_value=lfill)
 
                 if take_right is None:
                     rvals = result[name]._values
                 else:
+                    # TODO: can we pin down take_right's type earlier?
+                    take_right = extract_array(take_right, extract_numpy=True)
                     rfill = na_value_for_dtype(take_right.dtype)
                     rvals = algos.take_nd(take_right, right_indexer, fill_value=rfill)
 

diff --git a/pandas/tests/resample/test_base.py b/pandas/tests/resample/test_base.py
@@ -218,7 +218,7 @@ def test_resample_empty_dtypes(index, dtype, resample_method):
         getattr(empty_series_dti.resample("d"), resample_method)()
     except DataError:
         # Ignore these since some combinations are invalid
-        # (ex: doing mean with dtype of np.object)
+        # (ex: doing mean with dtype of np.object_)
         pass