DEPR: don't make Index instantiate Int64/Uint64/Flaot64Index

pandas-dev · Nov 6, 2022 · 5ff5584 · 5ff5584
1 parent 0b93117
commit 5ff5584
Show file tree

Hide file tree

Showing 33 changed files with 255 additions and 289 deletions.
diff --git a/pandas/conftest.py b/pandas/conftest.py
@@ -593,10 +593,7 @@ def _create_mi_with_dt64tz_level():
     "datetime-tz": tm.makeDateIndex(100, tz="US/Pacific"),
     "period": tm.makePeriodIndex(100),
     "timedelta": tm.makeTimedeltaIndex(100),
-    "int": tm.makeIntIndex(100),
-    "uint": tm.makeUIntIndex(100),
     "range": tm.makeRangeIndex(100),
-    "float": tm.makeFloatIndex(100),
     "complex64": tm.makeFloatIndex(100).astype("complex64"),
     "complex128": tm.makeFloatIndex(100).astype("complex128"),
     "num_int64": tm.makeNumericIndex(100, dtype="int64"),

diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
@@ -89,6 +89,7 @@
     ensure_platform_int,
     is_bool_dtype,
     is_categorical_dtype,
+    is_complex_dtype,
     is_dtype_equal,
     is_ea_or_datetimelike_dtype,
     is_extension_array_dtype,
@@ -104,6 +105,7 @@
     is_scalar,
     is_signed_integer_dtype,
     is_string_dtype,
+    is_unsigned_integer_dtype,
     needs_i8_conversion,
     pandas_dtype,
     validate_all_hashable,
@@ -588,18 +590,14 @@ def _dtype_to_subclass(cls, dtype: DtypeObj):
 
             return TimedeltaIndex
 
-        elif dtype.kind == "f":
-            from pandas.core.api import Float64Index
-
-            return Float64Index
-        elif dtype.kind == "u":
-            from pandas.core.api import UInt64Index
-
-            return UInt64Index
-        elif dtype.kind == "i":
-            from pandas.core.api import Int64Index
+        elif (
+            is_numeric_dtype(dtype)
+            and not is_bool_dtype(dtype)
+            and not is_complex_dtype(dtype)
+        ):
+            from pandas.core.api import NumericIndex
 
-            return Int64Index
+            return NumericIndex
 
         elif dtype.kind == "O":
             # NB: assuming away MultiIndex
@@ -1040,14 +1038,29 @@ def astype(self, dtype, copy: bool = True):
                 new_values = astype_nansafe(values, dtype=dtype, copy=copy)
 
         # pass copy=False because any copying will be done in the astype above
-        if self._is_backward_compat_public_numeric_index:
-            # this block is needed so e.g. NumericIndex[int8].astype("int32") returns
-            # NumericIndex[int32] and not Int64Index with dtype int64.
+        if not self._is_backward_compat_public_numeric_index and not isinstance(
+            self, ABCRangeIndex
+        ):
+            # this block is needed so e.g. Int64Index.astype("int32") returns
+            # Int64Index and not a NumericIndex with dtype int32.
             # When Int64Index etc. are removed from the code base, removed this also.
             if isinstance(dtype, np.dtype) and is_numeric_dtype(dtype):
-                return self._constructor(
-                    new_values, name=self.name, dtype=dtype, copy=False
+                from pandas.core.api import (
+                    Float64Index,
+                    Int64Index,
+                    UInt64Index,
                 )
+
+                if is_signed_integer_dtype(dtype):
+                    klass = Int64Index
+                elif is_unsigned_integer_dtype(dtype):
+                    klass = UInt64Index
+                elif is_float_dtype(dtype):
+                    klass = Float64Index
+                else:
+                    klass = Index
+                return klass(new_values, name=self.name, dtype=dtype, copy=False)
+
         return Index(new_values, name=self.name, dtype=new_values.dtype, copy=False)
 
     _index_shared_docs[
@@ -5247,6 +5260,7 @@ def putmask(self, mask, value) -> Index:
         if self.dtype != object and is_valid_na_for_dtype(value, self.dtype):
             # e.g. None -> np.nan, see also Block._standardize_fill_value
             value = self._na_value
+
         try:
             converted = self._validate_fill_value(value)
         except (LossySetitemError, ValueError, TypeError) as err:
@@ -6115,13 +6129,6 @@ def map(self, mapper, na_action=None):
                 new_values, self.dtype, same_dtype=same_dtype
             )
 
-        if self._is_backward_compat_public_numeric_index and is_numeric_dtype(
-            new_values.dtype
-        ):
-            return self._constructor(
-                new_values, dtype=dtype, copy=False, name=self.name
-            )
-
         return Index._with_infer(new_values, dtype=dtype, copy=False, name=self.name)
 
     # TODO: De-duplicate with map, xref GH#32349
@@ -6598,10 +6605,17 @@ def insert(self, loc: int, item) -> Index:
             loc = loc if loc >= 0 else loc - 1
             new_values[loc] = item
 
-        if self._typ == "numericindex":
-            # Use self._constructor instead of Index to retain NumericIndex GH#43921
-            # TODO(2.0) can use Index instead of self._constructor
-            return self._constructor._with_infer(new_values, name=self.name)
+        if not self._is_backward_compat_public_numeric_index:
+            from pandas.core.indexes.numeric import NumericIndex
+
+            if not isinstance(self, ABCRangeIndex) or not isinstance(
+                self, NumericIndex
+            ):
+                return Index._with_infer(new_values, name=self.name)
+            else:
+                # Use self._constructor instead of Index to retain old-style num. index
+                # TODO(2.0) can use Index instead of self._constructor
+                return self._constructor._with_infer(new_values, name=self.name)
         else:
             return Index._with_infer(new_values, name=self.name)
 

diff --git a/pandas/core/indexes/numeric.py b/pandas/core/indexes/numeric.py
@@ -13,6 +13,7 @@
 )
 from pandas._typing import (
     Dtype,
+    DtypeObj,
     npt,
 )
 from pandas.util._decorators import (
@@ -174,6 +175,10 @@ def _ensure_array(cls, data, dtype, copy: bool):
             raise ValueError("Index data must be 1-dimensional")
 
         subarr = np.asarray(subarr)
+        if subarr.dtype == "float16":
+            # float16 not supported (no indexing engine)
+            subarr = subarr.astype("float32")
+
         return subarr
 
     @classmethod
@@ -198,6 +203,9 @@ def _ensure_dtype(cls, dtype: Dtype | None) -> np.dtype | None:
             return cls._default_dtype
 
         dtype = pandas_dtype(dtype)
+        if dtype == np.float16:
+            # float16 not supported (no indexing engine)
+            dtype = np.dtype(np.float32)
         assert isinstance(dtype, np.dtype)
 
         if cls._is_backward_compat_public_numeric_index:
@@ -347,7 +355,26 @@ def _format_native_types(
 """
 
 
-class IntegerIndex(NumericIndex):
+class TempBaseIndex(NumericIndex):
+    @classmethod
+    def _dtype_to_subclass(cls, dtype: DtypeObj):
+        if is_integer_dtype(dtype):
+            from pandas.core.api import Int64Index
+
+            return Int64Index
+        elif is_unsigned_integer_dtype(dtype):
+            from pandas.core.api import UInt64Index
+
+            return UInt64Index
+        elif is_float_dtype(dtype):
+            from pandas.core.api import Float64Index
+
+            return Float64Index
+        else:
+            return super()._dtype_to_subclass(dtype)
+
+
+class IntegerIndex(TempBaseIndex):
     """
     This is an abstract class for Int64Index, UInt64Index.
     """
@@ -391,7 +418,7 @@ def _engine_type(self) -> type[libindex.UInt64Engine]:
         return libindex.UInt64Engine
 
 
-class Float64Index(NumericIndex):
+class Float64Index(TempBaseIndex):
     _index_descr_args = {
         "klass": "Float64Index",
         "dtype": "float64",

diff --git a/pandas/core/indexes/range.py b/pandas/core/indexes/range.py
@@ -185,9 +185,9 @@ def _simple_new(cls, values: range, name: Hashable = None) -> RangeIndex:
     # error: Return type "Type[Int64Index]" of "_constructor" incompatible with return
     # type "Type[RangeIndex]" in supertype "Index"
     @cache_readonly
-    def _constructor(self) -> type[Int64Index]:  # type: ignore[override]
+    def _constructor(self) -> type[NumericIndex]:  # type: ignore[override]
         """return the class to use for construction"""
-        return Int64Index
+        return NumericIndex
 
     # error: Signature of "_data" incompatible with supertype "Index"
     @cache_readonly

diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py
@@ -64,7 +64,6 @@
 )
 from pandas.core.indexers import maybe_convert_indices
 from pandas.core.indexes.api import (
-    Float64Index,
     Index,
     ensure_index,
 )
@@ -1582,7 +1581,7 @@ def _equal_values(self: BlockManager, other: BlockManager) -> bool:
     def quantile(
         self: T,
         *,
-        qs: Float64Index,
+        qs: Index,  # of dtype float 64
         axis: AxisInt = 0,
         interpolation: QuantileInterpolation = "linear",
     ) -> T:
@@ -1610,7 +1609,7 @@ def quantile(
         assert axis == 1  # only ever called this way
 
         new_axes = list(self.axes)
-        new_axes[1] = Float64Index(qs)
+        new_axes[1] = Index(qs, dtype=np.float64)
 
         blocks = [
             blk.quantile(axis=axis, qs=qs, interpolation=interpolation)

diff --git a/pandas/tests/apply/test_series_apply.py b/pandas/tests/apply/test_series_apply.py
@@ -186,7 +186,7 @@ def test_apply_datetimetz():
     # change dtype
     # GH 14506 : Returned dtype changed from int32 to int64
     result = s.apply(lambda x: x.hour)
-    exp = Series(list(range(24)) + [0], name="XX", dtype=np.int64)
+    exp = Series(list(range(24)) + [0], name="XX", dtype=np.int32)
     tm.assert_series_equal(result, exp)
 
     # not vectorized
@@ -766,7 +766,7 @@ def test_map_datetimetz():
     # change dtype
     # GH 14506 : Returned dtype changed from int32 to int64
     result = s.map(lambda x: x.hour)
-    exp = Series(list(range(24)) + [0], name="XX", dtype=np.int64)
+    exp = Series(list(range(24)) + [0], name="XX", dtype=np.int32)
     tm.assert_series_equal(result, exp)
 
     # not vectorized

diff --git a/pandas/tests/arrays/interval/test_interval.py b/pandas/tests/arrays/interval/test_interval.py
@@ -287,7 +287,7 @@ def test_arrow_array():
     with pytest.raises(TypeError, match="Not supported to convert IntervalArray"):
         pa.array(intervals, type="float64")
 
-    with pytest.raises(TypeError, match="different 'subtype'"):
+    with pytest.raises(TypeError, match="Not supported to convert IntervalArray"):
         pa.array(intervals, type=ArrowIntervalType(pa.float64(), "left"))
 
 

diff --git a/pandas/tests/arrays/sparse/test_accessor.py b/pandas/tests/arrays/sparse/test_accessor.py
@@ -41,7 +41,12 @@ def test_from_coo(self):
         sp_array = scipy.sparse.coo_matrix((data, (row, col)), dtype="int")
         result = pd.Series.sparse.from_coo(sp_array)
 
-        index = pd.MultiIndex.from_arrays([[0, 0, 1, 3], [0, 2, 1, 3]])
+        index = pd.MultiIndex.from_arrays(
+            [
+                np.array([0, 0, 1, 3], dtype=np.int32),
+                np.array([0, 2, 1, 3], dtype=np.int32),
+            ],
+        )
         expected = pd.Series([4, 9, 7, 5], index=index, dtype="Sparse[int]")
         tm.assert_series_equal(result, expected)
 
@@ -212,7 +217,15 @@ def test_series_from_coo(self, dtype, dense_index):
 
         A = scipy.sparse.eye(3, format="coo", dtype=dtype)
         result = pd.Series.sparse.from_coo(A, dense_index=dense_index)
-        index = pd.MultiIndex.from_tuples([(0, 0), (1, 1), (2, 2)])
+
+        index_dtype = np.int64 if dense_index else np.int32
+        index = pd.MultiIndex.from_tuples(
+            [
+                np.array([0, 0], dtype=index_dtype),
+                np.array([1, 1], dtype=index_dtype),
+                np.array([2, 2], dtype=index_dtype),
+            ],
+        )
         expected = pd.Series(SparseArray(np.array([1, 1, 1], dtype=dtype)), index=index)
         if dense_index:
             expected = expected.reindex(pd.MultiIndex.from_product(index.levels))

diff --git a/pandas/tests/frame/indexing/test_indexing.py b/pandas/tests/frame/indexing/test_indexing.py
@@ -736,7 +736,7 @@ def test_getitem_setitem_float_labels(self, using_array_manager):
 
         # positional slicing only via iloc!
         msg = (
-            "cannot do positional indexing on Float64Index with "
+            "cannot do positional indexing on NumericIndex with "
             r"these indexers \[1.0\] of type float"
         )
         with pytest.raises(TypeError, match=msg):

diff --git a/pandas/tests/frame/methods/test_set_index.py b/pandas/tests/frame/methods/test_set_index.py
@@ -159,7 +159,7 @@ def test_set_index_cast(self):
         df = DataFrame(
             {"A": [1.1, 2.2, 3.3], "B": [5.0, 6.1, 7.2]}, index=[2010, 2011, 2012]
         )
-        df2 = df.set_index(df.index.astype(np.int32))
+        df2 = df.set_index(df.index.astype(np.int64))
         tm.assert_frame_equal(df, df2)
 
     # A has duplicate values, C does not

diff --git a/pandas/tests/indexes/common.py b/pandas/tests/indexes/common.py
@@ -835,11 +835,7 @@ def test_insert_non_na(self, simple_index):
 
         result = index.insert(0, index[0])
 
-        cls = type(index)
-        if cls is RangeIndex:
-            cls = Int64Index
-
-        expected = cls([index[0]] + list(index), dtype=index.dtype)
+        expected = Index([index[0]] + list(index), dtype=index.dtype)
         tm.assert_index_equal(result, expected, exact=True)
 
     def test_insert_na(self, nulls_fixture, simple_index):
@@ -850,7 +846,7 @@ def test_insert_na(self, nulls_fixture, simple_index):
         if na_val is pd.NaT:
             expected = Index([index[0], pd.NaT] + list(index[1:]), dtype=object)
         else:
-            expected = Float64Index([index[0], np.nan] + list(index[1:]))
+            expected = Index([index[0], np.nan] + list(index[1:]))
 
             if index._is_backward_compat_public_numeric_index:
                 # GH#43921 we preserve NumericIndex

diff --git a/pandas/tests/indexes/datetimes/test_date_range.py b/pandas/tests/indexes/datetimes/test_date_range.py
@@ -812,7 +812,7 @@ def test_date_range_span_dst_transition(self, tzstr):
 
         dr = date_range("2012-11-02", periods=10, tz=tzstr)
         result = dr.hour
-        expected = pd.Index([0] * 10)
+        expected = pd.Index([0] * 10, dtype="int32")
         tm.assert_index_equal(result, expected)
 
     @pytest.mark.parametrize("tzstr", ["US/Eastern", "dateutil/US/Eastern"])

diff --git a/pandas/tests/indexes/datetimes/test_misc.py b/pandas/tests/indexes/datetimes/test_misc.py
@@ -281,8 +281,9 @@ def test_datetime_name_accessors(self, time_locale):
 
     def test_nanosecond_field(self):
         dti = DatetimeIndex(np.arange(10))
+        expected = Index(np.arange(10, dtype=np.int32))
 
-        tm.assert_index_equal(dti.nanosecond, Index(np.arange(10, dtype=np.int64)))
+        tm.assert_index_equal(dti.nanosecond, expected)
 
 
 def test_iter_readonly():