Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

DEPR: move NumericIndexes into base Index, part 1 #49494

Closed
Closed
Show file tree
Hide file tree
Changes from 8 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
5 changes: 3 additions & 2 deletions asv_bench/benchmarks/hash_functions.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import numpy as np

import pandas as pd
from pandas.core.api import Int64Index, UInt64Index, Float64Index


class UniqueForLargePyObjectInts:
Expand Down Expand Up @@ -57,7 +58,7 @@ def time_unique(self, exponent):
class NumericSeriesIndexing:

params = [
(pd.Int64Index, pd.UInt64Index, pd.Float64Index),
(Int64Index, UInt64Index, Float64Index),
(10**4, 10**5, 5 * 10**5, 10**6, 5 * 10**6),
]
param_names = ["index_dtype", "N"]
Expand All @@ -75,7 +76,7 @@ def time_loc_slice(self, index, N):
class NumericSeriesIndexingShuffled:

params = [
(pd.Int64Index, pd.UInt64Index, pd.Float64Index),
(Int64Index, UInt64Index, Float64Index),
(10**4, 10**5, 5 * 10**5, 10**6, 5 * 10**6),
]
param_names = ["index_dtype", "N"]
Expand Down
5 changes: 3 additions & 2 deletions asv_bench/benchmarks/index_cached_properties.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import pandas as pd
from pandas.core.api import Float64Index, UInt64Index


class IndexCache:
Expand Down Expand Up @@ -40,9 +41,9 @@ def setup(self, index_type):
elif index_type == "TimedeltaIndex":
self.idx = pd.TimedeltaIndex(range(N))
elif index_type == "Float64Index":
self.idx = pd.Float64Index(range(N))
self.idx = Float64Index(range(N))
elif index_type == "UInt64Index":
self.idx = pd.UInt64Index(range(N))
self.idx = UInt64Index(range(N))
elif index_type == "CategoricalIndex":
self.idx = pd.CategoricalIndex(range(N), range(N))
else:
Expand Down
32 changes: 0 additions & 32 deletions pandas/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -183,38 +183,6 @@
__git_version__ = v.get("full-revisionid")
del get_versions, v

# GH 27101
__deprecated_num_index_names = ["Float64Index", "Int64Index", "UInt64Index"]


def __dir__() -> list[str]:
# GH43028
# Int64Index etc. are deprecated, but we still want them to be available in the dir.
# Remove in Pandas 2.0, when we remove Int64Index etc. from the code base.
return list(globals().keys()) + __deprecated_num_index_names


def __getattr__(name):
import warnings

if name in __deprecated_num_index_names:
warnings.warn(
f"pandas.{name} is deprecated "
"and will be removed from pandas in a future version. "
"Use pandas.Index with the appropriate dtype instead.",
FutureWarning,
stacklevel=2,
)
from pandas.core.api import Float64Index, Int64Index, UInt64Index

return {
"Float64Index": Float64Index,
"Int64Index": Int64Index,
"UInt64Index": UInt64Index,
}[name]

raise AttributeError(f"module 'pandas' has no attribute '{name}'")


# module level doc-string
__doc__ = """
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -11203,7 +11203,7 @@ def quantile(
)
if method == "single":
# error: Argument "qs" to "quantile" of "BlockManager" has incompatible type
# "Index"; expected "Float64Index"
# "Index"; expected "NumericIndex"
res = data._mgr.quantile(
qs=q, axis=1, interpolation=interpolation # type: ignore[arg-type]
)
Expand Down
68 changes: 41 additions & 27 deletions pandas/core/indexes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,7 @@
ensure_platform_int,
is_bool_dtype,
is_categorical_dtype,
is_complex_dtype,
is_dtype_equal,
is_ea_or_datetimelike_dtype,
is_extension_array_dtype,
Expand All @@ -106,6 +107,7 @@
is_scalar,
is_signed_integer_dtype,
is_string_dtype,
is_unsigned_integer_dtype,
needs_i8_conversion,
pandas_dtype,
validate_all_hashable,
Expand Down Expand Up @@ -590,18 +592,14 @@ def _dtype_to_subclass(cls, dtype: DtypeObj):

return TimedeltaIndex

elif dtype.kind == "f":
from pandas.core.api import Float64Index

return Float64Index
elif dtype.kind == "u":
from pandas.core.api import UInt64Index

return UInt64Index
elif dtype.kind == "i":
from pandas.core.api import Int64Index
elif (
is_numeric_dtype(dtype)
and not is_bool_dtype(dtype)
and not is_complex_dtype(dtype)
):
from pandas.core.api import NumericIndex

return Int64Index
return NumericIndex

elif dtype.kind == "O":
# NB: assuming away MultiIndex
Expand Down Expand Up @@ -1057,14 +1055,29 @@ def astype(self, dtype, copy: bool = True):
) from err

# pass copy=False because any copying will be done in the astype above
if self._is_backward_compat_public_numeric_index:
# this block is needed so e.g. NumericIndex[int8].astype("int32") returns
# NumericIndex[int32] and not Int64Index with dtype int64.
if not self._is_backward_compat_public_numeric_index and not isinstance(
self, ABCRangeIndex
):
# this block is needed so e.g. Int64Index.astype("int32") returns
# Int64Index and not a NumericIndex with dtype int32.
# When Int64Index etc. are removed from the code base, removed this also.
if isinstance(dtype, np.dtype) and is_numeric_dtype(dtype):
return self._constructor(
new_values, name=self.name, dtype=dtype, copy=False
from pandas.core.api import (
Float64Index,
Int64Index,
UInt64Index,
)

if is_signed_integer_dtype(dtype):
klass = Int64Index
elif is_unsigned_integer_dtype(dtype):
klass = UInt64Index
elif is_float_dtype(dtype):
klass = Float64Index
else:
klass = Index
return klass(new_values, name=self.name, dtype=dtype, copy=False)

return Index(new_values, name=self.name, dtype=new_values.dtype, copy=False)

_index_shared_docs[
Expand Down Expand Up @@ -5265,6 +5278,7 @@ def putmask(self, mask, value) -> Index:
if self.dtype != object and is_valid_na_for_dtype(value, self.dtype):
# e.g. None -> np.nan, see also Block._standardize_fill_value
value = self._na_value

try:
converted = self._validate_fill_value(value)
except (LossySetitemError, ValueError, TypeError) as err:
Expand Down Expand Up @@ -6133,13 +6147,6 @@ def map(self, mapper, na_action=None):
new_values, self.dtype, same_dtype=same_dtype
)

if self._is_backward_compat_public_numeric_index and is_numeric_dtype(
new_values.dtype
):
return self._constructor(
new_values, dtype=dtype, copy=False, name=self.name
)

return Index._with_infer(new_values, dtype=dtype, copy=False, name=self.name)

# TODO: De-duplicate with map, xref GH#32349
Expand Down Expand Up @@ -6616,10 +6623,17 @@ def insert(self, loc: int, item) -> Index:
loc = loc if loc >= 0 else loc - 1
new_values[loc] = item

if self._typ == "numericindex":
# Use self._constructor instead of Index to retain NumericIndex GH#43921
# TODO(2.0) can use Index instead of self._constructor
return self._constructor._with_infer(new_values, name=self.name)
if not self._is_backward_compat_public_numeric_index:
from pandas.core.indexes.numeric import NumericIndex

if not isinstance(self, ABCRangeIndex) or not isinstance(
self, NumericIndex
):
return Index._with_infer(new_values, name=self.name)
else:
# Use self._constructor instead of Index to retain old-style num. index
# TODO(2.0) can use Index instead of self._constructor
return self._constructor._with_infer(new_values, name=self.name)
else:
return Index._with_infer(new_values, name=self.name)

Expand Down
35 changes: 31 additions & 4 deletions pandas/core/indexes/numeric.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
)
from pandas._typing import (
Dtype,
DtypeObj,
npt,
)
from pandas.util._decorators import (
Expand Down Expand Up @@ -74,8 +75,8 @@ class NumericIndex(Index):
Notes
-----
An NumericIndex instance can **only** contain numpy int64/32/16/8, uint64/32/16/8 or
float64/32/16 dtype. In particular, ``NumericIndex`` *can not* hold Pandas numeric
dtypes (:class:`Int64Dtype`, :class:`Int32Dtype` etc.).
float64/32 dtype. In particular, ``NumericIndex`` *can not* hold numpy float16
dtype or Pandas numeric dtypes (:class:`Int64Dtype`, :class:`Int32Dtype` etc.).
"""

_typ = "numericindex"
Expand Down Expand Up @@ -174,6 +175,10 @@ def _ensure_array(cls, data, dtype, copy: bool):
raise ValueError("Index data must be 1-dimensional")

subarr = np.asarray(subarr)
if subarr.dtype == "float16":
# float16 not supported (no indexing engine)
subarr = subarr.astype("float32")

return subarr

@classmethod
Expand All @@ -198,6 +203,9 @@ def _ensure_dtype(cls, dtype: Dtype | None) -> np.dtype | None:
return cls._default_dtype

dtype = pandas_dtype(dtype)
if dtype == np.float16:
# float16 not supported (no indexing engine)
dtype = np.dtype(np.float32)
assert isinstance(dtype, np.dtype)

if cls._is_backward_compat_public_numeric_index:
Expand Down Expand Up @@ -347,7 +355,26 @@ def _format_native_types(
"""


class IntegerIndex(NumericIndex):
class TempBaseIndex(NumericIndex):
@classmethod
def _dtype_to_subclass(cls, dtype: DtypeObj):
if is_integer_dtype(dtype):
from pandas.core.api import Int64Index

return Int64Index
elif is_unsigned_integer_dtype(dtype):
from pandas.core.api import UInt64Index

return UInt64Index
elif is_float_dtype(dtype):
from pandas.core.api import Float64Index

return Float64Index
else:
return super()._dtype_to_subclass(dtype)


class IntegerIndex(TempBaseIndex):
"""
This is an abstract class for Int64Index, UInt64Index.
"""
Expand Down Expand Up @@ -391,7 +418,7 @@ def _engine_type(self) -> type[libindex.UInt64Engine]:
return libindex.UInt64Engine


class Float64Index(NumericIndex):
class Float64Index(TempBaseIndex):
_index_descr_args = {
"klass": "Float64Index",
"dtype": "float64",
Expand Down
4 changes: 2 additions & 2 deletions pandas/core/indexes/range.py
Original file line number Diff line number Diff line change
Expand Up @@ -185,9 +185,9 @@ def _simple_new(cls, values: range, name: Hashable = None) -> RangeIndex:
# error: Return type "Type[Int64Index]" of "_constructor" incompatible with return
# type "Type[RangeIndex]" in supertype "Index"
@cache_readonly
def _constructor(self) -> type[Int64Index]: # type: ignore[override]
def _constructor(self) -> type[NumericIndex]: # type: ignore[override]
"""return the class to use for construction"""
return Int64Index
return NumericIndex

# error: Signature of "_data" incompatible with supertype "Index"
@cache_readonly
Expand Down
6 changes: 3 additions & 3 deletions pandas/core/internals/managers.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,8 +64,8 @@
)
from pandas.core.indexers import maybe_convert_indices
from pandas.core.indexes.api import (
Float64Index,
Index,
NumericIndex,
ensure_index,
)
from pandas.core.internals.base import (
Expand Down Expand Up @@ -1582,7 +1582,7 @@ def _equal_values(self: BlockManager, other: BlockManager) -> bool:
def quantile(
self: T,
*,
qs: Float64Index,
qs: NumericIndex,
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

comment about dtype?

axis: AxisInt = 0,
interpolation: QuantileInterpolation = "linear",
) -> T:
Expand Down Expand Up @@ -1610,7 +1610,7 @@ def quantile(
assert axis == 1 # only ever called this way

new_axes = list(self.axes)
new_axes[1] = Float64Index(qs)
new_axes[1] = Index(qs, dtype=np.float64)

blocks = [
blk.quantile(axis=axis, qs=qs, interpolation=interpolation)
Expand Down
3 changes: 1 addition & 2 deletions pandas/io/feather_format.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@

from pandas.core.api import (
DataFrame,
Int64Index,
RangeIndex,
)
from pandas.core.shared_docs import _shared_docs
Expand Down Expand Up @@ -62,7 +61,7 @@ def to_feather(
# validate that we have only a default index
# raise on anything else as we don't serialize the index

if not isinstance(df.index, (Int64Index, RangeIndex)):
if df.index.dtype != "int64":
typ = type(df.index)
raise ValueError(
f"feather does not support serializing {typ} "
Expand Down
5 changes: 1 addition & 4 deletions pandas/tests/api/test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,17 +55,14 @@ class TestPDApi(Base):
"DatetimeIndex",
"ExcelFile",
"ExcelWriter",
"Float64Index",
"Flags",
"Grouper",
"HDFStore",
"Index",
"Int64Index",
"MultiIndex",
"Period",
"PeriodIndex",
"RangeIndex",
"UInt64Index",
"Series",
"SparseDtype",
"StringDtype",
Expand Down Expand Up @@ -93,7 +90,7 @@ class TestPDApi(Base):
]

# these are already deprecated; awaiting removal
deprecated_classes: list[str] = ["Float64Index", "Int64Index", "UInt64Index"]
deprecated_classes: list[str] = []

# external modules exposed in pandas namespace
modules: list[str] = []
Expand Down
4 changes: 2 additions & 2 deletions pandas/tests/apply/test_series_apply.py
Original file line number Diff line number Diff line change
Expand Up @@ -186,7 +186,7 @@ def test_apply_datetimetz():
# change dtype
# GH 14506 : Returned dtype changed from int32 to int64
result = s.apply(lambda x: x.hour)
exp = Series(list(range(24)) + [0], name="XX", dtype=np.int64)
exp = Series(list(range(24)) + [0], name="XX", dtype=np.int32)
tm.assert_series_equal(result, exp)

# not vectorized
Expand Down Expand Up @@ -766,7 +766,7 @@ def test_map_datetimetz():
# change dtype
# GH 14506 : Returned dtype changed from int32 to int64
result = s.map(lambda x: x.hour)
exp = Series(list(range(24)) + [0], name="XX", dtype=np.int64)
exp = Series(list(range(24)) + [0], name="XX", dtype=np.int32)
tm.assert_series_equal(result, exp)

# not vectorized
Expand Down