Skip to content

Commit

Permalink
DEPR: don't make Index instantiate Int64/Uint64/Flaot64Index
Browse files Browse the repository at this point in the history
  • Loading branch information
Terji Petersen authored and Terji Petersen committed Nov 6, 2022
1 parent 0b93117 commit 5ff5584
Show file tree
Hide file tree
Showing 33 changed files with 255 additions and 289 deletions.
3 changes: 0 additions & 3 deletions pandas/conftest.py
Expand Up @@ -593,10 +593,7 @@ def _create_mi_with_dt64tz_level():
"datetime-tz": tm.makeDateIndex(100, tz="US/Pacific"),
"period": tm.makePeriodIndex(100),
"timedelta": tm.makeTimedeltaIndex(100),
"int": tm.makeIntIndex(100),
"uint": tm.makeUIntIndex(100),
"range": tm.makeRangeIndex(100),
"float": tm.makeFloatIndex(100),
"complex64": tm.makeFloatIndex(100).astype("complex64"),
"complex128": tm.makeFloatIndex(100).astype("complex128"),
"num_int64": tm.makeNumericIndex(100, dtype="int64"),
Expand Down
68 changes: 41 additions & 27 deletions pandas/core/indexes/base.py
Expand Up @@ -89,6 +89,7 @@
ensure_platform_int,
is_bool_dtype,
is_categorical_dtype,
is_complex_dtype,
is_dtype_equal,
is_ea_or_datetimelike_dtype,
is_extension_array_dtype,
Expand All @@ -104,6 +105,7 @@
is_scalar,
is_signed_integer_dtype,
is_string_dtype,
is_unsigned_integer_dtype,
needs_i8_conversion,
pandas_dtype,
validate_all_hashable,
Expand Down Expand Up @@ -588,18 +590,14 @@ def _dtype_to_subclass(cls, dtype: DtypeObj):

return TimedeltaIndex

elif dtype.kind == "f":
from pandas.core.api import Float64Index

return Float64Index
elif dtype.kind == "u":
from pandas.core.api import UInt64Index

return UInt64Index
elif dtype.kind == "i":
from pandas.core.api import Int64Index
elif (
is_numeric_dtype(dtype)
and not is_bool_dtype(dtype)
and not is_complex_dtype(dtype)
):
from pandas.core.api import NumericIndex

return Int64Index
return NumericIndex

elif dtype.kind == "O":
# NB: assuming away MultiIndex
Expand Down Expand Up @@ -1040,14 +1038,29 @@ def astype(self, dtype, copy: bool = True):
new_values = astype_nansafe(values, dtype=dtype, copy=copy)

# pass copy=False because any copying will be done in the astype above
if self._is_backward_compat_public_numeric_index:
# this block is needed so e.g. NumericIndex[int8].astype("int32") returns
# NumericIndex[int32] and not Int64Index with dtype int64.
if not self._is_backward_compat_public_numeric_index and not isinstance(
self, ABCRangeIndex
):
# this block is needed so e.g. Int64Index.astype("int32") returns
# Int64Index and not a NumericIndex with dtype int32.
# When Int64Index etc. are removed from the code base, removed this also.
if isinstance(dtype, np.dtype) and is_numeric_dtype(dtype):
return self._constructor(
new_values, name=self.name, dtype=dtype, copy=False
from pandas.core.api import (
Float64Index,
Int64Index,
UInt64Index,
)

if is_signed_integer_dtype(dtype):
klass = Int64Index
elif is_unsigned_integer_dtype(dtype):
klass = UInt64Index
elif is_float_dtype(dtype):
klass = Float64Index
else:
klass = Index
return klass(new_values, name=self.name, dtype=dtype, copy=False)

return Index(new_values, name=self.name, dtype=new_values.dtype, copy=False)

_index_shared_docs[
Expand Down Expand Up @@ -5247,6 +5260,7 @@ def putmask(self, mask, value) -> Index:
if self.dtype != object and is_valid_na_for_dtype(value, self.dtype):
# e.g. None -> np.nan, see also Block._standardize_fill_value
value = self._na_value

try:
converted = self._validate_fill_value(value)
except (LossySetitemError, ValueError, TypeError) as err:
Expand Down Expand Up @@ -6115,13 +6129,6 @@ def map(self, mapper, na_action=None):
new_values, self.dtype, same_dtype=same_dtype
)

if self._is_backward_compat_public_numeric_index and is_numeric_dtype(
new_values.dtype
):
return self._constructor(
new_values, dtype=dtype, copy=False, name=self.name
)

return Index._with_infer(new_values, dtype=dtype, copy=False, name=self.name)

# TODO: De-duplicate with map, xref GH#32349
Expand Down Expand Up @@ -6598,10 +6605,17 @@ def insert(self, loc: int, item) -> Index:
loc = loc if loc >= 0 else loc - 1
new_values[loc] = item

if self._typ == "numericindex":
# Use self._constructor instead of Index to retain NumericIndex GH#43921
# TODO(2.0) can use Index instead of self._constructor
return self._constructor._with_infer(new_values, name=self.name)
if not self._is_backward_compat_public_numeric_index:
from pandas.core.indexes.numeric import NumericIndex

if not isinstance(self, ABCRangeIndex) or not isinstance(
self, NumericIndex
):
return Index._with_infer(new_values, name=self.name)
else:
# Use self._constructor instead of Index to retain old-style num. index
# TODO(2.0) can use Index instead of self._constructor
return self._constructor._with_infer(new_values, name=self.name)
else:
return Index._with_infer(new_values, name=self.name)

Expand Down
31 changes: 29 additions & 2 deletions pandas/core/indexes/numeric.py
Expand Up @@ -13,6 +13,7 @@
)
from pandas._typing import (
Dtype,
DtypeObj,
npt,
)
from pandas.util._decorators import (
Expand Down Expand Up @@ -174,6 +175,10 @@ def _ensure_array(cls, data, dtype, copy: bool):
raise ValueError("Index data must be 1-dimensional")

subarr = np.asarray(subarr)
if subarr.dtype == "float16":
# float16 not supported (no indexing engine)
subarr = subarr.astype("float32")

return subarr

@classmethod
Expand All @@ -198,6 +203,9 @@ def _ensure_dtype(cls, dtype: Dtype | None) -> np.dtype | None:
return cls._default_dtype

dtype = pandas_dtype(dtype)
if dtype == np.float16:
# float16 not supported (no indexing engine)
dtype = np.dtype(np.float32)
assert isinstance(dtype, np.dtype)

if cls._is_backward_compat_public_numeric_index:
Expand Down Expand Up @@ -347,7 +355,26 @@ def _format_native_types(
"""


class IntegerIndex(NumericIndex):
class TempBaseIndex(NumericIndex):
@classmethod
def _dtype_to_subclass(cls, dtype: DtypeObj):
if is_integer_dtype(dtype):
from pandas.core.api import Int64Index

return Int64Index
elif is_unsigned_integer_dtype(dtype):
from pandas.core.api import UInt64Index

return UInt64Index
elif is_float_dtype(dtype):
from pandas.core.api import Float64Index

return Float64Index
else:
return super()._dtype_to_subclass(dtype)


class IntegerIndex(TempBaseIndex):
"""
This is an abstract class for Int64Index, UInt64Index.
"""
Expand Down Expand Up @@ -391,7 +418,7 @@ def _engine_type(self) -> type[libindex.UInt64Engine]:
return libindex.UInt64Engine


class Float64Index(NumericIndex):
class Float64Index(TempBaseIndex):
_index_descr_args = {
"klass": "Float64Index",
"dtype": "float64",
Expand Down
4 changes: 2 additions & 2 deletions pandas/core/indexes/range.py
Expand Up @@ -185,9 +185,9 @@ def _simple_new(cls, values: range, name: Hashable = None) -> RangeIndex:
# error: Return type "Type[Int64Index]" of "_constructor" incompatible with return
# type "Type[RangeIndex]" in supertype "Index"
@cache_readonly
def _constructor(self) -> type[Int64Index]: # type: ignore[override]
def _constructor(self) -> type[NumericIndex]: # type: ignore[override]
"""return the class to use for construction"""
return Int64Index
return NumericIndex

# error: Signature of "_data" incompatible with supertype "Index"
@cache_readonly
Expand Down
5 changes: 2 additions & 3 deletions pandas/core/internals/managers.py
Expand Up @@ -64,7 +64,6 @@
)
from pandas.core.indexers import maybe_convert_indices
from pandas.core.indexes.api import (
Float64Index,
Index,
ensure_index,
)
Expand Down Expand Up @@ -1582,7 +1581,7 @@ def _equal_values(self: BlockManager, other: BlockManager) -> bool:
def quantile(
self: T,
*,
qs: Float64Index,
qs: Index, # of dtype float 64
axis: AxisInt = 0,
interpolation: QuantileInterpolation = "linear",
) -> T:
Expand Down Expand Up @@ -1610,7 +1609,7 @@ def quantile(
assert axis == 1 # only ever called this way

new_axes = list(self.axes)
new_axes[1] = Float64Index(qs)
new_axes[1] = Index(qs, dtype=np.float64)

blocks = [
blk.quantile(axis=axis, qs=qs, interpolation=interpolation)
Expand Down
4 changes: 2 additions & 2 deletions pandas/tests/apply/test_series_apply.py
Expand Up @@ -186,7 +186,7 @@ def test_apply_datetimetz():
# change dtype
# GH 14506 : Returned dtype changed from int32 to int64
result = s.apply(lambda x: x.hour)
exp = Series(list(range(24)) + [0], name="XX", dtype=np.int64)
exp = Series(list(range(24)) + [0], name="XX", dtype=np.int32)
tm.assert_series_equal(result, exp)

# not vectorized
Expand Down Expand Up @@ -766,7 +766,7 @@ def test_map_datetimetz():
# change dtype
# GH 14506 : Returned dtype changed from int32 to int64
result = s.map(lambda x: x.hour)
exp = Series(list(range(24)) + [0], name="XX", dtype=np.int64)
exp = Series(list(range(24)) + [0], name="XX", dtype=np.int32)
tm.assert_series_equal(result, exp)

# not vectorized
Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/arrays/interval/test_interval.py
Expand Up @@ -287,7 +287,7 @@ def test_arrow_array():
with pytest.raises(TypeError, match="Not supported to convert IntervalArray"):
pa.array(intervals, type="float64")

with pytest.raises(TypeError, match="different 'subtype'"):
with pytest.raises(TypeError, match="Not supported to convert IntervalArray"):
pa.array(intervals, type=ArrowIntervalType(pa.float64(), "left"))


Expand Down
17 changes: 15 additions & 2 deletions pandas/tests/arrays/sparse/test_accessor.py
Expand Up @@ -41,7 +41,12 @@ def test_from_coo(self):
sp_array = scipy.sparse.coo_matrix((data, (row, col)), dtype="int")
result = pd.Series.sparse.from_coo(sp_array)

index = pd.MultiIndex.from_arrays([[0, 0, 1, 3], [0, 2, 1, 3]])
index = pd.MultiIndex.from_arrays(
[
np.array([0, 0, 1, 3], dtype=np.int32),
np.array([0, 2, 1, 3], dtype=np.int32),
],
)
expected = pd.Series([4, 9, 7, 5], index=index, dtype="Sparse[int]")
tm.assert_series_equal(result, expected)

Expand Down Expand Up @@ -212,7 +217,15 @@ def test_series_from_coo(self, dtype, dense_index):

A = scipy.sparse.eye(3, format="coo", dtype=dtype)
result = pd.Series.sparse.from_coo(A, dense_index=dense_index)
index = pd.MultiIndex.from_tuples([(0, 0), (1, 1), (2, 2)])

index_dtype = np.int64 if dense_index else np.int32
index = pd.MultiIndex.from_tuples(
[
np.array([0, 0], dtype=index_dtype),
np.array([1, 1], dtype=index_dtype),
np.array([2, 2], dtype=index_dtype),
],
)
expected = pd.Series(SparseArray(np.array([1, 1, 1], dtype=dtype)), index=index)
if dense_index:
expected = expected.reindex(pd.MultiIndex.from_product(index.levels))
Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/frame/indexing/test_indexing.py
Expand Up @@ -736,7 +736,7 @@ def test_getitem_setitem_float_labels(self, using_array_manager):

# positional slicing only via iloc!
msg = (
"cannot do positional indexing on Float64Index with "
"cannot do positional indexing on NumericIndex with "
r"these indexers \[1.0\] of type float"
)
with pytest.raises(TypeError, match=msg):
Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/frame/methods/test_set_index.py
Expand Up @@ -159,7 +159,7 @@ def test_set_index_cast(self):
df = DataFrame(
{"A": [1.1, 2.2, 3.3], "B": [5.0, 6.1, 7.2]}, index=[2010, 2011, 2012]
)
df2 = df.set_index(df.index.astype(np.int32))
df2 = df.set_index(df.index.astype(np.int64))
tm.assert_frame_equal(df, df2)

# A has duplicate values, C does not
Expand Down
8 changes: 2 additions & 6 deletions pandas/tests/indexes/common.py
Expand Up @@ -835,11 +835,7 @@ def test_insert_non_na(self, simple_index):

result = index.insert(0, index[0])

cls = type(index)
if cls is RangeIndex:
cls = Int64Index

expected = cls([index[0]] + list(index), dtype=index.dtype)
expected = Index([index[0]] + list(index), dtype=index.dtype)
tm.assert_index_equal(result, expected, exact=True)

def test_insert_na(self, nulls_fixture, simple_index):
Expand All @@ -850,7 +846,7 @@ def test_insert_na(self, nulls_fixture, simple_index):
if na_val is pd.NaT:
expected = Index([index[0], pd.NaT] + list(index[1:]), dtype=object)
else:
expected = Float64Index([index[0], np.nan] + list(index[1:]))
expected = Index([index[0], np.nan] + list(index[1:]))

if index._is_backward_compat_public_numeric_index:
# GH#43921 we preserve NumericIndex
Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/indexes/datetimes/test_date_range.py
Expand Up @@ -812,7 +812,7 @@ def test_date_range_span_dst_transition(self, tzstr):

dr = date_range("2012-11-02", periods=10, tz=tzstr)
result = dr.hour
expected = pd.Index([0] * 10)
expected = pd.Index([0] * 10, dtype="int32")
tm.assert_index_equal(result, expected)

@pytest.mark.parametrize("tzstr", ["US/Eastern", "dateutil/US/Eastern"])
Expand Down
3 changes: 2 additions & 1 deletion pandas/tests/indexes/datetimes/test_misc.py
Expand Up @@ -281,8 +281,9 @@ def test_datetime_name_accessors(self, time_locale):

def test_nanosecond_field(self):
dti = DatetimeIndex(np.arange(10))
expected = Index(np.arange(10, dtype=np.int32))

tm.assert_index_equal(dti.nanosecond, Index(np.arange(10, dtype=np.int64)))
tm.assert_index_equal(dti.nanosecond, expected)


def test_iter_readonly():
Expand Down

0 comments on commit 5ff5584

Please sign in to comment.