diff --git a/pandas/conftest.py b/pandas/conftest.py index 4639799d2ee03e..d74ec1afce1274 100644 --- a/pandas/conftest.py +++ b/pandas/conftest.py @@ -593,10 +593,7 @@ def _create_mi_with_dt64tz_level(): "datetime-tz": tm.makeDateIndex(100, tz="US/Pacific"), "period": tm.makePeriodIndex(100), "timedelta": tm.makeTimedeltaIndex(100), - "int": tm.makeIntIndex(100), - "uint": tm.makeUIntIndex(100), "range": tm.makeRangeIndex(100), - "float": tm.makeFloatIndex(100), "complex64": tm.makeFloatIndex(100).astype("complex64"), "complex128": tm.makeFloatIndex(100).astype("complex128"), "num_int64": tm.makeNumericIndex(100, dtype="int64"), diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 10c2349f05dfd7..0b5e70694e95a6 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -89,6 +89,7 @@ ensure_platform_int, is_bool_dtype, is_categorical_dtype, + is_complex_dtype, is_dtype_equal, is_ea_or_datetimelike_dtype, is_extension_array_dtype, @@ -104,6 +105,7 @@ is_scalar, is_signed_integer_dtype, is_string_dtype, + is_unsigned_integer_dtype, needs_i8_conversion, pandas_dtype, validate_all_hashable, @@ -588,18 +590,14 @@ def _dtype_to_subclass(cls, dtype: DtypeObj): return TimedeltaIndex - elif dtype.kind == "f": - from pandas.core.api import Float64Index - - return Float64Index - elif dtype.kind == "u": - from pandas.core.api import UInt64Index - - return UInt64Index - elif dtype.kind == "i": - from pandas.core.api import Int64Index + elif ( + is_numeric_dtype(dtype) + and not is_bool_dtype(dtype) + and not is_complex_dtype(dtype) + ): + from pandas.core.api import NumericIndex - return Int64Index + return NumericIndex elif dtype.kind == "O": # NB: assuming away MultiIndex @@ -1040,14 +1038,29 @@ def astype(self, dtype, copy: bool = True): new_values = astype_nansafe(values, dtype=dtype, copy=copy) # pass copy=False because any copying will be done in the astype above - if self._is_backward_compat_public_numeric_index: - # this block is needed so e.g. NumericIndex[int8].astype("int32") returns - # NumericIndex[int32] and not Int64Index with dtype int64. + if not self._is_backward_compat_public_numeric_index and not isinstance( + self, ABCRangeIndex + ): + # this block is needed so e.g. Int64Index.astype("int32") returns + # Int64Index and not a NumericIndex with dtype int32. # When Int64Index etc. are removed from the code base, removed this also. if isinstance(dtype, np.dtype) and is_numeric_dtype(dtype): - return self._constructor( - new_values, name=self.name, dtype=dtype, copy=False + from pandas.core.api import ( + Float64Index, + Int64Index, + UInt64Index, ) + + if is_signed_integer_dtype(dtype): + klass = Int64Index + elif is_unsigned_integer_dtype(dtype): + klass = UInt64Index + elif is_float_dtype(dtype): + klass = Float64Index + else: + klass = Index + return klass(new_values, name=self.name, dtype=dtype, copy=False) + return Index(new_values, name=self.name, dtype=new_values.dtype, copy=False) _index_shared_docs[ @@ -5247,6 +5260,7 @@ def putmask(self, mask, value) -> Index: if self.dtype != object and is_valid_na_for_dtype(value, self.dtype): # e.g. None -> np.nan, see also Block._standardize_fill_value value = self._na_value + try: converted = self._validate_fill_value(value) except (LossySetitemError, ValueError, TypeError) as err: @@ -6115,13 +6129,6 @@ def map(self, mapper, na_action=None): new_values, self.dtype, same_dtype=same_dtype ) - if self._is_backward_compat_public_numeric_index and is_numeric_dtype( - new_values.dtype - ): - return self._constructor( - new_values, dtype=dtype, copy=False, name=self.name - ) - return Index._with_infer(new_values, dtype=dtype, copy=False, name=self.name) # TODO: De-duplicate with map, xref GH#32349 @@ -6598,10 +6605,17 @@ def insert(self, loc: int, item) -> Index: loc = loc if loc >= 0 else loc - 1 new_values[loc] = item - if self._typ == "numericindex": - # Use self._constructor instead of Index to retain NumericIndex GH#43921 - # TODO(2.0) can use Index instead of self._constructor - return self._constructor._with_infer(new_values, name=self.name) + if not self._is_backward_compat_public_numeric_index: + from pandas.core.indexes.numeric import NumericIndex + + if not isinstance(self, ABCRangeIndex) or not isinstance( + self, NumericIndex + ): + return Index._with_infer(new_values, name=self.name) + else: + # Use self._constructor instead of Index to retain old-style num. index + # TODO(2.0) can use Index instead of self._constructor + return self._constructor._with_infer(new_values, name=self.name) else: return Index._with_infer(new_values, name=self.name) diff --git a/pandas/core/indexes/numeric.py b/pandas/core/indexes/numeric.py index 816a1752c5bf02..95378da165d014 100644 --- a/pandas/core/indexes/numeric.py +++ b/pandas/core/indexes/numeric.py @@ -13,6 +13,7 @@ ) from pandas._typing import ( Dtype, + DtypeObj, npt, ) from pandas.util._decorators import ( @@ -174,6 +175,10 @@ def _ensure_array(cls, data, dtype, copy: bool): raise ValueError("Index data must be 1-dimensional") subarr = np.asarray(subarr) + if subarr.dtype == "float16": + # float16 not supported (no indexing engine) + subarr = subarr.astype("float32") + return subarr @classmethod @@ -198,6 +203,9 @@ def _ensure_dtype(cls, dtype: Dtype | None) -> np.dtype | None: return cls._default_dtype dtype = pandas_dtype(dtype) + if dtype == np.float16: + # float16 not supported (no indexing engine) + dtype = np.dtype(np.float32) assert isinstance(dtype, np.dtype) if cls._is_backward_compat_public_numeric_index: @@ -347,7 +355,26 @@ def _format_native_types( """ -class IntegerIndex(NumericIndex): +class TempBaseIndex(NumericIndex): + @classmethod + def _dtype_to_subclass(cls, dtype: DtypeObj): + if is_integer_dtype(dtype): + from pandas.core.api import Int64Index + + return Int64Index + elif is_unsigned_integer_dtype(dtype): + from pandas.core.api import UInt64Index + + return UInt64Index + elif is_float_dtype(dtype): + from pandas.core.api import Float64Index + + return Float64Index + else: + return super()._dtype_to_subclass(dtype) + + +class IntegerIndex(TempBaseIndex): """ This is an abstract class for Int64Index, UInt64Index. """ @@ -391,7 +418,7 @@ def _engine_type(self) -> type[libindex.UInt64Engine]: return libindex.UInt64Engine -class Float64Index(NumericIndex): +class Float64Index(TempBaseIndex): _index_descr_args = { "klass": "Float64Index", "dtype": "float64", diff --git a/pandas/core/indexes/range.py b/pandas/core/indexes/range.py index f15c244d8b6280..df44eb827698de 100644 --- a/pandas/core/indexes/range.py +++ b/pandas/core/indexes/range.py @@ -185,9 +185,9 @@ def _simple_new(cls, values: range, name: Hashable = None) -> RangeIndex: # error: Return type "Type[Int64Index]" of "_constructor" incompatible with return # type "Type[RangeIndex]" in supertype "Index" @cache_readonly - def _constructor(self) -> type[Int64Index]: # type: ignore[override] + def _constructor(self) -> type[NumericIndex]: # type: ignore[override] """return the class to use for construction""" - return Int64Index + return NumericIndex # error: Signature of "_data" incompatible with supertype "Index" @cache_readonly diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index 91bb3a128ae273..2844688b54364e 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -64,7 +64,6 @@ ) from pandas.core.indexers import maybe_convert_indices from pandas.core.indexes.api import ( - Float64Index, Index, ensure_index, ) @@ -1582,7 +1581,7 @@ def _equal_values(self: BlockManager, other: BlockManager) -> bool: def quantile( self: T, *, - qs: Float64Index, + qs: Index, # of dtype float 64 axis: AxisInt = 0, interpolation: QuantileInterpolation = "linear", ) -> T: @@ -1610,7 +1609,7 @@ def quantile( assert axis == 1 # only ever called this way new_axes = list(self.axes) - new_axes[1] = Float64Index(qs) + new_axes[1] = Index(qs, dtype=np.float64) blocks = [ blk.quantile(axis=axis, qs=qs, interpolation=interpolation) diff --git a/pandas/tests/apply/test_series_apply.py b/pandas/tests/apply/test_series_apply.py index 9b51ea7fef5f8a..204486e2624eda 100644 --- a/pandas/tests/apply/test_series_apply.py +++ b/pandas/tests/apply/test_series_apply.py @@ -186,7 +186,7 @@ def test_apply_datetimetz(): # change dtype # GH 14506 : Returned dtype changed from int32 to int64 result = s.apply(lambda x: x.hour) - exp = Series(list(range(24)) + [0], name="XX", dtype=np.int64) + exp = Series(list(range(24)) + [0], name="XX", dtype=np.int32) tm.assert_series_equal(result, exp) # not vectorized @@ -766,7 +766,7 @@ def test_map_datetimetz(): # change dtype # GH 14506 : Returned dtype changed from int32 to int64 result = s.map(lambda x: x.hour) - exp = Series(list(range(24)) + [0], name="XX", dtype=np.int64) + exp = Series(list(range(24)) + [0], name="XX", dtype=np.int32) tm.assert_series_equal(result, exp) # not vectorized diff --git a/pandas/tests/arrays/interval/test_interval.py b/pandas/tests/arrays/interval/test_interval.py index 2a6bea32553429..92437e8ebdc895 100644 --- a/pandas/tests/arrays/interval/test_interval.py +++ b/pandas/tests/arrays/interval/test_interval.py @@ -287,7 +287,7 @@ def test_arrow_array(): with pytest.raises(TypeError, match="Not supported to convert IntervalArray"): pa.array(intervals, type="float64") - with pytest.raises(TypeError, match="different 'subtype'"): + with pytest.raises(TypeError, match="Not supported to convert IntervalArray"): pa.array(intervals, type=ArrowIntervalType(pa.float64(), "left")) diff --git a/pandas/tests/arrays/sparse/test_accessor.py b/pandas/tests/arrays/sparse/test_accessor.py index 36af5d32ae4616..df5dd8b8b182c5 100644 --- a/pandas/tests/arrays/sparse/test_accessor.py +++ b/pandas/tests/arrays/sparse/test_accessor.py @@ -41,7 +41,12 @@ def test_from_coo(self): sp_array = scipy.sparse.coo_matrix((data, (row, col)), dtype="int") result = pd.Series.sparse.from_coo(sp_array) - index = pd.MultiIndex.from_arrays([[0, 0, 1, 3], [0, 2, 1, 3]]) + index = pd.MultiIndex.from_arrays( + [ + np.array([0, 0, 1, 3], dtype=np.int32), + np.array([0, 2, 1, 3], dtype=np.int32), + ], + ) expected = pd.Series([4, 9, 7, 5], index=index, dtype="Sparse[int]") tm.assert_series_equal(result, expected) @@ -212,7 +217,15 @@ def test_series_from_coo(self, dtype, dense_index): A = scipy.sparse.eye(3, format="coo", dtype=dtype) result = pd.Series.sparse.from_coo(A, dense_index=dense_index) - index = pd.MultiIndex.from_tuples([(0, 0), (1, 1), (2, 2)]) + + index_dtype = np.int64 if dense_index else np.int32 + index = pd.MultiIndex.from_tuples( + [ + np.array([0, 0], dtype=index_dtype), + np.array([1, 1], dtype=index_dtype), + np.array([2, 2], dtype=index_dtype), + ], + ) expected = pd.Series(SparseArray(np.array([1, 1, 1], dtype=dtype)), index=index) if dense_index: expected = expected.reindex(pd.MultiIndex.from_product(index.levels)) diff --git a/pandas/tests/frame/indexing/test_indexing.py b/pandas/tests/frame/indexing/test_indexing.py index 4e4d0590830dec..a3adbe9e1e0bc9 100644 --- a/pandas/tests/frame/indexing/test_indexing.py +++ b/pandas/tests/frame/indexing/test_indexing.py @@ -736,7 +736,7 @@ def test_getitem_setitem_float_labels(self, using_array_manager): # positional slicing only via iloc! msg = ( - "cannot do positional indexing on Float64Index with " + "cannot do positional indexing on NumericIndex with " r"these indexers \[1.0\] of type float" ) with pytest.raises(TypeError, match=msg): diff --git a/pandas/tests/frame/methods/test_set_index.py b/pandas/tests/frame/methods/test_set_index.py index 8e5f11840fbe55..f3b5a51bac2120 100644 --- a/pandas/tests/frame/methods/test_set_index.py +++ b/pandas/tests/frame/methods/test_set_index.py @@ -159,7 +159,7 @@ def test_set_index_cast(self): df = DataFrame( {"A": [1.1, 2.2, 3.3], "B": [5.0, 6.1, 7.2]}, index=[2010, 2011, 2012] ) - df2 = df.set_index(df.index.astype(np.int32)) + df2 = df.set_index(df.index.astype(np.int64)) tm.assert_frame_equal(df, df2) # A has duplicate values, C does not diff --git a/pandas/tests/indexes/common.py b/pandas/tests/indexes/common.py index f2141b0b74ac66..331bd384108fef 100644 --- a/pandas/tests/indexes/common.py +++ b/pandas/tests/indexes/common.py @@ -835,11 +835,7 @@ def test_insert_non_na(self, simple_index): result = index.insert(0, index[0]) - cls = type(index) - if cls is RangeIndex: - cls = Int64Index - - expected = cls([index[0]] + list(index), dtype=index.dtype) + expected = Index([index[0]] + list(index), dtype=index.dtype) tm.assert_index_equal(result, expected, exact=True) def test_insert_na(self, nulls_fixture, simple_index): @@ -850,7 +846,7 @@ def test_insert_na(self, nulls_fixture, simple_index): if na_val is pd.NaT: expected = Index([index[0], pd.NaT] + list(index[1:]), dtype=object) else: - expected = Float64Index([index[0], np.nan] + list(index[1:])) + expected = Index([index[0], np.nan] + list(index[1:])) if index._is_backward_compat_public_numeric_index: # GH#43921 we preserve NumericIndex diff --git a/pandas/tests/indexes/datetimes/test_date_range.py b/pandas/tests/indexes/datetimes/test_date_range.py index 142679e292b38a..31878f54614f31 100644 --- a/pandas/tests/indexes/datetimes/test_date_range.py +++ b/pandas/tests/indexes/datetimes/test_date_range.py @@ -812,7 +812,7 @@ def test_date_range_span_dst_transition(self, tzstr): dr = date_range("2012-11-02", periods=10, tz=tzstr) result = dr.hour - expected = pd.Index([0] * 10) + expected = pd.Index([0] * 10, dtype="int32") tm.assert_index_equal(result, expected) @pytest.mark.parametrize("tzstr", ["US/Eastern", "dateutil/US/Eastern"]) diff --git a/pandas/tests/indexes/datetimes/test_misc.py b/pandas/tests/indexes/datetimes/test_misc.py index 1dc01a3d7f937f..a41645f46314ae 100644 --- a/pandas/tests/indexes/datetimes/test_misc.py +++ b/pandas/tests/indexes/datetimes/test_misc.py @@ -281,8 +281,9 @@ def test_datetime_name_accessors(self, time_locale): def test_nanosecond_field(self): dti = DatetimeIndex(np.arange(10)) + expected = Index(np.arange(10, dtype=np.int32)) - tm.assert_index_equal(dti.nanosecond, Index(np.arange(10, dtype=np.int64))) + tm.assert_index_equal(dti.nanosecond, expected) def test_iter_readonly(): diff --git a/pandas/tests/indexes/datetimes/test_timezones.py b/pandas/tests/indexes/datetimes/test_timezones.py index 0bc2862e550218..81128efff6c15e 100644 --- a/pandas/tests/indexes/datetimes/test_timezones.py +++ b/pandas/tests/indexes/datetimes/test_timezones.py @@ -113,28 +113,28 @@ def test_dti_tz_convert_hour_overflow_dst(self): ts = ["2008-05-12 09:50:00", "2008-12-12 09:50:35", "2009-05-12 09:50:32"] tt = DatetimeIndex(ts).tz_localize("US/Eastern") ut = tt.tz_convert("UTC") - expected = Index([13, 14, 13]) + expected = Index([13, 14, 13], dtype=np.int32) tm.assert_index_equal(ut.hour, expected) # sorted case UTC -> US/Eastern ts = ["2008-05-12 13:50:00", "2008-12-12 14:50:35", "2009-05-12 13:50:32"] tt = DatetimeIndex(ts).tz_localize("UTC") ut = tt.tz_convert("US/Eastern") - expected = Index([9, 9, 9]) + expected = Index([9, 9, 9], dtype=np.int32) tm.assert_index_equal(ut.hour, expected) # unsorted case US/Eastern -> UTC ts = ["2008-05-12 09:50:00", "2008-12-12 09:50:35", "2008-05-12 09:50:32"] tt = DatetimeIndex(ts).tz_localize("US/Eastern") ut = tt.tz_convert("UTC") - expected = Index([13, 14, 13]) + expected = Index([13, 14, 13], dtype=np.int32) tm.assert_index_equal(ut.hour, expected) # unsorted case UTC -> US/Eastern ts = ["2008-05-12 13:50:00", "2008-12-12 14:50:35", "2008-05-12 13:50:32"] tt = DatetimeIndex(ts).tz_localize("UTC") ut = tt.tz_convert("US/Eastern") - expected = Index([9, 9, 9]) + expected = Index([9, 9, 9], dtype=np.int32) tm.assert_index_equal(ut.hour, expected) @pytest.mark.parametrize("tz", ["US/Eastern", "dateutil/US/Eastern"]) @@ -149,7 +149,7 @@ def test_dti_tz_convert_hour_overflow_dst_timestamps(self, tz): ] tt = DatetimeIndex(ts) ut = tt.tz_convert("UTC") - expected = Index([13, 14, 13]) + expected = Index([13, 14, 13], dtype=np.int32) tm.assert_index_equal(ut.hour, expected) # sorted case UTC -> US/Eastern @@ -160,7 +160,7 @@ def test_dti_tz_convert_hour_overflow_dst_timestamps(self, tz): ] tt = DatetimeIndex(ts) ut = tt.tz_convert("US/Eastern") - expected = Index([9, 9, 9]) + expected = Index([9, 9, 9], dtype=np.int32) tm.assert_index_equal(ut.hour, expected) # unsorted case US/Eastern -> UTC @@ -171,7 +171,7 @@ def test_dti_tz_convert_hour_overflow_dst_timestamps(self, tz): ] tt = DatetimeIndex(ts) ut = tt.tz_convert("UTC") - expected = Index([13, 14, 13]) + expected = Index([13, 14, 13], dtype=np.int32) tm.assert_index_equal(ut.hour, expected) # unsorted case UTC -> US/Eastern @@ -182,7 +182,7 @@ def test_dti_tz_convert_hour_overflow_dst_timestamps(self, tz): ] tt = DatetimeIndex(ts) ut = tt.tz_convert("US/Eastern") - expected = Index([9, 9, 9]) + expected = Index([9, 9, 9], dtype=np.int32) tm.assert_index_equal(ut.hour, expected) @pytest.mark.parametrize("freq, n", [("H", 1), ("T", 60), ("S", 3600)]) @@ -194,7 +194,7 @@ def test_dti_tz_convert_trans_pos_plus_1__bug(self, freq, n): idx = idx.tz_convert("Europe/Moscow") expected = np.repeat(np.array([3, 4, 5]), np.array([n, n, 1])) - tm.assert_index_equal(idx.hour, Index(expected)) + tm.assert_index_equal(idx.hour, Index(expected, dtype=np.int32)) def test_dti_tz_convert_dst(self): for freq, n in [("H", 1), ("T", 60), ("S", 3600)]: @@ -207,7 +207,7 @@ def test_dti_tz_convert_dst(self): np.array([18, 19, 20, 21, 22, 23, 0, 1, 3, 4, 5]), np.array([n, n, n, n, n, n, n, n, n, n, 1]), ) - tm.assert_index_equal(idx.hour, Index(expected)) + tm.assert_index_equal(idx.hour, Index(expected, dtype=np.int32)) idx = date_range( "2014-03-08 18:00", "2014-03-09 05:00", freq=freq, tz="US/Eastern" @@ -217,7 +217,7 @@ def test_dti_tz_convert_dst(self): np.array([23, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9]), np.array([n, n, n, n, n, n, n, n, n, n, 1]), ) - tm.assert_index_equal(idx.hour, Index(expected)) + tm.assert_index_equal(idx.hour, Index(expected, dtype=np.int32)) # End DST idx = date_range( @@ -228,7 +228,7 @@ def test_dti_tz_convert_dst(self): np.array([19, 20, 21, 22, 23, 0, 1, 1, 2, 3, 4]), np.array([n, n, n, n, n, n, n, n, n, n, 1]), ) - tm.assert_index_equal(idx.hour, Index(expected)) + tm.assert_index_equal(idx.hour, Index(expected, dtype=np.int32)) idx = date_range( "2014-11-01 18:00", "2014-11-02 05:00", freq=freq, tz="US/Eastern" @@ -238,30 +238,30 @@ def test_dti_tz_convert_dst(self): np.array([22, 23, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]), np.array([n, n, n, n, n, n, n, n, n, n, n, n, 1]), ) - tm.assert_index_equal(idx.hour, Index(expected)) + tm.assert_index_equal(idx.hour, Index(expected, dtype=np.int32)) # daily # Start DST idx = date_range("2014-03-08 00:00", "2014-03-09 00:00", freq="D", tz="UTC") idx = idx.tz_convert("US/Eastern") - tm.assert_index_equal(idx.hour, Index([19, 19])) + tm.assert_index_equal(idx.hour, Index([19, 19], dtype=np.int32)) idx = date_range( "2014-03-08 00:00", "2014-03-09 00:00", freq="D", tz="US/Eastern" ) idx = idx.tz_convert("UTC") - tm.assert_index_equal(idx.hour, Index([5, 5])) + tm.assert_index_equal(idx.hour, Index([5, 5], dtype=np.int32)) # End DST idx = date_range("2014-11-01 00:00", "2014-11-02 00:00", freq="D", tz="UTC") idx = idx.tz_convert("US/Eastern") - tm.assert_index_equal(idx.hour, Index([20, 20])) + tm.assert_index_equal(idx.hour, Index([20, 20], dtype=np.int32)) idx = date_range( "2014-11-01 00:00", "2014-11-02 000:00", freq="D", tz="US/Eastern" ) idx = idx.tz_convert("UTC") - tm.assert_index_equal(idx.hour, Index([4, 4])) + tm.assert_index_equal(idx.hour, Index([4, 4], dtype=np.int32)) def test_tz_convert_roundtrip(self, tz_aware_fixture): tz = tz_aware_fixture @@ -1134,7 +1134,7 @@ def test_field_access_localize(self, prefix): "2011-10-02 00:00", freq="h", periods=10, tz=prefix + "America/Atikokan" ) - expected = Index(np.arange(10, dtype=np.int64)) + expected = Index(np.arange(10, dtype=np.int32)) tm.assert_index_equal(dr.hour, expected) @pytest.mark.parametrize("tz", [pytz.timezone("US/Eastern"), gettz("US/Eastern")]) diff --git a/pandas/tests/indexes/multi/test_setops.py b/pandas/tests/indexes/multi/test_setops.py index eaa4e0a7b5256a..d2e0277867a642 100644 --- a/pandas/tests/indexes/multi/test_setops.py +++ b/pandas/tests/indexes/multi/test_setops.py @@ -10,6 +10,10 @@ Series, ) import pandas._testing as tm +from pandas.api.types import ( + is_float_dtype, + is_unsigned_integer_dtype, +) @pytest.mark.parametrize("case", [0.5, "xxx"]) @@ -624,7 +628,10 @@ def test_union_duplicates(index, request): expected = mi2.sort_values() tm.assert_index_equal(result, expected) - if mi2.levels[0].dtype == np.uint64 and (mi2.get_level_values(0) < 2**63).all(): + if ( + is_unsigned_integer_dtype(mi2.levels[0]) + and (mi2.get_level_values(0) < 2**63).all() + ): # GH#47294 - union uses lib.fast_zip, converting data to Python integers # and loses type information. Result is then unsigned only when values are # sufficiently large to require unsigned dtype. This happens only if other @@ -632,6 +639,13 @@ def test_union_duplicates(index, request): expected = expected.set_levels( [expected.levels[0].astype(int), expected.levels[1]] ) + elif is_float_dtype(mi2.levels[0]): + # mi2 has duplicates witch is a different path than above, Fix that path + # to use correct float dtype? + expected = expected.set_levels( + [expected.levels[0].astype(float), expected.levels[1]] + ) + result = mi1.union(mi2) tm.assert_index_equal(result, expected) diff --git a/pandas/tests/indexes/numeric/test_astype.py b/pandas/tests/indexes/numeric/test_astype.py index ee75f56eac7ce3..b556907e29206a 100644 --- a/pandas/tests/indexes/numeric/test_astype.py +++ b/pandas/tests/indexes/numeric/test_astype.py @@ -67,7 +67,7 @@ def test_astype_float64_to_float_dtype(self, dtype): idx = Float64Index([0, 1.1, 2]) result = idx.astype(dtype) - expected = Index(idx.values.astype(dtype)) + expected = Float64Index(idx.values.astype(dtype)) tm.assert_index_equal(result, expected) @pytest.mark.parametrize("dtype", ["M8[ns]", "m8[ns]"]) diff --git a/pandas/tests/indexes/numeric/test_numeric.py b/pandas/tests/indexes/numeric/test_numeric.py index 4a6fc3a42b3ee1..7eb7cd79ca932b 100644 --- a/pandas/tests/indexes/numeric/test_numeric.py +++ b/pandas/tests/indexes/numeric/test_numeric.py @@ -10,7 +10,6 @@ ) import pandas._testing as tm from pandas.core.indexes.api import ( - Float64Index, Int64Index, NumericIndex, UInt64Index, @@ -160,10 +159,9 @@ def test_type_coercion_fail(self, any_int_numpy_dtype): Index([1, 2, 3.5], dtype=any_int_numpy_dtype) def test_type_coercion_valid(self, float_numpy_dtype): - # There is no Float32Index, so we always - # generate Float64Index. idx = Index([1, 2, 3.5], dtype=float_numpy_dtype) - tm.assert_index_equal(idx, Index([1, 2, 3.5]), exact=True) + result = NumericIndex([1, 2, 3.5], dtype=float_numpy_dtype) + tm.assert_index_equal(idx, result, exact=True) def test_equals_numeric(self): index_cls = self._index_cls @@ -268,39 +266,6 @@ def test_fillna_float64(self): tm.assert_index_equal(idx.fillna("obj"), exp, exact=True) -class TestFloat64Index(TestFloatNumericIndex): - _index_cls = Float64Index - - @pytest.fixture - def dtype(self, request): - return np.float64 - - @pytest.fixture( - params=["int64", "uint64", "object", "category", "datetime64"], - ) - def invalid_dtype(self, request): - return request.param - - def test_constructor_from_base_index(self, dtype): - index_cls = self._index_cls - - result = Index(np.array([np.nan], dtype=dtype)) - assert isinstance(result, index_cls) - assert result.dtype == dtype - assert pd.isna(result.values).all() - - def test_constructor_32bit(self, dtype): - index_cls = self._index_cls - - index = index_cls(np.array([1.0, 2, 3, 4, 5]), dtype=np.float32) - assert isinstance(index, index_cls) - assert index.dtype == np.float64 - - index = index_cls(np.array([1, 2, 3, 4, 5]), dtype=np.float32) - assert isinstance(index, index_cls) - assert index.dtype == np.float64 - - class NumericInt(NumericBase): def test_view(self, dtype): index_cls = self._index_cls @@ -507,14 +472,14 @@ def test_constructor_np_signed(self, any_signed_int_numpy_dtype): # GH#47475 scalar = np.dtype(any_signed_int_numpy_dtype).type(1) result = Index([scalar]) - expected = Int64Index([1]) + expected = NumericIndex([1], dtype=any_signed_int_numpy_dtype) tm.assert_index_equal(result, expected) def test_constructor_np_unsigned(self, any_unsigned_int_numpy_dtype): # GH#47475 scalar = np.dtype(any_unsigned_int_numpy_dtype).type(1) result = Index([scalar]) - expected = UInt64Index([1]) + expected = NumericIndex([1], dtype=any_unsigned_int_numpy_dtype) tm.assert_index_equal(result, expected) def test_coerce_list(self): @@ -527,31 +492,6 @@ def test_coerce_list(self): assert type(arr) is Index -class TestInt64Index(TestIntNumericIndex): - _index_cls = Int64Index - - @pytest.fixture - def dtype(self): - return np.int64 - - @pytest.fixture( - params=["float64", "uint64", "object", "category", "datetime64"], - ) - def invalid_dtype(self, request): - return request.param - - def test_constructor_32bit(self, dtype): - index_cls = self._index_cls - - index = index_cls(np.array([1, 2, 3, 4, 5]), dtype=np.int32) - assert isinstance(index, index_cls) - assert index.dtype == np.int64 - - index = index_cls(np.array([1, 2, 3, 4, 5]), dtype=np.int32) - assert isinstance(index, index_cls) - assert index.dtype == np.int64 - - class TestUIntNumericIndex(NumericInt): _index_cls = NumericIndex @@ -580,64 +520,6 @@ def index(self, request): return self._index_cls(request.param, dtype=np.uint64) -class TestUInt64Index(TestUIntNumericIndex): - - _index_cls = UInt64Index - - @pytest.fixture - def dtype(self): - return np.uint64 - - @pytest.fixture( - params=["int64", "float64", "object", "category", "datetime64"], - ) - def invalid_dtype(self, request): - return request.param - - def test_constructor(self, dtype): - index_cls = self._index_cls - exact = True if index_cls is UInt64Index else "equiv" - - idx = index_cls([1, 2, 3]) - res = Index([1, 2, 3], dtype=dtype) - tm.assert_index_equal(res, idx, exact=exact) - - idx = index_cls([1, 2**63]) - res = Index([1, 2**63], dtype=dtype) - tm.assert_index_equal(res, idx, exact=exact) - - idx = index_cls([1, 2**63]) - res = Index([1, 2**63]) - tm.assert_index_equal(res, idx, exact=exact) - - idx = Index([-1, 2**63], dtype=object) - res = Index(np.array([-1, 2**63], dtype=object)) - tm.assert_index_equal(res, idx, exact=exact) - - # https://github.com/pandas-dev/pandas/issues/29526 - idx = index_cls([1, 2**63 + 1], dtype=dtype) - res = Index([1, 2**63 + 1], dtype=dtype) - tm.assert_index_equal(res, idx, exact=exact) - - def test_constructor_does_not_cast_to_float(self): - # https://github.com/numpy/numpy/issues/19146 - values = [0, np.iinfo(np.uint64).max] - - result = UInt64Index(values) - assert list(result) == values - - def test_constructor_32bit(self, dtype): - index_cls = self._index_cls - - index = index_cls(np.array([1, 2, 3, 4, 5]), dtype=np.uint32) - assert isinstance(index, index_cls) - assert index.dtype == np.uint64 - - index = index_cls(np.array([1, 2, 3, 4, 5]), dtype=np.uint32) - assert isinstance(index, index_cls) - assert index.dtype == np.uint64 - - @pytest.mark.parametrize( "box", [list, lambda x: np.array(x, dtype=object), lambda x: Index(x, dtype=object)], diff --git a/pandas/tests/indexes/ranges/test_join.py b/pandas/tests/indexes/ranges/test_join.py index ed21996de891b9..c3c2560693d3d5 100644 --- a/pandas/tests/indexes/ranges/test_join.py +++ b/pandas/tests/indexes/ranges/test_join.py @@ -66,7 +66,7 @@ def test_join_inner(self): elidx = np.array([8, 9], dtype=np.intp) eridx = np.array([9, 7], dtype=np.intp) - assert isinstance(res, Int64Index) + assert isinstance(res, Index) and res.dtype == np.int64 tm.assert_index_equal(res, eres) tm.assert_numpy_array_equal(lidx, elidx) tm.assert_numpy_array_equal(ridx, eridx) diff --git a/pandas/tests/indexes/ranges/test_range.py b/pandas/tests/indexes/ranges/test_range.py index 003c69a6a11a67..b77289d5aadcd8 100644 --- a/pandas/tests/indexes/ranges/test_range.py +++ b/pandas/tests/indexes/ranges/test_range.py @@ -218,7 +218,7 @@ def test_delete_not_preserving_rangeindex(self): loc = [0, 3, 5] result = idx.delete(loc) - expected = Int64Index([1, 2, 4]) + expected = Index([1, 2, 4]) tm.assert_index_equal(result, expected, exact=True) result = idx.delete(loc[::-1]) @@ -536,8 +536,8 @@ def test_len_specialised(self, step): ([RI(2), RI(2, 5), RI(5, 8, 4)], RI(0, 6)), ([RI(2), RI(3, 5), RI(5, 8, 4)], I64([0, 1, 3, 4, 5])), ([RI(-2, 2), RI(2, 5), RI(5, 8, 4)], RI(-2, 6)), - ([RI(3), I64([-1, 3, 15])], I64([0, 1, 2, -1, 3, 15])), - ([RI(3), F64([-1, 3.1, 15.0])], F64([0, 1, 2, -1, 3.1, 15.0])), + ([RI(3), OI([-1, 3, 15])], OI([0, 1, 2, -1, 3, 15])), + ([RI(3), OI([-1, 3.1, 15.0])], OI([0, 1, 2, -1, 3.1, 15.0])), ([RI(3), OI(["a", None, 14])], OI([0, 1, 2, "a", None, 14])), ([RI(3, 1), OI(["a", None, 14])], OI(["a", None, 14])), ] diff --git a/pandas/tests/indexes/ranges/test_setops.py b/pandas/tests/indexes/ranges/test_setops.py index 71bd2f5590b8f5..29ba5a91498db1 100644 --- a/pandas/tests/indexes/ranges/test_setops.py +++ b/pandas/tests/indexes/ranges/test_setops.py @@ -16,16 +16,15 @@ Index, Int64Index, RangeIndex, - UInt64Index, ) class TestRangeIndexSetOps: - @pytest.mark.parametrize("klass", [RangeIndex, Int64Index, UInt64Index]) - def test_intersection_mismatched_dtype(self, klass): + @pytest.mark.parametrize("dtype", [None, "int64", "uint64"]) + def test_intersection_mismatched_dtype(self, dtype): # check that we cast to float, not object index = RangeIndex(start=0, stop=20, step=2, name="foo") - index = klass(index) + index = Index(index, dtype=dtype) flt = index.astype(np.float64) diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py index ef041d7f9e119e..8a11e1849b50a5 100644 --- a/pandas/tests/indexes/test_base.py +++ b/pandas/tests/indexes/test_base.py @@ -547,7 +547,7 @@ def test_map_tseries_indices_return_index(self, attr): def test_map_tseries_indices_accsr_return_index(self): date_index = tm.makeDateIndex(24, freq="h", name="hourly") - expected = Int64Index(range(24), name="hourly") + expected = Index(range(24), dtype="int32", name="hourly") tm.assert_index_equal(expected, date_index.map(lambda x: x.hour), exact=True) @pytest.mark.parametrize( diff --git a/pandas/tests/indexes/test_index_new.py b/pandas/tests/indexes/test_index_new.py index 9a57e3e08a59c0..20ca03b052c344 100644 --- a/pandas/tests/indexes/test_index_new.py +++ b/pandas/tests/indexes/test_index_new.py @@ -10,8 +10,6 @@ import numpy as np import pytest -from pandas.core.dtypes.common import is_unsigned_integer_dtype - from pandas import ( NA, Categorical, @@ -31,11 +29,7 @@ timedelta_range, ) import pandas._testing as tm -from pandas.core.api import ( - Float64Index, - Int64Index, - UInt64Index, -) +from pandas.core.api import NumericIndex class TestIndexConstructorInference: @@ -54,12 +48,7 @@ def test_construction_list_tuples_nan(self, na_value, vtype): ) def test_constructor_int_dtype_float(self, dtype): # GH#18400 - if is_unsigned_integer_dtype(dtype): - index_type = UInt64Index - else: - index_type = Int64Index - - expected = index_type([0, 1, 2, 3]) + expected = NumericIndex([0, 1, 2, 3], dtype=dtype) result = Index([0.0, 1.0, 2.0, 3.0], dtype=dtype) tm.assert_index_equal(result, expected) @@ -258,7 +247,7 @@ def test_constructor_int_dtype_nan_raises(self, dtype): ) def test_constructor_dtypes_to_int64(self, vals): index = Index(vals, dtype=int) - assert isinstance(index, Int64Index) + assert isinstance(index, NumericIndex) and index.dtype == "int64" @pytest.mark.parametrize( "vals", @@ -272,7 +261,7 @@ def test_constructor_dtypes_to_int64(self, vals): ) def test_constructor_dtypes_to_float64(self, vals): index = Index(vals, dtype=float) - assert isinstance(index, Float64Index) + assert isinstance(index, NumericIndex) and index.dtype == "float64" @pytest.mark.parametrize( "vals", diff --git a/pandas/tests/indexes/test_numpy_compat.py b/pandas/tests/indexes/test_numpy_compat.py index 453ece35a68e7f..93bf64f91dea0c 100644 --- a/pandas/tests/indexes/test_numpy_compat.py +++ b/pandas/tests/indexes/test_numpy_compat.py @@ -10,10 +10,8 @@ isna, ) import pandas._testing as tm -from pandas.core.api import ( - Float64Index, - NumericIndex, -) +from pandas.api.types import is_complex_dtype +from pandas.core.api import NumericIndex from pandas.core.arrays import BooleanArray from pandas.core.indexes.datetimelike import DatetimeIndexOpsMixin @@ -81,8 +79,21 @@ def test_numpy_ufuncs_basic(index, func): tm.assert_index_equal(result, exp) if type(index) is not Index or index.dtype == bool: - # i.e NumericIndex - assert isinstance(result, Float64Index) + assert type(result) is NumericIndex + if is_complex_dtype(index): + assert result.dtype == "complex64" + elif index.dtype in [ + "bool", + "int8", + "int16", + "uint8", + "uint16", + "float16", + "float32", + ]: + assert result.dtype == "float32" + else: + assert result.dtype == "float64" else: # e.g. np.exp with Int64 -> Float64 assert type(result) is Index diff --git a/pandas/tests/indexes/timedeltas/test_timedelta.py b/pandas/tests/indexes/timedeltas/test_timedelta.py index 01efbfb9ae0c09..7d1f6aa2df11df 100644 --- a/pandas/tests/indexes/timedeltas/test_timedelta.py +++ b/pandas/tests/indexes/timedeltas/test_timedelta.py @@ -14,7 +14,6 @@ ) import pandas._testing as tm from pandas.core.arrays import TimedeltaArray -from pandas.core.indexes.api import Int64Index from pandas.tests.indexes.datetimelike import DatetimeLike randn = np.random.randn @@ -56,7 +55,7 @@ def test_map(self): f = lambda x: x.days result = rng.map(f) - exp = Int64Index([f(x) for x in rng]) + exp = Index([f(x) for x in rng], dtype=np.int32) tm.assert_index_equal(result, exp) def test_pass_TimedeltaIndex_to_index(self): @@ -70,15 +69,16 @@ def test_pass_TimedeltaIndex_to_index(self): def test_fields(self): rng = timedelta_range("1 days, 10:11:12.100123456", periods=2, freq="s") - tm.assert_index_equal(rng.days, Index([1, 1], dtype="int64")) + tm.assert_index_equal(rng.days, Index([1, 1], dtype=np.int32)) tm.assert_index_equal( rng.seconds, - Index([10 * 3600 + 11 * 60 + 12, 10 * 3600 + 11 * 60 + 13], dtype="int64"), + Index([10 * 3600 + 11 * 60 + 12, 10 * 3600 + 11 * 60 + 13], dtype=np.int32), ) tm.assert_index_equal( - rng.microseconds, Index([100 * 1000 + 123, 100 * 1000 + 123], dtype="int64") + rng.microseconds, + Index([100 * 1000 + 123, 100 * 1000 + 123], dtype=np.int32), ) - tm.assert_index_equal(rng.nanoseconds, Index([456, 456], dtype="int64")) + tm.assert_index_equal(rng.nanoseconds, Index([456, 456], dtype=np.int32)) msg = "'TimedeltaIndex' object has no attribute '{}'" with pytest.raises(AttributeError, match=msg.format("hours")): diff --git a/pandas/tests/io/pytables/test_put.py b/pandas/tests/io/pytables/test_put.py index 2699d339504129..0c6fc11bb737ac 100644 --- a/pandas/tests/io/pytables/test_put.py +++ b/pandas/tests/io/pytables/test_put.py @@ -250,7 +250,7 @@ def test_column_multiindex(setup_path): df = DataFrame(np.arange(12).reshape(3, 4), columns=index) expected = df.copy() if isinstance(expected.index, RangeIndex): - expected.index = Int64Index(expected.index) + expected.index = Index(expected.index.to_numpy()) with ensure_clean_store(setup_path) as store: @@ -282,7 +282,7 @@ def test_column_multiindex(setup_path): df = DataFrame(np.arange(12).reshape(3, 4), columns=Index(list("ABCD"), name="foo")) expected = df.copy() if isinstance(expected.index, RangeIndex): - expected.index = Int64Index(expected.index) + expected.index = Index(expected.index.to_numpy()) with ensure_clean_store(setup_path) as store: diff --git a/pandas/tests/io/test_parquet.py b/pandas/tests/io/test_parquet.py index 75683a1d96bfb0..922ce663ab4936 100644 --- a/pandas/tests/io/test_parquet.py +++ b/pandas/tests/io/test_parquet.py @@ -829,6 +829,7 @@ def test_s3_roundtrip_for_dir( # GH #35791 if partition_col: + expected_df = expected_df.astype(dict.fromkeys(partition_col, np.int32)) partition_col_type = "category" expected_df[partition_col] = expected_df[partition_col].astype( diff --git a/pandas/tests/reshape/merge/test_multi.py b/pandas/tests/reshape/merge/test_multi.py index 0dbe45eeb1e823..66093fa20abe34 100644 --- a/pandas/tests/reshape/merge/test_multi.py +++ b/pandas/tests/reshape/merge/test_multi.py @@ -441,14 +441,13 @@ def test_merge_datetime_index(self, klass): if klass is not None: on_vector = klass(on_vector) - expected = DataFrame({"a": [1, 2, 3], "key_1": [2016, 2017, 2018]}) + exp_years = np.array([2016, 2017, 2018], dtype=np.int32) + expected = DataFrame({"a": [1, 2, 3], "key_1": exp_years}) result = df.merge(df, on=["a", on_vector], how="inner") tm.assert_frame_equal(result, expected) - expected = DataFrame( - {"key_0": [2016, 2017, 2018], "a_x": [1, 2, 3], "a_y": [1, 2, 3]} - ) + expected = DataFrame({"key_0": exp_years, "a_x": [1, 2, 3], "a_y": [1, 2, 3]}) result = df.merge(df, on=[df.index.year], how="inner") tm.assert_frame_equal(result, expected) @@ -852,14 +851,13 @@ def test_merge_datetime_index(self, box): if box is not None: on_vector = box(on_vector) - expected = DataFrame({"a": [1, 2, 3], "key_1": [2016, 2017, 2018]}) + exp_years = np.array([2016, 2017, 2018], dtype=np.int32) + expected = DataFrame({"a": [1, 2, 3], "key_1": exp_years}) result = df.merge(df, on=["a", on_vector], how="inner") tm.assert_frame_equal(result, expected) - expected = DataFrame( - {"key_0": [2016, 2017, 2018], "a_x": [1, 2, 3], "a_y": [1, 2, 3]} - ) + expected = DataFrame({"key_0": exp_years, "a_x": [1, 2, 3], "a_y": [1, 2, 3]}) result = df.merge(df, on=[df.index.year], how="inner") tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/reshape/test_pivot.py b/pandas/tests/reshape/test_pivot.py index 14ea670fa6cf90..14d95139baf6fc 100644 --- a/pandas/tests/reshape/test_pivot.py +++ b/pandas/tests/reshape/test_pivot.py @@ -411,7 +411,14 @@ def test_pivot_no_values(self): res = df.pivot_table(index=df.index.month, columns=df.index.day) exp_columns = MultiIndex.from_tuples([("A", 1), ("A", 2)]) - exp = DataFrame([[2.5, 4.0], [2.0, np.nan]], index=[1, 2], columns=exp_columns) + exp_columns = exp_columns.set_levels( + exp_columns.levels[1].astype(np.int32), level=1 + ) + exp = DataFrame( + [[2.5, 4.0], [2.0, np.nan]], + index=Index([1, 2], dtype=np.int32), + columns=exp_columns, + ) tm.assert_frame_equal(res, exp) df = DataFrame( @@ -424,7 +431,9 @@ def test_pivot_no_values(self): res = df.pivot_table(index=df.index.month, columns=Grouper(key="dt", freq="M")) exp_columns = MultiIndex.from_tuples([("A", pd.Timestamp("2011-01-31"))]) exp_columns.names = [None, "dt"] - exp = DataFrame([3.25, 2.0], index=[1, 2], columns=exp_columns) + exp = DataFrame( + [3.25, 2.0], index=Index([1, 2], dtype=np.int32), columns=exp_columns + ) tm.assert_frame_equal(res, exp) res = df.pivot_table( @@ -1604,7 +1613,7 @@ def test_pivot_dtaccessor(self): expected = DataFrame( {7: [0, 3], 8: [1, 4], 9: [2, 5]}, index=exp_idx, - columns=Index([7, 8, 9], name="dt1"), + columns=Index([7, 8, 9], dtype=np.int32, name="dt1"), ) tm.assert_frame_equal(result, expected) @@ -1614,8 +1623,8 @@ def test_pivot_dtaccessor(self): expected = DataFrame( {7: [0, 3], 8: [1, 4], 9: [2, 5]}, - index=Index([1, 2], name="dt2"), - columns=Index([7, 8, 9], name="dt1"), + index=Index([1, 2], dtype=np.int32, name="dt2"), + columns=Index([7, 8, 9], dtype=np.int32, name="dt1"), ) tm.assert_frame_equal(result, expected) @@ -1627,10 +1636,16 @@ def test_pivot_dtaccessor(self): ) exp_col = MultiIndex.from_arrays( - [[7, 7, 8, 8, 9, 9], [1, 2] * 3], names=["dt1", "dt2"] + [ + np.array([7, 7, 8, 8, 9, 9], dtype=np.int32), + np.array([1, 2] * 3, dtype=np.int32), + ], + names=["dt1", "dt2"], ) expected = DataFrame( - np.array([[0, 3, 1, 4, 2, 5]], dtype="int64"), index=[2013], columns=exp_col + np.array([[0, 3, 1, 4, 2, 5]], dtype="int64"), + index=Index([2013], dtype=np.int32), + columns=exp_col, ) tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/reshape/test_util.py b/pandas/tests/reshape/test_util.py index 3a9ddaebf29341..4d0be7464cb3d9 100644 --- a/pandas/tests/reshape/test_util.py +++ b/pandas/tests/reshape/test_util.py @@ -23,8 +23,8 @@ def test_datetimeindex(self): # make sure that the ordering on datetimeindex is consistent x = date_range("2000-01-01", periods=2) result1, result2 = (Index(y).day for y in cartesian_product([x, x])) - expected1 = Index([1, 1, 2, 2]) - expected2 = Index([1, 2, 1, 2]) + expected1 = Index([1, 1, 2, 2], dtype=np.int32) + expected2 = Index([1, 2, 1, 2], dtype=np.int32) tm.assert_index_equal(result1, expected1) tm.assert_index_equal(result2, expected2) diff --git a/pandas/tests/series/accessors/test_dt_accessor.py b/pandas/tests/series/accessors/test_dt_accessor.py index ccd79d5cc58f4a..d1bd3441b7ae10 100644 --- a/pandas/tests/series/accessors/test_dt_accessor.py +++ b/pandas/tests/series/accessors/test_dt_accessor.py @@ -229,14 +229,14 @@ def test_dt_namespace_accessor_index_and_values(self): dti = date_range("20140204", periods=3, freq="s") ser = Series(dti, index=index, name="xxx") exp = Series( - np.array([2014, 2014, 2014], dtype="int64"), index=index, name="xxx" + np.array([2014, 2014, 2014], dtype="int32"), index=index, name="xxx" ) tm.assert_series_equal(ser.dt.year, exp) - exp = Series(np.array([2, 2, 2], dtype="int64"), index=index, name="xxx") + exp = Series(np.array([2, 2, 2], dtype="int32"), index=index, name="xxx") tm.assert_series_equal(ser.dt.month, exp) - exp = Series(np.array([0, 1, 2], dtype="int64"), index=index, name="xxx") + exp = Series(np.array([0, 1, 2], dtype="int32"), index=index, name="xxx") tm.assert_series_equal(ser.dt.second, exp) exp = Series([ser[0]] * 3, index=index, name="xxx") @@ -386,7 +386,7 @@ def test_dt_namespace_accessor_categorical(self): dti = DatetimeIndex(["20171111", "20181212"]).repeat(2) ser = Series(pd.Categorical(dti), name="foo") result = ser.dt.year - expected = Series([2017, 2017, 2018, 2018], name="foo") + expected = Series([2017, 2017, 2018, 2018], dtype="int32", name="foo") tm.assert_series_equal(result, expected) def test_dt_tz_localize_categorical(self, tz_aware_fixture): @@ -748,6 +748,7 @@ def test_hour_index(self): result = dt_series.dt.hour expected = Series( [0, 1, 2, 3, 4], + dtype="int32", index=[2, 6, 7, 8, 11], ) tm.assert_series_equal(result, expected) diff --git a/pandas/tests/series/indexing/test_setitem.py b/pandas/tests/series/indexing/test_setitem.py index 74d05b7e43b2f6..0744ee6fdbf402 100644 --- a/pandas/tests/series/indexing/test_setitem.py +++ b/pandas/tests/series/indexing/test_setitem.py @@ -1159,7 +1159,7 @@ def expected(self, val): return Series(res_values) -@pytest.mark.parametrize("val", [512, np.int16(512)]) +@pytest.mark.parametrize("val", [np.int16(512), np.int16(512)]) class TestSetitemIntoIntegerSeriesNeedsUpcast(SetitemCastingEquivalents): @pytest.fixture def obj(self): @@ -1174,7 +1174,7 @@ def expected(self): return Series([1, 512, 3], dtype=np.int16) -@pytest.mark.parametrize("val", [2**33 + 1.0, 2**33 + 1.1, 2**62]) +@pytest.mark.parametrize("val", [2**30 + 1.0, 2**33 + 1.1, 2**62]) class TestSmallIntegerSetitemUpcast(SetitemCastingEquivalents): # https://github.com/pandas-dev/pandas/issues/39584#issuecomment-941212124 @pytest.fixture @@ -1187,10 +1187,12 @@ def key(self): @pytest.fixture def expected(self, val): - if val % 1 != 0: + if val > np.iinfo(np.int64).max: dtype = "f8" - else: + elif val > np.iinfo(np.int32).max: dtype = "i8" + else: + dtype = "i4" return Series([val, 2, 3], dtype=dtype) diff --git a/pandas/tests/util/test_assert_categorical_equal.py b/pandas/tests/util/test_assert_categorical_equal.py index 29a0805bceb987..fda4fa770fd9be 100644 --- a/pandas/tests/util/test_assert_categorical_equal.py +++ b/pandas/tests/util/test_assert_categorical_equal.py @@ -22,8 +22,8 @@ def test_categorical_equal_order_mismatch(check_category_order): msg = """Categorical\\.categories are different Categorical\\.categories values are different \\(100\\.0 %\\) -\\[left\\]: Int64Index\\(\\[1, 2, 3, 4\\], dtype='int64'\\) -\\[right\\]: Int64Index\\(\\[4, 3, 2, 1\\], dtype='int64'\\)""" +\\[left\\]: NumericIndex\\(\\[1, 2, 3, 4]\\, dtype='int64'\\) +\\[right\\]: NumericIndex\\(\\[4, 3, 2, 1\\], dtype='int64'\\)""" with pytest.raises(AssertionError, match=msg): tm.assert_categorical_equal(c1, c2, **kwargs) else: @@ -34,8 +34,8 @@ def test_categorical_equal_categories_mismatch(): msg = """Categorical\\.categories are different Categorical\\.categories values are different \\(25\\.0 %\\) -\\[left\\]: Int64Index\\(\\[1, 2, 3, 4\\], dtype='int64'\\) -\\[right\\]: Int64Index\\(\\[1, 2, 3, 5\\], dtype='int64'\\)""" +\\[left\\]: NumericIndex\\(\\[1, 2, 3, 4\\], dtype='int64'\\) +\\[right\\]: NumericIndex\\(\\[1, 2, 3, 5\\], dtype='int64'\\)""" c1 = Categorical([1, 2, 3, 4]) c2 = Categorical([1, 2, 3, 5]) diff --git a/pandas/tests/util/test_assert_index_equal.py b/pandas/tests/util/test_assert_index_equal.py index 71799c73f35c6c..0052ea671a5b0c 100644 --- a/pandas/tests/util/test_assert_index_equal.py +++ b/pandas/tests/util/test_assert_index_equal.py @@ -17,7 +17,7 @@ def test_index_equal_levels_mismatch(): msg = """Index are different Index levels are different -\\[left\\]: 1, Int64Index\\(\\[1, 2, 3\\], dtype='int64'\\) +\\[left\\]: 1, NumericIndex\\(\\[1, 2, 3\\], dtype='int64'\\) \\[right\\]: 2, MultiIndex\\(\\[\\('A', 1\\), \\('A', 2\\), \\('B', 3\\), @@ -35,8 +35,8 @@ def test_index_equal_values_mismatch(check_exact): msg = """MultiIndex level \\[1\\] are different MultiIndex level \\[1\\] values are different \\(25\\.0 %\\) -\\[left\\]: Int64Index\\(\\[2, 2, 3, 4\\], dtype='int64'\\) -\\[right\\]: Int64Index\\(\\[1, 2, 3, 4\\], dtype='int64'\\)""" +\\[left\\]: NumericIndex\\(\\[2, 2, 3, 4\\], dtype='int64'\\) +\\[right\\]: NumericIndex\\(\\[1, 2, 3, 4\\], dtype='int64'\\)""" idx1 = MultiIndex.from_tuples([("A", 2), ("A", 2), ("B", 3), ("B", 4)]) idx2 = MultiIndex.from_tuples([("A", 1), ("A", 2), ("B", 3), ("B", 4)]) @@ -49,8 +49,8 @@ def test_index_equal_length_mismatch(check_exact): msg = """Index are different Index length are different -\\[left\\]: 3, Int64Index\\(\\[1, 2, 3\\], dtype='int64'\\) -\\[right\\]: 4, Int64Index\\(\\[1, 2, 3, 4\\], dtype='int64'\\)""" +\\[left\\]: 3, NumericIndex\\(\\[1, 2, 3\\], dtype='int64'\\) +\\[right\\]: 4, NumericIndex\\(\\[1, 2, 3, 4\\], dtype='int64'\\)""" idx1 = Index([1, 2, 3]) idx2 = Index([1, 2, 3, 4]) @@ -67,22 +67,29 @@ def test_index_equal_class(exact): tm.assert_index_equal(idx1, idx2, exact=exact) -@pytest.mark.parametrize( - "idx_values, msg_str", - [ - [[1, 2, 3.0], "Float64Index\\(\\[1\\.0, 2\\.0, 3\\.0\\], dtype='float64'\\)"], - [range(3), "RangeIndex\\(start=0, stop=3, step=1\\)"], - ], -) -def test_index_equal_class_mismatch(check_exact, idx_values, msg_str): - msg = f"""Index are different +def test_int_float_index_equal_class_mismatch(check_exact): + msg = """Index are different + +Attribute "inferred_type" are different +\\[left\\]: integer +\\[right\\]: floating""" + + idx1 = Index([1, 2, 3]) + idx2 = Index([1, 2, 3], dtype=np.float64) + + with pytest.raises(AssertionError, match=msg): + tm.assert_index_equal(idx1, idx2, exact=True, check_exact=check_exact) + + +def test_range_index_equal_class_mismatch(check_exact): + msg = """Index are different Index classes are different -\\[left\\]: Int64Index\\(\\[1, 2, 3\\], dtype='int64'\\) -\\[right\\]: {msg_str}""" +\\[left\\]: NumericIndex\\(\\[1, 2, 3\\], dtype='int64'\\) +\\[right\\]: """ idx1 = Index([1, 2, 3]) - idx2 = Index(idx_values) + idx2 = RangeIndex(range(3)) with pytest.raises(AssertionError, match=msg): tm.assert_index_equal(idx1, idx2, exact=True, check_exact=check_exact) @@ -96,8 +103,8 @@ def test_index_equal_values_close(check_exact): msg = """Index are different Index values are different \\(33\\.33333 %\\) -\\[left\\]: Float64Index\\(\\[1.0, 2.0, 3.0], dtype='float64'\\) -\\[right\\]: Float64Index\\(\\[1.0, 2.0, 3.0000000001\\], dtype='float64'\\)""" +\\[left\\]: NumericIndex\\(\\[1.0, 2.0, 3.0], dtype='float64'\\) +\\[right\\]: NumericIndex\\(\\[1.0, 2.0, 3.0000000001\\], dtype='float64'\\)""" with pytest.raises(AssertionError, match=msg): tm.assert_index_equal(idx1, idx2, check_exact=check_exact) @@ -114,8 +121,8 @@ def test_index_equal_values_less_close(check_exact, rtol): msg = """Index are different Index values are different \\(33\\.33333 %\\) -\\[left\\]: Float64Index\\(\\[1.0, 2.0, 3.0], dtype='float64'\\) -\\[right\\]: Float64Index\\(\\[1.0, 2.0, 3.0001\\], dtype='float64'\\)""" +\\[left\\]: NumericIndex\\(\\[1.0, 2.0, 3.0], dtype='float64'\\) +\\[right\\]: NumericIndex\\(\\[1.0, 2.0, 3.0001\\], dtype='float64'\\)""" with pytest.raises(AssertionError, match=msg): tm.assert_index_equal(idx1, idx2, **kwargs) @@ -131,8 +138,8 @@ def test_index_equal_values_too_far(check_exact, rtol): msg = """Index are different Index values are different \\(33\\.33333 %\\) -\\[left\\]: Int64Index\\(\\[1, 2, 3\\], dtype='int64'\\) -\\[right\\]: Int64Index\\(\\[1, 2, 4\\], dtype='int64'\\)""" +\\[left\\]: NumericIndex\\(\\[1, 2, 3\\], dtype='int64'\\) +\\[right\\]: NumericIndex\\(\\[1, 2, 4\\], dtype='int64'\\)""" with pytest.raises(AssertionError, match=msg): tm.assert_index_equal(idx1, idx2, **kwargs) @@ -146,8 +153,8 @@ def test_index_equal_value_oder_mismatch(check_exact, rtol, check_order): msg = """Index are different Index values are different \\(66\\.66667 %\\) -\\[left\\]: Int64Index\\(\\[1, 2, 3\\], dtype='int64'\\) -\\[right\\]: Int64Index\\(\\[3, 2, 1\\], dtype='int64'\\)""" +\\[left\\]: NumericIndex\\(\\[1, 2, 3\\], dtype='int64'\\) +\\[right\\]: NumericIndex\\(\\[3, 2, 1\\], dtype='int64'\\)""" if check_order: with pytest.raises(AssertionError, match=msg): @@ -168,8 +175,8 @@ def test_index_equal_level_values_mismatch(check_exact, rtol): msg = """MultiIndex level \\[1\\] are different MultiIndex level \\[1\\] values are different \\(25\\.0 %\\) -\\[left\\]: Int64Index\\(\\[2, 2, 3, 4\\], dtype='int64'\\) -\\[right\\]: Int64Index\\(\\[1, 2, 3, 4\\], dtype='int64'\\)""" +\\[left\\]: NumericIndex\\(\\[2, 2, 3, 4\\], dtype='int64'\\) +\\[right\\]: NumericIndex\\(\\[1, 2, 3, 4\\], dtype='int64'\\)""" with pytest.raises(AssertionError, match=msg): tm.assert_index_equal(idx1, idx2, **kwargs) @@ -225,7 +232,7 @@ def test_index_equal_range_categories(check_categorical, exact): Index classes are different \\[left\\]: RangeIndex\\(start=0, stop=10, step=1\\) -\\[right\\]: Int64Index\\(\\[0, 1, 2, 3, 4, 5, 6, 7, 8, 9\\], dtype='int64'\\)""" +\\[right\\]: NumericIndex\\(\\[0, 1, 2, 3, 4, 5, 6, 7, 8, 9\\], dtype='int64'\\)""" rcat = CategoricalIndex(RangeIndex(10)) icat = CategoricalIndex(list(range(10)))