From d010c4a8443b3f0dc0dc8bc9160f275d6c6f262b Mon Sep 17 00:00:00 2001 From: Terji Petersen Date: Fri, 13 Jan 2023 18:12:56 +0000 Subject: [PATCH] DEPR: Remove int64 uint64 float64 index part 1 (#49560) * DEPR: don't make Index instantiate Int64/Uint64/Flaot64Index * fix precommit * fix various issues * np.int_ -> np.intp * update * float16 & groupby.nlargest/nsmallest tests * small fix * tests/window/test_groupby.py types * Revert "np.int_ -> np.intp" This reverts commit d759966d843e4bbf98f428e908dafc0690f4e898. * fixes for comments * fix intp->int_ error * fix interval * fix groupby value_counts * fix comments * fix more comments * fix stuff * fix Co-authored-by: Terji Petersen Co-authored-by: Terji Petersen --- pandas/core/arrays/datetimes.py | 18 +- pandas/core/arrays/interval.py | 8 +- pandas/core/arrays/timedeltas.py | 2 +- pandas/core/generic.py | 2 +- pandas/core/indexes/accessors.py | 8 +- pandas/core/indexes/base.py | 98 ++++++----- pandas/core/indexes/range.py | 4 +- pandas/core/internals/blocks.py | 7 +- pandas/core/internals/managers.py | 5 +- pandas/core/strings/accessor.py | 2 +- pandas/io/feather_format.py | 4 +- pandas/io/formats/info.py | 4 +- pandas/tests/apply/test_series_apply.py | 8 +- pandas/tests/arrays/categorical/test_repr.py | 8 +- pandas/tests/arrays/interval/test_interval.py | 2 +- pandas/tests/arrays/sparse/test_accessor.py | 19 ++- .../tests/extension/decimal/test_decimal.py | 3 +- .../frame/constructors/test_from_dict.py | 2 +- pandas/tests/frame/indexing/test_indexing.py | 2 +- pandas/tests/frame/indexing/test_where.py | 3 +- pandas/tests/frame/methods/test_set_index.py | 8 - pandas/tests/frame/methods/test_to_csv.py | 10 +- .../tests/frame/methods/test_value_counts.py | 4 +- pandas/tests/frame/test_reductions.py | 2 +- .../tests/groupby/aggregate/test_aggregate.py | 19 ++- pandas/tests/groupby/aggregate/test_cython.py | 2 +- pandas/tests/groupby/test_any_all.py | 10 +- pandas/tests/groupby/test_categorical.py | 6 +- pandas/tests/groupby/test_filters.py | 4 +- pandas/tests/groupby/test_function.py | 8 +- pandas/tests/groupby/test_groupby.py | 23 ++- pandas/tests/groupby/test_grouping.py | 8 +- pandas/tests/groupby/test_min_max.py | 4 +- pandas/tests/groupby/test_nunique.py | 2 +- pandas/tests/groupby/test_quantile.py | 18 +- pandas/tests/groupby/test_size.py | 3 + pandas/tests/groupby/test_value_counts.py | 9 +- .../tests/groupby/transform/test_transform.py | 2 +- pandas/tests/indexes/common.py | 8 +- .../datetimelike_/test_drop_duplicates.py | 17 +- .../indexes/datetimes/test_date_range.py | 2 +- pandas/tests/indexes/datetimes/test_misc.py | 3 +- .../tests/indexes/datetimes/test_timezones.py | 36 ++-- .../indexes/interval/test_constructors.py | 70 ++++---- .../tests/indexes/interval/test_interval.py | 13 +- pandas/tests/indexes/multi/test_setops.py | 20 ++- pandas/tests/indexes/numeric/test_astype.py | 10 +- pandas/tests/indexes/numeric/test_numeric.py | 10 +- pandas/tests/indexes/ranges/test_join.py | 2 +- pandas/tests/indexes/ranges/test_range.py | 14 +- pandas/tests/indexes/ranges/test_setops.py | 7 +- pandas/tests/indexes/test_base.py | 4 +- pandas/tests/indexes/test_index_new.py | 9 +- pandas/tests/indexes/test_numpy_compat.py | 17 +- .../indexes/timedeltas/test_timedelta.py | 12 +- .../tests/indexing/multiindex/test_partial.py | 8 +- .../tests/indexing/multiindex/test_slice.py | 2 +- pandas/tests/indexing/test_indexing.py | 2 +- pandas/tests/indexing/test_loc.py | 21 ++- pandas/tests/indexing/test_partial.py | 6 +- pandas/tests/io/excel/test_writers.py | 6 +- pandas/tests/io/formats/test_format.py | 1 - pandas/tests/io/json/test_pandas.py | 4 +- pandas/tests/io/test_parquet.py | 1 + pandas/tests/io/test_sql.py | 2 +- pandas/tests/io/test_stata.py | 161 ++++++++++++------ pandas/tests/resample/test_resample_api.py | 2 +- .../tests/resample/test_resampler_grouper.py | 2 +- pandas/tests/reshape/merge/test_merge.py | 9 +- pandas/tests/reshape/merge/test_multi.py | 19 +-- pandas/tests/reshape/test_crosstab.py | 2 +- pandas/tests/reshape/test_get_dummies.py | 5 +- pandas/tests/reshape/test_melt.py | 4 +- pandas/tests/reshape/test_pivot.py | 29 +++- pandas/tests/reshape/test_util.py | 4 +- .../series/accessors/test_dt_accessor.py | 9 +- pandas/tests/series/indexing/test_setitem.py | 3 +- pandas/tests/series/methods/test_reindex.py | 2 +- pandas/tests/series/test_repr.py | 8 +- pandas/tests/test_algos.py | 2 +- .../util/test_assert_categorical_equal.py | 8 +- pandas/tests/util/test_assert_index_equal.py | 63 ++++--- pandas/tests/window/conftest.py | 1 - pandas/tests/window/test_groupby.py | 12 +- pandas/tests/window/test_pairwise.py | 6 +- 85 files changed, 573 insertions(+), 436 deletions(-) diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index 6b21c242fdca8..f1da99825d5fd 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -1384,7 +1384,7 @@ def isocalendar(self) -> DataFrame: 0 2000 1 2001 2 2002 - dtype: int64 + dtype: int32 """, ) month = _field_accessor( @@ -1407,7 +1407,7 @@ def isocalendar(self) -> DataFrame: 0 1 1 2 2 3 - dtype: int64 + dtype: int32 """, ) day = _field_accessor( @@ -1430,7 +1430,7 @@ def isocalendar(self) -> DataFrame: 0 1 1 2 2 3 - dtype: int64 + dtype: int32 """, ) hour = _field_accessor( @@ -1453,7 +1453,7 @@ def isocalendar(self) -> DataFrame: 0 0 1 1 2 2 - dtype: int64 + dtype: int32 """, ) minute = _field_accessor( @@ -1476,7 +1476,7 @@ def isocalendar(self) -> DataFrame: 0 0 1 1 2 2 - dtype: int64 + dtype: int32 """, ) second = _field_accessor( @@ -1499,7 +1499,7 @@ def isocalendar(self) -> DataFrame: 0 0 1 1 2 2 - dtype: int64 + dtype: int32 """, ) microsecond = _field_accessor( @@ -1522,7 +1522,7 @@ def isocalendar(self) -> DataFrame: 0 0 1 1 2 2 - dtype: int64 + dtype: int32 """, ) nanosecond = _field_accessor( @@ -1545,7 +1545,7 @@ def isocalendar(self) -> DataFrame: 0 0 1 1 2 2 - dtype: int64 + dtype: int32 """, ) _dayofweek_doc = """ @@ -1580,7 +1580,7 @@ def isocalendar(self) -> DataFrame: 2017-01-06 4 2017-01-07 5 2017-01-08 6 - Freq: D, dtype: int64 + Freq: D, dtype: int32 """ day_of_week = _field_accessor("day_of_week", "dow", _dayofweek_doc) dayofweek = day_of_week diff --git a/pandas/core/arrays/interval.py b/pandas/core/arrays/interval.py index f47ab83b6bde1..338cfa18fbe66 100644 --- a/pandas/core/arrays/interval.py +++ b/pandas/core/arrays/interval.py @@ -46,7 +46,10 @@ from pandas.errors import IntCastingNaNError from pandas.util._decorators import Appender -from pandas.core.dtypes.cast import LossySetitemError +from pandas.core.dtypes.cast import ( + LossySetitemError, + maybe_upcast_numeric_to_64bit, +) from pandas.core.dtypes.common import ( is_categorical_dtype, is_dtype_equal, @@ -304,7 +307,10 @@ def _ensure_simple_new_inputs( from pandas.core.indexes.base import ensure_index left = ensure_index(left, copy=copy) + left = maybe_upcast_numeric_to_64bit(left) + right = ensure_index(right, copy=copy) + right = maybe_upcast_numeric_to_64bit(right) if closed is None and isinstance(dtype, IntervalDtype): closed = dtype.closed diff --git a/pandas/core/arrays/timedeltas.py b/pandas/core/arrays/timedeltas.py index bc65d06789e5a..9b85212336f0e 100644 --- a/pandas/core/arrays/timedeltas.py +++ b/pandas/core/arrays/timedeltas.py @@ -771,7 +771,7 @@ def total_seconds(self) -> npt.NDArray[np.float64]: dtype='timedelta64[ns]', freq=None) >>> idx.total_seconds() - Float64Index([0.0, 86400.0, 172800.0, 259200.0, 345600.0], + NumericIndex([0.0, 86400.0, 172800.0, 259200.0, 345600.0], dtype='float64') """ pps = periods_per_second(self._creso) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 2d26f7318fb8a..4ef91f6de5e27 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -6096,7 +6096,7 @@ def astype( 0 1 1 2 dtype: category - Categories (2, int64): [1, 2] + Categories (2, int32): [1, 2] Convert to ordered categorical type with custom ordering: diff --git a/pandas/core/indexes/accessors.py b/pandas/core/indexes/accessors.py index da2a0a2a87137..9e4680d2205b9 100644 --- a/pandas/core/indexes/accessors.py +++ b/pandas/core/indexes/accessors.py @@ -161,7 +161,7 @@ class DatetimeProperties(Properties): 0 0 1 1 2 2 - dtype: int64 + dtype: int32 >>> hours_series = pd.Series(pd.date_range("2000-01-01", periods=3, freq="h")) >>> hours_series @@ -173,7 +173,7 @@ class DatetimeProperties(Properties): 0 0 1 1 2 2 - dtype: int64 + dtype: int32 >>> quarters_series = pd.Series(pd.date_range("2000-01-01", periods=3, freq="q")) >>> quarters_series @@ -185,7 +185,7 @@ class DatetimeProperties(Properties): 0 1 1 2 2 3 - dtype: int64 + dtype: int32 Returns a Series indexed like the original Series. Raises TypeError if the Series does not contain datetimelike values. @@ -303,7 +303,7 @@ class TimedeltaProperties(Properties): 0 1 1 2 2 3 - dtype: int64 + dtype: int32 """ def to_pytimedelta(self) -> np.ndarray: diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index b9727beba1026..ea684c0693d8b 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -79,7 +79,7 @@ can_hold_element, common_dtype_categorical_compat, ensure_dtype_can_hold_na, - find_common_type, + find_result_type, infer_dtype_from, maybe_cast_pointwise_result, np_can_hold_element, @@ -107,6 +107,7 @@ is_scalar, is_signed_integer_dtype, is_string_dtype, + is_unsigned_integer_dtype, needs_i8_conversion, pandas_dtype, validate_all_hashable, @@ -124,6 +125,7 @@ ABCDatetimeIndex, ABCMultiIndex, ABCPeriodIndex, + ABCRangeIndex, ABCSeries, ABCTimedeltaIndex, ) @@ -308,7 +310,7 @@ class Index(IndexOpsMixin, PandasObject): Examples -------- >>> pd.Index([1, 2, 3]) - Int64Index([1, 2, 3], dtype='int64') + NumericIndex([1, 2, 3], dtype='int64') >>> pd.Index(list('abc')) Index(['a', 'b', 'c'], dtype='object') @@ -564,18 +566,10 @@ def _dtype_to_subclass(cls, dtype: DtypeObj): return TimedeltaIndex - elif dtype.kind == "f": - from pandas.core.api import Float64Index + elif dtype.kind in ["i", "f", "u"]: + from pandas.core.api import NumericIndex - return Float64Index - elif dtype.kind == "u": - from pandas.core.api import UInt64Index - - return UInt64Index - elif dtype.kind == "i": - from pandas.core.api import Int64Index - - return Int64Index + return NumericIndex elif dtype.kind == "O": # NB: assuming away MultiIndex @@ -987,18 +981,34 @@ def astype(self, dtype, copy: bool = True): new_values = astype_array(values, dtype=dtype, copy=copy) # pass copy=False because any copying will be done in the astype above - if self._is_backward_compat_public_numeric_index: - # this block is needed so e.g. NumericIndex[int8].astype("int32") returns - # NumericIndex[int32] and not Int64Index with dtype int64. + if not self._is_backward_compat_public_numeric_index and not isinstance( + self, ABCRangeIndex + ): + # this block is needed so e.g. Int64Index.astype("int32") returns + # Int64Index and not a NumericIndex with dtype int32. # When Int64Index etc. are removed from the code base, removed this also. if ( isinstance(dtype, np.dtype) and is_numeric_dtype(dtype) and not is_complex_dtype(dtype) ): - return self._constructor( - new_values, name=self.name, dtype=dtype, copy=False + from pandas.core.api import ( + Float64Index, + Int64Index, + UInt64Index, ) + + klass: type[Index] + if is_signed_integer_dtype(dtype): + klass = Int64Index + elif is_unsigned_integer_dtype(dtype): + klass = UInt64Index + elif is_float_dtype(dtype): + klass = Float64Index + else: + klass = Index + return klass(new_values, name=self.name, dtype=dtype, copy=False) + return Index(new_values, name=self.name, dtype=new_values.dtype, copy=False) _index_shared_docs[ @@ -1697,9 +1707,9 @@ def set_names( -------- >>> idx = pd.Index([1, 2, 3, 4]) >>> idx - Int64Index([1, 2, 3, 4], dtype='int64') + NumericIndex([1, 2, 3, 4], dtype='int64') >>> idx.set_names('quarter') - Int64Index([1, 2, 3, 4], dtype='int64', name='quarter') + NumericIndex([1, 2, 3, 4], dtype='int64', name='quarter') >>> idx = pd.MultiIndex.from_product([['python', 'cobra'], ... [2018, 2019]]) @@ -1985,7 +1995,7 @@ def droplevel(self, level: IndexLabel = 0): names=['x', 'y']) >>> mi.droplevel(['x', 'y']) - Int64Index([5, 6], dtype='int64', name='z') + NumericIndex([5, 6], dtype='int64', name='z') """ if not isinstance(level, (tuple, list)): level = [level] @@ -2582,7 +2592,7 @@ def isna(self) -> npt.NDArray[np.bool_]: >>> idx = pd.Index([5.2, 6.0, np.NaN]) >>> idx - Float64Index([5.2, 6.0, nan], dtype='float64') + NumericIndex([5.2, 6.0, nan], dtype='float64') >>> idx.isna() array([False, False, True]) @@ -2639,7 +2649,7 @@ def notna(self) -> npt.NDArray[np.bool_]: >>> idx = pd.Index([5.2, 6.0, np.NaN]) >>> idx - Float64Index([5.2, 6.0, nan], dtype='float64') + NumericIndex([5.2, 6.0, nan], dtype='float64') >>> idx.notna() array([ True, True, False]) @@ -2950,7 +2960,7 @@ def union(self, other, sort=None): >>> idx1 = pd.Index([1, 2, 3, 4]) >>> idx2 = pd.Index([3, 4, 5, 6]) >>> idx1.union(idx2) - Int64Index([1, 2, 3, 4, 5, 6], dtype='int64') + NumericIndex([1, 2, 3, 4, 5, 6], dtype='int64') Union mismatched dtypes @@ -3142,7 +3152,7 @@ def intersection(self, other, sort: bool = False): >>> idx1 = pd.Index([1, 2, 3, 4]) >>> idx2 = pd.Index([3, 4, 5, 6]) >>> idx1.intersection(idx2) - Int64Index([3, 4], dtype='int64') + NumericIndex([3, 4], dtype='int64') """ self._validate_sort_keyword(sort) self._assert_can_do_setop(other) @@ -3289,9 +3299,9 @@ def difference(self, other, sort=None): >>> idx1 = pd.Index([2, 1, 3, 4]) >>> idx2 = pd.Index([3, 4, 5, 6]) >>> idx1.difference(idx2) - Int64Index([1, 2], dtype='int64') + NumericIndex([1, 2], dtype='int64') >>> idx1.difference(idx2, sort=False) - Int64Index([2, 1], dtype='int64') + NumericIndex([2, 1], dtype='int64') """ self._validate_sort_keyword(sort) self._assert_can_do_setop(other) @@ -3372,7 +3382,7 @@ def symmetric_difference(self, other, result_name=None, sort=None): >>> idx1 = pd.Index([1, 2, 3, 4]) >>> idx2 = pd.Index([2, 3, 4, 5]) >>> idx1.symmetric_difference(idx2) - Int64Index([1, 5], dtype='int64') + NumericIndex([1, 5], dtype='int64') """ self._validate_sort_keyword(sort) self._assert_can_do_setop(other) @@ -4915,7 +4925,7 @@ def __contains__(self, key: Any) -> bool: -------- >>> idx = pd.Index([1, 2, 3, 4]) >>> idx - Int64Index([1, 2, 3, 4], dtype='int64') + NumericIndex([1, 2, 3, 4], dtype='int64') >>> 2 in idx True @@ -5066,6 +5076,7 @@ def putmask(self, mask, value) -> Index: if self.dtype != object and is_valid_na_for_dtype(value, self.dtype): # e.g. None -> np.nan, see also Block._standardize_fill_value value = self._na_value + try: converted = self._validate_fill_value(value) except (LossySetitemError, ValueError, TypeError) as err: @@ -5112,7 +5123,7 @@ def equals(self, other: Any) -> bool: -------- >>> idx1 = pd.Index([1, 2, 3]) >>> idx1 - Int64Index([1, 2, 3], dtype='int64') + NumericIndex([1, 2, 3], dtype='int64') >>> idx1.equals(pd.Index([1, 2, 3])) True @@ -5129,10 +5140,10 @@ def equals(self, other: Any) -> bool: >>> ascending_idx = pd.Index([1, 2, 3]) >>> ascending_idx - Int64Index([1, 2, 3], dtype='int64') + NumericIndex([1, 2, 3], dtype='int64') >>> descending_idx = pd.Index([3, 2, 1]) >>> descending_idx - Int64Index([3, 2, 1], dtype='int64') + NumericIndex([3, 2, 1], dtype='int64') >>> ascending_idx.equals(descending_idx) False @@ -5140,10 +5151,10 @@ def equals(self, other: Any) -> bool: >>> int64_idx = pd.Index([1, 2, 3], dtype='int64') >>> int64_idx - Int64Index([1, 2, 3], dtype='int64') + NumericIndex([1, 2, 3], dtype='int64') >>> uint64_idx = pd.Index([1, 2, 3], dtype='uint64') >>> uint64_idx - UInt64Index([1, 2, 3], dtype='uint64') + NumericIndex([1, 2, 3], dtype='uint64') >>> int64_idx.equals(uint64_idx) True """ @@ -5366,18 +5377,18 @@ def sort_values( -------- >>> idx = pd.Index([10, 100, 1, 1000]) >>> idx - Int64Index([10, 100, 1, 1000], dtype='int64') + NumericIndex([10, 100, 1, 1000], dtype='int64') Sort values in ascending order (default behavior). >>> idx.sort_values() - Int64Index([1, 10, 100, 1000], dtype='int64') + NumericIndex([1, 10, 100, 1000], dtype='int64') Sort values in descending order, and also get the indices `idx` was sorted by. >>> idx.sort_values(ascending=False, return_indexer=True) - (Int64Index([1000, 100, 10, 1], dtype='int64'), array([3, 1, 0, 2])) + (NumericIndex([1000, 100, 10, 1], dtype='int64'), array([3, 1, 0, 2])) """ idx = ensure_key_mapped(self, key) @@ -5853,7 +5864,7 @@ def _find_common_type_compat(self, target) -> DtypeObj: ): return _dtype_obj - dtype = find_common_type([self.dtype, target_dtype]) + dtype = find_result_type(self._values, target) dtype = common_dtype_categorical_compat([self, target], dtype) return dtype @@ -5959,13 +5970,6 @@ def map(self, mapper, na_action=None): new_values, self.dtype, same_dtype=same_dtype ) - if self._is_backward_compat_public_numeric_index and is_numeric_dtype( - new_values.dtype - ): - return self._constructor( - new_values, dtype=dtype, copy=False, name=self.name - ) - return Index._with_infer(new_values, dtype=dtype, copy=False, name=self.name) # TODO: De-duplicate with map, xref GH#32349 @@ -6033,7 +6037,7 @@ def isin(self, values, level=None) -> npt.NDArray[np.bool_]: -------- >>> idx = pd.Index([1,2,3]) >>> idx - Int64Index([1, 2, 3], dtype='int64') + NumericIndex([1, 2, 3], dtype='int64') Check whether each index value in a list of values. @@ -6837,7 +6841,7 @@ def ensure_index_from_sequences(sequences, names=None) -> Index: Examples -------- >>> ensure_index_from_sequences([[1, 2, 3]], names=["name"]) - Int64Index([1, 2, 3], dtype='int64', name='name') + NumericIndex([1, 2, 3], dtype='int64', name='name') >>> ensure_index_from_sequences([["a", "a"], ["a", "b"]], names=["L1", "L2"]) MultiIndex([('a', 'a'), diff --git a/pandas/core/indexes/range.py b/pandas/core/indexes/range.py index a9b35b99e4b51..e17a0d070be6a 100644 --- a/pandas/core/indexes/range.py +++ b/pandas/core/indexes/range.py @@ -189,9 +189,9 @@ def _simple_new( # type: ignore[override] # error: Return type "Type[Int64Index]" of "_constructor" incompatible with return # type "Type[RangeIndex]" in supertype "Index" @cache_readonly - def _constructor(self) -> type[Int64Index]: # type: ignore[override] + def _constructor(self) -> type[NumericIndex]: # type: ignore[override] """return the class to use for construction""" - return Int64Index + return NumericIndex # error: Signature of "_data" incompatible with supertype "Index" @cache_readonly diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index aa859fac04921..f7787aa52623b 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -108,10 +108,7 @@ from pandas.core.indexers import check_setitem_lengths if TYPE_CHECKING: - from pandas.core.api import ( - Float64Index, - Index, - ) + from pandas.core.api import Index from pandas.core.arrays._mixins import NDArrayBackedExtensionArray # comparison is faster than is_object_dtype @@ -1281,7 +1278,7 @@ def shift( @final def quantile( self, - qs: Float64Index, + qs: Index, # with dtype float64 interpolation: QuantileInterpolation = "linear", axis: AxisInt = 0, ) -> Block: diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index 7fc25b7ea1cad..2ce294f257d75 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -64,7 +64,6 @@ ) from pandas.core.indexers import maybe_convert_indices from pandas.core.indexes.api import ( - Float64Index, Index, ensure_index, ) @@ -1542,7 +1541,7 @@ def _equal_values(self: BlockManager, other: BlockManager) -> bool: def quantile( self: T, *, - qs: Float64Index, + qs: Index, # with dtype float 64 axis: AxisInt = 0, interpolation: QuantileInterpolation = "linear", ) -> T: @@ -1570,7 +1569,7 @@ def quantile( assert axis == 1 # only ever called this way new_axes = list(self.axes) - new_axes[1] = Float64Index(qs) + new_axes[1] = Index(qs, dtype=np.float64) blocks = [ blk.quantile(axis=axis, qs=qs, interpolation=interpolation) diff --git a/pandas/core/strings/accessor.py b/pandas/core/strings/accessor.py index 05fbb68e1f19b..a9b3816b41774 100644 --- a/pandas/core/strings/accessor.py +++ b/pandas/core/strings/accessor.py @@ -2247,7 +2247,7 @@ def count(self, pat, flags: int = 0): This is also available on Index >>> pd.Index(['A', 'A', 'Aaba', 'cat']).str.count('a') - Int64Index([0, 0, 2, 1], dtype='int64') + NumericIndex([0, 0, 2, 1], dtype='int64') """ result = self._data.array._str_count(pat, flags) return self._wrap_result(result, returns_string=False) diff --git a/pandas/io/feather_format.py b/pandas/io/feather_format.py index 4ecd5b7604088..e781da74e97aa 100644 --- a/pandas/io/feather_format.py +++ b/pandas/io/feather_format.py @@ -17,7 +17,7 @@ from pandas.core.api import ( DataFrame, - Int64Index, + NumericIndex, RangeIndex, ) from pandas.core.shared_docs import _shared_docs @@ -62,7 +62,7 @@ def to_feather( # validate that we have only a default index # raise on anything else as we don't serialize the index - if not isinstance(df.index, (Int64Index, RangeIndex)): + if not (isinstance(df.index, NumericIndex) and df.index.dtype == "int64"): typ = type(df.index) raise ValueError( f"feather does not support serializing {typ} " diff --git a/pandas/io/formats/info.py b/pandas/io/formats/info.py index 5e87db93cf56c..27b61d502e9de 100644 --- a/pandas/io/formats/info.py +++ b/pandas/io/formats/info.py @@ -165,7 +165,7 @@ >>> s = pd.Series(text_values, index=int_values) >>> s.info() - Int64Index: 5 entries, 1 to 5 + NumericIndex: 5 entries, 1 to 5 Series name: None Non-Null Count Dtype -------------- ----- @@ -177,7 +177,7 @@ >>> s.info(verbose=False) - Int64Index: 5 entries, 1 to 5 + NumericIndex: 5 entries, 1 to 5 dtypes: object(1) memory usage: 80.0+ bytes diff --git a/pandas/tests/apply/test_series_apply.py b/pandas/tests/apply/test_series_apply.py index f66253badaf75..53dee6e15c3e0 100644 --- a/pandas/tests/apply/test_series_apply.py +++ b/pandas/tests/apply/test_series_apply.py @@ -183,10 +183,8 @@ def test_apply_datetimetz(): exp = Series(exp_values, name="XX") tm.assert_series_equal(result, exp) - # change dtype - # GH 14506 : Returned dtype changed from int32 to int64 result = s.apply(lambda x: x.hour) - exp = Series(list(range(24)) + [0], name="XX", dtype=np.int64) + exp = Series(list(range(24)) + [0], name="XX", dtype=np.int32) tm.assert_series_equal(result, exp) # not vectorized @@ -761,10 +759,8 @@ def test_map_datetimetz(): exp = Series(exp_values, name="XX") tm.assert_series_equal(result, exp) - # change dtype - # GH 14506 : Returned dtype changed from int32 to int64 result = s.map(lambda x: x.hour) - exp = Series(list(range(24)) + [0], name="XX", dtype=np.int64) + exp = Series(list(range(24)) + [0], name="XX", dtype=np.int32) tm.assert_series_equal(result, exp) # not vectorized diff --git a/pandas/tests/arrays/categorical/test_repr.py b/pandas/tests/arrays/categorical/test_repr.py index b44af07cee01d..e8b43ea98c032 100644 --- a/pandas/tests/arrays/categorical/test_repr.py +++ b/pandas/tests/arrays/categorical/test_repr.py @@ -111,7 +111,7 @@ def test_categorical_repr(self): assert repr(c) == exp - c = Categorical(np.arange(20)) + c = Categorical(np.arange(20, dtype=np.int64)) exp = """[0, 1, 2, 3, 4, ..., 15, 16, 17, 18, 19] Length: 20 Categories (20, int64): [0, 1, 2, 3, ..., 16, 17, 18, 19]""" @@ -138,7 +138,7 @@ def test_categorical_repr_ordered(self): assert repr(c) == exp - c = Categorical(np.arange(20), ordered=True) + c = Categorical(np.arange(20, dtype=np.int64), ordered=True) exp = """[0, 1, 2, 3, 4, ..., 15, 16, 17, 18, 19] Length: 20 Categories (20, int64): [0 < 1 < 2 < 3 ... 16 < 17 < 18 < 19]""" @@ -380,7 +380,7 @@ def test_categorical_index_repr(self): exp = """CategoricalIndex([1, 2, 3], categories=[1, 2, 3], ordered=False, dtype='category')""" # noqa:E501 assert repr(idx) == exp - i = CategoricalIndex(Categorical(np.arange(10))) + i = CategoricalIndex(Categorical(np.arange(10, dtype=np.int64))) exp = """CategoricalIndex([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], categories=[0, 1, 2, 3, ..., 6, 7, 8, 9], ordered=False, dtype='category')""" # noqa:E501 assert repr(i) == exp @@ -389,7 +389,7 @@ def test_categorical_index_repr_ordered(self): exp = """CategoricalIndex([1, 2, 3], categories=[1, 2, 3], ordered=True, dtype='category')""" # noqa:E501 assert repr(i) == exp - i = CategoricalIndex(Categorical(np.arange(10), ordered=True)) + i = CategoricalIndex(Categorical(np.arange(10, dtype=np.int64), ordered=True)) exp = """CategoricalIndex([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], categories=[0, 1, 2, 3, ..., 6, 7, 8, 9], ordered=True, dtype='category')""" # noqa:E501 assert repr(i) == exp diff --git a/pandas/tests/arrays/interval/test_interval.py b/pandas/tests/arrays/interval/test_interval.py index b0601b9770d64..b2476a7a076fc 100644 --- a/pandas/tests/arrays/interval/test_interval.py +++ b/pandas/tests/arrays/interval/test_interval.py @@ -287,7 +287,7 @@ def test_arrow_array(): with pytest.raises(TypeError, match="Not supported to convert IntervalArray"): pa.array(intervals, type="float64") - with pytest.raises(TypeError, match="different 'subtype'|to convert IntervalArray"): + with pytest.raises(TypeError, match="Not supported to convert IntervalArray"): pa.array(intervals, type=ArrowIntervalType(pa.float64(), "left")) diff --git a/pandas/tests/arrays/sparse/test_accessor.py b/pandas/tests/arrays/sparse/test_accessor.py index 36af5d32ae461..9ac0d9d0401ed 100644 --- a/pandas/tests/arrays/sparse/test_accessor.py +++ b/pandas/tests/arrays/sparse/test_accessor.py @@ -41,7 +41,12 @@ def test_from_coo(self): sp_array = scipy.sparse.coo_matrix((data, (row, col)), dtype="int") result = pd.Series.sparse.from_coo(sp_array) - index = pd.MultiIndex.from_arrays([[0, 0, 1, 3], [0, 2, 1, 3]]) + index = pd.MultiIndex.from_arrays( + [ + np.array([0, 0, 1, 3], dtype=np.int32), + np.array([0, 2, 1, 3], dtype=np.int32), + ], + ) expected = pd.Series([4, 9, 7, 5], index=index, dtype="Sparse[int]") tm.assert_series_equal(result, expected) @@ -212,7 +217,17 @@ def test_series_from_coo(self, dtype, dense_index): A = scipy.sparse.eye(3, format="coo", dtype=dtype) result = pd.Series.sparse.from_coo(A, dense_index=dense_index) - index = pd.MultiIndex.from_tuples([(0, 0), (1, 1), (2, 2)]) + + # TODO: GH49560: scipy.sparse.eye always has A.row and A.col dtype as int32. + # fix index_dtype to follow scipy.sparse convention (always int32)? + index_dtype = np.int64 if dense_index else np.int32 + index = pd.MultiIndex.from_tuples( + [ + np.array([0, 0], dtype=index_dtype), + np.array([1, 1], dtype=index_dtype), + np.array([2, 2], dtype=index_dtype), + ], + ) expected = pd.Series(SparseArray(np.array([1, 1, 1], dtype=dtype)), index=index) if dense_index: expected = expected.reindex(pd.MultiIndex.from_product(index.levels)) diff --git a/pandas/tests/extension/decimal/test_decimal.py b/pandas/tests/extension/decimal/test_decimal.py index c025ba379cc9a..cdacbf9c5bb5a 100644 --- a/pandas/tests/extension/decimal/test_decimal.py +++ b/pandas/tests/extension/decimal/test_decimal.py @@ -445,7 +445,8 @@ def DecimalArray__my_sum(self): result = df.groupby("id")["decimals"].agg(lambda x: x.values.my_sum()) tm.assert_series_equal(result, expected, check_names=False) s = pd.Series(DecimalArray(data)) - result = s.groupby(np.array([0, 0, 0, 1, 1])).agg(lambda x: x.values.my_sum()) + grouper = np.array([0, 0, 0, 1, 1], dtype=np.int64) + result = s.groupby(grouper).agg(lambda x: x.values.my_sum()) tm.assert_series_equal(result, expected, check_names=False) diff --git a/pandas/tests/frame/constructors/test_from_dict.py b/pandas/tests/frame/constructors/test_from_dict.py index 6cba95e42463d..c04213c215f0d 100644 --- a/pandas/tests/frame/constructors/test_from_dict.py +++ b/pandas/tests/frame/constructors/test_from_dict.py @@ -85,7 +85,7 @@ def test_constructor_list_of_series(self): expected = DataFrame.from_dict(sdict, orient="index") tm.assert_frame_equal(result, expected.reindex(result.index)) - result2 = DataFrame(data, index=np.arange(6)) + result2 = DataFrame(data, index=np.arange(6, dtype=np.int64)) tm.assert_frame_equal(result, result2) result = DataFrame([Series(dtype=object)]) diff --git a/pandas/tests/frame/indexing/test_indexing.py b/pandas/tests/frame/indexing/test_indexing.py index 57a132c1088c7..8bb5948029ca1 100644 --- a/pandas/tests/frame/indexing/test_indexing.py +++ b/pandas/tests/frame/indexing/test_indexing.py @@ -737,7 +737,7 @@ def test_getitem_setitem_float_labels(self, using_array_manager): # positional slicing only via iloc! msg = ( - "cannot do positional indexing on Float64Index with " + "cannot do positional indexing on NumericIndex with " r"these indexers \[1.0\] of type float" ) with pytest.raises(TypeError, match=msg): diff --git a/pandas/tests/frame/indexing/test_where.py b/pandas/tests/frame/indexing/test_where.py index e37c881472b65..002fde78cfd35 100644 --- a/pandas/tests/frame/indexing/test_where.py +++ b/pandas/tests/frame/indexing/test_where.py @@ -642,7 +642,8 @@ def test_df_where_change_dtype(self): @pytest.mark.parametrize("kwargs", [{}, {"other": None}]) def test_df_where_with_category(self, kwargs): # GH#16979 - df = DataFrame(np.arange(2 * 3).reshape(2, 3), columns=list("ABC")) + data = np.arange(2 * 3, dtype=np.int64).reshape(2, 3) + df = DataFrame(data, columns=list("ABC")) mask = np.array([[True, False, False], [False, False, True]]) # change type to category diff --git a/pandas/tests/frame/methods/test_set_index.py b/pandas/tests/frame/methods/test_set_index.py index 8e5f11840fbe5..303eed0b813f4 100644 --- a/pandas/tests/frame/methods/test_set_index.py +++ b/pandas/tests/frame/methods/test_set_index.py @@ -154,14 +154,6 @@ def test_set_index_names(self): # Check equality tm.assert_index_equal(df.set_index([df.index, idx2]).index, mi2) - def test_set_index_cast(self): - # issue casting an index then set_index - df = DataFrame( - {"A": [1.1, 2.2, 3.3], "B": [5.0, 6.1, 7.2]}, index=[2010, 2011, 2012] - ) - df2 = df.set_index(df.index.astype(np.int32)) - tm.assert_frame_equal(df, df2) - # A has duplicate values, C does not @pytest.mark.parametrize("keys", ["A", "C", ["A", "B"], ("tuple", "as", "label")]) @pytest.mark.parametrize("inplace", [True, False]) diff --git a/pandas/tests/frame/methods/test_to_csv.py b/pandas/tests/frame/methods/test_to_csv.py index 519be89c8793a..a7e077c0d7408 100644 --- a/pandas/tests/frame/methods/test_to_csv.py +++ b/pandas/tests/frame/methods/test_to_csv.py @@ -62,8 +62,8 @@ def test_to_csv_from_csv1(self, float_frame, datetime_frame): # corner case dm = DataFrame( { - "s1": Series(range(3), index=np.arange(3)), - "s2": Series(range(2), index=np.arange(2)), + "s1": Series(range(3), index=np.arange(3, dtype=np.int64)), + "s2": Series(range(2), index=np.arange(2, dtype=np.int64)), } ) dm.to_csv(path) @@ -388,7 +388,7 @@ def test_to_csv_dup_cols(self, nrows): @pytest.mark.slow def test_to_csv_empty(self): - df = DataFrame(index=np.arange(10)) + df = DataFrame(index=np.arange(10, dtype=np.int64)) result, expected = self._return_result_expected(df, 1000) tm.assert_frame_equal(result, expected, check_column_type=False) @@ -486,7 +486,7 @@ def test_to_csv_multiindex(self, float_frame, datetime_frame): frame = float_frame old_index = frame.index - arrays = np.arange(len(old_index) * 2).reshape(2, -1) + arrays = np.arange(len(old_index) * 2, dtype=np.int64).reshape(2, -1) new_index = MultiIndex.from_arrays(arrays, names=["first", "second"]) frame.index = new_index @@ -510,7 +510,7 @@ def test_to_csv_multiindex(self, float_frame, datetime_frame): # try multiindex with dates tsframe = datetime_frame old_index = tsframe.index - new_index = [old_index, np.arange(len(old_index))] + new_index = [old_index, np.arange(len(old_index), dtype=np.int64)] tsframe.index = MultiIndex.from_arrays(new_index) tsframe.to_csv(path, index_label=["time", "foo"]) diff --git a/pandas/tests/frame/methods/test_value_counts.py b/pandas/tests/frame/methods/test_value_counts.py index 6e8528845ea6b..9859ffb83da66 100644 --- a/pandas/tests/frame/methods/test_value_counts.py +++ b/pandas/tests/frame/methods/test_value_counts.py @@ -88,7 +88,7 @@ def test_data_frame_value_counts_empty(): df_no_cols = pd.DataFrame() result = df_no_cols.value_counts() - expected = pd.Series([], dtype=np.int64) + expected = pd.Series([], dtype=np.int64, index=np.array([], dtype=np.intp)) tm.assert_series_equal(result, expected) @@ -97,7 +97,7 @@ def test_data_frame_value_counts_empty_normalize(): df_no_cols = pd.DataFrame() result = df_no_cols.value_counts(normalize=True) - expected = pd.Series([], dtype=np.float64) + expected = pd.Series([], dtype=np.float64, index=np.array([], dtype=np.intp)) tm.assert_series_equal(result, expected) diff --git a/pandas/tests/frame/test_reductions.py b/pandas/tests/frame/test_reductions.py index f1d176e59373f..2d395a7cbd608 100644 --- a/pandas/tests/frame/test_reductions.py +++ b/pandas/tests/frame/test_reductions.py @@ -583,7 +583,7 @@ def test_mode_sortwarning(self): def test_mode_empty_df(self): df = DataFrame([], columns=["a", "b"]) result = df.mode() - expected = DataFrame([], columns=["a", "b"], index=Index([], dtype=int)) + expected = DataFrame([], columns=["a", "b"], index=Index([], dtype=np.int64)) tm.assert_frame_equal(result, expected) def test_operators_timedelta64(self): diff --git a/pandas/tests/groupby/aggregate/test_aggregate.py b/pandas/tests/groupby/aggregate/test_aggregate.py index 461ae05aedb82..29bfaf99b744b 100644 --- a/pandas/tests/groupby/aggregate/test_aggregate.py +++ b/pandas/tests/groupby/aggregate/test_aggregate.py @@ -673,7 +673,8 @@ def test_agg_split_object_part_datetime(): "D": ["b"], "E": [pd.Timestamp("2000")], "F": [1], - } + }, + index=np.array([0]), ) tm.assert_frame_equal(result, expected) @@ -684,7 +685,7 @@ def test_series_named_agg(self): gr = df.groupby([0, 0, 1, 1]) result = gr.agg(a="sum", b="min") expected = DataFrame( - {"a": [3, 7], "b": [1, 3]}, columns=["a", "b"], index=[0, 1] + {"a": [3, 7], "b": [1, 3]}, columns=["a", "b"], index=np.array([0, 1]) ) tm.assert_frame_equal(result, expected) @@ -706,13 +707,13 @@ def test_series_named_agg_duplicates_no_raises(self): # GH28426 gr = Series([1, 2, 3]).groupby([0, 0, 1]) grouped = gr.agg(a="sum", b="sum") - expected = DataFrame({"a": [3, 3], "b": [3, 3]}) + expected = DataFrame({"a": [3, 3], "b": [3, 3]}, index=np.array([0, 1])) tm.assert_frame_equal(expected, grouped) def test_mangled(self): gr = Series([1, 2, 3]).groupby([0, 0, 1]) result = gr.agg(a=lambda x: 0, b=lambda x: 1) - expected = DataFrame({"a": [0, 0], "b": [1, 1]}) + expected = DataFrame({"a": [0, 0], "b": [1, 1]}, index=np.array([0, 1])) tm.assert_frame_equal(result, expected) @pytest.mark.parametrize( @@ -1013,8 +1014,7 @@ def test_multiindex_custom_func(func): (1, 4): {0: 4.0, 1: 7.0}, (2, 3): {0: 2.0, 1: 1.0}, } - expected = DataFrame(expected_dict) - expected.columns = df.columns + expected = DataFrame(expected_dict, index=np.array([0, 1]), columns=df.columns) tm.assert_frame_equal(result, expected) @@ -1096,7 +1096,8 @@ def test_basic(self): def test_mangle_series_groupby(self): gr = Series([1, 2, 3, 4]).groupby([0, 0, 1, 1]) result = gr.agg([lambda x: 0, lambda x: 1]) - expected = DataFrame({"": [0, 0], "": [1, 1]}) + exp_data = {"": [0, 0], "": [1, 1]} + expected = DataFrame(exp_data, index=np.array([0, 1])) tm.assert_frame_equal(result, expected) @pytest.mark.xfail(reason="GH-26611. kwargs for multi-agg.") @@ -1384,7 +1385,7 @@ def test_groupby_aggregate_directory(reduction_func): def test_group_mean_timedelta_nat(): # GH43132 data = Series(["1 day", "3 days", "NaT"], dtype="timedelta64[ns]") - expected = Series(["2 days"], dtype="timedelta64[ns]") + expected = Series(["2 days"], dtype="timedelta64[ns]", index=np.array([0])) result = data.groupby([0, 0, 0]).mean() @@ -1407,7 +1408,7 @@ def test_group_mean_timedelta_nat(): def test_group_mean_datetime64_nat(input_data, expected_output): # GH43132 data = to_datetime(Series(input_data)) - expected = to_datetime(Series(expected_output)) + expected = to_datetime(Series(expected_output, index=np.array([0]))) result = data.groupby([0, 0, 0]).mean() tm.assert_series_equal(result, expected) diff --git a/pandas/tests/groupby/aggregate/test_cython.py b/pandas/tests/groupby/aggregate/test_cython.py index 08c25fb74be83..7e7d3d682f20f 100644 --- a/pandas/tests/groupby/aggregate/test_cython.py +++ b/pandas/tests/groupby/aggregate/test_cython.py @@ -212,7 +212,7 @@ def test_cython_agg_empty_buckets_nanops(observed): # GH-18869 can't call nanops on empty groups, so hardcode expected # for these df = DataFrame([11, 12, 13], columns=["a"]) - grps = range(0, 25, 5) + grps = np.arange(0, 25, 5, dtype=np.int_) # add / sum result = df.groupby(pd.cut(df["a"], grps), observed=observed)._cython_agg_general( "sum", alt=None, numeric_only=True diff --git a/pandas/tests/groupby/test_any_all.py b/pandas/tests/groupby/test_any_all.py index e49238a9e6656..4e6631cb763fe 100644 --- a/pandas/tests/groupby/test_any_all.py +++ b/pandas/tests/groupby/test_any_all.py @@ -68,7 +68,7 @@ def test_bool_aggs_dup_column_labels(bool_agg_func): grp_by = df.groupby([0]) result = getattr(grp_by, bool_agg_func)() - expected = df + expected = df.set_axis(np.array([0])) tm.assert_frame_equal(result, expected) @@ -92,7 +92,7 @@ def test_masked_kleene_logic(bool_agg_func, skipna, data): # The result should match aggregating on the whole series. Correctness # there is verified in test_reductions.py::test_any_all_boolean_kleene_logic expected_data = getattr(ser, bool_agg_func)(skipna=skipna) - expected = Series(expected_data, dtype="boolean") + expected = Series(expected_data, index=np.array([0]), dtype="boolean") result = ser.groupby([0, 0, 0]).agg(bool_agg_func, skipna=skipna) tm.assert_series_equal(result, expected) @@ -135,7 +135,7 @@ def test_masked_mixed_types(dtype1, dtype2, exp_col1, exp_col2): ) result = df.groupby([1, 1]).agg("all", skipna=False) - expected = DataFrame({"col1": exp_col1, "col2": exp_col2}, index=[1]) + expected = DataFrame({"col1": exp_col1, "col2": exp_col2}, index=np.array([1])) tm.assert_frame_equal(result, expected) @@ -148,7 +148,7 @@ def test_masked_bool_aggs_skipna(bool_agg_func, dtype, skipna, frame_or_series): expected_res = True if not skipna and bool_agg_func == "all": expected_res = pd.NA - expected = frame_or_series([expected_res], index=[1], dtype="boolean") + expected = frame_or_series([expected_res], index=np.array([1]), dtype="boolean") result = obj.groupby([1, 1]).agg(bool_agg_func, skipna=skipna) tm.assert_equal(result, expected) @@ -167,7 +167,7 @@ def test_object_type_missing_vals(bool_agg_func, data, expected_res, frame_or_se # GH#37501 obj = frame_or_series(data, dtype=object) result = obj.groupby([1] * len(data)).agg(bool_agg_func) - expected = frame_or_series([expected_res], index=[1], dtype="bool") + expected = frame_or_series([expected_res], index=np.array([1]), dtype="bool") tm.assert_equal(result, expected) diff --git a/pandas/tests/groupby/test_categorical.py b/pandas/tests/groupby/test_categorical.py index e6f0705c2c647..30bf5eb39cf51 100644 --- a/pandas/tests/groupby/test_categorical.py +++ b/pandas/tests/groupby/test_categorical.py @@ -1507,7 +1507,7 @@ def test_groupby_agg_categorical_columns(func, expected_values): def test_groupby_agg_non_numeric(): df = DataFrame({"A": Categorical(["a", "a", "b"], categories=["a", "b", "c"])}) - expected = DataFrame({"A": [2, 1]}, index=[1, 2]) + expected = DataFrame({"A": [2, 1]}, index=np.array([1, 2])) result = df.groupby([1, 2, 1]).agg(Series.nunique) tm.assert_frame_equal(result, expected) @@ -1538,9 +1538,7 @@ def test_read_only_category_no_sort(): df = DataFrame( {"a": [1, 3, 5, 7], "b": Categorical([1, 1, 2, 2], categories=Index(cats))} ) - expected = DataFrame( - data={"a": [2.0, 6.0]}, index=CategoricalIndex([1, 2], name="b") - ) + expected = DataFrame(data={"a": [2.0, 6.0]}, index=CategoricalIndex(cats, name="b")) result = df.groupby("b", sort=False).mean() tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/groupby/test_filters.py b/pandas/tests/groupby/test_filters.py index 9db4a14929724..c121689232bb2 100644 --- a/pandas/tests/groupby/test_filters.py +++ b/pandas/tests/groupby/test_filters.py @@ -232,7 +232,7 @@ def test_filter_using_len(): actual = grouped.filter(lambda x: len(x) > 2) expected = DataFrame( {"A": np.arange(2, 6), "B": list("bbbb"), "C": np.arange(2, 6)}, - index=np.arange(2, 6), + index=np.arange(2, 6, dtype=np.int64), ) tm.assert_frame_equal(actual, expected) @@ -244,7 +244,7 @@ def test_filter_using_len(): s = df["B"] grouped = s.groupby(s) actual = grouped.filter(lambda x: len(x) > 2) - expected = Series(4 * ["b"], index=np.arange(2, 6), name="B") + expected = Series(4 * ["b"], index=np.arange(2, 6, dtype=np.int64), name="B") tm.assert_series_equal(actual, expected) actual = grouped.filter(lambda x: len(x) > 4) diff --git a/pandas/tests/groupby/test_function.py b/pandas/tests/groupby/test_function.py index 875037b390883..59c117408295b 100644 --- a/pandas/tests/groupby/test_function.py +++ b/pandas/tests/groupby/test_function.py @@ -770,16 +770,20 @@ def test_nsmallest(): "data, groups", [([0, 1, 2, 3], [0, 0, 1, 1]), ([0], [0])], ) +@pytest.mark.parametrize("dtype", [None, *tm.ALL_INT_NUMPY_DTYPES]) @pytest.mark.parametrize("method", ["nlargest", "nsmallest"]) -def test_nlargest_and_smallest_noop(data, groups, method): +def test_nlargest_and_smallest_noop(data, groups, dtype, method): # GH 15272, GH 16345, GH 29129 # Test nlargest/smallest when it results in a noop, # i.e. input is sorted and group size <= n + if dtype is not None: + data = np.array(data, dtype=dtype) if method == "nlargest": data = list(reversed(data)) ser = Series(data, name="a") result = getattr(ser.groupby(groups), method)(n=2) - expected = Series(data, index=MultiIndex.from_arrays([groups, ser.index]), name="a") + expidx = np.array(groups, dtype=np.int_) if isinstance(groups, list) else groups + expected = Series(data, index=MultiIndex.from_arrays([expidx, ser.index]), name="a") tm.assert_series_equal(result, expected) diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index 3baf2d86010f7..5384b228850f4 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -92,7 +92,9 @@ def test_groupby_nonobject_dtype(mframe, df_mixed_floats): result = grouped.sum() expected = mframe.groupby(key.astype("O")).sum() - tm.assert_frame_equal(result, expected) + assert result.index.dtype == np.int8 + assert expected.index.dtype == np.int64 + tm.assert_frame_equal(result, expected, check_index_type=False) # GH 3911, mixed frame non-conversion df = df_mixed_floats.copy() @@ -227,6 +229,7 @@ def test_pass_args_kwargs_duplicate_columns(tsframe, as_index): 2: tsframe[tsframe.index.month == 2].quantile(0.8), } expected = DataFrame(ex_data).T + expected.index = expected.index.astype(np.int32) if not as_index: # TODO: try to get this more consistent? expected.index = Index(range(2)) @@ -795,7 +798,7 @@ def test_groupby_as_index_cython(df): data.groupby(["A"]).mean() expected = data.groupby(["A"]).mean(numeric_only=True) expected.insert(0, "A", expected.index) - expected.index = np.arange(len(expected)) + expected.index = RangeIndex(len(expected)) tm.assert_frame_equal(result, expected) # multi-key @@ -806,7 +809,7 @@ def test_groupby_as_index_cython(df): arrays = list(zip(*expected.index.values)) expected.insert(0, "A", arrays[0]) expected.insert(1, "B", arrays[1]) - expected.index = np.arange(len(expected)) + expected.index = RangeIndex(len(expected)) tm.assert_frame_equal(result, expected) @@ -1016,8 +1019,12 @@ def test_groupby_level_mapper(mframe): result0 = mframe.groupby(mapper0, level=0).sum() result1 = mframe.groupby(mapper1, level=1).sum() - mapped_level0 = np.array([mapper0.get(x) for x in deleveled["first"]]) - mapped_level1 = np.array([mapper1.get(x) for x in deleveled["second"]]) + mapped_level0 = np.array( + [mapper0.get(x) for x in deleveled["first"]], dtype=np.int64 + ) + mapped_level1 = np.array( + [mapper1.get(x) for x in deleveled["second"]], dtype=np.int64 + ) expected0 = mframe.groupby(mapped_level0).sum() expected1 = mframe.groupby(mapped_level1).sum() expected0.index.name, expected1.index.name = "first", "second" @@ -2469,7 +2476,7 @@ def test_groupby_duplicate_columns(): ).astype(object) df.columns = ["A", "B", "B"] result = df.groupby([0, 0, 0, 0]).min() - expected = DataFrame([["e", "a", 1]], columns=["A", "B", "B"]) + expected = DataFrame([["e", "a", 1]], index=np.array([0]), columns=["A", "B", "B"]) tm.assert_frame_equal(result, expected) @@ -2770,7 +2777,7 @@ def test_groupby_overflow(val, dtype): result = df.groupby("a").sum() expected = DataFrame( {"b": [val * 2]}, - index=Index([1], name="a", dtype=f"{dtype}64"), + index=Index([1], name="a", dtype=f"{dtype}8"), dtype=f"{dtype}64", ) tm.assert_frame_equal(result, expected) @@ -2782,7 +2789,7 @@ def test_groupby_overflow(val, dtype): result = df.groupby("a").prod() expected = DataFrame( {"b": [val * val]}, - index=Index([1], name="a", dtype=f"{dtype}64"), + index=Index([1], name="a", dtype=f"{dtype}8"), dtype=f"{dtype}64", ) tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/groupby/test_grouping.py b/pandas/tests/groupby/test_grouping.py index 98727719f8658..30cfe638c8540 100644 --- a/pandas/tests/groupby/test_grouping.py +++ b/pandas/tests/groupby/test_grouping.py @@ -403,8 +403,9 @@ def test_groupby_dict_mapping(self): result = s.groupby(mapping).mean() result2 = s.groupby(mapping).agg(np.mean) - expected = s.groupby([0, 0, 1, 1]).mean() - expected2 = s.groupby([0, 0, 1, 1]).mean() + exp_key = np.array([0, 0, 1, 1], dtype=np.int64) + expected = s.groupby(exp_key).mean() + expected2 = s.groupby(exp_key).mean() tm.assert_series_equal(result, expected) tm.assert_series_equal(result, result2) tm.assert_series_equal(result, expected2) @@ -692,7 +693,8 @@ def test_groupby_empty(self): gr = s.groupby([]) result = gr.mean() - tm.assert_series_equal(result, s) + expected = s.set_axis(Index([], dtype=np.intp)) + tm.assert_series_equal(result, expected) # check group properties assert len(gr.grouper.groupings) == 1 diff --git a/pandas/tests/groupby/test_min_max.py b/pandas/tests/groupby/test_min_max.py index e85a4c95a2b34..2a997b3c84216 100644 --- a/pandas/tests/groupby/test_min_max.py +++ b/pandas/tests/groupby/test_min_max.py @@ -81,7 +81,8 @@ def test_min_date_with_nans(): def test_max_inat(): # GH#40767 dont interpret iNaT as NaN ser = Series([1, iNaT]) - gb = ser.groupby([1, 1]) + key = np.array([1, 1], dtype=np.int64) + gb = ser.groupby(key) result = gb.max(min_count=2) expected = Series({1: 1}, dtype=np.int64) @@ -107,6 +108,7 @@ def test_max_inat_not_all_na(): # Note: in converting to float64, the iNaT + 1 maps to iNaT, i.e. is lossy expected = Series({1: np.nan, 2: np.nan, 3: iNaT + 1}) + expected.index = expected.index.astype(np.int_) tm.assert_series_equal(result, expected, check_exact=True) diff --git a/pandas/tests/groupby/test_nunique.py b/pandas/tests/groupby/test_nunique.py index f69798637b26c..282c91c82f5b1 100644 --- a/pandas/tests/groupby/test_nunique.py +++ b/pandas/tests/groupby/test_nunique.py @@ -172,7 +172,7 @@ def test_nunique_preserves_column_level_names(): # GH 23222 test = DataFrame([1, 2, 2], columns=pd.Index(["A"], name="level_0")) result = test.groupby([0, 0, 0]).nunique() - expected = DataFrame([2], columns=test.columns) + expected = DataFrame([2], index=np.array([0]), columns=test.columns) tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/groupby/test_quantile.py b/pandas/tests/groupby/test_quantile.py index 56b9b35f1f688..62978596ff4fb 100644 --- a/pandas/tests/groupby/test_quantile.py +++ b/pandas/tests/groupby/test_quantile.py @@ -61,7 +61,8 @@ def test_quantile(interpolation, a_vals, b_vals, q, request): def test_quantile_array(): # https://github.com/pandas-dev/pandas/issues/27526 df = DataFrame({"A": [0, 1, 2, 3, 4]}) - result = df.groupby([0, 0, 1, 1, 1]).quantile([0.25]) + key = np.array([0, 0, 1, 1, 1], dtype=np.int64) + result = df.groupby(key).quantile([0.25]) index = pd.MultiIndex.from_product([[0, 1], [0.25]]) expected = DataFrame({"A": [0.25, 2.50]}, index=index) @@ -70,7 +71,8 @@ def test_quantile_array(): df = DataFrame({"A": [0, 1, 2, 3], "B": [4, 5, 6, 7]}) index = pd.MultiIndex.from_product([[0, 1], [0.25, 0.75]]) - result = df.groupby([0, 0, 1, 1]).quantile([0.25, 0.75]) + key = np.array([0, 0, 1, 1], dtype=np.int64) + result = df.groupby(key).quantile([0.25, 0.75]) expected = DataFrame( {"A": [0.25, 0.75, 2.25, 2.75], "B": [4.25, 4.75, 6.25, 6.75]}, index=index ) @@ -79,9 +81,8 @@ def test_quantile_array(): def test_quantile_array2(): # https://github.com/pandas-dev/pandas/pull/28085#issuecomment-524066959 - df = DataFrame( - np.random.RandomState(0).randint(0, 5, size=(10, 3)), columns=list("ABC") - ) + arr = np.random.RandomState(0).randint(0, 5, size=(10, 3), dtype=np.int64) + df = DataFrame(arr, columns=list("ABC")) result = df.groupby("A").quantile([0.3, 0.7]) expected = DataFrame( { @@ -97,14 +98,15 @@ def test_quantile_array2(): def test_quantile_array_no_sort(): df = DataFrame({"A": [0, 1, 2], "B": [3, 4, 5]}) - result = df.groupby([1, 0, 1], sort=False).quantile([0.25, 0.5, 0.75]) + key = np.array([1, 0, 1], dtype=np.int64) + result = df.groupby(key, sort=False).quantile([0.25, 0.5, 0.75]) expected = DataFrame( {"A": [0.5, 1.0, 1.5, 1.0, 1.0, 1.0], "B": [3.5, 4.0, 4.5, 4.0, 4.0, 4.0]}, index=pd.MultiIndex.from_product([[1, 0], [0.25, 0.5, 0.75]]), ) tm.assert_frame_equal(result, expected) - result = df.groupby([1, 0, 1], sort=False).quantile([0.75, 0.25]) + result = df.groupby(key, sort=False).quantile([0.75, 0.25]) expected = DataFrame( {"A": [1.5, 0.5, 1.0, 1.0], "B": [4.5, 3.5, 4.0, 4.0]}, index=pd.MultiIndex.from_product([[1, 0], [0.75, 0.25]]), @@ -135,7 +137,7 @@ def test_groupby_quantile_with_arraylike_q_and_int_columns(frame_size, groupby, nrow, ncol = frame_size df = DataFrame(np.array([ncol * [_ % 4] for _ in range(nrow)]), columns=range(ncol)) - idx_levels = [list(range(min(nrow, 4)))] * len(groupby) + [q] + idx_levels = [np.arange(min(nrow, 4))] * len(groupby) + [q] idx_codes = [[x for x in range(min(nrow, 4)) for _ in q]] * len(groupby) + [ list(range(len(q))) * min(nrow, 4) ] diff --git a/pandas/tests/groupby/test_size.py b/pandas/tests/groupby/test_size.py index d067efe3b61e5..c0c98562eda68 100644 --- a/pandas/tests/groupby/test_size.py +++ b/pandas/tests/groupby/test_size.py @@ -36,6 +36,9 @@ def test_size_axis_1(df, axis_1, by, sort, dropna): expected = Series(counts, dtype="int64") if sort: expected = expected.sort_index() + if tm.is_integer_dtype(expected.index) and not any(x is None for x in by): + expected.index = expected.index.astype(np.int_) + grouped = df.groupby(by=by, axis=axis_1, sort=sort, dropna=dropna) result = grouped.size() tm.assert_series_equal(result, expected) diff --git a/pandas/tests/groupby/test_value_counts.py b/pandas/tests/groupby/test_value_counts.py index f67fea9cd6c0e..3d1228d65ac7c 100644 --- a/pandas/tests/groupby/test_value_counts.py +++ b/pandas/tests/groupby/test_value_counts.py @@ -183,7 +183,7 @@ def test_series_groupby_value_counts_on_categorical(): data=[1, 0], index=MultiIndex.from_arrays( [ - [0, 0], + np.array([0, 0]), CategoricalIndex( ["a", "b"], categories=["a", "b"], ordered=False, dtype="category" ), @@ -880,7 +880,7 @@ def test_mixed_groupings(normalize, expected_label, expected_values): result = gp.value_counts(sort=True, normalize=normalize) expected = DataFrame( { - "level_0": [4, 4, 5], + "level_0": np.array([4, 4, 5], dtype=np.int_), "A": [1, 1, 2], "level_2": [8, 8, 7], "B": [1, 3, 2], @@ -903,7 +903,8 @@ def test_column_label_duplicates(test, columns, expected_names, as_index): # Test for duplicate input column labels and generated duplicate labels df = DataFrame([[1, 3, 5, 7, 9], [2, 4, 6, 8, 10]], columns=columns) expected_data = [(1, 0, 7, 3, 5, 9), (2, 1, 8, 4, 6, 10)] - result = df.groupby(["a", [0, 1], "d"], as_index=as_index).value_counts() + keys = ["a", np.array([0, 1], dtype=np.int64), "d"] + result = df.groupby(keys, as_index=as_index).value_counts() if as_index: expected = Series( data=(1, 1), @@ -942,7 +943,7 @@ def test_result_label_duplicates(normalize, expected_label): def test_ambiguous_grouping(): # Test that groupby is not confused by groupings length equal to row count df = DataFrame({"a": [1, 1]}) - gb = df.groupby([1, 1]) + gb = df.groupby(np.array([1, 1], dtype=np.int64)) result = gb.value_counts() expected = Series([2], index=MultiIndex.from_tuples([[1, 1]], names=[None, "a"])) tm.assert_series_equal(result, expected) diff --git a/pandas/tests/groupby/transform/test_transform.py b/pandas/tests/groupby/transform/test_transform.py index fedc219b42681..528f417ea1039 100644 --- a/pandas/tests/groupby/transform/test_transform.py +++ b/pandas/tests/groupby/transform/test_transform.py @@ -305,7 +305,7 @@ def test_transform_datetime_to_numeric(): lambda x: x.dt.dayofweek - x.dt.dayofweek.min() ) - expected = Series([0, 1], name="b") + expected = Series([0, 1], dtype=np.int32, name="b") tm.assert_series_equal(result, expected) diff --git a/pandas/tests/indexes/common.py b/pandas/tests/indexes/common.py index 8ddc2b6349bde..9063d5cf5d60a 100644 --- a/pandas/tests/indexes/common.py +++ b/pandas/tests/indexes/common.py @@ -862,11 +862,7 @@ def test_insert_non_na(self, simple_index): result = index.insert(0, index[0]) - cls = type(index) - if cls is RangeIndex: - cls = Int64Index - - expected = cls([index[0]] + list(index), dtype=index.dtype) + expected = Index([index[0]] + list(index), dtype=index.dtype) tm.assert_index_equal(result, expected, exact=True) def test_insert_na(self, nulls_fixture, simple_index): @@ -877,7 +873,7 @@ def test_insert_na(self, nulls_fixture, simple_index): if na_val is pd.NaT: expected = Index([index[0], pd.NaT] + list(index[1:]), dtype=object) else: - expected = Float64Index([index[0], np.nan] + list(index[1:])) + expected = Index([index[0], np.nan] + list(index[1:])) if index._is_backward_compat_public_numeric_index: # GH#43921 we preserve NumericIndex diff --git a/pandas/tests/indexes/datetimelike_/test_drop_duplicates.py b/pandas/tests/indexes/datetimelike_/test_drop_duplicates.py index c56fc84b540c0..e5da06cb005f6 100644 --- a/pandas/tests/indexes/datetimelike_/test_drop_duplicates.py +++ b/pandas/tests/indexes/datetimelike_/test_drop_duplicates.py @@ -35,12 +35,20 @@ def test_drop_duplicates_metadata(self, idx): @pytest.mark.parametrize( "keep, expected, index", [ - ("first", np.concatenate(([False] * 10, [True] * 5)), np.arange(0, 10)), - ("last", np.concatenate(([True] * 5, [False] * 10)), np.arange(5, 15)), + ( + "first", + np.concatenate(([False] * 10, [True] * 5)), + np.arange(0, 10, dtype=np.int64), + ), + ( + "last", + np.concatenate(([True] * 5, [False] * 10)), + np.arange(5, 15, dtype=np.int64), + ), ( False, np.concatenate(([True] * 5, [False] * 5, [True] * 5)), - np.arange(5, 10), + np.arange(5, 10, dtype=np.int64), ), ], ) @@ -55,7 +63,8 @@ def test_drop_duplicates(self, keep, expected, index, idx): tm.assert_index_equal(result, expected) result = Series(idx).drop_duplicates(keep=keep) - tm.assert_series_equal(result, Series(expected, index=index)) + expected = Series(expected, index=index) + tm.assert_series_equal(result, expected) class TestDropDuplicatesPeriodIndex(DropDuplicates): diff --git a/pandas/tests/indexes/datetimes/test_date_range.py b/pandas/tests/indexes/datetimes/test_date_range.py index ec9a6842dc91e..11bc785c66b70 100644 --- a/pandas/tests/indexes/datetimes/test_date_range.py +++ b/pandas/tests/indexes/datetimes/test_date_range.py @@ -812,7 +812,7 @@ def test_date_range_span_dst_transition(self, tzstr): dr = date_range("2012-11-02", periods=10, tz=tzstr) result = dr.hour - expected = pd.Index([0] * 10) + expected = pd.Index([0] * 10, dtype="int32") tm.assert_index_equal(result, expected) @pytest.mark.parametrize("tzstr", ["US/Eastern", "dateutil/US/Eastern"]) diff --git a/pandas/tests/indexes/datetimes/test_misc.py b/pandas/tests/indexes/datetimes/test_misc.py index 1dc01a3d7f937..a41645f46314a 100644 --- a/pandas/tests/indexes/datetimes/test_misc.py +++ b/pandas/tests/indexes/datetimes/test_misc.py @@ -281,8 +281,9 @@ def test_datetime_name_accessors(self, time_locale): def test_nanosecond_field(self): dti = DatetimeIndex(np.arange(10)) + expected = Index(np.arange(10, dtype=np.int32)) - tm.assert_index_equal(dti.nanosecond, Index(np.arange(10, dtype=np.int64))) + tm.assert_index_equal(dti.nanosecond, expected) def test_iter_readonly(): diff --git a/pandas/tests/indexes/datetimes/test_timezones.py b/pandas/tests/indexes/datetimes/test_timezones.py index ecc8da512a95c..65207a4d7a60f 100644 --- a/pandas/tests/indexes/datetimes/test_timezones.py +++ b/pandas/tests/indexes/datetimes/test_timezones.py @@ -119,28 +119,28 @@ def test_dti_tz_convert_hour_overflow_dst(self): ts = ["2008-05-12 09:50:00", "2008-12-12 09:50:35", "2009-05-12 09:50:32"] tt = DatetimeIndex(ts).tz_localize("US/Eastern") ut = tt.tz_convert("UTC") - expected = Index([13, 14, 13]) + expected = Index([13, 14, 13], dtype=np.int32) tm.assert_index_equal(ut.hour, expected) # sorted case UTC -> US/Eastern ts = ["2008-05-12 13:50:00", "2008-12-12 14:50:35", "2009-05-12 13:50:32"] tt = DatetimeIndex(ts).tz_localize("UTC") ut = tt.tz_convert("US/Eastern") - expected = Index([9, 9, 9]) + expected = Index([9, 9, 9], dtype=np.int32) tm.assert_index_equal(ut.hour, expected) # unsorted case US/Eastern -> UTC ts = ["2008-05-12 09:50:00", "2008-12-12 09:50:35", "2008-05-12 09:50:32"] tt = DatetimeIndex(ts).tz_localize("US/Eastern") ut = tt.tz_convert("UTC") - expected = Index([13, 14, 13]) + expected = Index([13, 14, 13], dtype=np.int32) tm.assert_index_equal(ut.hour, expected) # unsorted case UTC -> US/Eastern ts = ["2008-05-12 13:50:00", "2008-12-12 14:50:35", "2008-05-12 13:50:32"] tt = DatetimeIndex(ts).tz_localize("UTC") ut = tt.tz_convert("US/Eastern") - expected = Index([9, 9, 9]) + expected = Index([9, 9, 9], dtype=np.int32) tm.assert_index_equal(ut.hour, expected) @pytest.mark.parametrize("tz", ["US/Eastern", "dateutil/US/Eastern"]) @@ -155,7 +155,7 @@ def test_dti_tz_convert_hour_overflow_dst_timestamps(self, tz): ] tt = DatetimeIndex(ts) ut = tt.tz_convert("UTC") - expected = Index([13, 14, 13]) + expected = Index([13, 14, 13], dtype=np.int32) tm.assert_index_equal(ut.hour, expected) # sorted case UTC -> US/Eastern @@ -166,7 +166,7 @@ def test_dti_tz_convert_hour_overflow_dst_timestamps(self, tz): ] tt = DatetimeIndex(ts) ut = tt.tz_convert("US/Eastern") - expected = Index([9, 9, 9]) + expected = Index([9, 9, 9], dtype=np.int32) tm.assert_index_equal(ut.hour, expected) # unsorted case US/Eastern -> UTC @@ -177,7 +177,7 @@ def test_dti_tz_convert_hour_overflow_dst_timestamps(self, tz): ] tt = DatetimeIndex(ts) ut = tt.tz_convert("UTC") - expected = Index([13, 14, 13]) + expected = Index([13, 14, 13], dtype=np.int32) tm.assert_index_equal(ut.hour, expected) # unsorted case UTC -> US/Eastern @@ -188,7 +188,7 @@ def test_dti_tz_convert_hour_overflow_dst_timestamps(self, tz): ] tt = DatetimeIndex(ts) ut = tt.tz_convert("US/Eastern") - expected = Index([9, 9, 9]) + expected = Index([9, 9, 9], dtype=np.int32) tm.assert_index_equal(ut.hour, expected) @pytest.mark.parametrize("freq, n", [("H", 1), ("T", 60), ("S", 3600)]) @@ -200,7 +200,7 @@ def test_dti_tz_convert_trans_pos_plus_1__bug(self, freq, n): idx = idx.tz_convert("Europe/Moscow") expected = np.repeat(np.array([3, 4, 5]), np.array([n, n, 1])) - tm.assert_index_equal(idx.hour, Index(expected)) + tm.assert_index_equal(idx.hour, Index(expected, dtype=np.int32)) def test_dti_tz_convert_dst(self): for freq, n in [("H", 1), ("T", 60), ("S", 3600)]: @@ -213,7 +213,7 @@ def test_dti_tz_convert_dst(self): np.array([18, 19, 20, 21, 22, 23, 0, 1, 3, 4, 5]), np.array([n, n, n, n, n, n, n, n, n, n, 1]), ) - tm.assert_index_equal(idx.hour, Index(expected)) + tm.assert_index_equal(idx.hour, Index(expected, dtype=np.int32)) idx = date_range( "2014-03-08 18:00", "2014-03-09 05:00", freq=freq, tz="US/Eastern" @@ -223,7 +223,7 @@ def test_dti_tz_convert_dst(self): np.array([23, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9]), np.array([n, n, n, n, n, n, n, n, n, n, 1]), ) - tm.assert_index_equal(idx.hour, Index(expected)) + tm.assert_index_equal(idx.hour, Index(expected, dtype=np.int32)) # End DST idx = date_range( @@ -234,7 +234,7 @@ def test_dti_tz_convert_dst(self): np.array([19, 20, 21, 22, 23, 0, 1, 1, 2, 3, 4]), np.array([n, n, n, n, n, n, n, n, n, n, 1]), ) - tm.assert_index_equal(idx.hour, Index(expected)) + tm.assert_index_equal(idx.hour, Index(expected, dtype=np.int32)) idx = date_range( "2014-11-01 18:00", "2014-11-02 05:00", freq=freq, tz="US/Eastern" @@ -244,30 +244,30 @@ def test_dti_tz_convert_dst(self): np.array([22, 23, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]), np.array([n, n, n, n, n, n, n, n, n, n, n, n, 1]), ) - tm.assert_index_equal(idx.hour, Index(expected)) + tm.assert_index_equal(idx.hour, Index(expected, dtype=np.int32)) # daily # Start DST idx = date_range("2014-03-08 00:00", "2014-03-09 00:00", freq="D", tz="UTC") idx = idx.tz_convert("US/Eastern") - tm.assert_index_equal(idx.hour, Index([19, 19])) + tm.assert_index_equal(idx.hour, Index([19, 19], dtype=np.int32)) idx = date_range( "2014-03-08 00:00", "2014-03-09 00:00", freq="D", tz="US/Eastern" ) idx = idx.tz_convert("UTC") - tm.assert_index_equal(idx.hour, Index([5, 5])) + tm.assert_index_equal(idx.hour, Index([5, 5], dtype=np.int32)) # End DST idx = date_range("2014-11-01 00:00", "2014-11-02 00:00", freq="D", tz="UTC") idx = idx.tz_convert("US/Eastern") - tm.assert_index_equal(idx.hour, Index([20, 20])) + tm.assert_index_equal(idx.hour, Index([20, 20], dtype=np.int32)) idx = date_range( "2014-11-01 00:00", "2014-11-02 000:00", freq="D", tz="US/Eastern" ) idx = idx.tz_convert("UTC") - tm.assert_index_equal(idx.hour, Index([4, 4])) + tm.assert_index_equal(idx.hour, Index([4, 4], dtype=np.int32)) def test_tz_convert_roundtrip(self, tz_aware_fixture): tz = tz_aware_fixture @@ -1125,7 +1125,7 @@ def test_field_access_localize(self, prefix): "2011-10-02 00:00", freq="h", periods=10, tz=prefix + "America/Atikokan" ) - expected = Index(np.arange(10, dtype=np.int64)) + expected = Index(np.arange(10, dtype=np.int32)) tm.assert_index_equal(dr.hour, expected) @pytest.mark.parametrize("tz", [pytz.timezone("US/Eastern"), gettz("US/Eastern")]) diff --git a/pandas/tests/indexes/interval/test_constructors.py b/pandas/tests/indexes/interval/test_constructors.py index ce0de97befec3..6184b3288f886 100644 --- a/pandas/tests/indexes/interval/test_constructors.py +++ b/pandas/tests/indexes/interval/test_constructors.py @@ -18,10 +18,7 @@ timedelta_range, ) import pandas._testing as tm -from pandas.core.api import ( - Float64Index, - Int64Index, -) +from pandas.core.api import NumericIndex from pandas.core.arrays import IntervalArray import pandas.core.common as com @@ -38,35 +35,44 @@ class ConstructorTests: get_kwargs_from_breaks to the expected format. """ - @pytest.mark.parametrize( - "breaks", - [ - [3, 14, 15, 92, 653], - np.arange(10, dtype="int64"), - Int64Index(range(-10, 11)), - Float64Index(np.arange(20, 30, 0.5)), - date_range("20180101", periods=10), - date_range("20180101", periods=10, tz="US/Eastern"), - timedelta_range("1 day", periods=10), - ], + @pytest.fixture( + params=[ + ([3, 14, 15, 92, 653], np.int64), + (np.arange(10, dtype="int64"), np.int64), + (NumericIndex(range(-10, 11), dtype=np.int64), np.int64), + (NumericIndex(range(10, 31), dtype=np.uint64), np.uint64), + (NumericIndex(np.arange(20, 30, 0.5), dtype=np.float64), np.float64), + (date_range("20180101", periods=10), " Float64 assert type(result) is Index diff --git a/pandas/tests/indexes/timedeltas/test_timedelta.py b/pandas/tests/indexes/timedeltas/test_timedelta.py index 01efbfb9ae0c0..7d1f6aa2df11d 100644 --- a/pandas/tests/indexes/timedeltas/test_timedelta.py +++ b/pandas/tests/indexes/timedeltas/test_timedelta.py @@ -14,7 +14,6 @@ ) import pandas._testing as tm from pandas.core.arrays import TimedeltaArray -from pandas.core.indexes.api import Int64Index from pandas.tests.indexes.datetimelike import DatetimeLike randn = np.random.randn @@ -56,7 +55,7 @@ def test_map(self): f = lambda x: x.days result = rng.map(f) - exp = Int64Index([f(x) for x in rng]) + exp = Index([f(x) for x in rng], dtype=np.int32) tm.assert_index_equal(result, exp) def test_pass_TimedeltaIndex_to_index(self): @@ -70,15 +69,16 @@ def test_pass_TimedeltaIndex_to_index(self): def test_fields(self): rng = timedelta_range("1 days, 10:11:12.100123456", periods=2, freq="s") - tm.assert_index_equal(rng.days, Index([1, 1], dtype="int64")) + tm.assert_index_equal(rng.days, Index([1, 1], dtype=np.int32)) tm.assert_index_equal( rng.seconds, - Index([10 * 3600 + 11 * 60 + 12, 10 * 3600 + 11 * 60 + 13], dtype="int64"), + Index([10 * 3600 + 11 * 60 + 12, 10 * 3600 + 11 * 60 + 13], dtype=np.int32), ) tm.assert_index_equal( - rng.microseconds, Index([100 * 1000 + 123, 100 * 1000 + 123], dtype="int64") + rng.microseconds, + Index([100 * 1000 + 123, 100 * 1000 + 123], dtype=np.int32), ) - tm.assert_index_equal(rng.nanoseconds, Index([456, 456], dtype="int64")) + tm.assert_index_equal(rng.nanoseconds, Index([456, 456], dtype=np.int32)) msg = "'TimedeltaIndex' object has no attribute '{}'" with pytest.raises(AttributeError, match=msg.format("hours")): diff --git a/pandas/tests/indexing/multiindex/test_partial.py b/pandas/tests/indexing/multiindex/test_partial.py index c81473cb945bc..0c63326118ac3 100644 --- a/pandas/tests/indexing/multiindex/test_partial.py +++ b/pandas/tests/indexing/multiindex/test_partial.py @@ -10,10 +10,6 @@ to_datetime, ) import pandas._testing as tm -from pandas.core.api import ( - Float64Index, - Int64Index, -) class TestMultiIndexPartial: @@ -160,9 +156,9 @@ def test_getitem_intkey_leading_level( mi = ser.index assert isinstance(mi, MultiIndex) if dtype is int: - assert isinstance(mi.levels[0], Int64Index) + assert mi.levels[0].dtype == np.int_ else: - assert isinstance(mi.levels[0], Float64Index) + assert mi.levels[0].dtype == np.float64 assert 14 not in mi.levels[0] assert not mi.levels[0]._should_fallback_to_positional diff --git a/pandas/tests/indexing/multiindex/test_slice.py b/pandas/tests/indexing/multiindex/test_slice.py index 767df7dec305d..dd16cca28677c 100644 --- a/pandas/tests/indexing/multiindex/test_slice.py +++ b/pandas/tests/indexing/multiindex/test_slice.py @@ -729,7 +729,7 @@ def test_multiindex_label_slicing_with_negative_step(self): def test_multiindex_slice_first_level(self): # GH 12697 freq = ["a", "b", "c", "d"] - idx = MultiIndex.from_product([freq, np.arange(500)]) + idx = MultiIndex.from_product([freq, range(500)]) df = DataFrame(list(range(2000)), index=idx, columns=["Test"]) df_slice = df.loc[pd.IndexSlice[:, 30:70], :] result = df_slice.loc["a"] diff --git a/pandas/tests/indexing/test_indexing.py b/pandas/tests/indexing/test_indexing.py index 910406b967770..ff4695808ee75 100644 --- a/pandas/tests/indexing/test_indexing.py +++ b/pandas/tests/indexing/test_indexing.py @@ -41,7 +41,7 @@ def test_setitem_ndarray_1d(self): # GH5508 # len of indexer vs length of the 1d ndarray - df = DataFrame(index=Index(np.arange(1, 11))) + df = DataFrame(index=Index(np.arange(1, 11), dtype=np.int64)) df["foo"] = np.zeros(10, dtype=np.float64) df["bar"] = np.zeros(10, dtype=complex) diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index 61e95de3caf0d..5ab4802aacf66 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -436,7 +436,7 @@ def test_loc_to_fail(self): ) msg = ( - r"\"None of \[Int64Index\(\[1, 2\], dtype='int64'\)\] are " + rf"\"None of \[NumericIndex\(\[1, 2\], dtype='{np.int_().dtype}'\)\] are " r"in the \[index\]\"" ) with pytest.raises(KeyError, match=msg): @@ -454,7 +454,7 @@ def test_loc_to_fail2(self): s.loc[-1] msg = ( - r"\"None of \[Int64Index\(\[-1, -2\], dtype='int64'\)\] are " + rf"\"None of \[NumericIndex\(\[-1, -2\], dtype='{np.int_().dtype}'\)\] are " r"in the \[index\]\"" ) with pytest.raises(KeyError, match=msg): @@ -470,7 +470,7 @@ def test_loc_to_fail2(self): s["a"] = 2 msg = ( - r"\"None of \[Int64Index\(\[-2\], dtype='int64'\)\] are " + rf"\"None of \[NumericIndex\(\[-2\], dtype='{np.int_().dtype}'\)\] are " r"in the \[index\]\"" ) with pytest.raises(KeyError, match=msg): @@ -487,7 +487,7 @@ def test_loc_to_fail3(self): df = DataFrame([["a"], ["b"]], index=[1, 2], columns=["value"]) msg = ( - r"\"None of \[Int64Index\(\[3\], dtype='int64'\)\] are " + rf"\"None of \[NumericIndex\(\[3\], dtype='{np.int_().dtype}'\)\] are " r"in the \[index\]\"" ) with pytest.raises(KeyError, match=msg): @@ -504,12 +504,11 @@ def test_loc_getitem_list_with_fail(self): s.loc[[2]] - with pytest.raises( - KeyError, - match=re.escape( - "\"None of [Int64Index([3], dtype='int64')] are in the [index]\"" - ), - ): + msg = ( + f"\"None of [NumericIndex([3], dtype='{np.int_().dtype}')] " + 'are in the [index]"' + ) + with pytest.raises(KeyError, match=re.escape(msg)): s.loc[[3]] # a non-match and a match @@ -1199,7 +1198,7 @@ def test_loc_setitem_empty_append_raises(self): df = DataFrame(columns=["x", "y"]) df.index = df.index.astype(np.int64) msg = ( - r"None of \[Int64Index\(\[0, 1\], dtype='int64'\)\] " + rf"None of \[NumericIndex\(\[0, 1\], dtype='{np.int_().dtype}'\)\] " r"are in the \[index\]" ) with pytest.raises(KeyError, match=msg): diff --git a/pandas/tests/indexing/test_partial.py b/pandas/tests/indexing/test_partial.py index ddf1e68f55501..c26d57e7b97e3 100644 --- a/pandas/tests/indexing/test_partial.py +++ b/pandas/tests/indexing/test_partial.py @@ -403,8 +403,8 @@ def test_series_partial_set(self): # raises as nothing is in the index msg = ( - r"\"None of \[Int64Index\(\[3, 3, 3\], dtype='int64'\)\] are " - r"in the \[index\]\"" + rf"\"None of \[NumericIndex\(\[3, 3, 3\], dtype='{np.int_().dtype}'\)\] " + r"are in the \[index\]\"" ) with pytest.raises(KeyError, match=msg): ser.loc[[3, 3, 3]] @@ -484,7 +484,7 @@ def test_series_partial_set_with_name(self): # raises as nothing is in the index msg = ( - r"\"None of \[Int64Index\(\[3, 3, 3\], dtype='int64', " + rf"\"None of \[NumericIndex\(\[3, 3, 3\], dtype='{np.int_().dtype}', " r"name='idx'\)\] are in the \[index\]\"" ) with pytest.raises(KeyError, match=msg): diff --git a/pandas/tests/io/excel/test_writers.py b/pandas/tests/io/excel/test_writers.py index 4e1b9da0987e1..ec6484c5c2149 100644 --- a/pandas/tests/io/excel/test_writers.py +++ b/pandas/tests/io/excel/test_writers.py @@ -737,7 +737,7 @@ def test_to_excel_periodindex(self, tsframe, path): tm.assert_frame_equal(xp, rs.to_period("M")) def test_to_excel_multiindex(self, merge_cells, frame, path): - arrays = np.arange(len(frame.index) * 2).reshape(2, -1) + arrays = np.arange(len(frame.index) * 2, dtype=np.int64).reshape(2, -1) new_index = MultiIndex.from_arrays(arrays, names=["first", "second"]) frame.index = new_index @@ -763,7 +763,7 @@ def test_to_excel_multiindex_nan_label(self, merge_cells, path): # sure they are handled correctly for either setting of # merge_cells def test_to_excel_multiindex_cols(self, merge_cells, frame, path): - arrays = np.arange(len(frame.index) * 2).reshape(2, -1) + arrays = np.arange(len(frame.index) * 2, dtype=np.int64).reshape(2, -1) new_index = MultiIndex.from_arrays(arrays, names=["first", "second"]) frame.index = new_index @@ -786,7 +786,7 @@ def test_to_excel_multiindex_cols(self, merge_cells, frame, path): def test_to_excel_multiindex_dates(self, merge_cells, tsframe, path): # try multiindex with dates - new_index = [tsframe.index, np.arange(len(tsframe.index))] + new_index = [tsframe.index, np.arange(len(tsframe.index), dtype=np.int64)] tsframe.index = MultiIndex.from_arrays(new_index) tsframe.index.names = ["time", "foo"] diff --git a/pandas/tests/io/formats/test_format.py b/pandas/tests/io/formats/test_format.py index d04e5183d980f..e3322e414e0cd 100644 --- a/pandas/tests/io/formats/test_format.py +++ b/pandas/tests/io/formats/test_format.py @@ -1328,7 +1328,6 @@ def test_to_string(self): # big mixed biggie = DataFrame( {"A": np.random.randn(200), "B": tm.makeStringIndex(200)}, - index=np.arange(200), ) biggie.loc[:20, "A"] = np.nan diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py index d1de676a4eb2e..77ec95329861a 100644 --- a/pandas/tests/io/json/test_pandas.py +++ b/pandas/tests/io/json/test_pandas.py @@ -422,12 +422,12 @@ def test_frame_mixedtype_orient(self): # GH10289 left = read_json(inp, orient=orient, convert_axes=False) tm.assert_frame_equal(left, right) - right.index = np.arange(len(df)) + right.index = pd.RangeIndex(len(df)) inp = df.to_json(orient="records") left = read_json(inp, orient="records", convert_axes=False) tm.assert_frame_equal(left, right) - right.columns = np.arange(df.shape[1]) + right.columns = pd.RangeIndex(df.shape[1]) inp = df.to_json(orient="values") left = read_json(inp, orient="values", convert_axes=False) tm.assert_frame_equal(left, right) diff --git a/pandas/tests/io/test_parquet.py b/pandas/tests/io/test_parquet.py index 12a3801ef1344..b5841593d4f45 100644 --- a/pandas/tests/io/test_parquet.py +++ b/pandas/tests/io/test_parquet.py @@ -820,6 +820,7 @@ def test_s3_roundtrip_for_dir( # GH #35791 if partition_col: + expected_df = expected_df.astype(dict.fromkeys(partition_col, np.int32)) partition_col_type = "category" expected_df[partition_col] = expected_df[partition_col].astype( diff --git a/pandas/tests/io/test_sql.py b/pandas/tests/io/test_sql.py index f83b6b0373a87..1dbea86f74086 100644 --- a/pandas/tests/io/test_sql.py +++ b/pandas/tests/io/test_sql.py @@ -2852,7 +2852,7 @@ def test_basic(self, sqlite_buildin): frame["txt"] = ["a"] * len(frame) frame2 = frame.copy() - new_idx = Index(np.arange(len(frame2))) + 10 + new_idx = Index(np.arange(len(frame2)), dtype=np.int64) + 10 frame2["Idx"] = new_idx.copy() assert ( sql.to_sql(frame2, name="test_table2", con=sqlite_buildin, index=False) diff --git a/pandas/tests/io/test_stata.py b/pandas/tests/io/test_stata.py index 2859bf593f0e0..0a9ed30dd55d6 100644 --- a/pandas/tests/io/test_stata.py +++ b/pandas/tests/io/test_stata.py @@ -299,7 +299,10 @@ def test_read_write_dta5(self): with tm.ensure_clean() as path: original.to_stata(path, convert_dates=None) written_and_read_again = self.read_dta(path) - tm.assert_frame_equal(written_and_read_again.set_index("index"), original) + + expected = original.copy() + expected.index = expected.index.astype(np.int32) + tm.assert_frame_equal(written_and_read_again.set_index("index"), expected) def test_write_dta6(self, datapath): original = self.read_csv(datapath("io", "data", "stata", "stata3.csv")) @@ -392,7 +395,10 @@ def test_read_write_dta11(self): original.to_stata(path, convert_dates=None) written_and_read_again = self.read_dta(path) - tm.assert_frame_equal(written_and_read_again.set_index("index"), formatted) + + expected = formatted.copy() + expected.index = expected.index.astype(np.int32) + tm.assert_frame_equal(written_and_read_again.set_index("index"), expected) @pytest.mark.parametrize("version", [114, 117, 118, 119, None]) def test_read_write_dta12(self, version): @@ -429,7 +435,10 @@ def test_read_write_dta12(self, version): assert len(w) == 1 written_and_read_again = self.read_dta(path) - tm.assert_frame_equal(written_and_read_again.set_index("index"), formatted) + + expected = formatted.copy() + expected.index = expected.index.astype(np.int32) + tm.assert_frame_equal(written_and_read_again.set_index("index"), expected) def test_read_write_dta13(self): s1 = Series(2**9, dtype=np.int16) @@ -444,7 +453,10 @@ def test_read_write_dta13(self): with tm.ensure_clean() as path: original.to_stata(path) written_and_read_again = self.read_dta(path) - tm.assert_frame_equal(written_and_read_again.set_index("index"), formatted) + + expected = formatted.copy() + expected.index = expected.index.astype(np.int32) + tm.assert_frame_equal(written_and_read_again.set_index("index"), expected) @pytest.mark.parametrize("version", [114, 117, 118, 119, None]) @pytest.mark.parametrize( @@ -461,7 +473,9 @@ def test_read_write_reread_dta14(self, file, parsed_114, version, datapath): parsed_114.to_stata(path, convert_dates={"date_td": "td"}, version=version) written_and_read_again = self.read_dta(path) - tm.assert_frame_equal(written_and_read_again.set_index("index"), parsed_114) + expected = parsed_114.copy() + expected.index = expected.index.astype(np.int32) + tm.assert_frame_equal(written_and_read_again.set_index("index"), expected) @pytest.mark.parametrize( "file", ["stata6_113", "stata6_114", "stata6_115", "stata6_117"] @@ -516,11 +530,15 @@ def test_numeric_column_names(self): original.to_stata(path) written_and_read_again = self.read_dta(path) - written_and_read_again = written_and_read_again.set_index("index") - columns = list(written_and_read_again.columns) - convert_col_name = lambda x: int(x[1]) - written_and_read_again.columns = map(convert_col_name, columns) - tm.assert_frame_equal(original, written_and_read_again) + + written_and_read_again = written_and_read_again.set_index("index") + columns = list(written_and_read_again.columns) + convert_col_name = lambda x: int(x[1]) + written_and_read_again.columns = map(convert_col_name, columns) + + expected = original.copy() + expected.index = expected.index.astype(np.int32) + tm.assert_frame_equal(expected, written_and_read_again) @pytest.mark.parametrize("version", [114, 117, 118, 119, None]) def test_nan_to_missing_value(self, version): @@ -530,11 +548,15 @@ def test_nan_to_missing_value(self, version): s2[1::2] = np.nan original = DataFrame({"s1": s1, "s2": s2}) original.index.name = "index" + with tm.ensure_clean() as path: original.to_stata(path, version=version) written_and_read_again = self.read_dta(path) - written_and_read_again = written_and_read_again.set_index("index") - tm.assert_frame_equal(written_and_read_again, original) + + written_and_read_again = written_and_read_again.set_index("index") + expected = original.copy() + expected.index = expected.index.astype(np.int32) + tm.assert_frame_equal(written_and_read_again, expected) def test_no_index(self): columns = ["x", "y"] @@ -554,7 +576,10 @@ def test_string_no_dates(self): with tm.ensure_clean() as path: original.to_stata(path) written_and_read_again = self.read_dta(path) - tm.assert_frame_equal(written_and_read_again.set_index("index"), original) + + expected = original.copy() + expected.index = expected.index.astype(np.int32) + tm.assert_frame_equal(written_and_read_again.set_index("index"), expected) def test_large_value_conversion(self): s0 = Series([1, 99], dtype=np.int8) @@ -568,11 +593,13 @@ def test_large_value_conversion(self): original.to_stata(path) written_and_read_again = self.read_dta(path) - modified = original.copy() - modified["s1"] = Series(modified["s1"], dtype=np.int16) - modified["s2"] = Series(modified["s2"], dtype=np.int32) - modified["s3"] = Series(modified["s3"], dtype=np.float64) - tm.assert_frame_equal(written_and_read_again.set_index("index"), modified) + + modified = original.copy() + modified["s1"] = Series(modified["s1"], dtype=np.int16) + modified["s2"] = Series(modified["s2"], dtype=np.int32) + modified["s3"] = Series(modified["s3"], dtype=np.float64) + modified.index = original.index.astype(np.int32) + tm.assert_frame_equal(written_and_read_again.set_index("index"), modified) def test_dates_invalid_column(self): original = DataFrame([datetime(2006, 11, 19, 23, 13, 20)]) @@ -582,9 +609,11 @@ def test_dates_invalid_column(self): original.to_stata(path, convert_dates={0: "tc"}) written_and_read_again = self.read_dta(path) - modified = original.copy() - modified.columns = ["_0"] - tm.assert_frame_equal(written_and_read_again.set_index("index"), modified) + + modified = original.copy() + modified.columns = ["_0"] + modified.index = original.index.astype(np.int32) + tm.assert_frame_equal(written_and_read_again.set_index("index"), modified) def test_105(self, datapath): # Data obtained from: @@ -625,21 +654,32 @@ def test_date_export_formats(self): datetime(2006, 1, 1), ] # Year - expected = DataFrame([expected_values], columns=columns) - expected.index.name = "index" + expected = DataFrame( + [expected_values], + index=pd.Index([0], dtype=np.int32, name="index"), + columns=columns, + ) + with tm.ensure_clean() as path: original.to_stata(path, convert_dates=conversions) written_and_read_again = self.read_dta(path) - tm.assert_frame_equal(written_and_read_again.set_index("index"), expected) + + tm.assert_frame_equal(written_and_read_again.set_index("index"), expected) def test_write_missing_strings(self): original = DataFrame([["1"], [None]], columns=["foo"]) - expected = DataFrame([["1"], [""]], columns=["foo"]) - expected.index.name = "index" + + expected = DataFrame( + [["1"], [""]], + index=pd.Index([0, 1], dtype=np.int32, name="index"), + columns=["foo"], + ) + with tm.ensure_clean() as path: original.to_stata(path) written_and_read_again = self.read_dta(path) - tm.assert_frame_equal(written_and_read_again.set_index("index"), expected) + + tm.assert_frame_equal(written_and_read_again.set_index("index"), expected) @pytest.mark.parametrize("version", [114, 117, 118, 119, None]) @pytest.mark.parametrize("byteorder", [">", "<"]) @@ -657,6 +697,7 @@ def test_bool_uint(self, byteorder, version): ) original.index.name = "index" expected = original.copy() + expected.index = original.index.astype(np.int32) expected_types = ( np.int8, np.int8, @@ -672,8 +713,9 @@ def test_bool_uint(self, byteorder, version): with tm.ensure_clean() as path: original.to_stata(path, byteorder=byteorder, version=version) written_and_read_again = self.read_dta(path) - written_and_read_again = written_and_read_again.set_index("index") - tm.assert_frame_equal(written_and_read_again, expected) + + written_and_read_again = written_and_read_again.set_index("index") + tm.assert_frame_equal(written_and_read_again, expected) def test_variable_labels(self, datapath): with StataReader(datapath("io", "data", "stata", "stata7_115.dta")) as rdr: @@ -824,11 +866,12 @@ def test_big_dates(self, datapath): expected.index.name = "index" expected.to_stata(path, convert_dates=date_conversion) written_and_read_again = self.read_dta(path) - tm.assert_frame_equal( - written_and_read_again.set_index("index"), - expected, - check_datetimelike_compat=True, - ) + + tm.assert_frame_equal( + written_and_read_again.set_index("index"), + expected.set_index(expected.index.astype(np.int32)), + check_datetimelike_compat=True, + ) def test_dtype_conversion(self, datapath): expected = self.read_csv(datapath("io", "data", "stata", "stata6.csv")) @@ -942,7 +985,7 @@ def test_categorical_writing(self, version): original = pd.concat( [original[col].astype("category") for col in original], axis=1 ) - expected.index.name = "index" + expected.index = expected.index.set_names("index").astype(np.int32) expected["incompletely_labeled"] = expected["incompletely_labeled"].apply(str) expected["unlabeled"] = expected["unlabeled"].apply(str) @@ -961,8 +1004,9 @@ def test_categorical_writing(self, version): with tm.ensure_clean() as path: original.to_stata(path, version=version) written_and_read_again = self.read_dta(path) - res = written_and_read_again.set_index("index") - tm.assert_frame_equal(res, expected) + + res = written_and_read_again.set_index("index") + tm.assert_frame_equal(res, expected) def test_categorical_warnings_and_errors(self): # Warning for non-string labels @@ -1006,15 +1050,17 @@ def test_categorical_with_stata_missing_values(self, version): with tm.ensure_clean() as path: original.to_stata(path, version=version) written_and_read_again = self.read_dta(path) - res = written_and_read_again.set_index("index") - expected = original.copy() - for col in expected: - cat = expected[col]._values - new_cats = cat.remove_unused_categories().categories - cat = cat.set_categories(new_cats, ordered=True) - expected[col] = cat - tm.assert_frame_equal(res, expected) + res = written_and_read_again.set_index("index") + + expected = original.copy() + for col in expected: + cat = expected[col]._values + new_cats = cat.remove_unused_categories().categories + cat = cat.set_categories(new_cats, ordered=True) + expected[col] = cat + expected.index = expected.index.astype(np.int32) + tm.assert_frame_equal(res, expected) @pytest.mark.parametrize("file", ["stata10_115", "stata10_117"]) def test_categorical_order(self, file, datapath): @@ -1462,8 +1508,11 @@ def test_out_of_range_float(self): with tm.ensure_clean() as path: original.to_stata(path) reread = read_stata(path) - original["ColumnTooBig"] = original["ColumnTooBig"].astype(np.float64) - tm.assert_frame_equal(original, reread.set_index("index")) + + original["ColumnTooBig"] = original["ColumnTooBig"].astype(np.float64) + expected = original.copy() + expected.index = expected.index.astype(np.int32) + tm.assert_frame_equal(reread.set_index("index"), expected) @pytest.mark.parametrize("infval", [np.inf, -np.inf]) def test_inf(self, infval): @@ -1890,7 +1939,10 @@ def test_compression(compression, version, use_dict, infer): elif compression is None: fp = path reread = read_stata(fp, index_col="index") - tm.assert_frame_equal(reread, df) + + expected = df.copy() + expected.index = expected.index.astype(np.int32) + tm.assert_frame_equal(reread, expected) @pytest.mark.parametrize("method", ["zip", "infer"]) @@ -1911,20 +1963,29 @@ def test_compression_dict(method, file_ext): else: fp = path reread = read_stata(fp, index_col="index") - tm.assert_frame_equal(reread, df) + + expected = df.copy() + expected.index = expected.index.astype(np.int32) + tm.assert_frame_equal(reread, expected) @pytest.mark.parametrize("version", [114, 117, 118, 119, None]) def test_chunked_categorical(version): df = DataFrame({"cats": Series(["a", "b", "a", "b", "c"], dtype="category")}) df.index.name = "index" + + expected = df.copy() + expected.index = expected.index.astype(np.int32) + with tm.ensure_clean() as path: df.to_stata(path, version=version) with StataReader(path, chunksize=2, order_categoricals=False) as reader: for i, block in enumerate(reader): block = block.set_index("index") assert "cats" in block - tm.assert_series_equal(block.cats, df.cats.iloc[2 * i : 2 * (i + 1)]) + tm.assert_series_equal( + block.cats, expected.cats.iloc[2 * i : 2 * (i + 1)] + ) def test_chunked_categorical_partial(datapath): diff --git a/pandas/tests/resample/test_resample_api.py b/pandas/tests/resample/test_resample_api.py index 5f1e0904b8c3c..51a65d88d7b32 100644 --- a/pandas/tests/resample/test_resample_api.py +++ b/pandas/tests/resample/test_resample_api.py @@ -646,7 +646,7 @@ def test_selection_api_validation(): # non DatetimeIndex msg = ( "Only valid with DatetimeIndex, TimedeltaIndex or PeriodIndex, " - "but got an instance of 'Int64Index'" + "but got an instance of 'NumericIndex'" ) with pytest.raises(TypeError, match=msg): df.resample("2D", level="v") diff --git a/pandas/tests/resample/test_resampler_grouper.py b/pandas/tests/resample/test_resampler_grouper.py index a521e24aa6022..0c8e303b4ac56 100644 --- a/pandas/tests/resample/test_resampler_grouper.py +++ b/pandas/tests/resample/test_resampler_grouper.py @@ -316,7 +316,7 @@ def test_resample_groupby_with_label(): result = df.groupby("col0").resample("1W", label="left").sum() mi = [ - np.array([0, 0, 1, 2]), + np.array([0, 0, 1, 2], dtype=np.int64), pd.to_datetime( np.array(["1999-12-26", "2000-01-02", "2000-01-02", "2000-01-02"]) ), diff --git a/pandas/tests/reshape/merge/test_merge.py b/pandas/tests/reshape/merge/test_merge.py index fc2069c5d1e42..35d10eafb5ba7 100644 --- a/pandas/tests/reshape/merge/test_merge.py +++ b/pandas/tests/reshape/merge/test_merge.py @@ -355,7 +355,7 @@ def test_merge_join_key_dtype_cast(self): lkey = np.array([1]) rkey = np.array([2]) df = merge(df1, df2, left_on=lkey, right_on=rkey, how="outer") - assert df["key_0"].dtype == "int64" + assert df["key_0"].dtype == np.int_ def test_handle_join_key_pass_array(self): left = DataFrame( @@ -379,9 +379,8 @@ def test_handle_join_key_pass_array(self): rkey = np.array([1, 1, 2, 3, 4, 5]) merged = merge(left, right, left_on=lkey, right_on=rkey, how="outer") - tm.assert_series_equal( - merged["key_0"], Series([1, 1, 1, 1, 2, 2, 3, 4, 5], name="key_0") - ) + expected = Series([1, 1, 1, 1, 2, 2, 3, 4, 5], dtype=np.int_, name="key_0") + tm.assert_series_equal(merged["key_0"], expected) left = DataFrame({"value": np.arange(3)}) right = DataFrame({"rvalue": np.arange(6)}) @@ -2469,7 +2468,7 @@ def test_categorical_non_unique_monotonic(n_categories): df2 = DataFrame( [[6]], columns=["value"], - index=CategoricalIndex([0], categories=np.arange(n_categories)), + index=CategoricalIndex([0], categories=list(range(n_categories))), ) result = merge(df1, df2, how="left", left_index=True, right_index=True) diff --git a/pandas/tests/reshape/merge/test_multi.py b/pandas/tests/reshape/merge/test_multi.py index 0dbe45eeb1e82..d71584817cf43 100644 --- a/pandas/tests/reshape/merge/test_multi.py +++ b/pandas/tests/reshape/merge/test_multi.py @@ -6,6 +6,7 @@ DataFrame, Index, MultiIndex, + RangeIndex, Series, Timestamp, ) @@ -118,7 +119,7 @@ def run_asserts(left, right, sort): out = merge(left, right.reset_index(), on=icols, sort=sort, how="left") - res.index = np.arange(len(res)) + res.index = RangeIndex(len(res)) tm.assert_frame_equal(out, res) lc = list(map(chr, np.arange(ord("a"), ord("z") + 1))) @@ -370,7 +371,7 @@ def test_left_join_index_multi_match(self): # GH7331 - maintain left frame order in left merge result = merge(left, right.reset_index(), how="left", on="tag") - expected.index = np.arange(len(expected)) + expected.index = RangeIndex(len(expected)) tm.assert_frame_equal(result, expected) def test_left_merge_na_buglet(self): @@ -441,14 +442,13 @@ def test_merge_datetime_index(self, klass): if klass is not None: on_vector = klass(on_vector) - expected = DataFrame({"a": [1, 2, 3], "key_1": [2016, 2017, 2018]}) + exp_years = np.array([2016, 2017, 2018], dtype=np.int32) + expected = DataFrame({"a": [1, 2, 3], "key_1": exp_years}) result = df.merge(df, on=["a", on_vector], how="inner") tm.assert_frame_equal(result, expected) - expected = DataFrame( - {"key_0": [2016, 2017, 2018], "a_x": [1, 2, 3], "a_y": [1, 2, 3]} - ) + expected = DataFrame({"key_0": exp_years, "a_x": [1, 2, 3], "a_y": [1, 2, 3]}) result = df.merge(df, on=[df.index.year], how="inner") tm.assert_frame_equal(result, expected) @@ -852,14 +852,13 @@ def test_merge_datetime_index(self, box): if box is not None: on_vector = box(on_vector) - expected = DataFrame({"a": [1, 2, 3], "key_1": [2016, 2017, 2018]}) + exp_years = np.array([2016, 2017, 2018], dtype=np.int32) + expected = DataFrame({"a": [1, 2, 3], "key_1": exp_years}) result = df.merge(df, on=["a", on_vector], how="inner") tm.assert_frame_equal(result, expected) - expected = DataFrame( - {"key_0": [2016, 2017, 2018], "a_x": [1, 2, 3], "a_y": [1, 2, 3]} - ) + expected = DataFrame({"key_0": exp_years, "a_x": [1, 2, 3], "a_y": [1, 2, 3]}) result = df.merge(df, on=[df.index.year], how="inner") tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/reshape/test_crosstab.py b/pandas/tests/reshape/test_crosstab.py index bea0ddbe8a667..55f1696ac776c 100644 --- a/pandas/tests/reshape/test_crosstab.py +++ b/pandas/tests/reshape/test_crosstab.py @@ -112,7 +112,7 @@ def test_crosstab_non_aligned(self): # GH 17005 a = Series([0, 1, 1], index=["a", "b", "c"]) b = Series([3, 4, 3, 4, 3], index=["a", "b", "c", "d", "f"]) - c = np.array([3, 4, 3]) + c = np.array([3, 4, 3], dtype=np.int64) expected = DataFrame( [[1, 0], [1, 1]], diff --git a/pandas/tests/reshape/test_get_dummies.py b/pandas/tests/reshape/test_get_dummies.py index be0c4b37c2fd5..8a7985280eff4 100644 --- a/pandas/tests/reshape/test_get_dummies.py +++ b/pandas/tests/reshape/test_get_dummies.py @@ -11,6 +11,7 @@ Categorical, CategoricalIndex, DataFrame, + RangeIndex, Series, get_dummies, ) @@ -471,7 +472,7 @@ def test_get_dummies_basic_drop_first_one_level(self, sparse): s_series = Series(s_list) s_series_index = Series(s_list, list("ABC")) - expected = DataFrame(index=np.arange(3)) + expected = DataFrame(index=RangeIndex(3)) result = get_dummies(s_list, drop_first=True, sparse=sparse) tm.assert_frame_equal(result, expected) @@ -504,7 +505,7 @@ def test_get_dummies_basic_drop_first_NA(self, sparse): res_just_na = get_dummies( [np.nan], dummy_na=True, drop_first=True, sparse=sparse ) - exp_just_na = DataFrame(index=np.arange(1)) + exp_just_na = DataFrame(index=RangeIndex(1)) tm.assert_frame_equal(res_just_na, exp_just_na) def test_dataframe_dummies_drop_first(self, df, sparse): diff --git a/pandas/tests/reshape/test_melt.py b/pandas/tests/reshape/test_melt.py index 040e2b961eef3..792707dc080f8 100644 --- a/pandas/tests/reshape/test_melt.py +++ b/pandas/tests/reshape/test_melt.py @@ -831,7 +831,7 @@ def test_invalid_separator(self): "A": [], "B": [], } - expected = DataFrame(exp_data).astype({"year": "int"}) + expected = DataFrame(exp_data).astype({"year": np.int64}) expected = expected.set_index(["id", "year"])[ ["X", "A2010", "A2011", "B2010", "A", "B"] ] @@ -894,7 +894,7 @@ def test_invalid_suffixtype(self): "A": [], "B": [], } - expected = DataFrame(exp_data).astype({"year": "int"}) + expected = DataFrame(exp_data).astype({"year": np.int64}) expected = expected.set_index(["id", "year"]) expected.index = expected.index.set_levels([0, 1], level=0) diff --git a/pandas/tests/reshape/test_pivot.py b/pandas/tests/reshape/test_pivot.py index 9a72a8dadf8d0..3655d4cf1b28a 100644 --- a/pandas/tests/reshape/test_pivot.py +++ b/pandas/tests/reshape/test_pivot.py @@ -409,7 +409,14 @@ def test_pivot_no_values(self): res = df.pivot_table(index=df.index.month, columns=df.index.day) exp_columns = MultiIndex.from_tuples([("A", 1), ("A", 2)]) - exp = DataFrame([[2.5, 4.0], [2.0, np.nan]], index=[1, 2], columns=exp_columns) + exp_columns = exp_columns.set_levels( + exp_columns.levels[1].astype(np.int32), level=1 + ) + exp = DataFrame( + [[2.5, 4.0], [2.0, np.nan]], + index=Index([1, 2], dtype=np.int32), + columns=exp_columns, + ) tm.assert_frame_equal(res, exp) df = DataFrame( @@ -422,7 +429,9 @@ def test_pivot_no_values(self): res = df.pivot_table(index=df.index.month, columns=Grouper(key="dt", freq="M")) exp_columns = MultiIndex.from_tuples([("A", pd.Timestamp("2011-01-31"))]) exp_columns.names = [None, "dt"] - exp = DataFrame([3.25, 2.0], index=[1, 2], columns=exp_columns) + exp = DataFrame( + [3.25, 2.0], index=Index([1, 2], dtype=np.int32), columns=exp_columns + ) tm.assert_frame_equal(res, exp) res = df.pivot_table( @@ -1614,7 +1623,7 @@ def test_pivot_dtaccessor(self): expected = DataFrame( {7: [0, 3], 8: [1, 4], 9: [2, 5]}, index=exp_idx, - columns=Index([7, 8, 9], name="dt1"), + columns=Index([7, 8, 9], dtype=np.int32, name="dt1"), ) tm.assert_frame_equal(result, expected) @@ -1624,8 +1633,8 @@ def test_pivot_dtaccessor(self): expected = DataFrame( {7: [0, 3], 8: [1, 4], 9: [2, 5]}, - index=Index([1, 2], name="dt2"), - columns=Index([7, 8, 9], name="dt1"), + index=Index([1, 2], dtype=np.int32, name="dt2"), + columns=Index([7, 8, 9], dtype=np.int32, name="dt1"), ) tm.assert_frame_equal(result, expected) @@ -1637,10 +1646,16 @@ def test_pivot_dtaccessor(self): ) exp_col = MultiIndex.from_arrays( - [[7, 7, 8, 8, 9, 9], [1, 2] * 3], names=["dt1", "dt2"] + [ + np.array([7, 7, 8, 8, 9, 9], dtype=np.int32), + np.array([1, 2] * 3, dtype=np.int32), + ], + names=["dt1", "dt2"], ) expected = DataFrame( - np.array([[0, 3, 1, 4, 2, 5]], dtype="int64"), index=[2013], columns=exp_col + np.array([[0, 3, 1, 4, 2, 5]], dtype="int64"), + index=Index([2013], dtype=np.int32), + columns=exp_col, ) tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/reshape/test_util.py b/pandas/tests/reshape/test_util.py index 3a9ddaebf2934..4d0be7464cb3d 100644 --- a/pandas/tests/reshape/test_util.py +++ b/pandas/tests/reshape/test_util.py @@ -23,8 +23,8 @@ def test_datetimeindex(self): # make sure that the ordering on datetimeindex is consistent x = date_range("2000-01-01", periods=2) result1, result2 = (Index(y).day for y in cartesian_product([x, x])) - expected1 = Index([1, 1, 2, 2]) - expected2 = Index([1, 2, 1, 2]) + expected1 = Index([1, 1, 2, 2], dtype=np.int32) + expected2 = Index([1, 2, 1, 2], dtype=np.int32) tm.assert_index_equal(result1, expected1) tm.assert_index_equal(result2, expected2) diff --git a/pandas/tests/series/accessors/test_dt_accessor.py b/pandas/tests/series/accessors/test_dt_accessor.py index 689c8ba845a6c..adb11b88cf667 100644 --- a/pandas/tests/series/accessors/test_dt_accessor.py +++ b/pandas/tests/series/accessors/test_dt_accessor.py @@ -229,14 +229,14 @@ def test_dt_namespace_accessor_index_and_values(self): dti = date_range("20140204", periods=3, freq="s") ser = Series(dti, index=index, name="xxx") exp = Series( - np.array([2014, 2014, 2014], dtype="int64"), index=index, name="xxx" + np.array([2014, 2014, 2014], dtype="int32"), index=index, name="xxx" ) tm.assert_series_equal(ser.dt.year, exp) - exp = Series(np.array([2, 2, 2], dtype="int64"), index=index, name="xxx") + exp = Series(np.array([2, 2, 2], dtype="int32"), index=index, name="xxx") tm.assert_series_equal(ser.dt.month, exp) - exp = Series(np.array([0, 1, 2], dtype="int64"), index=index, name="xxx") + exp = Series(np.array([0, 1, 2], dtype="int32"), index=index, name="xxx") tm.assert_series_equal(ser.dt.second, exp) exp = Series([ser[0]] * 3, index=index, name="xxx") @@ -386,7 +386,7 @@ def test_dt_namespace_accessor_categorical(self): dti = DatetimeIndex(["20171111", "20181212"]).repeat(2) ser = Series(pd.Categorical(dti), name="foo") result = ser.dt.year - expected = Series([2017, 2017, 2018, 2018], name="foo") + expected = Series([2017, 2017, 2018, 2018], dtype="int32", name="foo") tm.assert_series_equal(result, expected) def test_dt_tz_localize_categorical(self, tz_aware_fixture): @@ -741,6 +741,7 @@ def test_hour_index(self): result = dt_series.dt.hour expected = Series( [0, 1, 2, 3, 4], + dtype="int32", index=[2, 6, 7, 8, 11], ) tm.assert_series_equal(result, expected) diff --git a/pandas/tests/series/indexing/test_setitem.py b/pandas/tests/series/indexing/test_setitem.py index fa196cf7d3f32..1a52e02ee314f 100644 --- a/pandas/tests/series/indexing/test_setitem.py +++ b/pandas/tests/series/indexing/test_setitem.py @@ -793,7 +793,8 @@ def test_index_where(self, obj, key, expected, val): mask[key] = True res = Index(obj).where(~mask, val) - tm.assert_index_equal(res, Index(expected, dtype=expected.dtype)) + expected_idx = Index(expected, dtype=expected.dtype) + tm.assert_index_equal(res, expected_idx) def test_index_putmask(self, obj, key, expected, val): mask = np.zeros(obj.shape, dtype=bool) diff --git a/pandas/tests/series/methods/test_reindex.py b/pandas/tests/series/methods/test_reindex.py index 698d66ebe7c29..b00858d2779bc 100644 --- a/pandas/tests/series/methods/test_reindex.py +++ b/pandas/tests/series/methods/test_reindex.py @@ -126,7 +126,7 @@ def test_reindex_pad(): reindexed2 = s2.reindex(s.index, method="ffill") tm.assert_series_equal(reindexed, reindexed2) - expected = Series([0, 0, 2, 2, 4, 4, 6, 6, 8, 8], index=np.arange(10)) + expected = Series([0, 0, 2, 2, 4, 4, 6, 6, 8, 8]) tm.assert_series_equal(reindexed, expected) # GH4604 diff --git a/pandas/tests/series/test_repr.py b/pandas/tests/series/test_repr.py index a12bc1df37269..57dcd06f8f524 100644 --- a/pandas/tests/series/test_repr.py +++ b/pandas/tests/series/test_repr.py @@ -322,7 +322,7 @@ def test_categorical_series_repr(self): assert repr(s) == exp s = Series(Categorical(np.arange(10))) - exp = """0 0 + exp = f"""0 0 1 1 2 2 3 3 @@ -333,7 +333,7 @@ def test_categorical_series_repr(self): 8 8 9 9 dtype: category -Categories (10, int64): [0, 1, 2, 3, ..., 6, 7, 8, 9]""" +Categories (10, {np.int_().dtype}): [0, 1, 2, 3, ..., 6, 7, 8, 9]""" assert repr(s) == exp @@ -348,7 +348,7 @@ def test_categorical_series_repr_ordered(self): assert repr(s) == exp s = Series(Categorical(np.arange(10), ordered=True)) - exp = """0 0 + exp = f"""0 0 1 1 2 2 3 3 @@ -359,7 +359,7 @@ def test_categorical_series_repr_ordered(self): 8 8 9 9 dtype: category -Categories (10, int64): [0 < 1 < 2 < 3 ... 6 < 7 < 8 < 9]""" +Categories (10, {np.int_().dtype}): [0 < 1 < 2 < 3 ... 6 < 7 < 8 < 9]""" assert repr(s) == exp diff --git a/pandas/tests/test_algos.py b/pandas/tests/test_algos.py index 19dd65e6435f7..ed1d346889566 100644 --- a/pandas/tests/test_algos.py +++ b/pandas/tests/test_algos.py @@ -1776,7 +1776,7 @@ def test_pad(self): # corner case old = Index([5, 10]) - new = Index(np.arange(5)) + new = Index(np.arange(5, dtype=np.int64)) filler = libalgos.pad["int64_t"](old.values, new.values) expect_filler = np.array([-1, -1, -1, -1, -1], dtype=np.intp) tm.assert_numpy_array_equal(filler, expect_filler) diff --git a/pandas/tests/util/test_assert_categorical_equal.py b/pandas/tests/util/test_assert_categorical_equal.py index 29a0805bceb98..89b59ecb53413 100644 --- a/pandas/tests/util/test_assert_categorical_equal.py +++ b/pandas/tests/util/test_assert_categorical_equal.py @@ -22,8 +22,8 @@ def test_categorical_equal_order_mismatch(check_category_order): msg = """Categorical\\.categories are different Categorical\\.categories values are different \\(100\\.0 %\\) -\\[left\\]: Int64Index\\(\\[1, 2, 3, 4\\], dtype='int64'\\) -\\[right\\]: Int64Index\\(\\[4, 3, 2, 1\\], dtype='int64'\\)""" +\\[left\\]: NumericIndex\\(\\[1, 2, 3, 4\\], dtype='int64'\\) +\\[right\\]: NumericIndex\\(\\[4, 3, 2, 1\\], dtype='int64'\\)""" with pytest.raises(AssertionError, match=msg): tm.assert_categorical_equal(c1, c2, **kwargs) else: @@ -34,8 +34,8 @@ def test_categorical_equal_categories_mismatch(): msg = """Categorical\\.categories are different Categorical\\.categories values are different \\(25\\.0 %\\) -\\[left\\]: Int64Index\\(\\[1, 2, 3, 4\\], dtype='int64'\\) -\\[right\\]: Int64Index\\(\\[1, 2, 3, 5\\], dtype='int64'\\)""" +\\[left\\]: NumericIndex\\(\\[1, 2, 3, 4\\], dtype='int64'\\) +\\[right\\]: NumericIndex\\(\\[1, 2, 3, 5\\], dtype='int64'\\)""" c1 = Categorical([1, 2, 3, 4]) c2 = Categorical([1, 2, 3, 5]) diff --git a/pandas/tests/util/test_assert_index_equal.py b/pandas/tests/util/test_assert_index_equal.py index 71799c73f35c6..0052ea671a5b0 100644 --- a/pandas/tests/util/test_assert_index_equal.py +++ b/pandas/tests/util/test_assert_index_equal.py @@ -17,7 +17,7 @@ def test_index_equal_levels_mismatch(): msg = """Index are different Index levels are different -\\[left\\]: 1, Int64Index\\(\\[1, 2, 3\\], dtype='int64'\\) +\\[left\\]: 1, NumericIndex\\(\\[1, 2, 3\\], dtype='int64'\\) \\[right\\]: 2, MultiIndex\\(\\[\\('A', 1\\), \\('A', 2\\), \\('B', 3\\), @@ -35,8 +35,8 @@ def test_index_equal_values_mismatch(check_exact): msg = """MultiIndex level \\[1\\] are different MultiIndex level \\[1\\] values are different \\(25\\.0 %\\) -\\[left\\]: Int64Index\\(\\[2, 2, 3, 4\\], dtype='int64'\\) -\\[right\\]: Int64Index\\(\\[1, 2, 3, 4\\], dtype='int64'\\)""" +\\[left\\]: NumericIndex\\(\\[2, 2, 3, 4\\], dtype='int64'\\) +\\[right\\]: NumericIndex\\(\\[1, 2, 3, 4\\], dtype='int64'\\)""" idx1 = MultiIndex.from_tuples([("A", 2), ("A", 2), ("B", 3), ("B", 4)]) idx2 = MultiIndex.from_tuples([("A", 1), ("A", 2), ("B", 3), ("B", 4)]) @@ -49,8 +49,8 @@ def test_index_equal_length_mismatch(check_exact): msg = """Index are different Index length are different -\\[left\\]: 3, Int64Index\\(\\[1, 2, 3\\], dtype='int64'\\) -\\[right\\]: 4, Int64Index\\(\\[1, 2, 3, 4\\], dtype='int64'\\)""" +\\[left\\]: 3, NumericIndex\\(\\[1, 2, 3\\], dtype='int64'\\) +\\[right\\]: 4, NumericIndex\\(\\[1, 2, 3, 4\\], dtype='int64'\\)""" idx1 = Index([1, 2, 3]) idx2 = Index([1, 2, 3, 4]) @@ -67,22 +67,29 @@ def test_index_equal_class(exact): tm.assert_index_equal(idx1, idx2, exact=exact) -@pytest.mark.parametrize( - "idx_values, msg_str", - [ - [[1, 2, 3.0], "Float64Index\\(\\[1\\.0, 2\\.0, 3\\.0\\], dtype='float64'\\)"], - [range(3), "RangeIndex\\(start=0, stop=3, step=1\\)"], - ], -) -def test_index_equal_class_mismatch(check_exact, idx_values, msg_str): - msg = f"""Index are different +def test_int_float_index_equal_class_mismatch(check_exact): + msg = """Index are different + +Attribute "inferred_type" are different +\\[left\\]: integer +\\[right\\]: floating""" + + idx1 = Index([1, 2, 3]) + idx2 = Index([1, 2, 3], dtype=np.float64) + + with pytest.raises(AssertionError, match=msg): + tm.assert_index_equal(idx1, idx2, exact=True, check_exact=check_exact) + + +def test_range_index_equal_class_mismatch(check_exact): + msg = """Index are different Index classes are different -\\[left\\]: Int64Index\\(\\[1, 2, 3\\], dtype='int64'\\) -\\[right\\]: {msg_str}""" +\\[left\\]: NumericIndex\\(\\[1, 2, 3\\], dtype='int64'\\) +\\[right\\]: """ idx1 = Index([1, 2, 3]) - idx2 = Index(idx_values) + idx2 = RangeIndex(range(3)) with pytest.raises(AssertionError, match=msg): tm.assert_index_equal(idx1, idx2, exact=True, check_exact=check_exact) @@ -96,8 +103,8 @@ def test_index_equal_values_close(check_exact): msg = """Index are different Index values are different \\(33\\.33333 %\\) -\\[left\\]: Float64Index\\(\\[1.0, 2.0, 3.0], dtype='float64'\\) -\\[right\\]: Float64Index\\(\\[1.0, 2.0, 3.0000000001\\], dtype='float64'\\)""" +\\[left\\]: NumericIndex\\(\\[1.0, 2.0, 3.0], dtype='float64'\\) +\\[right\\]: NumericIndex\\(\\[1.0, 2.0, 3.0000000001\\], dtype='float64'\\)""" with pytest.raises(AssertionError, match=msg): tm.assert_index_equal(idx1, idx2, check_exact=check_exact) @@ -114,8 +121,8 @@ def test_index_equal_values_less_close(check_exact, rtol): msg = """Index are different Index values are different \\(33\\.33333 %\\) -\\[left\\]: Float64Index\\(\\[1.0, 2.0, 3.0], dtype='float64'\\) -\\[right\\]: Float64Index\\(\\[1.0, 2.0, 3.0001\\], dtype='float64'\\)""" +\\[left\\]: NumericIndex\\(\\[1.0, 2.0, 3.0], dtype='float64'\\) +\\[right\\]: NumericIndex\\(\\[1.0, 2.0, 3.0001\\], dtype='float64'\\)""" with pytest.raises(AssertionError, match=msg): tm.assert_index_equal(idx1, idx2, **kwargs) @@ -131,8 +138,8 @@ def test_index_equal_values_too_far(check_exact, rtol): msg = """Index are different Index values are different \\(33\\.33333 %\\) -\\[left\\]: Int64Index\\(\\[1, 2, 3\\], dtype='int64'\\) -\\[right\\]: Int64Index\\(\\[1, 2, 4\\], dtype='int64'\\)""" +\\[left\\]: NumericIndex\\(\\[1, 2, 3\\], dtype='int64'\\) +\\[right\\]: NumericIndex\\(\\[1, 2, 4\\], dtype='int64'\\)""" with pytest.raises(AssertionError, match=msg): tm.assert_index_equal(idx1, idx2, **kwargs) @@ -146,8 +153,8 @@ def test_index_equal_value_oder_mismatch(check_exact, rtol, check_order): msg = """Index are different Index values are different \\(66\\.66667 %\\) -\\[left\\]: Int64Index\\(\\[1, 2, 3\\], dtype='int64'\\) -\\[right\\]: Int64Index\\(\\[3, 2, 1\\], dtype='int64'\\)""" +\\[left\\]: NumericIndex\\(\\[1, 2, 3\\], dtype='int64'\\) +\\[right\\]: NumericIndex\\(\\[3, 2, 1\\], dtype='int64'\\)""" if check_order: with pytest.raises(AssertionError, match=msg): @@ -168,8 +175,8 @@ def test_index_equal_level_values_mismatch(check_exact, rtol): msg = """MultiIndex level \\[1\\] are different MultiIndex level \\[1\\] values are different \\(25\\.0 %\\) -\\[left\\]: Int64Index\\(\\[2, 2, 3, 4\\], dtype='int64'\\) -\\[right\\]: Int64Index\\(\\[1, 2, 3, 4\\], dtype='int64'\\)""" +\\[left\\]: NumericIndex\\(\\[2, 2, 3, 4\\], dtype='int64'\\) +\\[right\\]: NumericIndex\\(\\[1, 2, 3, 4\\], dtype='int64'\\)""" with pytest.raises(AssertionError, match=msg): tm.assert_index_equal(idx1, idx2, **kwargs) @@ -225,7 +232,7 @@ def test_index_equal_range_categories(check_categorical, exact): Index classes are different \\[left\\]: RangeIndex\\(start=0, stop=10, step=1\\) -\\[right\\]: Int64Index\\(\\[0, 1, 2, 3, 4, 5, 6, 7, 8, 9\\], dtype='int64'\\)""" +\\[right\\]: NumericIndex\\(\\[0, 1, 2, 3, 4, 5, 6, 7, 8, 9\\], dtype='int64'\\)""" rcat = CategoricalIndex(RangeIndex(10)) icat = CategoricalIndex(list(range(10))) diff --git a/pandas/tests/window/conftest.py b/pandas/tests/window/conftest.py index d05bb3d51bcde..23fdb56f3a2b2 100644 --- a/pandas/tests/window/conftest.py +++ b/pandas/tests/window/conftest.py @@ -130,7 +130,6 @@ def frame(): return DataFrame( np.random.randn(100, 10), index=bdate_range(datetime(2009, 1, 1), periods=100), - columns=np.arange(10), ) diff --git a/pandas/tests/window/test_groupby.py b/pandas/tests/window/test_groupby.py index 41b2ee70d7987..9216fff89e074 100644 --- a/pandas/tests/window/test_groupby.py +++ b/pandas/tests/window/test_groupby.py @@ -811,7 +811,7 @@ def test_groupby_rolling_var(self, window, min_periods, closed, expected): expected_result = DataFrame( np.array(expected, dtype="float64"), index=MultiIndex( - levels=[[1, 2], [0, 1, 2, 3, 4, 5, 6, 7]], + levels=[np.array([1, 2]), [0, 1, 2, 3, 4, 5, 6, 7]], codes=[[0, 0, 0, 0, 1, 1, 1, 1], [0, 2, 4, 6, 1, 3, 5, 7]], ), ) @@ -897,10 +897,11 @@ def test_as_index_false(self, by, expected_data): ) tm.assert_frame_equal(result, expected) - def test_nan_and_zero_endpoints(self): + def test_nan_and_zero_endpoints(self, any_int_numpy_dtype): # https://github.com/twosigma/pandas/issues/53 + typ = np.dtype(any_int_numpy_dtype).type size = 1000 - idx = np.repeat(0, size) + idx = np.repeat(typ(0), size) idx[-1] = 1 val = 5e25 @@ -919,7 +920,10 @@ def test_nan_and_zero_endpoints(self): arr, name="adl2", index=MultiIndex.from_arrays( - [[0] * 999 + [1], [0] * 999 + [1]], names=["index", "index"] + [ + Index([0] * 999 + [1], dtype=typ, name="index"), + Index([0] * 999 + [1], dtype=typ, name="index"), + ], ), ) tm.assert_series_equal(result, expected) diff --git a/pandas/tests/window/test_pairwise.py b/pandas/tests/window/test_pairwise.py index 315b3003f716b..2b656a5d4e7b9 100644 --- a/pandas/tests/window/test_pairwise.py +++ b/pandas/tests/window/test_pairwise.py @@ -429,7 +429,11 @@ def test_multindex_columns_pairwise_func(self): expected = DataFrame( np.nan, index=MultiIndex.from_arrays( - [np.repeat(np.arange(5), 2), ["M", "N"] * 5, ["P", "Q"] * 5], + [ + np.repeat(np.arange(5, dtype=np.int64), 2), + ["M", "N"] * 5, + ["P", "Q"] * 5, + ], names=[None, "a", "b"], ), columns=columns,