Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[ENH] Partially replace pd.Int64Index with pd.Index #2339

Merged
merged 29 commits into from
Apr 4, 2022
Merged
Show file tree
Hide file tree
Changes from 10 commits
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
38323bd
replace Int64Index in sktime/forecasting/base/_fh.py
Mar 29, 2022
3fba403
replace Int64Index in sktime/utils/datetime.py
Mar 29, 2022
36e31f2
replace Int64Index in sktime/forecasting/tests/_config.py
Mar 29, 2022
909781e
replace Int64Index in sktime/forecasting/base/adapters/_statsmodels.py
Mar 29, 2022
480b2de
replace Int64Index in sktime/utils/_testing/series.py
Mar 29, 2022
e8d0cd0
remove pd.Int64Index from RELATIVE_TYPES and ABSOLUTE_TYPES and VALID…
Mar 29, 2022
cf3c3b0
clean up
Mar 29, 2022
cd6a63c
clean up
Mar 29, 2022
1f47de6
Merge branch 'upstream-main' into 2338-khrapovs-replace-int64index
Mar 29, 2022
b08bc5b
Merge branch 'main' into 2338-khrapovs-replace-int64index
Mar 29, 2022
61681b8
Merge branch 'main' into 2338-khrapovs-replace-int64index
Mar 30, 2022
385d5a0
use is_integer instead of is_numeric
Mar 30, 2022
dcd6c1c
Merge branch 'main' into 2338-khrapovs-replace-int64index
Mar 31, 2022
7d95e1e
Merge branch 'main' into 2338-khrapovs-replace-int64index
Apr 1, 2022
041e7b1
new function is_integer_index
Apr 1, 2022
c4ce5c2
new function is_in_valid_index_types
Apr 1, 2022
5e85ac3
revert pd.Int64Index to VALID_INDEX_TYPES
Apr 1, 2022
810fa93
functions is_relative_fh_type and is_absolute_fh_type
Apr 1, 2022
626fa1b
Merge branch 'main' into 2338-khrapovs-replace-int64index
Apr 1, 2022
c4344fc
Merge branch 'upstream-main' into 2338-khrapovs-replace-int64index
Apr 1, 2022
2e8b30f
Merge branch 'main' into 2338-khrapovs-replace-int64index
Apr 2, 2022
caf9bc2
Merge branch 'main' into 2338-khrapovs-replace-int64index
Apr 3, 2022
2d4836c
isinstance(x, VALID_INDEX_TYPES) or is_integer_index(x)
Apr 3, 2022
193d4de
use is_in_valid_index_types everywhere
Apr 3, 2022
5780916
Merge branch 'main' into 2338-khrapovs-replace-int64index
Apr 3, 2022
e853c22
move functions is_in_valid_relative_index_types and is_in_valid_absol…
Apr 3, 2022
d6e631e
Merge branch 'main' into 2338-khrapovs-replace-int64index
Apr 3, 2022
6f285bc
use is_integer_index in test_fh
Apr 3, 2022
ad24fcb
Merge branch 'main' into 2338-khrapovs-replace-int64index
Apr 4, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
32 changes: 19 additions & 13 deletions sktime/forecasting/base/_fh.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,8 @@
)
from sktime.utils.validation.series import VALID_INDEX_TYPES

RELATIVE_TYPES = (pd.Int64Index, pd.RangeIndex, pd.TimedeltaIndex)
ABSOLUTE_TYPES = (pd.Int64Index, pd.RangeIndex, pd.DatetimeIndex, pd.PeriodIndex)
RELATIVE_TYPES = (pd.RangeIndex, pd.TimedeltaIndex)
ABSOLUTE_TYPES = (pd.RangeIndex, pd.DatetimeIndex, pd.PeriodIndex)
assert set(RELATIVE_TYPES).issubset(VALID_INDEX_TYPES)
assert set(ABSOLUTE_TYPES).issubset(VALID_INDEX_TYPES)
VALID_FORECASTING_HORIZON_TYPES = (int, list, np.ndarray, pd.Index)
Expand Down Expand Up @@ -95,19 +95,22 @@ def _check_values(values: Union[VALID_FORECASTING_HORIZON_TYPES]) -> pd.Index:
# isinstance() does not work here, because index types inherit from each
# other,
# hence we check for type equality here
if type(values) in VALID_INDEX_TYPES:
if (type(values) in VALID_INDEX_TYPES) or (
isinstance(values, pd.Index) and values.is_numeric()
):
pass

# convert single integer to pandas index, no further checks needed
# convert single integer or timedelta or dateoffset
# to pandas index, no further checks needed
elif is_int(values):
return pd.Int64Index([values], dtype=int)
values = pd.Index([values], dtype=int)

elif is_timedelta_or_date_offset(values):
return pd.Index([values])
values = pd.Index([values])

# convert np.array or list to pandas index
elif is_array(values) and array_is_int(values):
values = pd.Int64Index(values, dtype=int)
values = pd.Index(values, dtype=int)

elif is_array(values) and array_is_timedelta_or_date_offset(values):
values = pd.Index(values)
Expand Down Expand Up @@ -180,18 +183,21 @@ def __init__(
# check types, note that isinstance() does not work here because index
# types inherit from each other, hence we check for type equality
error_msg = f"`values` type is not compatible with `is_relative={is_relative}`."
values_in_relative_types = type(values) in RELATIVE_TYPES
values_in_absolute_types = type(values) in ABSOLUTE_TYPES
values_is_numeric_index = isinstance(values, pd.Index) and values.is_numeric()
if is_relative is None:
if type(values) in RELATIVE_TYPES:
if values_in_relative_types or values_is_numeric_index:
is_relative = True
elif type(values) in ABSOLUTE_TYPES:
elif values_in_absolute_types or values_is_numeric_index:
is_relative = False
else:
raise TypeError(f"{type(values)} is not a supported fh index type")
if is_relative:
if not type(values) in RELATIVE_TYPES:
if not (values_in_relative_types or values_is_numeric_index):
raise TypeError(error_msg)
else:
if not type(values) in ABSOLUTE_TYPES:
if not (values_in_absolute_types or values_is_numeric_index):
raise TypeError(error_msg)

self._values = values
Expand Down Expand Up @@ -262,7 +268,7 @@ def to_numpy(self, **kwargs) -> np.ndarray:
# We cache the results from `to_relative()` and `to_absolute()` calls to speed up
# computations, as these are the basic methods and often required internally when
# calling different methods.
@lru_cache(typed=True)
@lru_cache(typed=True) # noqa: B019
def to_relative(self, cutoff=None):
"""Return forecasting horizon values relative to a cutoff.

Expand Down Expand Up @@ -319,7 +325,7 @@ def to_relative(self, cutoff=None):

return self._new(relative, is_relative=True)

@lru_cache(typed=True)
@lru_cache(typed=True) # noqa: B019
def to_absolute(self, cutoff):
"""Return absolute version of forecasting horizon values.

Expand Down
4 changes: 2 additions & 2 deletions sktime/forecasting/base/adapters/_statsmodels.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ def _fit(self, y, X=None, fh=None):
"""
# statsmodels does not support the pd.Int64Index as required,
# so we coerce them here to pd.RangeIndex
if isinstance(y, pd.Series) and type(y.index) == pd.Int64Index:
if isinstance(y, pd.Series) and y.index.is_numeric():
y, X = _coerce_int_to_range_index(y, X)
self._fit_forecaster(y, X)
return self
Expand Down Expand Up @@ -113,7 +113,7 @@ def _coerce_int_to_range_index(y, X=None):
np.testing.assert_array_equal(y.index, new_index)
except AssertionError:
raise ValueError(
"Coercion of pd.Int64Index to pd.RangeIndex "
"Coercion of integer pd.Index to pd.RangeIndex "
"failed. Please provide `y_train` with a "
"pd.RangeIndex."
)
Expand Down
10 changes: 6 additions & 4 deletions sktime/forecasting/base/tests/test_fh.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ def test_fh(index_type, fh_type, is_relative, steps):
# get expected outputs
if isinstance(steps, int):
steps = np.array([steps])
fh_relative = pd.Int64Index(steps).sort_values()
fh_relative = pd.Index(steps).sort_values()
fh_absolute = y.index[np.where(y.index == cutoff)[0] + steps].sort_values()
fh_indexer = fh_relative - 1
fh_oos = fh.to_pandas()[fh_relative > 0]
Expand Down Expand Up @@ -139,7 +139,7 @@ def test_check_fh_values_duplicate_input_values(arg):


GOOD_ABSOLUTE_INPUT_ARGS = (
pd.Int64Index([1, 2, 3]),
pd.Index([1, 2, 3]),
pd.period_range("2000-01-01", periods=3, freq="D"),
pd.date_range("2000-01-01", periods=3, freq="M"),
np.array([1, 2, 3]),
Expand All @@ -152,7 +152,9 @@ def test_check_fh_values_duplicate_input_values(arg):
def test_check_fh_absolute_values_input_conversion_to_pandas_index(arg):
"""Test conversion of absolute horizons to pandas index."""
output = ForecastingHorizon(arg, is_relative=False).to_pandas()
assert type(output) in VALID_INDEX_TYPES
assert (type(output) in VALID_INDEX_TYPES) or (
isinstance(output, pd.Index) and output.is_numeric()
)


GOOD_RELATIVE_INPUT_ARGS = [
Expand Down Expand Up @@ -212,7 +214,7 @@ def test_coerce_duration_to_int(duration):
ret = _coerce_duration_to_int(duration, freq=_get_freq(duration))

# check output type is always integer
assert type(ret) in (pd.Int64Index, np.integer, int)
assert (type(ret) in (np.integer, int)) or ret.is_numeric()

# check result
if isinstance(duration, pd.Index):
Expand Down
2 changes: 1 addition & 1 deletion sktime/forecasting/tests/_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@
]

INDEX_TYPE_LOOKUP = {
"int": pd.Int64Index,
"int": pd.Index,
"range": pd.RangeIndex,
"datetime": pd.DatetimeIndex,
"period": pd.PeriodIndex,
Expand Down
2 changes: 1 addition & 1 deletion sktime/utils/_testing/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ def _make_index(n_timepoints, index_type=None):

elif index_type == "int":
start = 3
return pd.Int64Index(np.arange(start, start + n_timepoints))
return pd.Index(np.arange(start, start + n_timepoints), dtype=int)

else:
raise ValueError(f"index_class: {index_type} is not supported")
4 changes: 2 additions & 2 deletions sktime/utils/datetime.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ def _coerce_duration_to_int(duration, freq=None):
duration[0], pd.tseries.offsets.BaseOffset
):
count = _get_intervals_count_and_unit(freq)[0]
return pd.Int64Index([d.n / count for d in duration])
return pd.Index([d.n / count for d in duration], dtype=int)
elif isinstance(duration, (pd.Timedelta, pd.TimedeltaIndex)):
count, unit = _get_intervals_count_and_unit(freq)
# integer conversion only works reliably with non-ambiguous units (
Expand Down Expand Up @@ -100,7 +100,7 @@ def _shift(x, by=1):
Shifted time point
"""
assert isinstance(x, (pd.Period, pd.Timestamp, int, np.integer)), type(x)
assert isinstance(by, (int, np.integer, pd.Int64Index)), type(by)
assert isinstance(by, (int, np.integer)) or by.is_numeric(), type(by)
if isinstance(x, pd.Timestamp):
if not hasattr(x, "freq") or x.freq is None:
raise ValueError("No `freq` information available")
Expand Down
12 changes: 3 additions & 9 deletions sktime/utils/validation/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,13 +18,7 @@

# We currently support the following types for input data and time index types.
VALID_DATA_TYPES = (pd.DataFrame, pd.Series, np.ndarray)
VALID_INDEX_TYPES = (
pd.Int64Index,
pd.RangeIndex,
pd.PeriodIndex,
pd.DatetimeIndex,
pd.TimedeltaIndex,
)
VALID_INDEX_TYPES = (pd.RangeIndex, pd.PeriodIndex, pd.DatetimeIndex, pd.TimedeltaIndex)


def _check_is_univariate(y, var_name="input"):
Expand Down Expand Up @@ -185,7 +179,7 @@ def check_time_index(

# We here check for type equality because isinstance does not
# work reliably because index types inherit from each other.
if not type(index) in VALID_INDEX_TYPES:
if not ((type(index) in VALID_INDEX_TYPES) or index.is_numeric()):
raise NotImplementedError(
f"{type(index)} is not supported for {var_name}, use "
f"one of {VALID_INDEX_TYPES} instead."
Expand All @@ -194,7 +188,7 @@ def check_time_index(
if enforce_index_type and type(index) is not enforce_index_type:
raise NotImplementedError(
f"{type(index)} is not supported for {var_name}, use "
f"type: {enforce_index_type} instead."
f"type: {enforce_index_type} or integer pd.Index instead."
)

# Check time index is ordered in time
Expand Down
2 changes: 1 addition & 1 deletion sktime/utils/validation/tests/test_forecasting.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@

from sktime.utils.validation.forecasting import check_fh

empty_input = (np.array([]), [], pd.Int64Index([]))
empty_input = (np.array([], dtype=int), [], pd.Index([], dtype=int))


@pytest.mark.parametrize("arg", empty_input)
Expand Down