Skip to content

Commit

Permalink
[ENH] Allow pd.Timedelta values in ForecastingHorizon (#2333)
Browse files Browse the repository at this point in the history
contribution towards #1737.

IThis PR adds `TEST_FHS_TIMEDELTA` test cases to  `sktime.forecasting.base.tests.test_fh.test_fh`. In order to pass the tests two changes were required in `ForecastingHorizon` itself: in `sktime.forecasting.base._fh.ForecastingHorizon._is_in_sample` and in `sktime.forecasting.base._fh.ForecastingHorizon._is_out_of_sample`. Both methods perform comparison with zero which in case of `pd.Timdelta` values should actually be `pd.Timedelta(0)`.

Note that `sktime.forecasting.base._fh.ForecastingHorizon.to_indexer` returns `None` in the new timedelta case. I have no idea what this method should return in this case. Any suggestions are very much welcome.
  • Loading branch information
Stanislav Khrapov committed Apr 4, 2022
1 parent 4b9003c commit 2ec3327
Show file tree
Hide file tree
Showing 6 changed files with 106 additions and 38 deletions.
31 changes: 21 additions & 10 deletions sktime/forecasting/base/_fh.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
is_in_valid_absolute_index_types,
is_in_valid_index_types,
is_in_valid_relative_index_types,
is_integer_index,
)

VALID_FORECASTING_HORIZON_TYPES = (int, list, np.ndarray, pd.Index)
Expand Down Expand Up @@ -142,7 +143,7 @@ class ForecastingHorizon:
Parameters
----------
values : pd.Index, np.array, list or int
values : pd.Index, pd.TimedeltaIndex, np.array, list, pd.Timedelta, or int
Values of forecasting horizon
is_relative : bool, optional (default=None)
- If True, a relative ForecastingHorizon is created:
Expand Down Expand Up @@ -208,7 +209,7 @@ def _new(
Parameters
----------
values : pd.Index, np.array, list or int
values : pd.Index, pd.TimedeltaIndex, np.array, list, pd.Timedelta, or int
Values of forecasting horizon.
is_relative : bool, default=same as self.is_relative
- If None, determined automatically: same as self.is_relative
Expand Down Expand Up @@ -382,11 +383,13 @@ def to_out_of_sample(self, cutoff=None):
out_of_sample = self.to_pandas()[is_out_of_sample]
return self._new(out_of_sample)

def _is_in_sample(self, cutoff=None):
def _is_in_sample(self, cutoff=None) -> np.ndarray:
"""Get index location of in-sample values."""
return self.to_relative(cutoff).to_pandas() <= 0
relative = self.to_relative(cutoff).to_pandas()
null = 0 if is_integer_index(relative) else pd.Timedelta(0)
return relative <= null

def is_all_in_sample(self, cutoff=None):
def is_all_in_sample(self, cutoff=None) -> bool:
"""Whether the forecasting horizon is purely in-sample for given cutoff.
Parameters
Expand All @@ -401,12 +404,11 @@ def is_all_in_sample(self, cutoff=None):
"""
return sum(self._is_in_sample(cutoff)) == len(self)

def _is_out_of_sample(self, cutoff=None):
def _is_out_of_sample(self, cutoff=None) -> np.ndarray:
"""Get index location of out-of-sample values."""
# return ~self._in_sample_idx(cutoff)
return self.to_relative(cutoff).to_pandas() > 0
return np.logical_not(self._is_in_sample(cutoff))

def is_all_out_of_sample(self, cutoff=None):
def is_all_out_of_sample(self, cutoff=None) -> bool:
"""Whether the forecasting horizon is purely out-of-sample for given cutoff.
Parameters
Expand Down Expand Up @@ -442,7 +444,16 @@ def to_indexer(self, cutoff=None, from_cutoff=True):
Indexer.
"""
if from_cutoff:
return self.to_relative(cutoff).to_pandas() - 1
relative_index = self.to_relative(cutoff).to_pandas()
if is_integer_index(relative_index):
return relative_index - 1
else:
# What does indexer mean if fh is timedelta?
msg = (
"The indexer for timedelta-like forecasting horizon "
"is not yet implemented"
)
raise NotImplementedError(msg)
else:
relative = self.to_relative(cutoff)
return relative - relative.to_pandas()[0]
Expand Down
75 changes: 56 additions & 19 deletions sktime/forecasting/base/tests/test_fh.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
from sktime.forecasting.tests._config import (
INDEX_TYPE_LOOKUP,
TEST_FHS,
TEST_FHS_TIMEDELTA,
VALID_INDEX_FH_COMBINATIONS,
)
from sktime.utils._testing.forecasting import _make_fh, make_forecasting_problem
Expand Down Expand Up @@ -48,9 +49,21 @@ def _assert_index_equal(a, b):
@pytest.mark.parametrize(
"index_type, fh_type, is_relative", VALID_INDEX_FH_COMBINATIONS
)
@pytest.mark.parametrize("steps", TEST_FHS)
@pytest.mark.parametrize("steps", [*TEST_FHS, *TEST_FHS_TIMEDELTA])
def test_fh(index_type, fh_type, is_relative, steps):
"""Testing ForecastingHorizon conversions."""
int_types = ["int64", "int32"]
steps_is_int = (
isinstance(steps, (int, np.integer)) or np.array(steps).dtype in int_types
)
steps_is_timedelta = isinstance(steps, pd.Timedelta) or (
isinstance(steps, list) and isinstance(pd.Index(steps), pd.TimedeltaIndex)
)
steps_and_fh_incompatible = (fh_type == "timedelta" and steps_is_int) or (
fh_type != "timedelta" and steps_is_timedelta
)
if steps_and_fh_incompatible:
pytest.skip("steps and fh_type are incompatible")
# generate data
y = make_forecasting_problem(index_type=index_type)
if index_type == "int":
Expand All @@ -74,12 +87,27 @@ def test_fh(index_type, fh_type, is_relative, steps):
# get expected outputs
if isinstance(steps, int):
steps = np.array([steps])
fh_relative = pd.Index(steps).sort_values()
fh_absolute = y.index[np.where(y.index == cutoff)[0] + steps].sort_values()
fh_indexer = fh_relative - 1
fh_oos = fh.to_pandas()[fh_relative > 0]
elif isinstance(steps, pd.Timedelta):
steps = pd.Index([steps])
else:
steps = pd.Index(steps)

if steps.dtype in int_types:
fh_relative = pd.Index(steps, dtype="int64").sort_values()
fh_absolute = y.index[np.where(y.index == cutoff)[0] + steps].sort_values()
fh_indexer = fh_relative - 1
else:
fh_relative = steps.sort_values()
fh_absolute = (cutoff + steps).sort_values()
fh_indexer = None

if steps.dtype in int_types:
null = 0
else:
null = pd.Timedelta(0)
fh_oos = fh.to_pandas()[fh_relative > null]
is_oos = len(fh_oos) == len(fh)
fh_ins = fh.to_pandas()[fh_relative <= 0]
fh_ins = fh.to_pandas()[fh_relative <= null]
is_ins = len(fh_ins) == len(fh)

# check outputs
Expand All @@ -91,8 +119,12 @@ def test_fh(index_type, fh_type, is_relative, steps):
_assert_index_equal(fh_relative, fh.to_relative(cutoff).to_pandas())
assert fh.to_relative(cutoff).is_relative

# check index-like representation
_assert_index_equal(fh_indexer, fh.to_indexer(cutoff))
if steps.dtype in int_types:
# check index-like representation
_assert_index_equal(fh_indexer, fh.to_indexer(cutoff))
else:
with pytest.raises(NotImplementedError):
fh.to_indexer(cutoff)

# check in-sample representation
# we only compare the numpy array here because the expected solution is
Expand Down Expand Up @@ -241,17 +273,22 @@ def test_coerce_duration_to_int_with_non_allowed_durations(duration):
@pytest.mark.parametrize("index_type", INDEX_TYPE_LOOKUP.keys())
def test_get_duration(n_timepoints, index_type):
"""Test getting of duration."""
index = _make_index(n_timepoints, index_type)
duration = _get_duration(index)
# check output type is duration type
assert isinstance(
duration, (pd.Timedelta, pd.tseries.offsets.BaseOffset, int, np.integer)
)

# check integer output
duration = _get_duration(index, coerce_to_int=True)
assert isinstance(duration, (int, np.integer))
assert duration == n_timepoints - 1
if index_type != "timedelta":
index = _make_index(n_timepoints, index_type)
duration = _get_duration(index)
# check output type is duration type
assert isinstance(
duration, (pd.Timedelta, pd.tseries.offsets.BaseOffset, int, np.integer)
)

# check integer output
duration = _get_duration(index, coerce_to_int=True)
assert isinstance(duration, (int, np.integer))
assert duration == n_timepoints - 1
else:
match = "index_class: timedelta is not supported"
with pytest.raises(ValueError, match=match):
_make_index(n_timepoints, index_type)


FIXED_FREQUENCY_STRINGS = ["10T", "H", "D", "2D"]
Expand Down
5 changes: 5 additions & 0 deletions sktime/forecasting/model_selection/tests/test_split.py
Original file line number Diff line number Diff line change
Expand Up @@ -450,6 +450,11 @@ def test_window_splitter_in_sample_fh_greater_than_window_length(CV):
@pytest.mark.parametrize("values", TEST_OOS_FHS)
def test_split_by_fh(index_type, fh_type, is_relative, values):
"""Test temporal_train_test_split."""
if fh_type == "timedelta":
pytest.skip(
"ForecastingHorizon with timedelta values "
"is currently experimental and not supported everywhere"
)
y = _make_series(20, index_type=index_type)
cutoff = y.index[10]
fh = _make_fh(cutoff, values, fh_type, is_relative)
Expand Down
8 changes: 5 additions & 3 deletions sktime/forecasting/tests/_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,12 +63,12 @@
*TEST_STEP_LENGTHS_DATEOFFSET,
]

TEST_OOS_FHS = [1, np.array([2, 5])] # out-of-sample
TEST_OOS_FHS = [1, np.array([2, 5], dtype="int64")] # out-of-sample
TEST_INS_FHS = [
-3, # single in-sample
np.array([-2, -5]), # multiple in-sample
np.array([-2, -5], dtype="int64"), # multiple in-sample
0, # last training point
np.array([-3, 2]), # mixed in-sample and out-of-sample
np.array([-3, 2], dtype="int64"), # mixed in-sample and out-of-sample
]
TEST_FHS = [*TEST_OOS_FHS, *TEST_INS_FHS]

Expand Down Expand Up @@ -104,11 +104,13 @@
("period", "period", False),
("datetime", "int", True),
("datetime", "datetime", False),
("datetime", "timedelta", True),
]

INDEX_TYPE_LOOKUP = {
"int": pd.Index,
"range": pd.RangeIndex,
"datetime": pd.DatetimeIndex,
"period": pd.PeriodIndex,
"timedelta": pd.TimedeltaIndex,
}
22 changes: 16 additions & 6 deletions sktime/forecasting/tests/test_all_forecasters.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,11 @@
# names for index/fh combinations to display in tests
index_fh_comb_names = [f"{x[0]}-{x[1]}-{x[2]}" for x in VALID_INDEX_FH_COMBINATIONS]

pytest_skip_msg = (
"ForecastingHorizon with timedelta values "
"is currently experimental and not supported everywhere"
)


class ForecasterFixtureGenerator(BaseFixtureGenerator):
"""Fixture generator for forecasting tests.
Expand Down Expand Up @@ -202,6 +207,9 @@ def test_predict_time_index(
):
"""Check that predicted time index matches forecasting horizon."""
index_type, fh_type, is_relative = index_fh_comb
if fh_type == "timedelta":
pytest.skip(pytest_skip_msg)

y_train = _make_series(
n_columns=n_columns, index_type=index_type, n_timepoints=50
)
Expand All @@ -224,6 +232,8 @@ def test_predict_residuals(
):
"""Check that predict_residuals method works as expected."""
index_type, fh_type, is_relative = index_fh_comb
if fh_type == "timedelta":
pytest.skip(pytest_skip_msg)

y_train = _make_series(
n_columns=n_columns, index_type=index_type, n_timepoints=50
Expand All @@ -250,14 +260,12 @@ def test_predict_residuals(
"fh_int_oos", TEST_OOS_FHS, ids=[f"fh={fh}" for fh in TEST_OOS_FHS]
)
def test_predict_time_index_with_X(
self,
estimator_instance,
n_columns,
index_fh_comb,
fh_int_oos,
self, estimator_instance, n_columns, index_fh_comb, fh_int_oos
):
"""Check that predicted time index matches forecasting horizon."""
index_type, fh_type, is_relative = index_fh_comb
if fh_type == "timedelta":
pytest.skip(pytest_skip_msg)

z, X = make_forecasting_problem(index_type=index_type, make_X=True)

Expand All @@ -284,6 +292,9 @@ def test_predict_time_index_in_sample_full(
):
"""Check that predicted time index equals fh for full in-sample predictions."""
index_type, fh_type, is_relative = index_fh_comb
if fh_type == "timedelta":
pytest.skip(pytest_skip_msg)

y_train = _make_series(n_columns=n_columns, index_type=index_type)
cutoff = y_train.index[-1]
steps = -np.arange(len(y_train))
Expand Down Expand Up @@ -455,7 +466,6 @@ def test_score(self, estimator_instance, n_columns, fh_int_oos):
y_pred = estimator_instance.predict()

fh_idx = check_fh(fh_int_oos).to_indexer() # get zero based index
actual = estimator_instance.score(y_test.iloc[fh_idx], fh=fh_int_oos)
expected = mean_absolute_percentage_error(
y_pred, y_test.iloc[fh_idx], symmetric=True
)
Expand Down
3 changes: 3 additions & 0 deletions sktime/utils/_testing/forecasting.py
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,9 @@ def _make_fh(cutoff, steps, fh_type, is_relative):
if isinstance(steps, (int, np.integer)):
steps = np.array([steps], dtype=int)

elif isinstance(steps, pd.Timedelta):
steps = [steps]

if is_relative:
return ForecastingHorizon(fh_class(steps), is_relative=is_relative)

Expand Down

0 comments on commit 2ec3327

Please sign in to comment.