Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[ENH] Allow pd.Timedelta values in ForecastingHorizon #2333

Merged
merged 27 commits into from
Apr 4, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
efa1f2a
Test ForecastingHorizon with timedelta values
Mar 28, 2022
892c7ec
Merge branch 'upstream-main' into 1737-khrapovs-test-timedelta-foreca…
Mar 28, 2022
b7932b5
clean up
Mar 28, 2022
c4cd9d6
docstrings
Mar 28, 2022
3fa69c0
use TEST_WINDOW_LENGTHS_INT in sktime.forecasting.tests.test_all_fore…
Mar 29, 2022
65bf0cc
skip forecaster tests with timedelta fh
Mar 29, 2022
3503d01
skip timdelta fh in sktime.forecasting.model_selection.tests.test_spl…
Mar 29, 2022
9e090fd
use "int64" in type checking
Mar 29, 2022
b832b32
use "int32" in type checking
Mar 29, 2022
d1d19c6
use "int32" in type checking
Mar 29, 2022
dfd830d
clean up
Mar 29, 2022
12fd025
clean up
Mar 29, 2022
16d89d5
Merge branch 'main' into 1737-khrapovs-test-timedelta-forecasting-hor…
Mar 29, 2022
65cf207
Merge branch 'main' into 1737-khrapovs-test-timedelta-forecasting-hor…
Mar 30, 2022
30ce39e
Merge branch 'main' into 1737-khrapovs-test-timedelta-forecasting-hor…
Mar 31, 2022
d22aac4
use logical not of _is_in_sample in _is_out_of_sample
Mar 31, 2022
6bca5cf
Merge branch 'main' into 1737-khrapovs-test-timedelta-forecasting-hor…
Apr 1, 2022
72126a1
Merge branch 'main' into 1737-khrapovs-test-timedelta-forecasting-hor…
Apr 1, 2022
9d15c87
Merge branch 'upstream-main' into 1737-khrapovs-test-timedelta-foreca…
Apr 1, 2022
1ae1214
Merge branch 'main' into 1737-khrapovs-test-timedelta-forecasting-hor…
Apr 2, 2022
3a1088a
Merge branch 'main' into 1737-khrapovs-test-timedelta-forecasting-hor…
Apr 3, 2022
7136985
Merge branch 'main' into 1737-khrapovs-test-timedelta-forecasting-hor…
Apr 3, 2022
84e815f
Merge branch 'main' into 1737-khrapovs-test-timedelta-forecasting-hor…
Apr 3, 2022
0473cee
to_indexer throws NotImplementedError for non-integer indices
Apr 3, 2022
918f160
clean up
Apr 3, 2022
db67ef2
Merge branch 'main' into 1737-khrapovs-test-timedelta-forecasting-hor…
Apr 4, 2022
3d389ac
merge with main
Apr 4, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
31 changes: 21 additions & 10 deletions sktime/forecasting/base/_fh.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
is_in_valid_absolute_index_types,
is_in_valid_index_types,
is_in_valid_relative_index_types,
is_integer_index,
)

VALID_FORECASTING_HORIZON_TYPES = (int, list, np.ndarray, pd.Index)
Expand Down Expand Up @@ -142,7 +143,7 @@ class ForecastingHorizon:

Parameters
----------
values : pd.Index, np.array, list or int
values : pd.Index, pd.TimedeltaIndex, np.array, list, pd.Timedelta, or int
Values of forecasting horizon
is_relative : bool, optional (default=None)
- If True, a relative ForecastingHorizon is created:
Expand Down Expand Up @@ -208,7 +209,7 @@ def _new(

Parameters
----------
values : pd.Index, np.array, list or int
values : pd.Index, pd.TimedeltaIndex, np.array, list, pd.Timedelta, or int
Values of forecasting horizon.
is_relative : bool, default=same as self.is_relative
- If None, determined automatically: same as self.is_relative
Expand Down Expand Up @@ -382,11 +383,13 @@ def to_out_of_sample(self, cutoff=None):
out_of_sample = self.to_pandas()[is_out_of_sample]
return self._new(out_of_sample)

def _is_in_sample(self, cutoff=None):
def _is_in_sample(self, cutoff=None) -> np.ndarray:
"""Get index location of in-sample values."""
return self.to_relative(cutoff).to_pandas() <= 0
relative = self.to_relative(cutoff).to_pandas()
null = 0 if is_integer_index(relative) else pd.Timedelta(0)
return relative <= null

def is_all_in_sample(self, cutoff=None):
def is_all_in_sample(self, cutoff=None) -> bool:
"""Whether the forecasting horizon is purely in-sample for given cutoff.

Parameters
Expand All @@ -401,12 +404,11 @@ def is_all_in_sample(self, cutoff=None):
"""
return sum(self._is_in_sample(cutoff)) == len(self)

def _is_out_of_sample(self, cutoff=None):
def _is_out_of_sample(self, cutoff=None) -> np.ndarray:
"""Get index location of out-of-sample values."""
# return ~self._in_sample_idx(cutoff)
return self.to_relative(cutoff).to_pandas() > 0
fkiraly marked this conversation as resolved.
Show resolved Hide resolved
return np.logical_not(self._is_in_sample(cutoff))

def is_all_out_of_sample(self, cutoff=None):
def is_all_out_of_sample(self, cutoff=None) -> bool:
"""Whether the forecasting horizon is purely out-of-sample for given cutoff.

Parameters
Expand Down Expand Up @@ -442,7 +444,16 @@ def to_indexer(self, cutoff=None, from_cutoff=True):
Indexer.
"""
if from_cutoff:
return self.to_relative(cutoff).to_pandas() - 1
relative_index = self.to_relative(cutoff).to_pandas()
if is_integer_index(relative_index):
return relative_index - 1
else:
# What does indexer mean if fh is timedelta?
msg = (
"The indexer for timedelta-like forecasting horizon "
"is not yet implemented"
)
raise NotImplementedError(msg)
else:
relative = self.to_relative(cutoff)
return relative - relative.to_pandas()[0]
Expand Down
75 changes: 56 additions & 19 deletions sktime/forecasting/base/tests/test_fh.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
from sktime.forecasting.tests._config import (
INDEX_TYPE_LOOKUP,
TEST_FHS,
TEST_FHS_TIMEDELTA,
VALID_INDEX_FH_COMBINATIONS,
)
from sktime.utils._testing.forecasting import _make_fh, make_forecasting_problem
Expand Down Expand Up @@ -48,9 +49,21 @@ def _assert_index_equal(a, b):
@pytest.mark.parametrize(
"index_type, fh_type, is_relative", VALID_INDEX_FH_COMBINATIONS
)
@pytest.mark.parametrize("steps", TEST_FHS)
@pytest.mark.parametrize("steps", [*TEST_FHS, *TEST_FHS_TIMEDELTA])
def test_fh(index_type, fh_type, is_relative, steps):
"""Testing ForecastingHorizon conversions."""
int_types = ["int64", "int32"]
steps_is_int = (
isinstance(steps, (int, np.integer)) or np.array(steps).dtype in int_types
)
steps_is_timedelta = isinstance(steps, pd.Timedelta) or (
isinstance(steps, list) and isinstance(pd.Index(steps), pd.TimedeltaIndex)
)
steps_and_fh_incompatible = (fh_type == "timedelta" and steps_is_int) or (
fh_type != "timedelta" and steps_is_timedelta
)
if steps_and_fh_incompatible:
pytest.skip("steps and fh_type are incompatible")
# generate data
y = make_forecasting_problem(index_type=index_type)
if index_type == "int":
Expand All @@ -74,12 +87,27 @@ def test_fh(index_type, fh_type, is_relative, steps):
# get expected outputs
if isinstance(steps, int):
steps = np.array([steps])
fh_relative = pd.Index(steps).sort_values()
fh_absolute = y.index[np.where(y.index == cutoff)[0] + steps].sort_values()
fh_indexer = fh_relative - 1
fh_oos = fh.to_pandas()[fh_relative > 0]
elif isinstance(steps, pd.Timedelta):
steps = pd.Index([steps])
else:
steps = pd.Index(steps)

if steps.dtype in int_types:
fh_relative = pd.Index(steps, dtype="int64").sort_values()
fh_absolute = y.index[np.where(y.index == cutoff)[0] + steps].sort_values()
fh_indexer = fh_relative - 1
else:
fh_relative = steps.sort_values()
fh_absolute = (cutoff + steps).sort_values()
fh_indexer = None

if steps.dtype in int_types:
null = 0
else:
null = pd.Timedelta(0)
fh_oos = fh.to_pandas()[fh_relative > null]
is_oos = len(fh_oos) == len(fh)
fh_ins = fh.to_pandas()[fh_relative <= 0]
fh_ins = fh.to_pandas()[fh_relative <= null]
is_ins = len(fh_ins) == len(fh)

# check outputs
Expand All @@ -91,8 +119,12 @@ def test_fh(index_type, fh_type, is_relative, steps):
_assert_index_equal(fh_relative, fh.to_relative(cutoff).to_pandas())
assert fh.to_relative(cutoff).is_relative

# check index-like representation
_assert_index_equal(fh_indexer, fh.to_indexer(cutoff))
if steps.dtype in int_types:
# check index-like representation
_assert_index_equal(fh_indexer, fh.to_indexer(cutoff))
else:
with pytest.raises(NotImplementedError):
fh.to_indexer(cutoff)

# check in-sample representation
# we only compare the numpy array here because the expected solution is
Expand Down Expand Up @@ -241,17 +273,22 @@ def test_coerce_duration_to_int_with_non_allowed_durations(duration):
@pytest.mark.parametrize("index_type", INDEX_TYPE_LOOKUP.keys())
def test_get_duration(n_timepoints, index_type):
"""Test getting of duration."""
index = _make_index(n_timepoints, index_type)
duration = _get_duration(index)
# check output type is duration type
assert isinstance(
duration, (pd.Timedelta, pd.tseries.offsets.BaseOffset, int, np.integer)
)

# check integer output
duration = _get_duration(index, coerce_to_int=True)
assert isinstance(duration, (int, np.integer))
assert duration == n_timepoints - 1
if index_type != "timedelta":
index = _make_index(n_timepoints, index_type)
duration = _get_duration(index)
# check output type is duration type
assert isinstance(
duration, (pd.Timedelta, pd.tseries.offsets.BaseOffset, int, np.integer)
)

# check integer output
duration = _get_duration(index, coerce_to_int=True)
assert isinstance(duration, (int, np.integer))
assert duration == n_timepoints - 1
else:
match = "index_class: timedelta is not supported"
with pytest.raises(ValueError, match=match):
_make_index(n_timepoints, index_type)


FIXED_FREQUENCY_STRINGS = ["10T", "H", "D", "2D"]
Expand Down
5 changes: 5 additions & 0 deletions sktime/forecasting/model_selection/tests/test_split.py
Original file line number Diff line number Diff line change
Expand Up @@ -450,6 +450,11 @@ def test_window_splitter_in_sample_fh_greater_than_window_length(CV):
@pytest.mark.parametrize("values", TEST_OOS_FHS)
def test_split_by_fh(index_type, fh_type, is_relative, values):
"""Test temporal_train_test_split."""
if fh_type == "timedelta":
pytest.skip(
"ForecastingHorizon with timedelta values "
"is currently experimental and not supported everywhere"
)
y = _make_series(20, index_type=index_type)
cutoff = y.index[10]
fh = _make_fh(cutoff, values, fh_type, is_relative)
Expand Down
8 changes: 5 additions & 3 deletions sktime/forecasting/tests/_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,12 +63,12 @@
*TEST_STEP_LENGTHS_DATEOFFSET,
]

TEST_OOS_FHS = [1, np.array([2, 5])] # out-of-sample
TEST_OOS_FHS = [1, np.array([2, 5], dtype="int64")] # out-of-sample
TEST_INS_FHS = [
-3, # single in-sample
np.array([-2, -5]), # multiple in-sample
np.array([-2, -5], dtype="int64"), # multiple in-sample
0, # last training point
np.array([-3, 2]), # mixed in-sample and out-of-sample
np.array([-3, 2], dtype="int64"), # mixed in-sample and out-of-sample
]
TEST_FHS = [*TEST_OOS_FHS, *TEST_INS_FHS]

Expand Down Expand Up @@ -104,11 +104,13 @@
("period", "period", False),
("datetime", "int", True),
("datetime", "datetime", False),
("datetime", "timedelta", True),
]

INDEX_TYPE_LOOKUP = {
"int": pd.Index,
"range": pd.RangeIndex,
"datetime": pd.DatetimeIndex,
"period": pd.PeriodIndex,
"timedelta": pd.TimedeltaIndex,
}
22 changes: 16 additions & 6 deletions sktime/forecasting/tests/test_all_forecasters.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,11 @@
# names for index/fh combinations to display in tests
index_fh_comb_names = [f"{x[0]}-{x[1]}-{x[2]}" for x in VALID_INDEX_FH_COMBINATIONS]

pytest_skip_msg = (
"ForecastingHorizon with timedelta values "
"is currently experimental and not supported everywhere"
)


class ForecasterFixtureGenerator(BaseFixtureGenerator):
"""Fixture generator for forecasting tests.
Expand Down Expand Up @@ -202,6 +207,9 @@ def test_predict_time_index(
):
"""Check that predicted time index matches forecasting horizon."""
index_type, fh_type, is_relative = index_fh_comb
if fh_type == "timedelta":
pytest.skip(pytest_skip_msg)

y_train = _make_series(
n_columns=n_columns, index_type=index_type, n_timepoints=50
)
Expand All @@ -224,6 +232,8 @@ def test_predict_residuals(
):
"""Check that predict_residuals method works as expected."""
index_type, fh_type, is_relative = index_fh_comb
if fh_type == "timedelta":
pytest.skip(pytest_skip_msg)

y_train = _make_series(
n_columns=n_columns, index_type=index_type, n_timepoints=50
Expand All @@ -250,14 +260,12 @@ def test_predict_residuals(
"fh_int_oos", TEST_OOS_FHS, ids=[f"fh={fh}" for fh in TEST_OOS_FHS]
)
def test_predict_time_index_with_X(
self,
estimator_instance,
n_columns,
index_fh_comb,
fh_int_oos,
self, estimator_instance, n_columns, index_fh_comb, fh_int_oos
):
"""Check that predicted time index matches forecasting horizon."""
index_type, fh_type, is_relative = index_fh_comb
if fh_type == "timedelta":
pytest.skip(pytest_skip_msg)

z, X = make_forecasting_problem(index_type=index_type, make_X=True)

Expand All @@ -284,6 +292,9 @@ def test_predict_time_index_in_sample_full(
):
"""Check that predicted time index equals fh for full in-sample predictions."""
index_type, fh_type, is_relative = index_fh_comb
if fh_type == "timedelta":
pytest.skip(pytest_skip_msg)

y_train = _make_series(n_columns=n_columns, index_type=index_type)
cutoff = y_train.index[-1]
steps = -np.arange(len(y_train))
Expand Down Expand Up @@ -455,7 +466,6 @@ def test_score(self, estimator_instance, n_columns, fh_int_oos):
y_pred = estimator_instance.predict()

fh_idx = check_fh(fh_int_oos).to_indexer() # get zero based index
actual = estimator_instance.score(y_test.iloc[fh_idx], fh=fh_int_oos)
expected = mean_absolute_percentage_error(
y_pred, y_test.iloc[fh_idx], symmetric=True
)
Expand Down
3 changes: 3 additions & 0 deletions sktime/utils/_testing/forecasting.py
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,9 @@ def _make_fh(cutoff, steps, fh_type, is_relative):
if isinstance(steps, (int, np.integer)):
steps = np.array([steps], dtype=int)

elif isinstance(steps, pd.Timedelta):
steps = [steps]

if is_relative:
return ForecastingHorizon(fh_class(steps), is_relative=is_relative)

Expand Down