Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[ENH] Extract cached ForecastingHorizon methods to functions and avoid B019 error #2364

Merged
merged 5 commits into from
Apr 1, 2022
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
176 changes: 110 additions & 66 deletions sktime/forecasting/base/_fh.py
Original file line number Diff line number Diff line change
Expand Up @@ -259,10 +259,6 @@ def to_numpy(self, **kwargs) -> np.ndarray:
"""
return self.to_pandas().to_numpy(**kwargs)

# We cache the results from `to_relative()` and `to_absolute()` calls to speed up
# computations, as these are the basic methods and often required internally when
# calling different methods.
@lru_cache(typed=True)
def to_relative(self, cutoff=None):
"""Return forecasting horizon values relative to a cutoff.

Expand All @@ -277,49 +273,8 @@ def to_relative(self, cutoff=None):
fh : ForecastingHorizon
Relative representation of forecasting horizon.
"""
if self.is_relative:
return self._new()
return _to_relative(fh=self, cutoff=cutoff)

else:
absolute = self.to_pandas()
_check_cutoff(cutoff, absolute)

# We cannot use the freq from the ForecastingHorizon itself (or its
# wrapped pd.DatetimeIndex) because it may be none for non-regular
# indices, so instead we use the freq of cutoff.
freq = _get_freq(cutoff)

if isinstance(absolute, pd.DatetimeIndex):
# coerce to pd.Period for reliable arithmetics and computations of
# time deltas
absolute = _coerce_to_period(absolute, freq)
cutoff = _coerce_to_period(cutoff, freq)

# TODO: Replace the following line if the bug in pandas is fixed
# and its version is restricted in sktime dependencies
# Compute relative values
# The following line circumvents the bug in pandas
# periods = pd.period_range(start="2021-01-01", periods=3, freq="2H")
# periods - periods[0]
# Out: Index([<0 * Hours>, <4 * Hours>, <8 * Hours>], dtype = 'object')
# [v - periods[0] for v in periods]
# Out: Index([<0 * Hours>, <2 * Hours>, <4 * Hours>], dtype='object')
# TODO: v0.12.0: Check if this comment below can be removed,
# so check if pandas has released the fix to PyPI:
# This bug was reported: https://github.com/pandas-dev/pandas/issues/45999
# and fixed: https://github.com/pandas-dev/pandas/pull/46006
# Most likely it will be released with pandas 1.5
# Once the bug is fixed the line should simply be:
# relative = absolute - cutoff
relative = pd.Index([date - cutoff for date in absolute])

# Coerce durations (time deltas) into integer values for given frequency
if isinstance(absolute, (pd.PeriodIndex, pd.DatetimeIndex)):
relative = _coerce_duration_to_int(relative, freq=freq)

return self._new(relative, is_relative=True)

@lru_cache(typed=True)
def to_absolute(self, cutoff):
"""Return absolute version of forecasting horizon values.

Expand All @@ -334,26 +289,7 @@ def to_absolute(self, cutoff):
fh : ForecastingHorizon
Absolute representation of forecasting horizon.
"""
if not self.is_relative:
return self._new()

else:
relative = self.to_pandas()
_check_cutoff(cutoff, relative)
is_timestamp = isinstance(cutoff, pd.Timestamp)

if is_timestamp:
# coerce to pd.Period for reliable arithmetic operations and
# computations of time deltas
cutoff = _coerce_to_period(cutoff, freq=cutoff.freqstr)

absolute = cutoff + relative

if is_timestamp:
# coerce back to DatetimeIndex after operation
absolute = absolute.to_timestamp(cutoff.freqstr)

return self._new(absolute, is_relative=False)
return _to_absolute(fh=self, cutoff=cutoff)

def to_absolute_int(self, start, cutoff=None):
"""Return absolute values as zero-based integer index starting from `start`.
Expand Down Expand Up @@ -516,6 +452,114 @@ def __repr__(self):
return f"{class_name}({pandas_repr}, is_relative={self.is_relative})"


# This function needs to be outside ForecastingHorizon
# since the lru_cache decorator has known, problematic interactions
# with object methods, see B019 error of flake8-bugbear for a detail explanation.
# See more here: https://github.com/alan-turing-institute/sktime/issues/2338
# We cache the results from `to_relative()` and `to_absolute()` calls to speed up
# computations, as these are the basic methods and often required internally when
# calling different methods.
@lru_cache(typed=True)
def _to_relative(fh: ForecastingHorizon, cutoff=None) -> ForecastingHorizon:
"""Return forecasting horizon values relative to a cutoff.

Parameters
----------
fh : ForecastingHorizon
cutoff : pd.Period, pd.Timestamp, int, optional (default=None)
Cutoff value required to convert a relative forecasting
horizon to an absolute one (and vice versa).

Returns
-------
fh : ForecastingHorizon
Relative representation of forecasting horizon.
"""
if fh.is_relative:
return fh._new()

else:
absolute = fh.to_pandas()
_check_cutoff(cutoff, absolute)

# We cannot use the freq from the ForecastingHorizon itself (or its
# wrapped pd.DatetimeIndex) because it may be none for non-regular
# indices, so instead we use the freq of cutoff.
freq = _get_freq(cutoff)

if isinstance(absolute, pd.DatetimeIndex):
# coerce to pd.Period for reliable arithmetics and computations of
# time deltas
absolute = _coerce_to_period(absolute, freq)
cutoff = _coerce_to_period(cutoff, freq)

# TODO: Replace when we upgrade our lower pandas bound
# to a version where this is fixed
# Compute relative values
# The following line circumvents the bug in pandas
# periods = pd.period_range(start="2021-01-01", periods=3, freq="2H")
# periods - periods[0]
# Out: Index([<0 * Hours>, <4 * Hours>, <8 * Hours>], dtype = 'object')
# [v - periods[0] for v in periods]
# Out: Index([<0 * Hours>, <2 * Hours>, <4 * Hours>], dtype='object')
# TODO: v0.12.0: Check if this comment below can be removed,
# so check if pandas has released the fix to PyPI:
# This bug was reported: https://github.com/pandas-dev/pandas/issues/45999
# and fixed: https://github.com/pandas-dev/pandas/pull/46006
# Most likely it will be released with pandas 1.5
# Once the bug is fixed the line should simply be:
# relative = absolute - cutoff
relative = pd.Index([date - cutoff for date in absolute])

# Coerce durations (time deltas) into integer values for given frequency
if isinstance(absolute, (pd.PeriodIndex, pd.DatetimeIndex)):
relative = _coerce_duration_to_int(relative, freq=freq)

return fh._new(relative, is_relative=True)


# This function needs to be outside ForecastingHorizon
# since the lru_cache decorator has known, problematic interactions
# with object methods, see B019 error of flake8-bugbear for a detail explanation.
# See more here: https://github.com/alan-turing-institute/sktime/issues/2338
@lru_cache(typed=True)
def _to_absolute(fh: ForecastingHorizon, cutoff) -> ForecastingHorizon:
"""Return absolute version of forecasting horizon values.

Parameters
----------
fh : ForecastingHorizon
cutoff : pd.Period, pd.Timestamp, int
Cutoff value is required to convert a relative forecasting
horizon to an absolute one (and vice versa).

Returns
-------
fh : ForecastingHorizon
Absolute representation of forecasting horizon.
"""
if not fh.is_relative:
return fh._new()

else:
relative = fh.to_pandas()
_check_cutoff(cutoff, relative)
is_timestamp = isinstance(cutoff, pd.Timestamp)

if is_timestamp:
# coerce to pd.Period for reliable arithmetic operations and
# computations of time deltas
cutoff = _coerce_to_period(cutoff, freq=cutoff.freqstr)

absolute = cutoff + relative

if is_timestamp:
# coerce back to DatetimeIndex after operation
absolute = absolute.to_timestamp(cutoff.freqstr)

return fh._new(absolute, is_relative=False)


def _check_cutoff(cutoff, index):
"""Check if the cutoff is valid based on time index of forecasting horizon.

Expand Down