Skip to content

Commit

Permalink
Change frequency offset based on Pandas version (#754)
Browse files Browse the repository at this point in the history
  • Loading branch information
martinvonk committed May 15, 2024
1 parent 521c239 commit c410865
Show file tree
Hide file tree
Showing 6 changed files with 60 additions and 35 deletions.
41 changes: 24 additions & 17 deletions pastas/stats/dutch.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,18 @@
from typing import Optional, Union

from numpy import nan
from pandas import Series, Timedelta, concat, date_range
from packaging.version import parse as parse_version
from pandas import Series, Timedelta
from pandas import __version__ as pd_version
from pandas import concat, date_range

from pastas.timeseries_utils import get_sample
from pastas.typing import Function, TimestampType

pandas_version = parse_version(pd_version)

year_offset = "YE" if pandas_version >= parse_version("2.2.0") else "A"


def q_ghg(
series: Series,
Expand Down Expand Up @@ -101,7 +108,7 @@ def q_gvg(
inspring = _in_spring(series)
if any(inspring):
if by_year:
return series.loc[inspring].resample("A").median().mean()
return series.loc[inspring].resample(year_offset).median().mean()
else:
return series.loc[inspring].median()
else:
Expand All @@ -117,7 +124,7 @@ def ghg(
output: str = "mean",
min_n_meas: int = 16,
min_n_years: int = 8,
year_offset: str = "A-MAR",
year_offset: str = year_offset + "-MAR",
) -> Union[Series, float]:
"""Calculate the 'Gemiddelde Hoogste Grondwaterstand' (Average High
Groundwater Level)
Expand All @@ -144,8 +151,8 @@ def ghg(
Minimum number of measurements per year (at maximum 24).
min_n_years: int, optional
Minimum number of years.
year_offset: resampling offset. Use 'a' for calendar years
(jan 1 to dec 31) and 'a-mar' for hydrological years (apr 1 to mar 31).
year_offset: resampling offset. Use 'YE' for calendar years
(jan 1 to dec 31) and 'YE-MAR' for hydrological years (apr 1 to mar 31).
Returns
-------
Expand Down Expand Up @@ -205,7 +212,7 @@ def glg(
output: str = "mean",
min_n_meas: int = 16,
min_n_years: int = 8,
year_offset: str = "A-MAR",
year_offset: str = year_offset + "-MAR",
) -> Union[Series, float]:
"""Calculate the 'Gemiddelde Laagste Grondwaterstand' (Average Low GW Level).
Expand All @@ -231,8 +238,8 @@ def glg(
Minimum number of measurements per year (at maximum 24).
min_n_years: int, optional
Minimum number of years.
year_offset: resampling offset. Use 'a' for calendar years
(jan 1 to dec 31) and 'a-mar' for hydrological years (apr 1 to mar 31).
year_offset: resampling offset. Use 'YE' for calendar years
(jan 1 to dec 31) and 'YE-MAR' for hydrological years (apr 1 to mar 31).
Returns
-------
Expand Down Expand Up @@ -292,7 +299,7 @@ def gvg(
output: str = "mean",
min_n_meas: int = 2,
min_n_years: int = 8,
year_offset: str = "A",
year_offset: str = year_offset,
) -> Union[Series, float]:
"""Calculate the 'Gemiddelde Voorjaars Grondwaterstand' (Average Spring GW Level).
Expand All @@ -318,8 +325,8 @@ def gvg(
Minimum number of measurements per year (at maximum 3).
min_n_years: int, optional
Minimum number of years.
year_offset: resampling offset. Use 'a' for calendar years
(jan 1 to dec 31) and 'a-mar' for hydrological years (apr 1 to mar 31).
year_offset: resampling offset. Use "YE" for calendar years
(jan 1 to dec 31) and "YE-MAR" for hydrological years (apr 1 to mar 31).
Returns
-------
Expand Down Expand Up @@ -367,7 +374,7 @@ def gg(
output: str = "mean",
min_n_meas: int = 16,
min_n_years: int = 8,
year_offset: str = "A-MAR",
year_offset: str = year_offset + "-MAR",
) -> Union[Series, float]:
"""Calculate the 'Gemiddelde Grondwaterstand' (Average Groundwater Level).
Expand All @@ -393,8 +400,8 @@ def gg(
Minimum number of measurements per year (at maximum 24).
min_n_years: int, optional
Minimum number of years.
year_offset: resampling offset. Use 'a' for calendar years (jan 1 to dec 31) and
'a-mar' for hydrological years (apr 1 to mar 31).
year_offset: resampling offset. Use "YE" for calendar years (jan 1 to dec 31) and
'YE-MAR' for hydrological years (apr 1 to mar 31).
Returns
-------
Expand Down Expand Up @@ -512,8 +519,8 @@ def _gxg(
min_n_years: int
Minimum number of years.
year_offset: string
resampling offset. Use 'a' for calendar years (jan 1 to dec 31) and 'a-mar'
for hydrological years (apr 1 to mar 31)
resampling offset. Use "YE" for calendar years (jan 1 to dec 31) and
'YE-MAR' for hydrological years (apr 1 to mar 31)
Returns
Expand Down Expand Up @@ -652,6 +659,6 @@ def _q_gxg(
series = series.loc[:tmax]
series = series.resample("d").median()
if by_year:
return series.resample("A").apply(lambda s: s.quantile(q)).mean()
return series.resample(year_offset).apply(lambda s: s.quantile(q)).mean()
else:
return series.quantile(q)
31 changes: 21 additions & 10 deletions pastas/stats/signatures.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,13 +23,22 @@
sqrt,
where,
)
from pandas import DataFrame, DatetimeIndex, Series, Timedelta, concat, cut, to_datetime
from packaging.version import parse as parse_version
from pandas import DataFrame, DatetimeIndex, Series, Timedelta
from pandas import __version__ as pd_version
from pandas import concat, cut, to_datetime
from scipy.optimize import curve_fit
from scipy.stats import linregress

import pastas as ps
from pastas.stats.core import acf

pandas_version = parse_version(pd_version)

year_offset = "YE" if pandas_version >= parse_version("2.2.0") else "A"

month_offset = "ME" if pandas_version >= parse_version("2.2.0") else "M"

__all__ = [
"cv_period_mean",
"cv_date_min",
Expand Down Expand Up @@ -86,7 +95,9 @@ def _normalize(series: Series) -> Series:
return series


def cv_period_mean(series: Series, normalize: bool = False, freq: str = "M") -> float:
def cv_period_mean(
series: Series, normalize: bool = False, freq: str = month_offset
) -> float:
"""Coefficient of variation of the mean head over a period (default monthly).
Parameters
Expand Down Expand Up @@ -340,7 +351,7 @@ def _martens(series: Series, normalize: bool = False) -> Tuple[Series, Series]:
if normalize:
series = _normalize(series)

s = series.resample("M")
s = series.resample(month_offset)
s_min = s.min()
s_max = s.max()
hl = s_min.groupby(s_min.index.year).nsmallest(3).groupby(level=0).mean()
Expand Down Expand Up @@ -440,7 +451,7 @@ def _colwell_components(
bins: int
number of bins to determine the states of the groundwater.
freq: str, optional
frequency to resample the series to. Possible options are "D", "W", or "M".
frequency to resample the series to. Possible options are "D", "W", "M" or "ME".
method: str, optional
Method to use for resampling. Only "mean" is allowed now.
normalize: bool, optional
Expand Down Expand Up @@ -474,7 +485,7 @@ def _colwell_components(
)
df = DataFrame(binned, dtype=float)

if freq == "M":
if freq in ("M", "ME"):
df["time"] = df.index.isocalendar().month
elif freq == "W":
df["time"] = df.index.isocalendar().week
Expand Down Expand Up @@ -1003,7 +1014,7 @@ def reversals_avg(series: Series) -> float:
reversals = (
(series_diff[series_diff != 0.0] > 0).astype(int).diff().replace(-1, 1)
)
return reversals.resample("A").sum().mean()
return reversals.resample(year_offset).sum().mean()


def reversals_cv(series: Series) -> float:
Expand Down Expand Up @@ -1043,7 +1054,7 @@ def reversals_cv(series: Series) -> float:
reversals = (
(series_diff[series_diff != 0.0] > 0).astype(int).diff().replace(-1, 1)
)
annual_sum = reversals.resample("A").sum()
annual_sum = reversals.resample(year_offset).sum()
return annual_sum.std(ddof=1) / annual_sum.mean()


Expand Down Expand Up @@ -1075,7 +1086,7 @@ def mean_annual_maximum(series: Series, normalize: bool = True) -> float:
if normalize:
series = _normalize(series)

return series.resample("A").max().mean()
return series.resample(year_offset).max().mean()


def bimodality_coefficient(series: Series, normalize: bool = True) -> float:
Expand Down Expand Up @@ -1583,7 +1594,7 @@ def baselevel_stability(series: Series, normalize: bool = True, period="30D") ->

_, ht = _baselevel(series, normalize=normalize, period=period)

return ht.resample("A").mean().max() - ht.resample("A").mean().min()
return ht.resample(year_offset).mean().max() - ht.resample(year_offset).mean().min()


def autocorr_time(series: Series, cutoff: float = 0.8, **kwargs) -> float:
Expand Down Expand Up @@ -1747,7 +1758,7 @@ def summary(
--------
>>> idx = date_range("2000", "2010")
>>> data = np.random.rand(len(idx), 3)
>>> df = DataFrame(index=idx, data=data, columns=["A", "B", "C"], dtype=float)
>>> df = DataFrame(index=idx, data=data, columns=[year_offset, "B", "C"], dtype=float)
>>> ps.stats.signatures.summary(df)
"""
Expand Down
10 changes: 8 additions & 2 deletions pastas/stressmodels.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,10 @@
from typing import List, Optional, Tuple, Union

import numpy as np
from pandas import DataFrame, Series, Timedelta, Timestamp, concat, date_range
from packaging.version import parse as parse_version
from pandas import DataFrame, Series, Timedelta, Timestamp
from pandas import __version__ as pd_version
from pandas import concat, date_range
from scipy.signal import fftconvolve

from pastas.typing import (
Expand All @@ -39,6 +42,8 @@
from .timeseries import TimeSeries
from .utils import validate_name

pandas_version = parse_version(pd_version)

logger = getLogger(__name__)

__all__ = [
Expand Down Expand Up @@ -1321,7 +1326,8 @@ def __init__(
# precipitation in the world, then the precipitation is very likely in m/d
# and not in mm/d. In this case a warning is given for nonlinear models.

if self.prec.series.resample("A").sum().max() < 12:
freq_offset = "YE" if pandas_version >= parse_version("2.2.0") else "A"
if self.prec.series.resample(freq_offset).sum().max() < 12:
msg = (
"The maximum annual precipitation is smaller than 12 m/d. Please "
"double-check if the stresses are in mm/d and not in m/d."
Expand Down
4 changes: 2 additions & 2 deletions pastas/timeseries_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,13 +93,13 @@ def _get_stress_dt(freq: str) -> float:
except:
num = offset.n
freq = offset._prefix
if freq in ["A", "Y", "AS", "YS", "BA", "BY", "BAS", "BYS"]:
if freq in ["A", "Y", "AS", "YS", "YE", "BA", "BY", "BAS", "BYS"]:
# year
dt = num * 365
elif freq in ["BQ", "BQS", "Q", "QS"]:
# quarter
dt = num * 90
elif freq in ["BM", "BMS", "CBM", "CBMS", "M", "MS"]:
elif freq in ["BM", "BMS", "CBM", "CBMS", "M", "MS", "ME"]:
# month
dt = num * 30
elif freq in ["SM", "SMS"]:
Expand Down
3 changes: 2 additions & 1 deletion tests/test_dataset.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
import pytest
from pastas.dataset import list_datasets, load_dataset
from pandas import DataFrame

from pastas.dataset import list_datasets, load_dataset


def test_load_single_csv():
# Test loading a single csv file
Expand Down
6 changes: 3 additions & 3 deletions tests/test_gxg.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ def test_glg(self) -> None:
[x.month + x.day for x in idx],
index=idx,
)
v = ps.stats.glg(s, year_offset="A")
v = ps.stats.glg(s, year_offset="YE")
assert v == 16.0

def test_glg_fill_limit(self) -> None:
Expand All @@ -54,7 +54,7 @@ def test_glg_fill_limit(self) -> None:
fill_method="linear",
limit=15,
output="yearly",
year_offset="A",
year_offset="YE",
min_n_meas=1,
)
assert v.notna().sum() == 2
Expand All @@ -67,7 +67,7 @@ def test_glg_fill_limit_null(self) -> None:
fill_method="linear",
limit=None,
output="yearly",
year_offset="A",
year_offset="YE",
min_n_meas=1,
)
assert v.notna().sum() == 3
Expand Down

0 comments on commit c410865

Please sign in to comment.