Change frequency offset based on Pandas version (#754)

pastas · May 15, 2024 · c410865 · c410865
1 parent 521c239
commit c410865
Show file tree

Hide file tree

Showing 6 changed files with 60 additions and 35 deletions.
diff --git a/pastas/stats/dutch.py b/pastas/stats/dutch.py
@@ -8,11 +8,18 @@
 from typing import Optional, Union
 
 from numpy import nan
-from pandas import Series, Timedelta, concat, date_range
+from packaging.version import parse as parse_version
+from pandas import Series, Timedelta
+from pandas import __version__ as pd_version
+from pandas import concat, date_range
 
 from pastas.timeseries_utils import get_sample
 from pastas.typing import Function, TimestampType
 
+pandas_version = parse_version(pd_version)
+
+year_offset = "YE" if pandas_version >= parse_version("2.2.0") else "A"
+
 
 def q_ghg(
     series: Series,
@@ -101,7 +108,7 @@ def q_gvg(
     inspring = _in_spring(series)
     if any(inspring):
         if by_year:
-            return series.loc[inspring].resample("A").median().mean()
+            return series.loc[inspring].resample(year_offset).median().mean()
         else:
             return series.loc[inspring].median()
     else:
@@ -117,7 +124,7 @@ def ghg(
     output: str = "mean",
     min_n_meas: int = 16,
     min_n_years: int = 8,
-    year_offset: str = "A-MAR",
+    year_offset: str = year_offset + "-MAR",
 ) -> Union[Series, float]:
     """Calculate the 'Gemiddelde Hoogste Grondwaterstand' (Average High
     Groundwater Level)
@@ -144,8 +151,8 @@ def ghg(
         Minimum number of measurements per year (at maximum 24).
     min_n_years: int, optional
         Minimum number of years.
-    year_offset: resampling offset. Use 'a' for calendar years
-        (jan 1 to dec 31) and 'a-mar' for hydrological years (apr 1 to mar 31).
+    year_offset: resampling offset. Use 'YE' for calendar years
+        (jan 1 to dec 31) and 'YE-MAR' for hydrological years (apr 1 to mar 31).
 
     Returns
     -------
@@ -205,7 +212,7 @@ def glg(
     output: str = "mean",
     min_n_meas: int = 16,
     min_n_years: int = 8,
-    year_offset: str = "A-MAR",
+    year_offset: str = year_offset + "-MAR",
 ) -> Union[Series, float]:
     """Calculate the 'Gemiddelde Laagste Grondwaterstand' (Average Low GW Level).
 
@@ -231,8 +238,8 @@ def glg(
         Minimum number of measurements per year (at maximum 24).
     min_n_years: int, optional
         Minimum number of years.
-    year_offset: resampling offset. Use 'a' for calendar years
-        (jan 1 to dec 31) and 'a-mar' for hydrological years (apr 1 to mar 31).
+    year_offset: resampling offset. Use 'YE' for calendar years
+        (jan 1 to dec 31) and 'YE-MAR' for hydrological years (apr 1 to mar 31).
 
     Returns
     -------
@@ -292,7 +299,7 @@ def gvg(
     output: str = "mean",
     min_n_meas: int = 2,
     min_n_years: int = 8,
-    year_offset: str = "A",
+    year_offset: str = year_offset,
 ) -> Union[Series, float]:
     """Calculate the 'Gemiddelde Voorjaars Grondwaterstand' (Average Spring GW Level).
 
@@ -318,8 +325,8 @@ def gvg(
         Minimum number of measurements per year (at maximum 3).
     min_n_years: int, optional
         Minimum number of years.
-    year_offset: resampling offset. Use 'a' for calendar years
-        (jan 1 to dec 31) and 'a-mar' for hydrological years (apr 1 to mar 31).
+    year_offset: resampling offset. Use "YE" for calendar years
+        (jan 1 to dec 31) and "YE-MAR" for hydrological years (apr 1 to mar 31).
 
     Returns
     -------
@@ -367,7 +374,7 @@ def gg(
     output: str = "mean",
     min_n_meas: int = 16,
     min_n_years: int = 8,
-    year_offset: str = "A-MAR",
+    year_offset: str = year_offset + "-MAR",
 ) -> Union[Series, float]:
     """Calculate the 'Gemiddelde Grondwaterstand' (Average Groundwater Level).
 
@@ -393,8 +400,8 @@ def gg(
         Minimum number of measurements per year (at maximum 24).
     min_n_years: int, optional
         Minimum number of years.
-    year_offset: resampling offset. Use 'a' for calendar years (jan 1 to dec 31) and
-    'a-mar' for hydrological years (apr 1 to mar 31).
+    year_offset: resampling offset. Use "YE" for calendar years (jan 1 to dec 31) and
+    'YE-MAR' for hydrological years (apr 1 to mar 31).
 
     Returns
     -------
@@ -512,8 +519,8 @@ def _gxg(
     min_n_years: int
         Minimum number of years.
     year_offset: string
-        resampling offset. Use 'a' for calendar years (jan 1 to dec 31) and 'a-mar'
-        for hydrological years (apr 1 to mar 31)
+        resampling offset. Use "YE" for calendar years (jan 1 to dec 31) and
+        'YE-MAR' for hydrological years (apr 1 to mar 31)
 
 
     Returns
@@ -652,6 +659,6 @@ def _q_gxg(
         series = series.loc[:tmax]
     series = series.resample("d").median()
     if by_year:
-        return series.resample("A").apply(lambda s: s.quantile(q)).mean()
+        return series.resample(year_offset).apply(lambda s: s.quantile(q)).mean()
     else:
         return series.quantile(q)
diff --git a/pastas/stats/signatures.py b/pastas/stats/signatures.py
@@ -23,13 +23,22 @@
     sqrt,
     where,
 )
-from pandas import DataFrame, DatetimeIndex, Series, Timedelta, concat, cut, to_datetime
+from packaging.version import parse as parse_version
+from pandas import DataFrame, DatetimeIndex, Series, Timedelta
+from pandas import __version__ as pd_version
+from pandas import concat, cut, to_datetime
 from scipy.optimize import curve_fit
 from scipy.stats import linregress
 
 import pastas as ps
 from pastas.stats.core import acf
 
+pandas_version = parse_version(pd_version)
+
+year_offset = "YE" if pandas_version >= parse_version("2.2.0") else "A"
+
+month_offset = "ME" if pandas_version >= parse_version("2.2.0") else "M"
+
 __all__ = [
     "cv_period_mean",
     "cv_date_min",
@@ -86,7 +95,9 @@ def _normalize(series: Series) -> Series:
     return series
 
 
-def cv_period_mean(series: Series, normalize: bool = False, freq: str = "M") -> float:
+def cv_period_mean(
+    series: Series, normalize: bool = False, freq: str = month_offset
+) -> float:
     """Coefficient of variation of the mean head over a period (default monthly).
 
     Parameters
@@ -340,7 +351,7 @@ def _martens(series: Series, normalize: bool = False) -> Tuple[Series, Series]:
     if normalize:
         series = _normalize(series)
 
-    s = series.resample("M")
+    s = series.resample(month_offset)
     s_min = s.min()
     s_max = s.max()
     hl = s_min.groupby(s_min.index.year).nsmallest(3).groupby(level=0).mean()
@@ -440,7 +451,7 @@ def _colwell_components(
     bins: int
         number of bins to determine the states of the groundwater.
     freq: str, optional
-        frequency to resample the series to. Possible options are "D", "W", or "M".
+        frequency to resample the series to. Possible options are "D", "W", "M" or "ME".
     method: str, optional
         Method to use for resampling. Only "mean" is allowed now.
     normalize: bool, optional
@@ -474,7 +485,7 @@ def _colwell_components(
     )
     df = DataFrame(binned, dtype=float)
 
-    if freq == "M":
+    if freq in ("M", "ME"):
         df["time"] = df.index.isocalendar().month
     elif freq == "W":
         df["time"] = df.index.isocalendar().week
@@ -1003,7 +1014,7 @@ def reversals_avg(series: Series) -> float:
         reversals = (
             (series_diff[series_diff != 0.0] > 0).astype(int).diff().replace(-1, 1)
         )
-        return reversals.resample("A").sum().mean()
+        return reversals.resample(year_offset).sum().mean()
 
 
 def reversals_cv(series: Series) -> float:
@@ -1043,7 +1054,7 @@ def reversals_cv(series: Series) -> float:
         reversals = (
             (series_diff[series_diff != 0.0] > 0).astype(int).diff().replace(-1, 1)
         )
-        annual_sum = reversals.resample("A").sum()
+        annual_sum = reversals.resample(year_offset).sum()
         return annual_sum.std(ddof=1) / annual_sum.mean()
 
 
@@ -1075,7 +1086,7 @@ def mean_annual_maximum(series: Series, normalize: bool = True) -> float:
     if normalize:
         series = _normalize(series)
 
-    return series.resample("A").max().mean()
+    return series.resample(year_offset).max().mean()
 
 
 def bimodality_coefficient(series: Series, normalize: bool = True) -> float:
@@ -1583,7 +1594,7 @@ def baselevel_stability(series: Series, normalize: bool = True, period="30D") ->
 
     _, ht = _baselevel(series, normalize=normalize, period=period)
 
-    return ht.resample("A").mean().max() - ht.resample("A").mean().min()
+    return ht.resample(year_offset).mean().max() - ht.resample(year_offset).mean().min()
 
 
 def autocorr_time(series: Series, cutoff: float = 0.8, **kwargs) -> float:
@@ -1747,7 +1758,7 @@ def summary(
     --------
     >>> idx = date_range("2000", "2010")
     >>> data = np.random.rand(len(idx), 3)
-    >>> df = DataFrame(index=idx, data=data, columns=["A", "B", "C"], dtype=float)
+    >>> df = DataFrame(index=idx, data=data, columns=[year_offset, "B", "C"], dtype=float)
     >>> ps.stats.signatures.summary(df)
 
     """

diff --git a/pastas/stressmodels.py b/pastas/stressmodels.py
@@ -21,7 +21,10 @@
 from typing import List, Optional, Tuple, Union
 
 import numpy as np
-from pandas import DataFrame, Series, Timedelta, Timestamp, concat, date_range
+from packaging.version import parse as parse_version
+from pandas import DataFrame, Series, Timedelta, Timestamp
+from pandas import __version__ as pd_version
+from pandas import concat, date_range
 from scipy.signal import fftconvolve
 
 from pastas.typing import (
@@ -39,6 +42,8 @@
 from .timeseries import TimeSeries
 from .utils import validate_name
 
+pandas_version = parse_version(pd_version)
+
 logger = getLogger(__name__)
 
 __all__ = [
@@ -1321,7 +1326,8 @@ def __init__(
             # precipitation in the world, then the precipitation is very likely in m/d
             # and not in mm/d. In this case a warning is given for nonlinear models.
 
-            if self.prec.series.resample("A").sum().max() < 12:
+            freq_offset = "YE" if pandas_version >= parse_version("2.2.0") else "A"
+            if self.prec.series.resample(freq_offset).sum().max() < 12:
                 msg = (
                     "The maximum annual precipitation is smaller than 12 m/d. Please "
                     "double-check if the stresses are in mm/d and not in m/d."

diff --git a/pastas/timeseries_utils.py b/pastas/timeseries_utils.py
@@ -93,13 +93,13 @@ def _get_stress_dt(freq: str) -> float:
     except:
         num = offset.n
         freq = offset._prefix
-        if freq in ["A", "Y", "AS", "YS", "BA", "BY", "BAS", "BYS"]:
+        if freq in ["A", "Y", "AS", "YS", "YE", "BA", "BY", "BAS", "BYS"]:
             # year
             dt = num * 365
         elif freq in ["BQ", "BQS", "Q", "QS"]:
             # quarter
             dt = num * 90
-        elif freq in ["BM", "BMS", "CBM", "CBMS", "M", "MS"]:
+        elif freq in ["BM", "BMS", "CBM", "CBMS", "M", "MS", "ME"]:
             # month
             dt = num * 30
         elif freq in ["SM", "SMS"]:

diff --git a/tests/test_dataset.py b/tests/test_dataset.py
@@ -1,7 +1,8 @@
 import pytest
-from pastas.dataset import list_datasets, load_dataset
 from pandas import DataFrame
 
+from pastas.dataset import list_datasets, load_dataset
+
 
 def test_load_single_csv():
     # Test loading a single csv file

diff --git a/tests/test_gxg.py b/tests/test_gxg.py
@@ -43,7 +43,7 @@ def test_glg(self) -> None:
             [x.month + x.day for x in idx],
             index=idx,
         )
-        v = ps.stats.glg(s, year_offset="A")
+        v = ps.stats.glg(s, year_offset="YE")
         assert v == 16.0
 
     def test_glg_fill_limit(self) -> None:
@@ -54,7 +54,7 @@ def test_glg_fill_limit(self) -> None:
             fill_method="linear",
             limit=15,
             output="yearly",
-            year_offset="A",
+            year_offset="YE",
             min_n_meas=1,
         )
         assert v.notna().sum() == 2
@@ -67,7 +67,7 @@ def test_glg_fill_limit_null(self) -> None:
             fill_method="linear",
             limit=None,
             output="yearly",
-            year_offset="A",
+            year_offset="YE",
             min_n_meas=1,
         )
         assert v.notna().sum() == 3