From d2aa44f50f6ac4789d4e351e4e52a53a358da42e Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <emailformattr@gmail.com>
Date: Thu, 31 Mar 2022 11:01:23 -0700
Subject: [PATCH] BUG: groupby().rolling(freq) with monotonic dates within
 groups #46065  (#46567)

---
 doc/source/whatsnew/v1.4.2.rst                |  1 +
 pandas/core/window/rolling.py                 | 18 +++++
 pandas/tests/window/test_groupby.py           | 77 +++++++++++++++++++
 pandas/tests/window/test_rolling.py           | 12 ---
 pandas/tests/window/test_timeseries_window.py | 60 ---------------
 5 files changed, 96 insertions(+), 72 deletions(-)

diff --git a/doc/source/whatsnew/v1.4.2.rst b/doc/source/whatsnew/v1.4.2.rst
index 76b2a5d6ffd47..e98e419283508 100644
--- a/doc/source/whatsnew/v1.4.2.rst
+++ b/doc/source/whatsnew/v1.4.2.rst
@@ -32,6 +32,7 @@ Bug fixes
 - Fix some cases for subclasses that define their ``_constructor`` properties as general callables (:issue:`46018`)
 - Fixed "longtable" formatting in :meth:`.Styler.to_latex` when ``column_format`` is given in extended format (:issue:`46037`)
 - Fixed incorrect rendering in :meth:`.Styler.format` with ``hyperlinks="html"`` when the url contains a colon or other special characters (:issue:`46389`)
+- Fixed :meth:`Groupby.rolling` with a frequency window that would raise a ``ValueError`` even if the datetimes within each group were monotonic (:issue:`46061`)
 
 .. ---------------------------------------------------------------------------
 
diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py
index d4569816f9f7a..ac3d8b3dabb2b 100644
--- a/pandas/core/window/rolling.py
+++ b/pandas/core/window/rolling.py
@@ -2680,3 +2680,21 @@ def _get_window_indexer(self) -> GroupbyIndexer:
             indexer_kwargs=indexer_kwargs,
         )
         return window_indexer
+
+    def _validate_datetimelike_monotonic(self):
+        """
+        Validate that each group in self._on is monotonic
+        """
+        # GH 46061
+        if self._on.hasnans:
+            self._raise_monotonic_error("values must not have NaT")
+        for group_indices in self._grouper.indices.values():
+            group_on = self._on.take(group_indices)
+            if not (
+                group_on.is_monotonic_increasing or group_on.is_monotonic_decreasing
+            ):
+                on = "index" if self.on is None else self.on
+                raise ValueError(
+                    f"Each group within {on} must be monotonic. "
+                    f"Sort the values in {on} first."
+                )
diff --git a/pandas/tests/window/test_groupby.py b/pandas/tests/window/test_groupby.py
index b4d0f6562f2d5..5f4805eaa01d2 100644
--- a/pandas/tests/window/test_groupby.py
+++ b/pandas/tests/window/test_groupby.py
@@ -927,6 +927,83 @@ def test_nan_and_zero_endpoints(self):
         )
         tm.assert_series_equal(result, expected)
 
+    def test_groupby_rolling_non_monotonic(self):
+        # GH 43909
+
+        shuffled = [3, 0, 1, 2]
+        sec = 1_000
+        df = DataFrame(
+            [{"t": Timestamp(2 * x * sec), "x": x + 1, "c": 42} for x in shuffled]
+        )
+        with pytest.raises(ValueError, match=r".* must be monotonic"):
+            df.groupby("c").rolling(on="t", window="3s")
+
+    def test_groupby_monotonic(self):
+
+        # GH 15130
+        # we don't need to validate monotonicity when grouping
+
+        # GH 43909 we should raise an error here to match
+        # behaviour of non-groupby rolling.
+
+        data = [
+            ["David", "1/1/2015", 100],
+            ["David", "1/5/2015", 500],
+            ["David", "5/30/2015", 50],
+            ["David", "7/25/2015", 50],
+            ["Ryan", "1/4/2014", 100],
+            ["Ryan", "1/19/2015", 500],
+            ["Ryan", "3/31/2016", 50],
+            ["Joe", "7/1/2015", 100],
+            ["Joe", "9/9/2015", 500],
+            ["Joe", "10/15/2015", 50],
+        ]
+
+        df = DataFrame(data=data, columns=["name", "date", "amount"])
+        df["date"] = to_datetime(df["date"])
+        df = df.sort_values("date")
+
+        expected = (
+            df.set_index("date")
+            .groupby("name")
+            .apply(lambda x: x.rolling("180D")["amount"].sum())
+        )
+        result = df.groupby("name").rolling("180D", on="date")["amount"].sum()
+        tm.assert_series_equal(result, expected)
+
+    def test_datelike_on_monotonic_within_each_group(self):
+        # GH 13966 (similar to #15130, closed by #15175)
+
+        # superseded by 43909
+        # GH 46061: OK if the on is monotonic relative to each each group
+
+        dates = date_range(start="2016-01-01 09:30:00", periods=20, freq="s")
+        df = DataFrame(
+            {
+                "A": [1] * 20 + [2] * 12 + [3] * 8,
+                "B": np.concatenate((dates, dates)),
+                "C": np.arange(40),
+            }
+        )
+
+        expected = (
+            df.set_index("B").groupby("A").apply(lambda x: x.rolling("4s")["C"].mean())
+        )
+        result = df.groupby("A").rolling("4s", on="B").C.mean()
+        tm.assert_series_equal(result, expected)
+
+    def test_datelike_on_not_monotonic_within_each_group(self):
+        # GH 46061
+        df = DataFrame(
+            {
+                "A": [1] * 3 + [2] * 3,
+                "B": [Timestamp(year, 1, 1) for year in [2020, 2021, 2019]] * 2,
+                "C": range(6),
+            }
+        )
+        with pytest.raises(ValueError, match="Each group within B must be monotonic."):
+            df.groupby("A").rolling("365D", on="B")
+
 
 class TestExpanding:
     def setup_method(self):
diff --git a/pandas/tests/window/test_rolling.py b/pandas/tests/window/test_rolling.py
index 7e9bc121f06ff..89c90836ae957 100644
--- a/pandas/tests/window/test_rolling.py
+++ b/pandas/tests/window/test_rolling.py
@@ -1456,18 +1456,6 @@ def test_groupby_rolling_nan_included():
     tm.assert_frame_equal(result, expected)
 
 
-def test_groupby_rolling_non_monotonic():
-    # GH 43909
-
-    shuffled = [3, 0, 1, 2]
-    sec = 1_000
-    df = DataFrame(
-        [{"t": Timestamp(2 * x * sec), "x": x + 1, "c": 42} for x in shuffled]
-    )
-    with pytest.raises(ValueError, match=r".* must be monotonic"):
-        df.groupby("c").rolling(on="t", window="3s")
-
-
 @pytest.mark.parametrize("method", ["skew", "kurt"])
 def test_rolling_skew_kurt_numerical_stability(method):
     # GH#6929
diff --git a/pandas/tests/window/test_timeseries_window.py b/pandas/tests/window/test_timeseries_window.py
index ee28b27b17365..907c654570273 100644
--- a/pandas/tests/window/test_timeseries_window.py
+++ b/pandas/tests/window/test_timeseries_window.py
@@ -9,7 +9,6 @@
     Series,
     Timestamp,
     date_range,
-    to_datetime,
 )
 import pandas._testing as tm
 
@@ -649,65 +648,6 @@ def agg_by_day(x):
 
         tm.assert_frame_equal(result, expected)
 
-    def test_groupby_monotonic(self):
-
-        # GH 15130
-        # we don't need to validate monotonicity when grouping
-
-        # GH 43909 we should raise an error here to match
-        # behaviour of non-groupby rolling.
-
-        data = [
-            ["David", "1/1/2015", 100],
-            ["David", "1/5/2015", 500],
-            ["David", "5/30/2015", 50],
-            ["David", "7/25/2015", 50],
-            ["Ryan", "1/4/2014", 100],
-            ["Ryan", "1/19/2015", 500],
-            ["Ryan", "3/31/2016", 50],
-            ["Joe", "7/1/2015", 100],
-            ["Joe", "9/9/2015", 500],
-            ["Joe", "10/15/2015", 50],
-        ]
-
-        df = DataFrame(data=data, columns=["name", "date", "amount"])
-        df["date"] = to_datetime(df["date"])
-        df = df.sort_values("date")
-
-        expected = (
-            df.set_index("date")
-            .groupby("name")
-            .apply(lambda x: x.rolling("180D")["amount"].sum())
-        )
-        result = df.groupby("name").rolling("180D", on="date")["amount"].sum()
-        tm.assert_series_equal(result, expected)
-
-    def test_non_monotonic_raises(self):
-        # GH 13966 (similar to #15130, closed by #15175)
-
-        # superseded by 43909
-
-        dates = date_range(start="2016-01-01 09:30:00", periods=20, freq="s")
-        df = DataFrame(
-            {
-                "A": [1] * 20 + [2] * 12 + [3] * 8,
-                "B": np.concatenate((dates, dates)),
-                "C": np.arange(40),
-            }
-        )
-
-        expected = (
-            df.set_index("B").groupby("A").apply(lambda x: x.rolling("4s")["C"].mean())
-        )
-        with pytest.raises(ValueError, match=r".* must be monotonic"):
-            df.groupby("A").rolling(
-                "4s", on="B"
-            ).C.mean()  # should raise for non-monotonic t series
-
-        df2 = df.sort_values("B")
-        result = df2.groupby("A").rolling("4s", on="B").C.mean()
-        tm.assert_series_equal(result, expected)
-
     def test_rolling_cov_offset(self):
         # GH16058