From f55f514bfaa83fb0c23223d07c8242214e3d3611 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Thu, 26 Nov 2020 22:35:58 +0100 Subject: [PATCH 1/7] BUG: fix wrong error message in deprecated 2D indexing of Series with datetime values --- pandas/core/series.py | 5 +++-- pandas/tests/series/indexing/test_getitem.py | 15 ++++++++++++--- 2 files changed, 15 insertions(+), 5 deletions(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index d493ac0a8c051..56c95ca8b238f 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -901,10 +901,11 @@ def _get_values_tuple(self, key): def _get_values(self, indexer): try: return self._constructor(self._mgr.get_slice(indexer)).__finalize__(self) - except ValueError: + except (ValueError, AssertionError): # mpl compat if we look up e.g. ser[:, np.newaxis]; # see tests.series.timeseries.test_mpl_compat_hack - return self._values[indexer] + # the asarray is needed to avoid returning a 2D DatetimeArray + return np.asarray(self._values[indexer]) def _get_value(self, label, takeable: bool = False): """ diff --git a/pandas/tests/series/indexing/test_getitem.py b/pandas/tests/series/indexing/test_getitem.py index 3686337141420..714afb0956c19 100644 --- a/pandas/tests/series/indexing/test_getitem.py +++ b/pandas/tests/series/indexing/test_getitem.py @@ -389,10 +389,19 @@ def test_getitem_generator(string_series): tm.assert_series_equal(result2, expected) -def test_getitem_ndim_deprecated(): - s = Series([0, 1]) +@pytest.mark.parametrize( + "series", + [ + Series([0, 1]), + Series(date_range("2012-01-01", periods=2)), + Series(date_range("2012-01-01", periods=2, tz="CET")), + ], +) +def test_getitem_ndim_deprecated(series): with tm.assert_produces_warning(FutureWarning): - s[:, None] + res = series[:, None] + + assert isinstance(res, np.ndarray) def test_getitem_multilevel_scalar_slice_not_implemented( From a2cf40f8db0084c7d78f78f4e32edf4b95625638 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Thu, 26 Nov 2020 22:50:07 +0100 Subject: [PATCH 2/7] avoid catching AssertionError --- pandas/core/internals/blocks.py | 18 ++++++++++++++++++ pandas/core/series.py | 2 +- 2 files changed, 19 insertions(+), 1 deletion(-) diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index f6ff38201fdfa..f41d70b7d61ff 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -2382,6 +2382,24 @@ def quantile(self, qs, interpolation="linear", axis=0): aware = self._holder(res_blk.values.ravel(), dtype=self.dtype) return self.make_block_same_class(aware, ndim=res_blk.ndim) + def _check_ndim(self, values, ndim): + """ + ndim inference and validation. + + This is overriden by the DatetimeTZBlock to check the case of 2D + data (values.ndim == 2), which should only be allowed if ndim is + also 2. + """ + if ndim is None: + ndim = values.ndim + + if values.ndim > ndim: + raise ValueError( + "Wrong number of dimensions. " + f"values.ndim != ndim [{values.ndim} != {ndim}]" + ) + return ndim + class TimeDeltaBlock(DatetimeLikeBlockMixin): __slots__ = () diff --git a/pandas/core/series.py b/pandas/core/series.py index 56c95ca8b238f..86de34b87ffeb 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -901,7 +901,7 @@ def _get_values_tuple(self, key): def _get_values(self, indexer): try: return self._constructor(self._mgr.get_slice(indexer)).__finalize__(self) - except (ValueError, AssertionError): + except ValueError: # mpl compat if we look up e.g. ser[:, np.newaxis]; # see tests.series.timeseries.test_mpl_compat_hack # the asarray is needed to avoid returning a 2D DatetimeArray From 95df80a1e072397e0a4e04dbd369bb945ee731c3 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Fri, 27 Nov 2020 21:10:25 +0100 Subject: [PATCH 3/7] clarify comment --- pandas/core/internals/blocks.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 4a3cf78f5d11e..e6d67ccd0c0b7 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -2401,6 +2401,10 @@ def _check_ndim(self, values, ndim): This is overriden by the DatetimeTZBlock to check the case of 2D data (values.ndim == 2), which should only be allowed if ndim is also 2. + The case of 1D array is still allowed with both ndim of 1 or 2, as + if the case for other EAs. Therefore, we are only checking + `values.ndim > ndim` instead of `values.ndim != ndim` as for + consolidated blocks. """ if ndim is None: ndim = values.ndim From df0f6a24787c66e9aa263dbf84f50d563cf3933b Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Mon, 30 Nov 2020 10:38:30 +0100 Subject: [PATCH 4/7] update test --- pandas/tests/series/indexing/test_getitem.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/pandas/tests/series/indexing/test_getitem.py b/pandas/tests/series/indexing/test_getitem.py index 714afb0956c19..482a938b9246c 100644 --- a/pandas/tests/series/indexing/test_getitem.py +++ b/pandas/tests/series/indexing/test_getitem.py @@ -398,10 +398,13 @@ def test_getitem_generator(string_series): ], ) def test_getitem_ndim_deprecated(series): - with tm.assert_produces_warning(FutureWarning): - res = series[:, None] + with tm.assert_produces_warning( + FutureWarning, match="Support for multi-dimensional indexing" + ): + result = series[:, None] - assert isinstance(res, np.ndarray) + assert isinstance(result, np.ndarray) + tm.assert_numpy_array_equal(result, np.asarray(series)[:, None]) def test_getitem_multilevel_scalar_slice_not_implemented( From 61e7731db962b330c3f488a7fcf2b3ed12399dd8 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Mon, 30 Nov 2020 20:25:14 +0100 Subject: [PATCH 5/7] fixup --- pandas/tests/series/indexing/test_getitem.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/tests/series/indexing/test_getitem.py b/pandas/tests/series/indexing/test_getitem.py index 482a938b9246c..b4c30cb6d4cd2 100644 --- a/pandas/tests/series/indexing/test_getitem.py +++ b/pandas/tests/series/indexing/test_getitem.py @@ -403,8 +403,8 @@ def test_getitem_ndim_deprecated(series): ): result = series[:, None] - assert isinstance(result, np.ndarray) - tm.assert_numpy_array_equal(result, np.asarray(series)[:, None]) + expected = np.asarray(series)[:, None] + tm.assert_numpy_array_equal(result, expected) def test_getitem_multilevel_scalar_slice_not_implemented( From 6391f54276a2ae305a1330dcac21344f73da241b Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Mon, 30 Nov 2020 20:29:16 +0100 Subject: [PATCH 6/7] add whatsnew --- doc/source/whatsnew/v1.1.5.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v1.1.5.rst b/doc/source/whatsnew/v1.1.5.rst index d0a935bfb4e32..53e7db492d8bb 100644 --- a/doc/source/whatsnew/v1.1.5.rst +++ b/doc/source/whatsnew/v1.1.5.rst @@ -24,6 +24,7 @@ Fixed regressions - Fixed regression in ``df.groupby(..).rolling(..)`` with the resulting :class:`MultiIndex` when grouping by a label that is in the index (:issue:`37641`) - Fixed regression in :meth:`DataFrame.fillna` not filling ``NaN`` after other operations such as :meth:`DataFrame.pivot` (:issue:`36495`). - Fixed performance regression for :meth:`DataFrame.__setitem__` with list-like indexers (:issue:`37954`) +- Fixed performance regression in ``df.groupby(..).rolling(..)`` (:issue:`38038`) - Fixed regression in :meth:`MultiIndex.intersection` returning duplicates when at least one of the indexes had duplicates (:issue:`36915`) .. --------------------------------------------------------------------------- From e44bf5340d032908f751b68b2bc7765652caa198 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Mon, 30 Nov 2020 20:31:55 +0100 Subject: [PATCH 7/7] Revert "add whatsnew" - wrong PR This reverts commit 6391f54276a2ae305a1330dcac21344f73da241b. --- doc/source/whatsnew/v1.1.5.rst | 1 - 1 file changed, 1 deletion(-) diff --git a/doc/source/whatsnew/v1.1.5.rst b/doc/source/whatsnew/v1.1.5.rst index 53e7db492d8bb..d0a935bfb4e32 100644 --- a/doc/source/whatsnew/v1.1.5.rst +++ b/doc/source/whatsnew/v1.1.5.rst @@ -24,7 +24,6 @@ Fixed regressions - Fixed regression in ``df.groupby(..).rolling(..)`` with the resulting :class:`MultiIndex` when grouping by a label that is in the index (:issue:`37641`) - Fixed regression in :meth:`DataFrame.fillna` not filling ``NaN`` after other operations such as :meth:`DataFrame.pivot` (:issue:`36495`). - Fixed performance regression for :meth:`DataFrame.__setitem__` with list-like indexers (:issue:`37954`) -- Fixed performance regression in ``df.groupby(..).rolling(..)`` (:issue:`38038`) - Fixed regression in :meth:`MultiIndex.intersection` returning duplicates when at least one of the indexes had duplicates (:issue:`36915`) .. ---------------------------------------------------------------------------