Skip to content

Commit

Permalink
Backport PR #52850 on branch 2.0.x (REGR: SeriesGroupBy.agg with mult…
Browse files Browse the repository at this point in the history
…iple categoricals, as_index=False, and a list fails) (#52854)

* REGR: SeriesGroupBy.agg with multiple categoricals, as_index=False, and a list fails (#52850)

(cherry picked from commit 2ac0da4)

* Fixup for 2.0.x

---------

Co-authored-by: Richard Shadrach <45562402+rhshadrach@users.noreply.github.com>
  • Loading branch information
phofl and rhshadrach committed Apr 22, 2023
1 parent d220466 commit 3af68dc
Show file tree
Hide file tree
Showing 3 changed files with 50 additions and 3 deletions.
1 change: 1 addition & 0 deletions doc/source/whatsnew/v2.0.1.rst
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ Fixed regressions
- Fixed regression in :meth:`DataFrame.sort_values` not resetting index when :class:`DataFrame` is already sorted and ``ignore_index=True`` (:issue:`52553`)
- Fixed regression in :meth:`MultiIndex.isin` raising ``TypeError`` for ``Generator`` (:issue:`52568`)
- Fixed regression in :meth:`Series.describe` showing ``RuntimeWarning`` for extension dtype :class:`Series` with one element (:issue:`52515`)
- Fixed regression in :meth:`SeriesGroupBy.agg` failing when grouping with categorical data, multiple groupings, ``as_index=False``, and a list of aggregations (:issue:`52760`)

.. ---------------------------------------------------------------------------
.. _whatsnew_201.bug_fixes:
Expand Down
4 changes: 1 addition & 3 deletions pandas/core/groupby/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -241,8 +241,7 @@ def aggregate(self, func=None, *args, engine=None, engine_kwargs=None, **kwargs)
assert columns is not None # for mypy
ret.columns = columns
if not self.as_index:
ret = self._insert_inaxis_grouper(ret)
ret.index = default_index(len(ret))
ret = ret.reset_index()
return ret

else:
Expand Down Expand Up @@ -328,7 +327,6 @@ def _aggregate_multiple_funcs(self, arg, *args, **kwargs) -> DataFrame:
output = self.obj._constructor_expanddim(indexed_output, index=None)
output.columns = Index(key.label for key in results)

output = self._reindex_output(output)
return output

def _wrap_applied_output(
Expand Down
48 changes: 48 additions & 0 deletions pandas/tests/groupby/test_categorical.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
qcut,
)
import pandas._testing as tm
from pandas.core.groupby.generic import SeriesGroupBy
from pandas.tests.groupby import get_groupby_method_args


Expand Down Expand Up @@ -2007,3 +2008,50 @@ def test_many_categories(as_index, sort, index_kind, ordered):
expected = DataFrame({"a": Series(index), "b": data})

tm.assert_frame_equal(result, expected)


@pytest.mark.parametrize("test_series", [True, False])
@pytest.mark.parametrize("keys", [["a1"], ["a1", "a2"]])
def test_agg_list(request, as_index, observed, reduction_func, test_series, keys):
# GH#52760
if test_series and reduction_func == "corrwith":
assert not hasattr(SeriesGroupBy, "corrwith")
pytest.skip("corrwith not implemented for SeriesGroupBy")
elif reduction_func == "corrwith":
msg = "GH#32293: attempts to call SeriesGroupBy.corrwith"
request.node.add_marker(pytest.mark.xfail(reason=msg))
elif (
reduction_func == "nunique"
and not test_series
and len(keys) != 1
and not observed
and not as_index
):
msg = "GH#52848 - raises a ValueError"
request.node.add_marker(pytest.mark.xfail(reason=msg))

df = DataFrame({"a1": [0, 0, 1], "a2": [2, 3, 3], "b": [4, 5, 6]})
df = df.astype({"a1": "category", "a2": "category"})
if "a2" not in keys:
df = df.drop(columns="a2")
gb = df.groupby(by=keys, as_index=as_index, observed=observed)
if test_series:
gb = gb["b"]
args = get_groupby_method_args(reduction_func, df)

result = gb.agg([reduction_func], *args)
expected = getattr(gb, reduction_func)(*args)

if as_index and (test_series or reduction_func == "size"):
expected = expected.to_frame(reduction_func)
if not test_series:
if not as_index:
# TODO: GH#52849 - as_index=False is not respected
expected = expected.set_index(keys)
expected.columns = MultiIndex(
levels=[["b"], [reduction_func]], codes=[[0], [0]]
)
elif not as_index:
expected.columns = keys + [reduction_func]

tm.assert_equal(result, expected)

0 comments on commit 3af68dc

Please sign in to comment.