Skip to content

Commit

Permalink
Allow split_out to be None, which then defaults to 1 in `groupb…
Browse files Browse the repository at this point in the history
…y().aggregate()` (#9491)
  • Loading branch information
Ian Rose committed Sep 14, 2022
1 parent 6c15102 commit 982376e
Show file tree
Hide file tree
Showing 2 changed files with 14 additions and 0 deletions.
7 changes: 7 additions & 0 deletions dask/dataframe/groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -1681,6 +1681,13 @@ def get_group(self, key):

@_aggregate_docstring()
def aggregate(self, arg, split_every=None, split_out=1, shuffle=None):
if split_out is None:
warnings.warn(
"split_out=None is deprecated, please use a positive integer, "
"or allow the default of 1",
category=FutureWarning,
)
split_out = 1
if shuffle is None:
if split_out > 1:
shuffle = shuffle or config.get("shuffle", None) or "tasks"
Expand Down
7 changes: 7 additions & 0 deletions dask/dataframe/tests/test_groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -2941,3 +2941,10 @@ def test_groupby_iter_fails():
ddf = dd.from_pandas(df, npartitions=1)
with pytest.raises(NotImplementedError, match="computing the groups"):
list(ddf.groupby("A"))


def test_groupby_None_split_out_warns():
df = pd.DataFrame({"a": [1, 1, 2], "b": [2, 3, 4]})
ddf = dd.from_pandas(df, npartitions=1)
with pytest.warns(FutureWarning, match="split_out=None"):
ddf.groupby("a").agg({"b": "max"}, split_out=None)

0 comments on commit 982376e

Please sign in to comment.