Skip to content

Commit

Permalink
REGR: groupby.transform producing segfault (#46585)
Browse files Browse the repository at this point in the history
  • Loading branch information
rhshadrach committed Mar 31, 2022
1 parent 2555468 commit 382aefc
Show file tree
Hide file tree
Showing 3 changed files with 27 additions and 13 deletions.
2 changes: 1 addition & 1 deletion pandas/core/groupby/groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -1106,7 +1106,7 @@ def _set_result_index_ordered(
# set the result index on the passed values object and
# return the new object, xref 8046

if self.grouper.is_monotonic:
if self.grouper.is_monotonic and not self.grouper.has_dropped_na:
# shortcut if we have an already ordered grouper
result.set_axis(self.obj._get_axis(self.axis), axis=self.axis, inplace=True)
return result
Expand Down
5 changes: 4 additions & 1 deletion pandas/core/groupby/ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -818,7 +818,10 @@ def result_ilocs(self) -> npt.NDArray[np.intp]:
# Original indices are where group_index would go via sorting.
# But when dropna is true, we need to remove null values while accounting for
# any gaps that then occur because of them.
group_index = get_group_index(self.codes, self.shape, sort=False, xnull=True)
group_index = get_group_index(
self.codes, self.shape, sort=self._sort, xnull=True
)
group_index, _ = compress_group_index(group_index, sort=self._sort)

if self.has_dropped_na:
mask = np.where(group_index >= 0)
Expand Down
33 changes: 22 additions & 11 deletions pandas/tests/groupby/transform/test_transform.py
Original file line number Diff line number Diff line change
Expand Up @@ -1303,23 +1303,34 @@ def test_transform_cumcount():
tm.assert_series_equal(result, expected)


def test_null_group_lambda_self(sort, dropna):
@pytest.mark.parametrize("keys", [["A1"], ["A1", "A2"]])
def test_null_group_lambda_self(request, sort, dropna, keys):
# GH 17093
np.random.seed(0)
keys = np.random.randint(0, 5, size=50).astype(float)
nulls = np.random.choice([0, 1], keys.shape).astype(bool)
keys[nulls] = np.nan
values = np.random.randint(0, 5, size=keys.shape)
df = DataFrame({"A": keys, "B": values})
if not sort and not dropna:
msg = "GH#46584: null values get sorted when sort=False"
request.node.add_marker(pytest.mark.xfail(reason=msg, strict=False))

size = 50
nulls1 = np.random.choice([False, True], size)
nulls2 = np.random.choice([False, True], size)
# Whether a group contains a null value or not
nulls_grouper = nulls1 if len(keys) == 1 else nulls1 | nulls2

a1 = np.random.randint(0, 5, size=size).astype(float)
a1[nulls1] = np.nan
a2 = np.random.randint(0, 5, size=size).astype(float)
a2[nulls2] = np.nan
values = np.random.randint(0, 5, size=a1.shape)
df = DataFrame({"A1": a1, "A2": a2, "B": values})

expected_values = values
if dropna and nulls.any():
if dropna and nulls_grouper.any():
expected_values = expected_values.astype(float)
expected_values[nulls] = np.nan
expected_values[nulls_grouper] = np.nan
expected = DataFrame(expected_values, columns=["B"])

gb = df.groupby("A", dropna=dropna, sort=sort)
result = gb.transform(lambda x: x)
gb = df.groupby(keys, dropna=dropna, sort=sort)
result = gb[["B"]].transform(lambda x: x)
tm.assert_frame_equal(result, expected)


Expand Down

0 comments on commit 382aefc

Please sign in to comment.