From 46a80a75bceadb3ebb36a7d4754681e2c4d3e933 Mon Sep 17 00:00:00 2001 From: Richard Shadrach Date: Sun, 6 Dec 2020 11:03:09 -0500 Subject: [PATCH 1/4] BUG: Groupby first/last/nth treats None as an observation --- pandas/_libs/groupby.pyx | 12 ++++-------- pandas/tests/groupby/test_nth.py | 29 +++++++++++++++++++++-------- 2 files changed, 25 insertions(+), 16 deletions(-) diff --git a/pandas/_libs/groupby.pyx b/pandas/_libs/groupby.pyx index 24156c88f0d76..5c4ba3b2729e3 100644 --- a/pandas/_libs/groupby.pyx +++ b/pandas/_libs/groupby.pyx @@ -928,9 +928,7 @@ def group_last(rank_t[:, :] out, for j in range(K): val = values[i, j] - # None should not be treated like other NA-like - # so that it won't be converted to nan - if not checknull(val) or val is None: + if not checknull(val): # NB: use _treat_as_na here once # conditional-nogil is available. nobs[lab, j] += 1 @@ -939,7 +937,7 @@ def group_last(rank_t[:, :] out, for i in range(ncounts): for j in range(K): if nobs[i, j] < min_count: - out[i, j] = NAN + out[i, j] = None else: out[i, j] = resx[i, j] else: @@ -1023,9 +1021,7 @@ def group_nth(rank_t[:, :] out, for j in range(K): val = values[i, j] - # None should not be treated like other NA-like - # so that it won't be converted to nan - if not checknull(val) or val is None: + if not checknull(val): # NB: use _treat_as_na here once # conditional-nogil is available. nobs[lab, j] += 1 @@ -1035,7 +1031,7 @@ def group_nth(rank_t[:, :] out, for i in range(ncounts): for j in range(K): if nobs[i, j] < min_count: - out[i, j] = NAN + out[i, j] = None else: out[i, j] = resx[i, j] diff --git a/pandas/tests/groupby/test_nth.py b/pandas/tests/groupby/test_nth.py index 699cd88b5c53c..25b220f06633a 100644 --- a/pandas/tests/groupby/test_nth.py +++ b/pandas/tests/groupby/test_nth.py @@ -91,14 +91,27 @@ def test_nth_with_na_object(index, nulls_fixture): @pytest.mark.parametrize("method", ["first", "last"]) -def test_first_last_with_None(method): - # https://github.com/pandas-dev/pandas/issues/32800 - # None should be preserved as object dtype - df = DataFrame.from_dict({"id": ["a"], "value": [None]}) - groups = df.groupby("id", as_index=False) - result = getattr(groups, method)() - - tm.assert_frame_equal(result, df) +@pytest.mark.parametrize( + "df, expected", + [ + ( + DataFrame({"id": ["a"], "value": [None]}), + DataFrame({"value": [None]}, index=Index(["a"], name="id")), + ), + ( + DataFrame({"id": "a", "value": [None, "foo", np.nan]}), + DataFrame({"value": ["foo"]}, index=Index(["a"], name="id")), + ), + ( + DataFrame({"id": "a", "value": [np.nan]}, dtype=object), + DataFrame({"value": [None]}, index=Index(["a"], name="id")), + ), + ], +) +def test_first_last_with_none(method, df, expected): + # GH 32800, 38286 + result = getattr(df.groupby("id"), method)() + tm.assert_frame_equal(result, expected) def test_first_last_nth_dtypes(df_mixed_floats): From f93a8bd14afac7dd92984851b7bf0b3e35884498 Mon Sep 17 00:00:00 2001 From: Richard Shadrach Date: Sun, 6 Dec 2020 11:55:14 -0500 Subject: [PATCH 2/4] Reverted test changes, whatsnew --- doc/source/whatsnew/v1.1.5.rst | 1 + pandas/tests/groupby/test_nth.py | 17 ++++++++++++----- 2 files changed, 13 insertions(+), 5 deletions(-) diff --git a/doc/source/whatsnew/v1.1.5.rst b/doc/source/whatsnew/v1.1.5.rst index fbb12cb38448a..0c3175ac927a7 100644 --- a/doc/source/whatsnew/v1.1.5.rst +++ b/doc/source/whatsnew/v1.1.5.rst @@ -27,6 +27,7 @@ Fixed regressions - Fixed regression in :meth:`DataFrame.fillna` not filling ``NaN`` after other operations such as :meth:`DataFrame.pivot` (:issue:`36495`). - Fixed performance regression in ``df.groupby(..).rolling(..)`` (:issue:`38038`) - Fixed regression in :meth:`MultiIndex.intersection` returning duplicates when at least one of the indexes had duplicates (:issue:`36915`) +- Fixed regression in :meth:`.DataFrameGroupBy.first`, :meth:`.DataFrameGroupBy.last`, :meth:`.DataFrameGroupBy.nth` where ``None`` was considered an observation (:issue:`38286`) .. --------------------------------------------------------------------------- diff --git a/pandas/tests/groupby/test_nth.py b/pandas/tests/groupby/test_nth.py index 25b220f06633a..26b3af4234be1 100644 --- a/pandas/tests/groupby/test_nth.py +++ b/pandas/tests/groupby/test_nth.py @@ -90,14 +90,21 @@ def test_nth_with_na_object(index, nulls_fixture): tm.assert_frame_equal(result, expected) +@pytest.mark.parametrize("method", ["first", "last"]) +def test_first_last_with_None(method): + # https://github.com/pandas-dev/pandas/issues/32800 + # None should be preserved as object dtype + df = DataFrame.from_dict({"id": ["a"], "value": [None]}) + groups = df.groupby("id", as_index=False) + result = getattr(groups, method)() + + tm.assert_frame_equal(result, df) + + @pytest.mark.parametrize("method", ["first", "last"]) @pytest.mark.parametrize( "df, expected", [ - ( - DataFrame({"id": ["a"], "value": [None]}), - DataFrame({"value": [None]}, index=Index(["a"], name="id")), - ), ( DataFrame({"id": "a", "value": [None, "foo", np.nan]}), DataFrame({"value": ["foo"]}, index=Index(["a"], name="id")), @@ -108,7 +115,7 @@ def test_nth_with_na_object(index, nulls_fixture): ), ], ) -def test_first_last_with_none(method, df, expected): +def test_first_last_with_None_expanded(method, df, expected): # GH 32800, 38286 result = getattr(df.groupby("id"), method)() tm.assert_frame_equal(result, expected) From 7753f396812d5b7d8f3984723af06895b024c5b3 Mon Sep 17 00:00:00 2001 From: Richard Shadrach Date: Sun, 6 Dec 2020 12:05:19 -0500 Subject: [PATCH 3/4] Reverted test changes, whatsnew --- doc/source/whatsnew/v1.1.5.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.1.5.rst b/doc/source/whatsnew/v1.1.5.rst index 0c3175ac927a7..e993e7054dd3f 100644 --- a/doc/source/whatsnew/v1.1.5.rst +++ b/doc/source/whatsnew/v1.1.5.rst @@ -27,7 +27,7 @@ Fixed regressions - Fixed regression in :meth:`DataFrame.fillna` not filling ``NaN`` after other operations such as :meth:`DataFrame.pivot` (:issue:`36495`). - Fixed performance regression in ``df.groupby(..).rolling(..)`` (:issue:`38038`) - Fixed regression in :meth:`MultiIndex.intersection` returning duplicates when at least one of the indexes had duplicates (:issue:`36915`) -- Fixed regression in :meth:`.DataFrameGroupBy.first`, :meth:`.DataFrameGroupBy.last`, :meth:`.DataFrameGroupBy.nth` where ``None`` was considered an observation (:issue:`38286`) +- Fixed regression in :meth:`.GroupBy.first`, :meth:`.GroupBy.last`, and :meth:`.GroupBy.nth` where ``None`` was considered a non-NA value (:issue:`38286`) .. --------------------------------------------------------------------------- From 54c1a0bbf4acc2ee9b814682bce182a4415eb159 Mon Sep 17 00:00:00 2001 From: Richard Shadrach Date: Sun, 6 Dec 2020 12:18:29 -0500 Subject: [PATCH 4/4] Remove nth from whatsnew --- doc/source/whatsnew/v1.1.5.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.1.5.rst b/doc/source/whatsnew/v1.1.5.rst index e993e7054dd3f..7164830392f35 100644 --- a/doc/source/whatsnew/v1.1.5.rst +++ b/doc/source/whatsnew/v1.1.5.rst @@ -27,7 +27,7 @@ Fixed regressions - Fixed regression in :meth:`DataFrame.fillna` not filling ``NaN`` after other operations such as :meth:`DataFrame.pivot` (:issue:`36495`). - Fixed performance regression in ``df.groupby(..).rolling(..)`` (:issue:`38038`) - Fixed regression in :meth:`MultiIndex.intersection` returning duplicates when at least one of the indexes had duplicates (:issue:`36915`) -- Fixed regression in :meth:`.GroupBy.first`, :meth:`.GroupBy.last`, and :meth:`.GroupBy.nth` where ``None`` was considered a non-NA value (:issue:`38286`) +- Fixed regression in :meth:`.GroupBy.first` and :meth:`.GroupBy.last` where ``None`` was considered a non-NA value (:issue:`38286`) .. ---------------------------------------------------------------------------