From 462134d52ac989023c53a8359a28d94708332d7e Mon Sep 17 00:00:00 2001 From: xr-chen <826010519@qq.com> Date: Sun, 17 Jul 2022 18:30:54 -0500 Subject: [PATCH 1/7] fix 47649 --- doc/source/whatsnew/v1.5.0.rst | 1 + pandas/core/indexing.py | 6 ++++- pandas/tests/frame/methods/test_fillna.py | 28 ++++++++++++++++++++ pandas/tests/indexing/multiindex/test_loc.py | 19 +++++++++++++ 4 files changed, 53 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst index 22a5f2a08362f..c1355b1150ec4 100644 --- a/doc/source/whatsnew/v1.5.0.rst +++ b/doc/source/whatsnew/v1.5.0.rst @@ -917,6 +917,7 @@ Missing - Bug in :meth:`DataFrame.interpolate` with object-dtype column not returning a copy with ``inplace=False`` (:issue:`45791`) - Bug in :meth:`DataFrame.dropna` allows to set both ``how`` and ``thresh`` incompatible arguments (:issue:`46575`) - Bug in :meth:`DataFrame.fillna` ignored ``axis`` when :class:`DataFrame` is single block (:issue:`47713`) +- Bug in :meth:`DataFrame.fillna` not working on multiindexed DataFrame (:issue:`47649`) MultiIndex ^^^^^^^^^^ diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index fa1ad7ce3c874..0c9c5a987eace 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -1936,7 +1936,11 @@ def _setitem_with_indexer_frame_value(self, indexer, value: DataFrame, name: str else: for loc in ilocs: - item = self.obj.columns[loc] + level_diff = self.obj.columns.nlevels - value.columns.nlevels + if multiindex_indexer and level_diff > 0 and value.columns.nlevels != 0: + item = self.obj.columns.get_level_values(level_diff)[loc] + else: + item = self.obj.columns[loc] if item in value: sub_indexer[1] = item val = self._align_series( diff --git a/pandas/tests/frame/methods/test_fillna.py b/pandas/tests/frame/methods/test_fillna.py index d86c1b2aedcac..29d6de0625e16 100644 --- a/pandas/tests/frame/methods/test_fillna.py +++ b/pandas/tests/frame/methods/test_fillna.py @@ -708,6 +708,34 @@ def test_single_block_df_with_horizontal_axis(self): ) tm.assert_frame_equal(result, expected) + def test_fillna_with_multi_index_frame(self): + # GH 47649 + pdf = DataFrame( + { + ("x", "a"): [np.nan, 2.0, 3.0, 4.0, np.nan, 6.0], + ("x", "b"): [1.0, 2.0, np.nan, 4.0, np.nan, np.nan], + ("y", "c"): [1.0, 2.0, 3.0, 4.0, np.nan, np.nan], + } + ) + expected = DataFrame( + { + ("x", "a"): [-1.0, 2.0, 3.0, 4.0, -1.0, 6.0], + ("x", "b"): [1.0, 2.0, -1.0, 4.0, -1.0, -1.0], + ("y", "c"): [1.0, 2.0, 3.0, 4.0, np.nan, np.nan], + } + ) + tm.assert_frame_equal(pdf.fillna({"x": -1}), expected) + tm.assert_frame_equal(pdf.fillna({"x": -1, ("x", "b"): -2}), expected) + + expected = DataFrame( + { + ("x", "a"): [-1.0, 2.0, 3.0, 4.0, -1.0, 6.0], + ("x", "b"): [1.0, 2.0, -2.0, 4.0, -2.0, -2.0], + ("y", "c"): [1.0, 2.0, 3.0, 4.0, np.nan, np.nan], + } + ) + tm.assert_frame_equal(pdf.fillna({("x", "b"): -2, "x": -1}), expected) + def test_fillna_nonconsolidated_frame(): # https://github.com/pandas-dev/pandas/issues/36495 diff --git a/pandas/tests/indexing/multiindex/test_loc.py b/pandas/tests/indexing/multiindex/test_loc.py index d4354766a203b..7b286917e39bb 100644 --- a/pandas/tests/indexing/multiindex/test_loc.py +++ b/pandas/tests/indexing/multiindex/test_loc.py @@ -416,6 +416,25 @@ def test_loc_no_second_level_index(self): ) tm.assert_frame_equal(res, expected) + def test_loc_setitem_with_multi_index(self): + # GH 47649 + result = DataFrame( + { + ("x", "a"): np.arange(6), + ("x", "b"): np.arange(6), + ("y", "c"): np.arange(6), + }, + ) + expected = DataFrame( + { + ("x", "a"): np.arange(6, 0, -1), + ("x", "b"): np.arange(6), + ("y", "c"): np.arange(6), + }, + ) + result.loc[:, "x"] = expected["x"] + tm.assert_frame_equal(result, expected) + @pytest.mark.parametrize( "indexer, pos", From 1bcdaef90c5e47368ac3ebcda5ae9252fa6ea3ae Mon Sep 17 00:00:00 2001 From: xr-chen <826010519@qq.com> Date: Wed, 20 Jul 2022 10:45:43 -0500 Subject: [PATCH 2/7] fix fillna ignore underlying issue --- pandas/core/generic.py | 2 +- pandas/core/indexing.py | 6 +----- pandas/tests/indexing/multiindex/test_loc.py | 19 ------------------- 3 files changed, 2 insertions(+), 25 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 0a439faed0896..383b977faf77d 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -6676,7 +6676,7 @@ def fillna( downcast_k = downcast if not is_dict else downcast.get(k) result.loc[:, k] = result[k].fillna( v, limit=limit, downcast=downcast_k - ) + ).values return result if not inplace else None elif not is_list_like(value): diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 0c9c5a987eace..fa1ad7ce3c874 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -1936,11 +1936,7 @@ def _setitem_with_indexer_frame_value(self, indexer, value: DataFrame, name: str else: for loc in ilocs: - level_diff = self.obj.columns.nlevels - value.columns.nlevels - if multiindex_indexer and level_diff > 0 and value.columns.nlevels != 0: - item = self.obj.columns.get_level_values(level_diff)[loc] - else: - item = self.obj.columns[loc] + item = self.obj.columns[loc] if item in value: sub_indexer[1] = item val = self._align_series( diff --git a/pandas/tests/indexing/multiindex/test_loc.py b/pandas/tests/indexing/multiindex/test_loc.py index 7b286917e39bb..d4354766a203b 100644 --- a/pandas/tests/indexing/multiindex/test_loc.py +++ b/pandas/tests/indexing/multiindex/test_loc.py @@ -416,25 +416,6 @@ def test_loc_no_second_level_index(self): ) tm.assert_frame_equal(res, expected) - def test_loc_setitem_with_multi_index(self): - # GH 47649 - result = DataFrame( - { - ("x", "a"): np.arange(6), - ("x", "b"): np.arange(6), - ("y", "c"): np.arange(6), - }, - ) - expected = DataFrame( - { - ("x", "a"): np.arange(6, 0, -1), - ("x", "b"): np.arange(6), - ("y", "c"): np.arange(6), - }, - ) - result.loc[:, "x"] = expected["x"] - tm.assert_frame_equal(result, expected) - @pytest.mark.parametrize( "indexer, pos", From 5409961f68de9b7c1b7a8d66a2d697907c064dea Mon Sep 17 00:00:00 2001 From: xr-chen <826010519@qq.com> Date: Wed, 20 Jul 2022 10:56:54 -0500 Subject: [PATCH 3/7] fix pre commit --- pandas/core/generic.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 383b977faf77d..df235268c0f01 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -6674,9 +6674,9 @@ def fillna( if k not in result: continue downcast_k = downcast if not is_dict else downcast.get(k) - result.loc[:, k] = result[k].fillna( - v, limit=limit, downcast=downcast_k - ).values + result.loc[:, k] = ( + result[k].fillna(v, limit=limit, downcast=downcast_k).values + ) return result if not inplace else None elif not is_list_like(value): From 764f0abc066510cc1beec2425785fcd7dbde6ce9 Mon Sep 17 00:00:00 2001 From: xr-chen <826010519@qq.com> Date: Wed, 3 Aug 2022 14:59:17 -0500 Subject: [PATCH 4/7] add TODO --- pandas/core/generic.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index df235268c0f01..1e18abab41e45 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -6674,9 +6674,12 @@ def fillna( if k not in result: continue downcast_k = downcast if not is_dict else downcast.get(k) + # GH47649 result.loc[:, k] = ( result[k].fillna(v, limit=limit, downcast=downcast_k).values ) + # TODO: Revert to result.loc[:, k] = result[k].fillna(...) + # when issue GH45751 is fixed return result if not inplace else None elif not is_list_like(value): From f3d79a92488a46581901719920bcfa546b8bede5 Mon Sep 17 00:00:00 2001 From: xr-chen <826010519@qq.com> Date: Wed, 3 Aug 2022 17:13:23 -0500 Subject: [PATCH 5/7] update TODO --- pandas/core/generic.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 1e18abab41e45..8ea8b71147534 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -6678,8 +6678,10 @@ def fillna( result.loc[:, k] = ( result[k].fillna(v, limit=limit, downcast=downcast_k).values ) - # TODO: Revert to result.loc[:, k] = result[k].fillna(...) - # when issue GH45751 is fixed + # TODO: result.loc[:, k] = result.loc[:, k].fillna( + # v, limit=limit, downcast=downcast_k + # ) + # Revert when GH45751 is fixed return result if not inplace else None elif not is_list_like(value): From e384fd6819e1829fd50a8aea67153b9b7343c1e9 Mon Sep 17 00:00:00 2001 From: Patrick Hoefler <61934744+phofl@users.noreply.github.com> Date: Thu, 18 Aug 2022 23:39:55 +0200 Subject: [PATCH 6/7] Add small changes --- doc/source/whatsnew/v1.4.4.rst | 1 + doc/source/whatsnew/v1.5.0.rst | 1 - pandas/tests/frame/methods/test_fillna.py | 18 +++++++++--------- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/doc/source/whatsnew/v1.4.4.rst b/doc/source/whatsnew/v1.4.4.rst index 6ee140f59e096..924dd9aae1c63 100644 --- a/doc/source/whatsnew/v1.4.4.rst +++ b/doc/source/whatsnew/v1.4.4.rst @@ -14,6 +14,7 @@ including other versions of pandas. Fixed regressions ~~~~~~~~~~~~~~~~~ +- Fixed regression in :meth:`DataFrame.fillna` not working :class:`DataFrame` with :class:`MultiIndex` and ``inplace=True`` (:issue:`47649`) - Fixed regression in :func:`concat` materializing :class:`Index` during sorting even if :class:`Index` was already sorted (:issue:`47501`) - diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst index c1355b1150ec4..22a5f2a08362f 100644 --- a/doc/source/whatsnew/v1.5.0.rst +++ b/doc/source/whatsnew/v1.5.0.rst @@ -917,7 +917,6 @@ Missing - Bug in :meth:`DataFrame.interpolate` with object-dtype column not returning a copy with ``inplace=False`` (:issue:`45791`) - Bug in :meth:`DataFrame.dropna` allows to set both ``how`` and ``thresh`` incompatible arguments (:issue:`46575`) - Bug in :meth:`DataFrame.fillna` ignored ``axis`` when :class:`DataFrame` is single block (:issue:`47713`) -- Bug in :meth:`DataFrame.fillna` not working on multiindexed DataFrame (:issue:`47649`) MultiIndex ^^^^^^^^^^ diff --git a/pandas/tests/frame/methods/test_fillna.py b/pandas/tests/frame/methods/test_fillna.py index 29d6de0625e16..b7cf1a3369b1d 100644 --- a/pandas/tests/frame/methods/test_fillna.py +++ b/pandas/tests/frame/methods/test_fillna.py @@ -712,16 +712,16 @@ def test_fillna_with_multi_index_frame(self): # GH 47649 pdf = DataFrame( { - ("x", "a"): [np.nan, 2.0, 3.0, 4.0, np.nan, 6.0], - ("x", "b"): [1.0, 2.0, np.nan, 4.0, np.nan, np.nan], - ("y", "c"): [1.0, 2.0, 3.0, 4.0, np.nan, np.nan], + ("x", "a"): [np.nan, 2.0, 3.0], + ("x", "b"): [1.0, 2.0, np.nan], + ("y", "c"): [1.0, 2.0, np.nan], } ) expected = DataFrame( { - ("x", "a"): [-1.0, 2.0, 3.0, 4.0, -1.0, 6.0], - ("x", "b"): [1.0, 2.0, -1.0, 4.0, -1.0, -1.0], - ("y", "c"): [1.0, 2.0, 3.0, 4.0, np.nan, np.nan], + ("x", "a"): [-1.0, 2.0, 3.0], + ("x", "b"): [1.0, 2.0, -1.0], + ("y", "c"): [1.0, 2.0, np.nan], } ) tm.assert_frame_equal(pdf.fillna({"x": -1}), expected) @@ -729,9 +729,9 @@ def test_fillna_with_multi_index_frame(self): expected = DataFrame( { - ("x", "a"): [-1.0, 2.0, 3.0, 4.0, -1.0, 6.0], - ("x", "b"): [1.0, 2.0, -2.0, 4.0, -2.0, -2.0], - ("y", "c"): [1.0, 2.0, 3.0, 4.0, np.nan, np.nan], + ("x", "a"): [-1.0, 2.0, 3.0], + ("x", "b"): [1.0, 2.0, -2.0], + ("y", "c"): [1.0, 2.0, np.nan], } ) tm.assert_frame_equal(pdf.fillna({("x", "b"): -2, "x": -1}), expected) From 15db9abd1eaf813d7d3bffb3cba7ebc2a41f1fa8 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Tue, 23 Aug 2022 12:15:03 +0200 Subject: [PATCH 7/7] Update doc/source/whatsnew/v1.4.4.rst --- doc/source/whatsnew/v1.4.4.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.4.4.rst b/doc/source/whatsnew/v1.4.4.rst index 509bc3f9ca7f0..3d0a6e01826f8 100644 --- a/doc/source/whatsnew/v1.4.4.rst +++ b/doc/source/whatsnew/v1.4.4.rst @@ -14,7 +14,7 @@ including other versions of pandas. Fixed regressions ~~~~~~~~~~~~~~~~~ -- Fixed regression in :meth:`DataFrame.fillna` not working :class:`DataFrame` with :class:`MultiIndex` and ``inplace=True`` (:issue:`47649`) +- Fixed regression in :meth:`DataFrame.fillna` not working :class:`DataFrame` with :class:`MultiIndex` (:issue:`47649`) - Fixed regression in taking NULL :class:`objects` from a :class:`DataFrame` causing a segmentation violation. These NULL values are created by :meth:`numpy.empty_like` (:issue:`46848`) - Fixed regression in :func:`concat` materializing :class:`Index` during sorting even if :class:`Index` was already sorted (:issue:`47501`) - Fixed regression in calling bitwise numpy ufuncs (for example, ``np.bitwise_and``) on Index objects (:issue:`46769`)