From f6e9b1a4643affbc0d5ce9f363bb37c5ace35bc4 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Tue, 23 Aug 2022 16:58:28 +0200 Subject: [PATCH] REGR: ensure DataFrame.select_dtypes() returns a copy (#48176) --- doc/source/whatsnew/v1.4.4.rst | 1 + pandas/core/frame.py | 2 +- pandas/tests/copy_view/test_methods.py | 12 +++++------- pandas/tests/frame/methods/test_select_dtypes.py | 9 +++++++++ 4 files changed, 16 insertions(+), 8 deletions(-) diff --git a/doc/source/whatsnew/v1.4.4.rst b/doc/source/whatsnew/v1.4.4.rst index 71f3db9af02ff..deff6e194c3bd 100644 --- a/doc/source/whatsnew/v1.4.4.rst +++ b/doc/source/whatsnew/v1.4.4.rst @@ -26,6 +26,7 @@ Fixed regressions - Fixed regression in :meth:`DataFrame.loc` setting a length-1 array like value to a single value in the DataFrame (:issue:`46268`) - Fixed regression when slicing with :meth:`DataFrame.loc` with :class:`DateOffset`-index (:issue:`46671`) - Fixed regression in setting ``None`` or non-string value into a ``string``-dtype Series using a mask (:issue:`47628`) +- Fixed regression in :meth:`DataFrame.select_dtypes` returning a view on the original DataFrame (:issue:`48090`) - Fixed regression using custom Index subclasses (for example, used in xarray) with :meth:`~DataFrame.reset_index` or :meth:`Index.insert` (:issue:`47071`) - Fixed regression in :meth:`DatetimeIndex.intersection` when the :class:`DatetimeIndex` has dates crossing daylight savings time (:issue:`46702`) - Fixed regression in :func:`merge` throwing an error when passing a :class:`Series` with a multi-level name (:issue:`47946`) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 93a2c20cd0b74..4302b14da6418 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -4740,7 +4740,7 @@ def predicate(arr: ArrayLike) -> bool: return True - mgr = self._mgr._get_data_subset(predicate) + mgr = self._mgr._get_data_subset(predicate).copy(deep=None) return type(self)(mgr).__finalize__(self) def insert( diff --git a/pandas/tests/copy_view/test_methods.py b/pandas/tests/copy_view/test_methods.py index cc4c219e6c5d9..df723808ce06b 100644 --- a/pandas/tests/copy_view/test_methods.py +++ b/pandas/tests/copy_view/test_methods.py @@ -139,18 +139,16 @@ def test_select_dtypes(using_copy_on_write): df2 = df.select_dtypes("int64") df2._mgr._verify_integrity() - # currently this always returns a "view" - assert np.shares_memory(get_array(df2, "a"), get_array(df, "a")) + if using_copy_on_write: + assert np.shares_memory(get_array(df2, "a"), get_array(df, "a")) + else: + assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a")) # mutating df2 triggers a copy-on-write for that column/block df2.iloc[0, 0] = 0 if using_copy_on_write: assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a")) - tm.assert_frame_equal(df, df_orig) - else: - # but currently select_dtypes() actually returns a view -> mutates parent - df_orig.iloc[0, 0] = 0 - tm.assert_frame_equal(df, df_orig) + tm.assert_frame_equal(df, df_orig) def test_to_frame(using_copy_on_write): diff --git a/pandas/tests/frame/methods/test_select_dtypes.py b/pandas/tests/frame/methods/test_select_dtypes.py index 6ff5a41b67ec2..9284e0c0cced6 100644 --- a/pandas/tests/frame/methods/test_select_dtypes.py +++ b/pandas/tests/frame/methods/test_select_dtypes.py @@ -456,3 +456,12 @@ def test_np_bool_ea_boolean_include_number(self): result = df.select_dtypes(include="number") expected = DataFrame({"a": [1, 2, 3]}) tm.assert_frame_equal(result, expected) + + def test_select_dtypes_no_view(self): + # https://github.com/pandas-dev/pandas/issues/48090 + # result of this method is not a view on the original dataframe + df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}) + df_orig = df.copy() + result = df.select_dtypes(include=["number"]) + result.iloc[0, 0] = 0 + tm.assert_frame_equal(df, df_orig)