Skip to content

Commit

Permalink
REGR: ensure DataFrame.select_dtypes() returns a copy (#48176)
Browse files Browse the repository at this point in the history
  • Loading branch information
jorisvandenbossche committed Aug 23, 2022
1 parent 53d3c45 commit f6e9b1a
Show file tree
Hide file tree
Showing 4 changed files with 16 additions and 8 deletions.
1 change: 1 addition & 0 deletions doc/source/whatsnew/v1.4.4.rst
Expand Up @@ -26,6 +26,7 @@ Fixed regressions
- Fixed regression in :meth:`DataFrame.loc` setting a length-1 array like value to a single value in the DataFrame (:issue:`46268`)
- Fixed regression when slicing with :meth:`DataFrame.loc` with :class:`DateOffset`-index (:issue:`46671`)
- Fixed regression in setting ``None`` or non-string value into a ``string``-dtype Series using a mask (:issue:`47628`)
- Fixed regression in :meth:`DataFrame.select_dtypes` returning a view on the original DataFrame (:issue:`48090`)
- Fixed regression using custom Index subclasses (for example, used in xarray) with :meth:`~DataFrame.reset_index` or :meth:`Index.insert` (:issue:`47071`)
- Fixed regression in :meth:`DatetimeIndex.intersection` when the :class:`DatetimeIndex` has dates crossing daylight savings time (:issue:`46702`)
- Fixed regression in :func:`merge` throwing an error when passing a :class:`Series` with a multi-level name (:issue:`47946`)
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/frame.py
Expand Up @@ -4740,7 +4740,7 @@ def predicate(arr: ArrayLike) -> bool:

return True

mgr = self._mgr._get_data_subset(predicate)
mgr = self._mgr._get_data_subset(predicate).copy(deep=None)
return type(self)(mgr).__finalize__(self)

def insert(
Expand Down
12 changes: 5 additions & 7 deletions pandas/tests/copy_view/test_methods.py
Expand Up @@ -139,18 +139,16 @@ def test_select_dtypes(using_copy_on_write):
df2 = df.select_dtypes("int64")
df2._mgr._verify_integrity()

# currently this always returns a "view"
assert np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
if using_copy_on_write:
assert np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
else:
assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a"))

# mutating df2 triggers a copy-on-write for that column/block
df2.iloc[0, 0] = 0
if using_copy_on_write:
assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
tm.assert_frame_equal(df, df_orig)
else:
# but currently select_dtypes() actually returns a view -> mutates parent
df_orig.iloc[0, 0] = 0
tm.assert_frame_equal(df, df_orig)
tm.assert_frame_equal(df, df_orig)


def test_to_frame(using_copy_on_write):
Expand Down
9 changes: 9 additions & 0 deletions pandas/tests/frame/methods/test_select_dtypes.py
Expand Up @@ -456,3 +456,12 @@ def test_np_bool_ea_boolean_include_number(self):
result = df.select_dtypes(include="number")
expected = DataFrame({"a": [1, 2, 3]})
tm.assert_frame_equal(result, expected)

def test_select_dtypes_no_view(self):
# https://github.com/pandas-dev/pandas/issues/48090
# result of this method is not a view on the original dataframe
df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
df_orig = df.copy()
result = df.select_dtypes(include=["number"])
result.iloc[0, 0] = 0
tm.assert_frame_equal(df, df_orig)

0 comments on commit f6e9b1a

Please sign in to comment.