Skip to content

Commit

Permalink
ENH: Use lazy copy for dropna (#50429)
Browse files Browse the repository at this point in the history
  • Loading branch information
phofl committed Jan 13, 2023
1 parent 7afbdf1 commit b37589a
Show file tree
Hide file tree
Showing 4 changed files with 46 additions and 4 deletions.
2 changes: 1 addition & 1 deletion pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -6346,7 +6346,7 @@ def dropna(
raise ValueError(f"invalid how option: {how}")

if np.all(mask):
result = self.copy()
result = self.copy(deep=None)
else:
result = self.loc(axis=axis)[mask]

Expand Down
10 changes: 9 additions & 1 deletion pandas/core/internals/managers.py
Original file line number Diff line number Diff line change
Expand Up @@ -1955,9 +1955,17 @@ def _blklocs(self):
"""compat with BlockManager"""
return None

def getitem_mgr(self, indexer: slice | npt.NDArray[np.bool_]) -> SingleBlockManager:
def getitem_mgr(self, indexer: slice | np.ndarray) -> SingleBlockManager:
# similar to get_slice, but not restricted to slice indexer
blk = self._block
if (
using_copy_on_write()
and isinstance(indexer, np.ndarray)
and len(indexer) > 0
and com.is_bool_indexer(indexer)
and indexer.all()
):
return type(self)(blk, self.index, [weakref.ref(blk)], parent=self)
array = blk._slice(indexer)
if array.ndim > 1:
# This will be caught by Series._get_values
Expand Down
4 changes: 2 additions & 2 deletions pandas/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -921,7 +921,7 @@ def _ixs(self, i: int, axis: AxisInt = 0) -> Any:
"""
return self._values[i]

def _slice(self, slobj: slice, axis: Axis = 0) -> Series:
def _slice(self, slobj: slice | np.ndarray, axis: Axis = 0) -> Series:
# axis kwarg is retained for compat with NDFrame method
# _slice is *always* positional
return self._get_values(slobj)
Expand Down Expand Up @@ -5583,7 +5583,7 @@ def dropna(
return result
else:
if not inplace:
return self.copy()
return self.copy(deep=None)
return None

# ----------------------------------------------------------------------
Expand Down
34 changes: 34 additions & 0 deletions pandas/tests/copy_view/test_methods.py
Original file line number Diff line number Diff line change
Expand Up @@ -503,6 +503,40 @@ def test_add_suffix(using_copy_on_write):
tm.assert_frame_equal(df, df_orig)


@pytest.mark.parametrize("axis, val", [(0, 5.5), (1, np.nan)])
def test_dropna(using_copy_on_write, axis, val):
df = DataFrame({"a": [1, 2, 3], "b": [4, val, 6], "c": "d"})
df_orig = df.copy()
df2 = df.dropna(axis=axis)

if using_copy_on_write:
assert np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
else:
assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a"))

df2.iloc[0, 0] = 0
if using_copy_on_write:
assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
tm.assert_frame_equal(df, df_orig)


@pytest.mark.parametrize("val", [5, 5.5])
def test_dropna_series(using_copy_on_write, val):
ser = Series([1, val, 4])
ser_orig = ser.copy()
ser2 = ser.dropna()

if using_copy_on_write:
assert np.shares_memory(ser2.values, ser.values)
else:
assert not np.shares_memory(ser2.values, ser.values)

ser2.iloc[0] = 0
if using_copy_on_write:
assert not np.shares_memory(ser2.values, ser.values)
tm.assert_series_equal(ser, ser_orig)


@pytest.mark.parametrize(
"method",
[
Expand Down

0 comments on commit b37589a

Please sign in to comment.