Skip to content

Commit

Permalink
DEPR: DataFrame.median/mean with numeric_only=None and dt64 columns (p…
Browse files Browse the repository at this point in the history
  • Loading branch information
jbrockmendel authored and noatamir committed Nov 9, 2022
1 parent 9616d84 commit 0448f17
Show file tree
Hide file tree
Showing 3 changed files with 13 additions and 39 deletions.
1 change: 1 addition & 0 deletions doc/source/whatsnew/v2.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -237,6 +237,7 @@ Removal of prior version deprecations/changes
- Removed deprecated :meth:`Timedelta.delta`, :meth:`Timedelta.is_populated`, and :attr:`Timedelta.freq` (:issue:`46430`, :issue:`46476`)
- Removed deprecated :meth:`Categorical.replace`, use :meth:`Series.replace` instead (:issue:`44929`)
- Removed the ``numeric_only`` keyword from :meth:`Categorical.min` and :meth:`Categorical.max` in favor of ``skipna`` (:issue:`48821`)
- Changed behavior of :meth:`DataFrame.median` and :meth:`DataFrame.mean` with ``numeric_only=None`` to not exclude datetime-like columns THIS NOTE WILL BE IRRELEVANT ONCE ``numeric_only=None`` DEPRECATION IS ENFORCED (:issue:`29941`)
- Removed :func:`is_extension_type` in favor of :func:`is_extension_array_dtype` (:issue:`29457`)
- Removed ``.ExponentialMovingWindow.vol`` (:issue:`39220`)
- Removed :meth:`Index.get_value` and :meth:`Index.set_value` (:issue:`33907`, :issue:`28621`)
Expand Down
24 changes: 0 additions & 24 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,6 @@
is_1d_only_ea_dtype,
is_bool_dtype,
is_dataclass,
is_datetime64_any_dtype,
is_dict_like,
is_dtype_equal,
is_extension_array_dtype,
Expand Down Expand Up @@ -10739,29 +10738,6 @@ def _reduce(
assert filter_type is None or filter_type == "bool", filter_type
out_dtype = "bool" if filter_type == "bool" else None

if numeric_only is None and name in ["mean", "median"]:
own_dtypes = [arr.dtype for arr in self._mgr.arrays]

dtype_is_dt = np.array(
[is_datetime64_any_dtype(dtype) for dtype in own_dtypes],
dtype=bool,
)
if dtype_is_dt.any():
warnings.warn(
"DataFrame.mean and DataFrame.median with numeric_only=None "
"will include datetime64 and datetime64tz columns in a "
"future version.",
FutureWarning,
stacklevel=find_stack_level(),
)
# Non-copy equivalent to
# dt64_cols = self.dtypes.apply(is_datetime64_any_dtype)
# cols = self.columns[~dt64_cols]
# self = self[cols]
predicate = lambda x: not is_datetime64_any_dtype(x.dtype)
mgr = self._mgr._get_data_subset(predicate)
self = type(self)(mgr)

# TODO: Make other agg func handle axis=None properly GH#21597
axis = self._get_axis_number(axis)
labels = self._get_agg_axis(axis)
Expand Down
27 changes: 12 additions & 15 deletions pandas/tests/frame/test_reductions.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,14 +74,13 @@ def assert_stat_op_calc(
f = getattr(frame, opname)

if check_dates:
expected_warning = FutureWarning if opname in ["mean", "median"] else None
df = DataFrame({"b": date_range("1/1/2001", periods=2)})
with tm.assert_produces_warning(expected_warning):
with tm.assert_produces_warning(None):
result = getattr(df, opname)()
assert isinstance(result, Series)

df["a"] = range(len(df))
with tm.assert_produces_warning(expected_warning):
with tm.assert_produces_warning(None):
result = getattr(df, opname)()
assert isinstance(result, Series)
assert len(result)
Expand Down Expand Up @@ -384,21 +383,19 @@ def test_nunique(self):
def test_mean_mixed_datetime_numeric(self, tz):
# https://github.com/pandas-dev/pandas/issues/24752
df = DataFrame({"A": [1, 1], "B": [Timestamp("2000", tz=tz)] * 2})
with tm.assert_produces_warning(FutureWarning):
result = df.mean()
expected = Series([1.0], index=["A"])
result = df.mean()
expected = Series([1.0, Timestamp("2000", tz=tz)], index=["A", "B"])
tm.assert_series_equal(result, expected)

@pytest.mark.parametrize("tz", [None, "UTC"])
def test_mean_excludes_datetimes(self, tz):
def test_mean_includes_datetimes(self, tz):
# https://github.com/pandas-dev/pandas/issues/24752
# Our long-term desired behavior is unclear, but the behavior in
# 0.24.0rc1 was buggy.
# Behavior in 0.24.0rc1 was buggy.
# As of 2.0 with numeric_only=None we do *not* drop datetime columns
df = DataFrame({"A": [Timestamp("2000", tz=tz)] * 2})
with tm.assert_produces_warning(FutureWarning):
result = df.mean()
result = df.mean()

expected = Series(dtype=np.float64)
expected = Series([Timestamp("2000", tz=tz)], index=["A"])
tm.assert_series_equal(result, expected)

def test_mean_mixed_string_decimal(self):
Expand Down Expand Up @@ -851,6 +848,7 @@ def test_mean_corner(self, float_frame, float_string_frame):
def test_mean_datetimelike(self):
# GH#24757 check that datetimelike are excluded by default, handled
# correctly with numeric_only=True
# As of 2.0, datetimelike are *not* excluded with numeric_only=None

df = DataFrame(
{
Expand All @@ -864,10 +862,9 @@ def test_mean_datetimelike(self):
expected = Series({"A": 1.0})
tm.assert_series_equal(result, expected)

with tm.assert_produces_warning(FutureWarning):
# in the future datetime columns will be included
with tm.assert_produces_warning(FutureWarning, match="Select only valid"):
result = df.mean()
expected = Series({"A": 1.0, "C": df.loc[1, "C"]})
expected = Series({"A": 1.0, "B": df.loc[1, "B"], "C": df.loc[1, "C"]})
tm.assert_series_equal(result, expected)

def test_mean_datetimelike_numeric_only_false(self):
Expand Down

0 comments on commit 0448f17

Please sign in to comment.