Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

DEPR: Change numeric_only default to False in remaining groupby methods #49951

Merged
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
2 changes: 1 addition & 1 deletion doc/source/whatsnew/v2.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -572,7 +572,7 @@ Removal of prior version deprecations/changes
- Changed default of ``numeric_only`` to ``False`` in all DataFrame methods with that argument (:issue:`46096`, :issue:`46906`)
- Changed default of ``numeric_only`` to ``False`` in :meth:`Series.rank` (:issue:`47561`)
- Enforced deprecation of silently dropping nuisance columns in groupby and resample operations when ``numeric_only=False`` (:issue:`41475`)
- Changed default of ``numeric_only`` to ``False`` in various :class:`.DataFrameGroupBy` methods (:issue:`46072`)
- Changed default of ``numeric_only`` in various :class:`.DataFrameGroupBy` methods; all methods now default to ``numeric_only=False`` (:issue:`46072`)
- Changed default of ``numeric_only`` to ``False`` in :class:`.Resampler` methods (:issue:`47177`)
-

Expand Down
31 changes: 9 additions & 22 deletions pandas/core/groupby/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,6 @@
_agg_template,
_apply_docs,
_transform_template,
warn_dropping_nuisance_columns_deprecated,
)
from pandas.core.groupby.grouper import get_grouper
from pandas.core.indexes.api import (
Expand Down Expand Up @@ -438,7 +437,7 @@ def transform(self, func, *args, engine=None, engine_kwargs=None, **kwargs):
)

def _cython_transform(
self, how: str, numeric_only: bool = True, axis: AxisInt = 0, **kwargs
self, how: str, numeric_only: bool = False, axis: AxisInt = 0, **kwargs
):
assert axis == 0 # handled by caller

Expand Down Expand Up @@ -1333,22 +1332,20 @@ def _wrap_applied_output_series(
def _cython_transform(
self,
how: str,
numeric_only: bool | lib.NoDefault = lib.no_default,
numeric_only: bool = False,
axis: AxisInt = 0,
**kwargs,
) -> DataFrame:
assert axis == 0 # handled by caller
# TODO: no tests with self.ndim == 1 for DataFrameGroupBy
numeric_only_bool = self._resolve_numeric_only(how, numeric_only, axis)

# With self.axis == 0, we have multi-block tests
# e.g. test_rank_min_int, test_cython_transform_frame
# test_transform_numeric_ret
# With self.axis == 1, _get_data_to_aggregate does a transpose
# so we always have a single block.
mgr: Manager2D = self._get_data_to_aggregate()
orig_mgr_len = len(mgr)
if numeric_only_bool:
if numeric_only:
mgr = mgr.get_numeric_data(copy=False)

def arr_func(bvalues: ArrayLike) -> ArrayLike:
Expand All @@ -1358,12 +1355,9 @@ def arr_func(bvalues: ArrayLike) -> ArrayLike:

# We could use `mgr.apply` here and not have to set_axis, but
# we would have to do shape gymnastics for ArrayManager compat
res_mgr = mgr.grouped_reduce(arr_func, ignore_failures=False)
res_mgr = mgr.grouped_reduce(arr_func)
res_mgr.set_axis(1, mgr.axes[1])

if len(res_mgr) < orig_mgr_len:
warn_dropping_nuisance_columns_deprecated(type(self), how, numeric_only)

res_df = self.obj._constructor(res_mgr)
if self.axis == 1:
res_df = res_df.T
Expand Down Expand Up @@ -1493,15 +1487,8 @@ def _transform_item_by_item(self, obj: DataFrame, wrapper) -> DataFrame:
output = {}
inds = []
for i, (colname, sgb) in enumerate(self._iterate_column_groupbys(obj)):
try:
output[i] = sgb.transform(wrapper)
except TypeError:
# e.g. trying to call nanmean with string values
warn_dropping_nuisance_columns_deprecated(
type(self), "transform", numeric_only=False
)
else:
inds.append(i)
output[i] = sgb.transform(wrapper)
inds.append(i)

if not output:
raise TypeError("Transform function invalid for data types")
Expand Down Expand Up @@ -2243,7 +2230,7 @@ def corr(
self,
method: str | Callable[[np.ndarray, np.ndarray], float] = "pearson",
min_periods: int = 1,
numeric_only: bool | lib.NoDefault = lib.no_default,
numeric_only: bool = False,
) -> DataFrame:
result = self._op_via_apply(
"corr", method=method, min_periods=min_periods, numeric_only=numeric_only
Expand All @@ -2255,7 +2242,7 @@ def cov(
self,
min_periods: int | None = None,
ddof: int | None = 1,
numeric_only: bool | lib.NoDefault = lib.no_default,
numeric_only: bool = False,
) -> DataFrame:
result = self._op_via_apply(
"cov", min_periods=min_periods, ddof=ddof, numeric_only=numeric_only
Expand Down Expand Up @@ -2316,7 +2303,7 @@ def corrwith(
axis: Axis = 0,
drop: bool = False,
method: CorrelationMethod = "pearson",
numeric_only: bool | lib.NoDefault = lib.no_default,
numeric_only: bool = False,
) -> DataFrame:
result = self._op_via_apply(
"corrwith",
Expand Down