From ac648eeaf5c27ab957e8cd284eb7e49a45232f00 Mon Sep 17 00:00:00 2001 From: Richard Shadrach <45562402+rhshadrach@users.noreply.github.com> Date: Thu, 15 Sep 2022 19:19:49 -0400 Subject: [PATCH] DOC: Add SeriesGroupBy and DataFrameGroupBy reference pages (#48500) * WIP * DOC: Add SeriesGroupBy and DataFrameGroupBy reference pages * whatnews, fix docstring of filter * Reorder docstring sections --- doc/source/reference/groupby.rst | 156 +++++++++++-------- doc/source/user_guide/10min.rst | 4 +- doc/source/user_guide/groupby.rst | 2 +- doc/source/whatsnew/v1.0.0.rst | 6 +- doc/source/whatsnew/v1.0.1.rst | 2 +- doc/source/whatsnew/v1.0.2.rst | 6 +- doc/source/whatsnew/v1.0.4.rst | 6 +- doc/source/whatsnew/v1.1.0.rst | 8 +- doc/source/whatsnew/v1.1.4.rst | 2 +- doc/source/whatsnew/v1.1.5.rst | 2 +- doc/source/whatsnew/v1.2.0.rst | 4 +- doc/source/whatsnew/v1.2.1.rst | 2 +- doc/source/whatsnew/v1.3.0.rst | 50 +++--- doc/source/whatsnew/v1.3.3.rst | 6 +- doc/source/whatsnew/v1.3.4.rst | 6 +- doc/source/whatsnew/v1.3.5.rst | 2 +- doc/source/whatsnew/v1.4.0.rst | 46 +++--- doc/source/whatsnew/v1.5.0.rst | 46 +++--- doc/source/whatsnew/v1.6.0.rst | 6 +- pandas/core/generic.py | 3 +- pandas/core/groupby/generic.py | 30 ++-- pandas/core/groupby/groupby.py | 12 +- scripts/validate_rst_title_capitalization.py | 2 + 23 files changed, 224 insertions(+), 185 deletions(-) diff --git a/doc/source/reference/groupby.rst b/doc/source/reference/groupby.rst index 63d7c19b7841d..138018eacef49 100644 --- a/doc/source/reference/groupby.rst +++ b/doc/source/reference/groupby.rst @@ -14,10 +14,14 @@ Indexing, iteration .. autosummary:: :toctree: api/ - GroupBy.__iter__ - GroupBy.groups - GroupBy.indices - GroupBy.get_group + DataFrameGroupBy.__iter__ + SeriesGroupBy.__iter__ + DataFrameGroupBy.groups + SeriesGroupBy.groups + DataFrameGroupBy.indices + SeriesGroupBy.indices + DataFrameGroupBy.get_group + SeriesGroupBy.get_group .. currentmodule:: pandas @@ -41,57 +45,21 @@ Function application .. autosummary:: :toctree: api/ - GroupBy.apply - GroupBy.agg + SeriesGroupBy.apply + DataFrameGroupBy.apply + SeriesGroupBy.agg + DataFrameGroupBy.agg SeriesGroupBy.aggregate DataFrameGroupBy.aggregate SeriesGroupBy.transform DataFrameGroupBy.transform - GroupBy.pipe - -Computations / descriptive stats --------------------------------- -.. autosummary:: - :toctree: api/ - - GroupBy.all - GroupBy.any - GroupBy.bfill - GroupBy.backfill - GroupBy.count - GroupBy.cumcount - GroupBy.cummax - GroupBy.cummin - GroupBy.cumprod - GroupBy.cumsum - GroupBy.ffill - GroupBy.first - GroupBy.head - GroupBy.last - GroupBy.max - GroupBy.mean - GroupBy.median - GroupBy.min - GroupBy.ngroup - GroupBy.nth - GroupBy.ohlc - GroupBy.pad - GroupBy.prod - GroupBy.rank - GroupBy.pct_change - GroupBy.size - GroupBy.sem - GroupBy.std - GroupBy.sum - GroupBy.var - GroupBy.tail - -The following methods are available in both ``SeriesGroupBy`` and -``DataFrameGroupBy`` objects, but may differ slightly, usually in that -the ``DataFrameGroupBy`` version usually permits the specification of an -axis argument, and often an argument indicating whether to restrict -application to columns of a specific data type. + SeriesGroupBy.pipe + DataFrameGroupBy.pipe + DataFrameGroupBy.filter + SeriesGroupBy.filter +``DataFrameGroupBy`` computations / descriptive stats +----------------------------------------------------- .. autosummary:: :toctree: api/ @@ -100,6 +68,7 @@ application to columns of a specific data type. DataFrameGroupBy.backfill DataFrameGroupBy.bfill DataFrameGroupBy.corr + DataFrameGroupBy.corrwith DataFrameGroupBy.count DataFrameGroupBy.cov DataFrameGroupBy.cumcount @@ -111,42 +80,105 @@ application to columns of a specific data type. DataFrameGroupBy.diff DataFrameGroupBy.ffill DataFrameGroupBy.fillna - DataFrameGroupBy.filter - DataFrameGroupBy.hist + DataFrameGroupBy.first + DataFrameGroupBy.head DataFrameGroupBy.idxmax DataFrameGroupBy.idxmin + DataFrameGroupBy.last DataFrameGroupBy.mad + DataFrameGroupBy.max + DataFrameGroupBy.mean + DataFrameGroupBy.median + DataFrameGroupBy.min + DataFrameGroupBy.ngroup + DataFrameGroupBy.nth DataFrameGroupBy.nunique + DataFrameGroupBy.ohlc DataFrameGroupBy.pad DataFrameGroupBy.pct_change - DataFrameGroupBy.plot + DataFrameGroupBy.prod DataFrameGroupBy.quantile DataFrameGroupBy.rank DataFrameGroupBy.resample DataFrameGroupBy.sample + DataFrameGroupBy.sem DataFrameGroupBy.shift DataFrameGroupBy.size DataFrameGroupBy.skew + DataFrameGroupBy.std + DataFrameGroupBy.sum + DataFrameGroupBy.var + DataFrameGroupBy.tail DataFrameGroupBy.take DataFrameGroupBy.tshift DataFrameGroupBy.value_counts -The following methods are available only for ``SeriesGroupBy`` objects. - +``SeriesGroupBy`` computations / descriptive stats +-------------------------------------------------- .. autosummary:: :toctree: api/ - SeriesGroupBy.hist + SeriesGroupBy.all + SeriesGroupBy.any + SeriesGroupBy.backfill + SeriesGroupBy.bfill + SeriesGroupBy.corr + SeriesGroupBy.count + SeriesGroupBy.cov + SeriesGroupBy.cumcount + SeriesGroupBy.cummax + SeriesGroupBy.cummin + SeriesGroupBy.cumprod + SeriesGroupBy.cumsum + SeriesGroupBy.describe + SeriesGroupBy.diff + SeriesGroupBy.ffill + SeriesGroupBy.fillna + SeriesGroupBy.first + SeriesGroupBy.head + SeriesGroupBy.last + SeriesGroupBy.idxmax + SeriesGroupBy.idxmin + SeriesGroupBy.is_monotonic_increasing + SeriesGroupBy.is_monotonic_decreasing + SeriesGroupBy.mad + SeriesGroupBy.max + SeriesGroupBy.mean + SeriesGroupBy.median + SeriesGroupBy.min + SeriesGroupBy.ngroup SeriesGroupBy.nlargest SeriesGroupBy.nsmallest + SeriesGroupBy.nth + SeriesGroupBy.nunique SeriesGroupBy.unique - SeriesGroupBy.is_monotonic_increasing - SeriesGroupBy.is_monotonic_decreasing - -The following methods are available only for ``DataFrameGroupBy`` objects. - + SeriesGroupBy.ohlc + SeriesGroupBy.pad + SeriesGroupBy.pct_change + SeriesGroupBy.prod + SeriesGroupBy.quantile + SeriesGroupBy.rank + SeriesGroupBy.resample + SeriesGroupBy.sample + SeriesGroupBy.sem + SeriesGroupBy.shift + SeriesGroupBy.size + SeriesGroupBy.skew + SeriesGroupBy.std + SeriesGroupBy.sum + SeriesGroupBy.var + SeriesGroupBy.tail + SeriesGroupBy.take + SeriesGroupBy.tshift + SeriesGroupBy.value_counts + +Plotting and visualization +-------------------------- .. autosummary:: :toctree: api/ - DataFrameGroupBy.corrwith DataFrameGroupBy.boxplot + DataFrameGroupBy.hist + SeriesGroupBy.hist + DataFrameGroupBy.plot + SeriesGroupBy.plot diff --git a/doc/source/user_guide/10min.rst b/doc/source/user_guide/10min.rst index c767fb1ebef7f..06508e9af9660 100644 --- a/doc/source/user_guide/10min.rst +++ b/doc/source/user_guide/10min.rst @@ -528,7 +528,7 @@ See the :ref:`Grouping section `. ) df -Grouping and then applying the :meth:`~pandas.core.groupby.GroupBy.sum` function to the resulting +Grouping and then applying the :meth:`~pandas.core.groupby.DataFrameGroupBy.sum` function to the resulting groups: .. ipython:: python @@ -536,7 +536,7 @@ groups: df.groupby("A")[["C", "D"]].sum() Grouping by multiple columns forms a hierarchical index, and again we can -apply the :meth:`~pandas.core.groupby.GroupBy.sum` function: +apply the :meth:`~pandas.core.groupby.DataFrameGroupBy.sum` function: .. ipython:: python diff --git a/doc/source/user_guide/groupby.rst b/doc/source/user_guide/groupby.rst index 5d8ef7ce02097..f9b8b793bfde8 100644 --- a/doc/source/user_guide/groupby.rst +++ b/doc/source/user_guide/groupby.rst @@ -632,7 +632,7 @@ Named aggregation .. versionadded:: 0.25.0 To support column-specific aggregation *with control over the output column names*, pandas -accepts the special syntax in :meth:`GroupBy.agg`, known as "named aggregation", where +accepts the special syntax in :meth:`DataFrameGroupBy.agg` and :meth:`SeriesGroupBy.agg`, known as "named aggregation", where - The keywords are the *output* column names - The values are tuples whose first element is the column to select diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index 2ab0af46cda88..e9aa5d60314a5 100755 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -774,7 +774,7 @@ source, you should no longer need to install Cython into your build environment Other API changes ^^^^^^^^^^^^^^^^^ -- :class:`core.groupby.GroupBy.transform` now raises on invalid operation names (:issue:`27489`) +- :meth:`.DataFrameGroupBy.transform` and :meth:`.SeriesGroupBy.transform` now raises on invalid operation names (:issue:`27489`) - :meth:`pandas.api.types.infer_dtype` will now return "integer-na" for integer and ``np.nan`` mix (:issue:`27283`) - :meth:`MultiIndex.from_arrays` will no longer infer names from arrays if ``names=None`` is explicitly provided (:issue:`27292`) - In order to improve tab-completion, pandas does not include most deprecated attributes when introspecting a pandas object using ``dir`` (e.g. ``dir(df)``). @@ -1232,8 +1232,8 @@ GroupBy/resample/rolling - Bug in :meth:`core.groupby.DataFrameGroupBy.agg` with timezone-aware datetime64 column incorrectly casting results to the original dtype (:issue:`29641`) - Bug in :meth:`DataFrame.groupby` when using axis=1 and having a single level columns index (:issue:`30208`) - Bug in :meth:`DataFrame.groupby` when using nunique on axis=1 (:issue:`30253`) -- Bug in :meth:`GroupBy.quantile` with multiple list-like q value and integer column names (:issue:`30289`) -- Bug in :meth:`GroupBy.pct_change` and :meth:`core.groupby.SeriesGroupBy.pct_change` causes ``TypeError`` when ``fill_method`` is ``None`` (:issue:`30463`) +- Bug in :meth:`.DataFrameGroupBy.quantile` and :meth:`.SeriesGroupBy.quantile` with multiple list-like q value and integer column names (:issue:`30289`) +- Bug in :meth:`.DataFrameGroupBy.pct_change` and :meth:`.SeriesGroupBy.pct_change` causes ``TypeError`` when ``fill_method`` is ``None`` (:issue:`30463`) - Bug in :meth:`Rolling.count` and :meth:`Expanding.count` argument where ``min_periods`` was ignored (:issue:`26996`) Reshaping diff --git a/doc/source/whatsnew/v1.0.1.rst b/doc/source/whatsnew/v1.0.1.rst index c42aab6de4cc3..bceac69b4914a 100644 --- a/doc/source/whatsnew/v1.0.1.rst +++ b/doc/source/whatsnew/v1.0.1.rst @@ -21,7 +21,7 @@ Fixed regressions - Fixed regression in :class:`Series` multiplication when multiplying a numeric :class:`Series` with >10000 elements with a timedelta-like scalar (:issue:`31457`) - Fixed regression in ``.groupby().agg()`` raising an ``AssertionError`` for some reductions like ``min`` on object-dtype columns (:issue:`31522`) - Fixed regression in ``.groupby()`` aggregations with categorical dtype using Cythonized reduction functions (e.g. ``first``) (:issue:`31450`) -- Fixed regression in :meth:`GroupBy.apply` if called with a function which returned a non-pandas non-scalar object (e.g. a list or numpy array) (:issue:`31441`) +- Fixed regression in :meth:`.DataFrameGroupBy.apply` and :meth:`.SeriesGroupBy.apply` if called with a function which returned a non-pandas non-scalar object (e.g. a list or numpy array) (:issue:`31441`) - Fixed regression in :meth:`DataFrame.groupby` whereby taking the minimum or maximum of a column with period dtype would raise a ``TypeError``. (:issue:`31471`) - Fixed regression in :meth:`DataFrame.groupby` with an empty DataFrame grouping by a level of a MultiIndex (:issue:`31670`). - Fixed regression in :meth:`DataFrame.apply` with object dtype and non-reducing function (:issue:`31505`) diff --git a/doc/source/whatsnew/v1.0.2.rst b/doc/source/whatsnew/v1.0.2.rst index 3f7c6e85e14ca..340fa97a513ad 100644 --- a/doc/source/whatsnew/v1.0.2.rst +++ b/doc/source/whatsnew/v1.0.2.rst @@ -17,12 +17,12 @@ Fixed regressions **Groupby** -- Fixed regression in :meth:`groupby(..).agg() ` which was failing on frames with :class:`MultiIndex` columns and a custom function (:issue:`31777`) +- Fixed regression in :meth:`.DataFrameGroupBy.agg` and :meth:`.SeriesGroupBy.agg` which were failing on frames with :class:`MultiIndex` columns and a custom function (:issue:`31777`) - Fixed regression in ``groupby(..).rolling(..).apply()`` (``RollingGroupby``) where the ``raw`` parameter was ignored (:issue:`31754`) - Fixed regression in :meth:`rolling(..).corr() ` when using a time offset (:issue:`31789`) - Fixed regression in :meth:`groupby(..).nunique() ` which was modifying the original values if ``NaN`` values were present (:issue:`31950`) - Fixed regression in ``DataFrame.groupby`` raising a ``ValueError`` from an internal operation (:issue:`31802`) -- Fixed regression in :meth:`groupby(..).agg() ` calling a user-provided function an extra time on an empty input (:issue:`31760`) +- Fixed regression in :meth:`.DataFrameGroupBy.agg` and :meth:`.SeriesGroupBy.agg` calling a user-provided function an extra time on an empty input (:issue:`31760`) **I/O** @@ -104,7 +104,7 @@ Bug fixes - Fixed bug in :meth:`DataFrame.convert_dtypes` for series with mix of integers and strings (:issue:`32117`) - Fixed bug in :meth:`DataFrame.convert_dtypes` where ``BooleanDtype`` columns were converted to ``Int64`` (:issue:`32287`) - Fixed bug in setting values using a slice indexer with string dtype (:issue:`31772`) -- Fixed bug where :meth:`pandas.core.groupby.GroupBy.first` and :meth:`pandas.core.groupby.GroupBy.last` would raise a ``TypeError`` when groups contained ``pd.NA`` in a column of object dtype (:issue:`32123`) +- Fixed bug where :meth:`.DataFrameGroupBy.first`, :meth:`.SeriesGroupBy.first`, :meth:`.DataFrameGroupBy.last`, and :meth:`.SeriesGroupBy.last` would raise a ``TypeError`` when groups contained ``pd.NA`` in a column of object dtype (:issue:`32123`) - Fixed bug where :meth:`DataFrameGroupBy.mean`, :meth:`DataFrameGroupBy.median`, :meth:`DataFrameGroupBy.var`, and :meth:`DataFrameGroupBy.std` would raise a ``TypeError`` on ``Int64`` dtype columns (:issue:`32219`) **Strings** diff --git a/doc/source/whatsnew/v1.0.4.rst b/doc/source/whatsnew/v1.0.4.rst index 84b7e7d45e8b7..794990b027216 100644 --- a/doc/source/whatsnew/v1.0.4.rst +++ b/doc/source/whatsnew/v1.0.4.rst @@ -16,7 +16,7 @@ including other versions of pandas. Fixed regressions ~~~~~~~~~~~~~~~~~ - Fix regression where :meth:`Series.isna` and :meth:`DataFrame.isna` would raise for categorical dtype when ``pandas.options.mode.use_inf_as_na`` was set to ``True`` (:issue:`33594`) -- Fix regression in :meth:`GroupBy.first` and :meth:`GroupBy.last` where None is not preserved in object dtype (:issue:`32800`) +- Fix regression in :meth:`.DataFrameGroupBy.first`, :meth:`.SeriesGroupBy.first`, :meth:`.DataFrameGroupBy.last`, and :meth:`.SeriesGroupBy.last` where None is not preserved in object dtype (:issue:`32800`) - Fix regression in DataFrame reductions using ``numeric_only=True`` and ExtensionArrays (:issue:`33256`). - Fix performance regression in ``memory_usage(deep=True)`` for object dtype (:issue:`33012`) - Fix regression where :meth:`Categorical.replace` would replace with ``NaN`` whenever the new value and replacement value were equal (:issue:`33288`) @@ -26,7 +26,7 @@ Fixed regressions - Fix regression in :meth:`DataFrame.describe` raising ``TypeError: unhashable type: 'dict'`` (:issue:`32409`) - Fix regression in :meth:`DataFrame.replace` casts columns to ``object`` dtype if items in ``to_replace`` not in values (:issue:`32988`) - Fix regression in :meth:`Series.groupby` would raise ``ValueError`` when grouping by :class:`PeriodIndex` level (:issue:`34010`) -- Fix regression in :meth:`GroupBy.rolling.apply` ignores args and kwargs parameters (:issue:`33433`) +- Fix regression in :meth:`DataFrameGroupBy.rolling.apply` and :meth:`SeriesGroupBy.rolling.apply` ignoring args and kwargs parameters (:issue:`33433`) - Fix regression in error message with ``np.min`` or ``np.max`` on unordered :class:`Categorical` (:issue:`33115`) - Fix regression in :meth:`DataFrame.loc` and :meth:`Series.loc` throwing an error when a ``datetime64[ns, tz]`` value is provided (:issue:`32395`) @@ -40,7 +40,7 @@ Bug fixes - Bug in :meth:`~DataFrame.to_csv` was silently failing when writing to an invalid s3 bucket. (:issue:`32486`) - Bug in :meth:`read_parquet` was raising a ``FileNotFoundError`` when passed an s3 directory path. (:issue:`26388`) - Bug in :meth:`~DataFrame.to_parquet` was throwing an ``AttributeError`` when writing a partitioned parquet file to s3 (:issue:`27596`) -- Bug in :meth:`GroupBy.quantile` causes the quantiles to be shifted when the ``by`` axis contains ``NaN`` (:issue:`33200`, :issue:`33569`) +- Bug in :meth:`.DataFrameGroupBy.quantile` and :meth:`.SeriesGroupBy.quantile` causes the quantiles to be shifted when the ``by`` axis contains ``NaN`` (:issue:`33200`, :issue:`33569`) Contributors ~~~~~~~~~~~~ diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index e1f54c439ae9b..ab14891b40151 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -1126,16 +1126,16 @@ GroupBy/resample/rolling - Using a :class:`pandas.api.indexers.BaseIndexer` with ``count``, ``min``, ``max``, ``median``, ``skew``, ``cov``, ``corr`` will now return correct results for any monotonic :class:`pandas.api.indexers.BaseIndexer` descendant (:issue:`32865`) - :meth:`DataFrameGroupby.mean` and :meth:`SeriesGroupby.mean` (and similarly for :meth:`~DataFrameGroupby.median`, :meth:`~DataFrameGroupby.std` and :meth:`~DataFrameGroupby.var`) now raise a ``TypeError`` if a non-accepted keyword argument is passed into it. Previously an ``UnsupportedFunctionCall`` was raised (``AssertionError`` if ``min_count`` passed into :meth:`~DataFrameGroupby.median`) (:issue:`31485`) -- Bug in :meth:`GroupBy.apply` raises ``ValueError`` when the ``by`` axis is not sorted, has duplicates, and the applied ``func`` does not mutate passed in objects (:issue:`30667`) +- Bug in :meth:`.DataFrameGroupBy.apply` and :meth:`.SeriesGroupBy.apply` raising ``ValueError`` when the ``by`` axis is not sorted, has duplicates, and the applied ``func`` does not mutate passed in objects (:issue:`30667`) - Bug in :meth:`DataFrameGroupBy.transform` produces an incorrect result with transformation functions (:issue:`30918`) -- Bug in :meth:`Groupby.transform` was returning the wrong result when grouping by multiple keys of which some were categorical and others not (:issue:`32494`) -- Bug in :meth:`GroupBy.count` causes segmentation fault when grouped-by columns contain NaNs (:issue:`32841`) +- Bug in :meth:`.DataFrameGroupBy.transform` and :meth:`.SeriesGroupBy.transform` were returning the wrong result when grouping by multiple keys of which some were categorical and others not (:issue:`32494`) +- Bug in :meth:`.DataFrameGroupBy.count` and :meth:`.SeriesGroupBy.count` causing segmentation fault when grouped-by columns contain NaNs (:issue:`32841`) - Bug in :meth:`DataFrame.groupby` and :meth:`Series.groupby` produces inconsistent type when aggregating Boolean :class:`Series` (:issue:`32894`) - Bug in :meth:`DataFrameGroupBy.sum` and :meth:`SeriesGroupBy.sum` where a large negative number would be returned when the number of non-null values was below ``min_count`` for nullable integer dtypes (:issue:`32861`) - Bug in :meth:`SeriesGroupBy.quantile` was raising on nullable integers (:issue:`33136`) - Bug in :meth:`DataFrame.resample` where an ``AmbiguousTimeError`` would be raised when the resulting timezone aware :class:`DatetimeIndex` had a DST transition at midnight (:issue:`25758`) - Bug in :meth:`DataFrame.groupby` where a ``ValueError`` would be raised when grouping by a categorical column with read-only categories and ``sort=False`` (:issue:`33410`) -- Bug in :meth:`GroupBy.agg`, :meth:`GroupBy.transform`, and :meth:`GroupBy.resample` where subclasses are not preserved (:issue:`28330`) +- Bug in :meth:`.DataFrameGroupBy.agg`, :meth:`.SeriesGroupBy.agg`, :meth:`.DataFrameGroupBy.transform`, :meth:`.SeriesGroupBy.transform`, :meth:`.DataFrameGroupBy.resample`, and :meth:`.SeriesGroupBy.resample` where subclasses are not preserved (:issue:`28330`) - Bug in :meth:`SeriesGroupBy.agg` where any column name was accepted in the named aggregation of :class:`SeriesGroupBy` previously. The behaviour now allows only ``str`` and callables else would raise ``TypeError``. (:issue:`34422`) - Bug in :meth:`DataFrame.groupby` lost the name of the :class:`Index` when one of the ``agg`` keys referenced an empty list (:issue:`32580`) - Bug in :meth:`Rolling.apply` where ``center=True`` was ignored when ``engine='numba'`` was specified (:issue:`34784`) diff --git a/doc/source/whatsnew/v1.1.4.rst b/doc/source/whatsnew/v1.1.4.rst index 6353dbfafc9f1..1e5b3614895b4 100644 --- a/doc/source/whatsnew/v1.1.4.rst +++ b/doc/source/whatsnew/v1.1.4.rst @@ -41,7 +41,7 @@ Bug fixes ~~~~~~~~~ - Bug causing ``groupby(...).sum()`` and similar to not preserve metadata (:issue:`29442`) - Bug in :meth:`Series.isin` and :meth:`DataFrame.isin` raising a ``ValueError`` when the target was read-only (:issue:`37174`) -- Bug in :meth:`GroupBy.fillna` that introduced a performance regression after 1.0.5 (:issue:`36757`) +- Bug in :meth:`.DataFrameGroupBy.fillna` and :meth:`.SeriesGroupBy.fillna` that introduced a performance regression after 1.0.5 (:issue:`36757`) - Bug in :meth:`DataFrame.info` was raising a ``KeyError`` when the DataFrame has integer column names (:issue:`37245`) - Bug in :meth:`DataFrameGroupby.apply` would drop a :class:`CategoricalIndex` when grouped on (:issue:`35792`) diff --git a/doc/source/whatsnew/v1.1.5.rst b/doc/source/whatsnew/v1.1.5.rst index 002e1f85f4127..4c822ae049936 100644 --- a/doc/source/whatsnew/v1.1.5.rst +++ b/doc/source/whatsnew/v1.1.5.rst @@ -28,7 +28,7 @@ Fixed regressions - Fixed regression in :meth:`DataFrame.fillna` not filling ``NaN`` after other operations such as :meth:`DataFrame.pivot` (:issue:`36495`). - Fixed performance regression in ``df.groupby(..).rolling(..)`` (:issue:`38038`) - Fixed regression in :meth:`MultiIndex.intersection` returning duplicates when at least one of the indexes had duplicates (:issue:`36915`) -- Fixed regression in :meth:`.GroupBy.first` and :meth:`.GroupBy.last` where ``None`` was considered a non-NA value (:issue:`38286`) +- Fixed regression in :meth:`.DataFrameGroupBy.first`, :meth:`.SeriesGroupBy.first`, :meth:`.DataFrameGroupBy.last`, and :meth:`.SeriesGroupBy.last` where ``None`` was considered a non-NA value (:issue:`38286`) .. --------------------------------------------------------------------------- diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index 49f9abd99db53..c5f2dbe71cb3c 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -561,9 +561,9 @@ Performance improvements ~~~~~~~~~~~~~~~~~~~~~~~~ - Performance improvements when creating DataFrame or Series with dtype ``str`` or :class:`StringDtype` from array with many string elements (:issue:`36304`, :issue:`36317`, :issue:`36325`, :issue:`36432`, :issue:`37371`) -- Performance improvement in :meth:`.GroupBy.agg` with the ``numba`` engine (:issue:`35759`) +- Performance improvement in :meth:`.DataFrameGroupBy.agg` and :meth:`.SeriesGroupBy.agg` with the ``numba`` engine (:issue:`35759`) - Performance improvements when creating :meth:`Series.map` from a huge dictionary (:issue:`34717`) -- Performance improvement in :meth:`.GroupBy.transform` with the ``numba`` engine (:issue:`36240`) +- Performance improvement in :meth:`.DataFrameGroupBy.transform` and :meth:`.SeriesGroupBy.transform` with the ``numba`` engine (:issue:`36240`) - :class:`.Styler` uuid method altered to compress data transmission over web whilst maintaining reasonably low table collision probability (:issue:`36345`) - Performance improvement in :func:`to_datetime` with non-ns time unit for ``float`` ``dtype`` columns (:issue:`20445`) - Performance improvement in setting values on an :class:`IntervalArray` (:issue:`36310`) diff --git a/doc/source/whatsnew/v1.2.1.rst b/doc/source/whatsnew/v1.2.1.rst index 34e28eab6d4bf..25e616dcdf37f 100644 --- a/doc/source/whatsnew/v1.2.1.rst +++ b/doc/source/whatsnew/v1.2.1.rst @@ -23,7 +23,7 @@ Fixed regressions - Fixed regression in setting with :meth:`DataFrame.loc` raising ``ValueError`` when :class:`DataFrame` has unsorted :class:`MultiIndex` columns and indexer is a scalar (:issue:`38601`) - Fixed regression in setting with :meth:`DataFrame.loc` raising ``KeyError`` with :class:`MultiIndex` and list-like columns indexer enlarging :class:`DataFrame` (:issue:`39147`) - Fixed regression in :meth:`~DataFrame.groupby()` with :class:`Categorical` grouping column not showing unused categories for ``grouped.indices`` (:issue:`38642`) -- Fixed regression in :meth:`.GroupBy.sem` where the presence of non-numeric columns would cause an error instead of being dropped (:issue:`38774`) +- Fixed regression in :meth:`.DataFrameGroupBy.sem` and :meth:`.SeriesGroupBy.sem` where the presence of non-numeric columns would cause an error instead of being dropped (:issue:`38774`) - Fixed regression in :meth:`.DataFrameGroupBy.diff` raising for ``int8`` and ``int16`` columns (:issue:`39050`) - Fixed regression in :meth:`DataFrame.groupby` when aggregating an ``ExtensionDType`` that could fail for non-numeric values (:issue:`38980`) - Fixed regression in :meth:`.Rolling.skew` and :meth:`.Rolling.kurt` modifying the object inplace (:issue:`38908`) diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst index a392aeb5274c2..537463d293287 100644 --- a/doc/source/whatsnew/v1.3.0.rst +++ b/doc/source/whatsnew/v1.3.0.rst @@ -267,10 +267,10 @@ Other enhancements - :class:`RangeIndex` can now be constructed by passing a ``range`` object directly e.g. ``pd.RangeIndex(range(3))`` (:issue:`12067`) - :meth:`Series.round` and :meth:`DataFrame.round` now work with nullable integer and floating dtypes (:issue:`38844`) - :meth:`read_csv` and :meth:`read_json` expose the argument ``encoding_errors`` to control how encoding errors are handled (:issue:`39450`) -- :meth:`.GroupBy.any` and :meth:`.GroupBy.all` use Kleene logic with nullable data types (:issue:`37506`) -- :meth:`.GroupBy.any` and :meth:`.GroupBy.all` return a ``BooleanDtype`` for columns with nullable data types (:issue:`33449`) -- :meth:`.GroupBy.any` and :meth:`.GroupBy.all` raising with ``object`` data containing ``pd.NA`` even when ``skipna=True`` (:issue:`37501`) -- :meth:`.GroupBy.rank` now supports object-dtype data (:issue:`38278`) +- :meth:`.DataFrameGroupBy.any`, :meth:`.SeriesGroupBy.any`, :meth:`.DataFrameGroupBy.all`, and :meth:`.SeriesGroupBy.all` use Kleene logic with nullable data types (:issue:`37506`) +- :meth:`.DataFrameGroupBy.any`, :meth:`.SeriesGroupBy.any`, :meth:`.DataFrameGroupBy.all`, and :meth:`.SeriesGroupBy.all` return a ``BooleanDtype`` for columns with nullable data types (:issue:`33449`) +- :meth:`.DataFrameGroupBy.any`, :meth:`.SeriesGroupBy.any`, :meth:`.DataFrameGroupBy.all`, and :meth:`.SeriesGroupBy.all` raising with ``object`` data containing ``pd.NA`` even when ``skipna=True`` (:issue:`37501`) +- :meth:`.DataFrameGroupBy.rank` and :meth:`.SeriesGroupBy.rank` now supports object-dtype data (:issue:`38278`) - Constructing a :class:`DataFrame` or :class:`Series` with the ``data`` argument being a Python iterable that is *not* a NumPy ``ndarray`` consisting of NumPy scalars will now result in a dtype with a precision the maximum of the NumPy scalars; this was already the case when ``data`` is a NumPy ``ndarray`` (:issue:`40908`) - Add keyword ``sort`` to :func:`pivot_table` to allow non-sorting of the result (:issue:`39143`) - Add keyword ``dropna`` to :meth:`DataFrame.value_counts` to allow counting rows that include ``NA`` values (:issue:`41325`) @@ -391,8 +391,8 @@ values as measured by ``np.allclose``. Now no such casting occurs. .. _whatsnew_130.notable_bug_fixes.groupby_reductions_float_result: -``float`` result for :meth:`.GroupBy.mean`, :meth:`.GroupBy.median`, and :meth:`.GroupBy.var` -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +``float`` result for :meth:`.DataFrameGroupBy.mean`, :meth:`.DataFrameGroupBy.median`, and :meth:`.GDataFrameGroupBy.var`, :meth:`.SeriesGroupBy.mean`, :meth:`.SeriesGroupBy.median`, and :meth:`.SeriesGroupBy.var` +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Previously, these methods could result in different dtypes depending on the input values. Now, these methods will always return a float dtype. (:issue:`41137`) @@ -535,8 +535,8 @@ casts to ``dtype=object`` (:issue:`38709`) .. _whatsnew_130.notable_bug_fixes.rolling_groupby_column: -GroupBy.rolling no longer returns grouped-by column in values -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +DataFrameGroupBy.rolling and SeriesGroupBy.rolling no longer return grouped-by column in values +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ The group-by column will now be dropped from the result of a ``groupby.rolling`` operation (:issue:`32262`) @@ -583,10 +583,10 @@ However, floating point artifacts may now exist in the results when rolling over .. _whatsnew_130.notable_bug_fixes.rolling_groupby_multiindex: -GroupBy.rolling with MultiIndex no longer drops levels in the result -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +DataFrameGroupBy.rolling and SeriesGroupBy.rolling with MultiIndex no longer drop levels in the result +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -:meth:`GroupBy.rolling` will no longer drop levels of a :class:`DataFrame` +:meth:`DataFrameGroupBy.rolling` and :meth:`SeriesGroupBy.rolling` will no longer drop levels of a :class:`DataFrame` with a :class:`MultiIndex` in the result. This can lead to a perceived duplication of levels in the resulting :class:`MultiIndex`, but this change restores the behavior that was present in version 1.1.3 (:issue:`38787`, :issue:`38523`). @@ -891,10 +891,10 @@ Performance improvements - Performance improvement in :class:`.Styler` where render times are more than 50% reduced and now matches :meth:`DataFrame.to_html` (:issue:`39972` :issue:`39952`, :issue:`40425`) - The method :meth:`.Styler.set_td_classes` is now as performant as :meth:`.Styler.apply` and :meth:`.Styler.applymap`, and even more so in some cases (:issue:`40453`) - Performance improvement in :meth:`.ExponentialMovingWindow.mean` with ``times`` (:issue:`39784`) -- Performance improvement in :meth:`.GroupBy.apply` when requiring the Python fallback implementation (:issue:`40176`) +- Performance improvement in :meth:`.DataFrameGroupBy.apply` and :meth:`.SeriesGroupBy.apply` when requiring the Python fallback implementation (:issue:`40176`) - Performance improvement in the conversion of a PyArrow Boolean array to a pandas nullable Boolean array (:issue:`41051`) - Performance improvement for concatenation of data with type :class:`CategoricalDtype` (:issue:`40193`) -- Performance improvement in :meth:`.GroupBy.cummin` and :meth:`.GroupBy.cummax` with nullable data types (:issue:`37493`) +- Performance improvement in :meth:`.DataFrameGroupBy.cummin`, :meth:`.SeriesGroupBy.cummin`, :meth:`.DataFrameGroupBy.cummax`, and :meth:`.SeriesGroupBy.cummax` with nullable data types (:issue:`37493`) - Performance improvement in :meth:`Series.nunique` with nan values (:issue:`40865`) - Performance improvement in :meth:`DataFrame.transpose`, :meth:`Series.unstack` with ``DatetimeTZDtype`` (:issue:`40149`) - Performance improvement in :meth:`Series.plot` and :meth:`DataFrame.plot` with entry point lazy loading (:issue:`41492`) @@ -953,11 +953,11 @@ Numeric - Bug in :meth:`DataFrame.mode` and :meth:`Series.mode` not keeping consistent integer :class:`Index` for empty input (:issue:`33321`) - Bug in :meth:`DataFrame.rank` when the DataFrame contained ``np.inf`` (:issue:`32593`) - Bug in :meth:`DataFrame.rank` with ``axis=0`` and columns holding incomparable types raising an ``IndexError`` (:issue:`38932`) -- Bug in :meth:`Series.rank`, :meth:`DataFrame.rank`, and :meth:`.GroupBy.rank` treating the most negative ``int64`` value as missing (:issue:`32859`) +- Bug in :meth:`Series.rank`, :meth:`DataFrame.rank`, :meth:`.DataFrameGroupBy.rank`, and :meth:`.SeriesGroupBy.rank` treating the most negative ``int64`` value as missing (:issue:`32859`) - Bug in :meth:`DataFrame.select_dtypes` different behavior between Windows and Linux with ``include="int"`` (:issue:`36596`) - Bug in :meth:`DataFrame.apply` and :meth:`DataFrame.agg` when passed the argument ``func="size"`` would operate on the entire ``DataFrame`` instead of rows or columns (:issue:`39934`) - Bug in :meth:`DataFrame.transform` would raise a ``SpecificationError`` when passed a dictionary and columns were missing; will now raise a ``KeyError`` instead (:issue:`40004`) -- Bug in :meth:`.GroupBy.rank` giving incorrect results with ``pct=True`` and equal values between consecutive groups (:issue:`40518`) +- Bug in :meth:`.DataFrameGroupBy.rank` and :meth:`.SeriesGroupBy.rank` giving incorrect results with ``pct=True`` and equal values between consecutive groups (:issue:`40518`) - Bug in :meth:`Series.count` would result in an ``int32`` result on 32-bit platforms when argument ``level=None`` (:issue:`40908`) - Bug in :class:`Series` and :class:`DataFrame` reductions with methods ``any`` and ``all`` not returning Boolean results for object data (:issue:`12863`, :issue:`35450`, :issue:`27709`) - Bug in :meth:`Series.clip` would fail if the Series contains NA values and has nullable int or float as a data type (:issue:`40851`) @@ -1120,21 +1120,21 @@ Plotting Groupby/resample/rolling ^^^^^^^^^^^^^^^^^^^^^^^^ -- Bug in :meth:`.GroupBy.agg` with :class:`PeriodDtype` columns incorrectly casting results too aggressively (:issue:`38254`) +- Bug in :meth:`.DataFrameGroupBy.agg` and :meth:`.SeriesGroupBy.agg` with :class:`PeriodDtype` columns incorrectly casting results too aggressively (:issue:`38254`) - Bug in :meth:`.SeriesGroupBy.value_counts` where unobserved categories in a grouped categorical Series were not tallied (:issue:`38672`) - Bug in :meth:`.SeriesGroupBy.value_counts` where an error was raised on an empty Series (:issue:`39172`) - Bug in :meth:`.GroupBy.indices` would contain non-existent indices when null values were present in the groupby keys (:issue:`9304`) -- Fixed bug in :meth:`.GroupBy.sum` causing a loss of precision by now using Kahan summation (:issue:`38778`) -- Fixed bug in :meth:`.GroupBy.cumsum` and :meth:`.GroupBy.mean` causing loss of precision through using Kahan summation (:issue:`38934`) +- Fixed bug in :meth:`.DataFrameGroupBy.sum` and :meth:`.SeriesGroupBy.sum` causing a loss of precision by now using Kahan summation (:issue:`38778`) +- Fixed bug in :meth:`.DataFrameGroupBy.cumsum`, :meth:`.SeriesGroupBy.cumsum`, :meth:`.DataFrameGroupBy.mean`, and :meth:`.SeriesGroupBy.mean` causing loss of precision through using Kahan summation (:issue:`38934`) - Bug in :meth:`.Resampler.aggregate` and :meth:`DataFrame.transform` raising a ``TypeError`` instead of ``SpecificationError`` when missing keys had mixed dtypes (:issue:`39025`) - Bug in :meth:`.DataFrameGroupBy.idxmin` and :meth:`.DataFrameGroupBy.idxmax` with ``ExtensionDtype`` columns (:issue:`38733`) - Bug in :meth:`Series.resample` would raise when the index was a :class:`PeriodIndex` consisting of ``NaT`` (:issue:`39227`) - Bug in :meth:`.RollingGroupby.corr` and :meth:`.ExpandingGroupby.corr` where the groupby column would return ``0`` instead of ``np.nan`` when providing ``other`` that was longer than each group (:issue:`39591`) - Bug in :meth:`.ExpandingGroupby.corr` and :meth:`.ExpandingGroupby.cov` where ``1`` would be returned instead of ``np.nan`` when providing ``other`` that was longer than each group (:issue:`39591`) -- Bug in :meth:`.GroupBy.mean`, :meth:`.GroupBy.median` and :meth:`DataFrame.pivot_table` not propagating metadata (:issue:`28283`) +- Bug in :meth:`.DataFrameGroupBy.mean`, :meth:`.SeriesGroupBy.mean`, :meth:`.DataFrameGroupBy.median`, :meth:`.SeriesGroupBy.median`, and :meth:`DataFrame.pivot_table` not propagating metadata (:issue:`28283`) - Bug in :meth:`Series.rolling` and :meth:`DataFrame.rolling` not calculating window bounds correctly when window is an offset and dates are in descending order (:issue:`40002`) - Bug in :meth:`Series.groupby` and :meth:`DataFrame.groupby` on an empty ``Series`` or ``DataFrame`` would lose index, columns, and/or data types when directly using the methods ``idxmax``, ``idxmin``, ``mad``, ``min``, ``max``, ``sum``, ``prod``, and ``skew`` or using them through ``apply``, ``aggregate``, or ``resample`` (:issue:`26411`) -- Bug in :meth:`.GroupBy.apply` where a :class:`MultiIndex` would be created instead of an :class:`Index` when used on a :class:`.RollingGroupby` object (:issue:`39732`) +- Bug in :meth:`.DataFrameGroupBy.apply` and :meth:`.SeriesGroupBy.apply` where a :class:`MultiIndex` would be created instead of an :class:`Index` when used on a :class:`.RollingGroupby` object (:issue:`39732`) - Bug in :meth:`.DataFrameGroupBy.sample` where an error was raised when ``weights`` was specified and the index was an :class:`Int64Index` (:issue:`39927`) - Bug in :meth:`.DataFrameGroupBy.aggregate` and :meth:`.Resampler.aggregate` would sometimes raise a ``SpecificationError`` when passed a dictionary and columns were missing; will now always raise a ``KeyError`` instead (:issue:`40004`) - Bug in :meth:`.DataFrameGroupBy.sample` where column selection was not applied before computing the result (:issue:`39928`) @@ -1148,14 +1148,14 @@ Groupby/resample/rolling - Bug in aggregation functions for :class:`DataFrame` not respecting ``numeric_only`` argument when ``level`` keyword was given (:issue:`40660`) - Bug in :meth:`.SeriesGroupBy.aggregate` where using a user-defined function to aggregate a Series with an object-typed :class:`Index` causes an incorrect :class:`Index` shape (:issue:`40014`) - Bug in :class:`.RollingGroupby` where ``as_index=False`` argument in ``groupby`` was ignored (:issue:`39433`) -- Bug in :meth:`.GroupBy.any` and :meth:`.GroupBy.all` raising a ``ValueError`` when using with nullable type columns holding ``NA`` even with ``skipna=True`` (:issue:`40585`) -- Bug in :meth:`.GroupBy.cummin` and :meth:`.GroupBy.cummax` incorrectly rounding integer values near the ``int64`` implementations bounds (:issue:`40767`) -- Bug in :meth:`.GroupBy.rank` with nullable dtypes incorrectly raising a ``TypeError`` (:issue:`41010`) -- Bug in :meth:`.GroupBy.cummin` and :meth:`.GroupBy.cummax` computing wrong result with nullable data types too large to roundtrip when casting to float (:issue:`37493`) +- Bug in :meth:`.DataFrameGroupBy.any`, :meth:`.SeriesGroupBy.any`, :meth:`.DataFrameGroupBy.all` and :meth:`.SeriesGroupBy.all` raising a ``ValueError`` when using with nullable type columns holding ``NA`` even with ``skipna=True`` (:issue:`40585`) +- Bug in :meth:`.DataFrameGroupBy.cummin`, :meth:`.SeriesGroupBy.cummin`, :meth:`.DataFrameGroupBy.cummax` and :meth:`.SeriesGroupBy.cummax` incorrectly rounding integer values near the ``int64`` implementations bounds (:issue:`40767`) +- Bug in :meth:`.DataFrameGroupBy.rank` and :meth:`.SeriesGroupBy.rank` with nullable dtypes incorrectly raising a ``TypeError`` (:issue:`41010`) +- Bug in :meth:`.DataFrameGroupBy.cummin`, :meth:`.SeriesGroupBy.cummin`, :meth:`.DataFrameGroupBy.cummax` and :meth:`.SeriesGroupBy.cummax` computing wrong result with nullable data types too large to roundtrip when casting to float (:issue:`37493`) - Bug in :meth:`DataFrame.rolling` returning mean zero for all ``NaN`` window with ``min_periods=0`` if calculation is not numerical stable (:issue:`41053`) - Bug in :meth:`DataFrame.rolling` returning sum not zero for all ``NaN`` window with ``min_periods=0`` if calculation is not numerical stable (:issue:`41053`) - Bug in :meth:`.SeriesGroupBy.agg` failing to retain ordered :class:`CategoricalDtype` on order-preserving aggregations (:issue:`41147`) -- Bug in :meth:`.GroupBy.min` and :meth:`.GroupBy.max` with multiple object-dtype columns and ``numeric_only=False`` incorrectly raising a ``ValueError`` (:issue:`41111`) +- Bug in :meth:`.DataFrameGroupBy.min`, :meth:`.SeriesGroupBy.min`, :meth:`.DataFrameGroupBy.max` and :meth:`.SeriesGroupBy.max` with multiple object-dtype columns and ``numeric_only=False`` incorrectly raising a ``ValueError`` (:issue:`41111`) - Bug in :meth:`.DataFrameGroupBy.rank` with the GroupBy object's ``axis=0`` and the ``rank`` method's keyword ``axis=1`` (:issue:`41320`) - Bug in :meth:`DataFrameGroupBy.__getitem__` with non-unique columns incorrectly returning a malformed :class:`SeriesGroupBy` instead of :class:`DataFrameGroupBy` (:issue:`41427`) - Bug in :meth:`.DataFrameGroupBy.transform` with non-unique columns incorrectly raising an ``AttributeError`` (:issue:`41427`) diff --git a/doc/source/whatsnew/v1.3.3.rst b/doc/source/whatsnew/v1.3.3.rst index ecec6d975ccb7..5bb7bcd1372c7 100644 --- a/doc/source/whatsnew/v1.3.3.rst +++ b/doc/source/whatsnew/v1.3.3.rst @@ -15,9 +15,9 @@ including other versions of pandas. Fixed regressions ~~~~~~~~~~~~~~~~~ - Fixed regression in :class:`DataFrame` constructor failing to broadcast for defined :class:`Index` and len one list of :class:`Timestamp` (:issue:`42810`) -- Fixed regression in :meth:`.GroupBy.agg` incorrectly raising in some cases (:issue:`42390`) -- Fixed regression in :meth:`.GroupBy.apply` where ``nan`` values were dropped even with ``dropna=False`` (:issue:`43205`) -- Fixed regression in :meth:`.GroupBy.quantile` which was failing with ``pandas.NA`` (:issue:`42849`) +- Fixed regression in :meth:`.DataFrameGroupBy.agg` and :meth:`.SeriesGroupBy.agg` incorrectly raising in some cases (:issue:`42390`) +- Fixed regression in :meth:`.DataFrameGroupBy.apply` and :meth:`.SeriesGroupBy.apply` where ``nan`` values were dropped even with ``dropna=False`` (:issue:`43205`) +- Fixed regression in :meth:`.DataFrameGroupBy.quantile` and :meth:`.SeriesGroupBy.quantile` which were failing with ``pandas.NA`` (:issue:`42849`) - Fixed regression in :meth:`merge` where ``on`` columns with ``ExtensionDtype`` or ``bool`` data types were cast to ``object`` in ``right`` and ``outer`` merge (:issue:`40073`) - Fixed regression in :meth:`RangeIndex.where` and :meth:`RangeIndex.putmask` raising ``AssertionError`` when result did not represent a :class:`RangeIndex` (:issue:`43240`) - Fixed regression in :meth:`read_parquet` where the ``fastparquet`` engine would not work properly with fastparquet 0.7.0 (:issue:`43075`) diff --git a/doc/source/whatsnew/v1.3.4.rst b/doc/source/whatsnew/v1.3.4.rst index b46744d51d74d..4fc53b3f39dea 100644 --- a/doc/source/whatsnew/v1.3.4.rst +++ b/doc/source/whatsnew/v1.3.4.rst @@ -15,11 +15,11 @@ including other versions of pandas. Fixed regressions ~~~~~~~~~~~~~~~~~ - Fixed regression in :meth:`DataFrame.convert_dtypes` incorrectly converts byte strings to strings (:issue:`43183`) -- Fixed regression in :meth:`.GroupBy.agg` where it was failing silently with mixed data types along ``axis=1`` and :class:`MultiIndex` (:issue:`43209`) +- Fixed regression in :meth:`.DataFrameGroupBy.agg` and :meth:`.SeriesGroupBy.agg` were failing silently with mixed data types along ``axis=1`` and :class:`MultiIndex` (:issue:`43209`) - Fixed regression in :func:`merge` with integer and ``NaN`` keys failing with ``outer`` merge (:issue:`43550`) - Fixed regression in :meth:`DataFrame.corr` raising ``ValueError`` with ``method="spearman"`` on 32-bit platforms (:issue:`43588`) - Fixed performance regression in :meth:`MultiIndex.equals` (:issue:`43549`) -- Fixed performance regression in :meth:`.GroupBy.first` and :meth:`.GroupBy.last` with :class:`StringDtype` (:issue:`41596`) +- Fixed performance regression in :meth:`.DataFrameGroupBy.first`, :meth:`.SeriesGroupBy.first`, :meth:`.DataFrameGroupBy.last`, and :meth:`.SeriesGroupBy.last` with :class:`StringDtype` (:issue:`41596`) - Fixed regression in :meth:`Series.cat.reorder_categories` failing to update the categories on the ``Series`` (:issue:`43232`) - Fixed regression in :meth:`Series.cat.categories` setter failing to update the categories on the ``Series`` (:issue:`43334`) - Fixed regression in :func:`read_csv` raising ``UnicodeDecodeError`` exception when ``memory_map=True`` (:issue:`43540`) @@ -35,7 +35,7 @@ Fixed regressions Bug fixes ~~~~~~~~~ - Fixed bug in :meth:`pandas.DataFrame.groupby.rolling` and :class:`pandas.api.indexers.FixedForwardWindowIndexer` leading to segfaults and window endpoints being mixed across groups (:issue:`43267`) -- Fixed bug in :meth:`.GroupBy.mean` with datetimelike values including ``NaT`` values returning incorrect results (:issue:`43132`) +- Fixed bug in :meth:`.DataFrameGroupBy.mean` and :meth:`.SeriesGroupBy.mean` with datetimelike values including ``NaT`` values returning incorrect results (:issue:`43132`) - Fixed bug in :meth:`Series.aggregate` not passing the first ``args`` to the user supplied ``func`` in certain cases (:issue:`43357`) - Fixed memory leaks in :meth:`Series.rolling.quantile` and :meth:`Series.rolling.median` (:issue:`43339`) diff --git a/doc/source/whatsnew/v1.3.5.rst b/doc/source/whatsnew/v1.3.5.rst index 339bd7debf945..067d2214b1a5b 100644 --- a/doc/source/whatsnew/v1.3.5.rst +++ b/doc/source/whatsnew/v1.3.5.rst @@ -20,7 +20,7 @@ Fixed regressions - Fixed regression in creating a :class:`DataFrame` from a timezone-aware :class:`Timestamp` scalar near a Daylight Savings Time transition (:issue:`42505`) - Fixed performance regression in :func:`read_csv` (:issue:`44106`) - Fixed regression in :meth:`Series.duplicated` and :meth:`Series.drop_duplicates` when Series has :class:`Categorical` dtype with boolean categories (:issue:`44351`) -- Fixed regression in :meth:`.GroupBy.sum` with ``timedelta64[ns]`` dtype containing ``NaT`` failing to treat that value as NA (:issue:`42659`) +- Fixed regression in :meth:`.DataFrameGroupBy.sum` and :meth:`.SeriesGroupBy.sum` with ``timedelta64[ns]`` dtype containing ``NaT`` failing to treat that value as NA (:issue:`42659`) - Fixed regression in :meth:`.RollingGroupby.cov` and :meth:`.RollingGroupby.corr` when ``other`` had the same shape as each group would incorrectly return superfluous groups in the result (:issue:`42915`) diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst index 697070e50a40a..5895a06792ffb 100644 --- a/doc/source/whatsnew/v1.4.0.rst +++ b/doc/source/whatsnew/v1.4.0.rst @@ -148,7 +148,7 @@ Groupby positional indexing It is now possible to specify positional ranges relative to the ends of each group. -Negative arguments for :meth:`.GroupBy.head` and :meth:`.GroupBy.tail` now work +Negative arguments for :meth:`.DataFrameGroupBy.head`, :meth:`.SeriesGroupBy.head`, :meth:`.DataFrameGroupBy.tail`, and :meth:`.SeriesGroupBy.tail` now work correctly and result in ranges relative to the end and start of each group, respectively. Previously, negative arguments returned empty frames. @@ -159,14 +159,14 @@ respectively. Previously, negative arguments returned empty frames. df.groupby("A").head(-1) -:meth:`.GroupBy.nth` now accepts a slice or list of integers and slices. +:meth:`.DataFrameGroupBy.nth` and :meth:`.SeriesGroupBy.nth` now accept a slice or list of integers and slices. .. ipython:: python df.groupby("A").nth(slice(1, -1)) df.groupby("A").nth([slice(None, 1), slice(-1, None)]) -:meth:`.GroupBy.nth` now accepts index notation. +:meth:`.DataFrameGroupBy.nth` and :meth:`.SeriesGroupBy.nth` now accept index notation. .. ipython:: python @@ -204,9 +204,9 @@ Other enhancements - :meth:`concat` will preserve the ``attrs`` when it is the same for all objects and discard the ``attrs`` when they are different (:issue:`41828`) - :class:`DataFrameGroupBy` operations with ``as_index=False`` now correctly retain ``ExtensionDtype`` dtypes for columns being grouped on (:issue:`41373`) - Add support for assigning values to ``by`` argument in :meth:`DataFrame.plot.hist` and :meth:`DataFrame.plot.box` (:issue:`15079`) -- :meth:`Series.sample`, :meth:`DataFrame.sample`, and :meth:`.GroupBy.sample` now accept a ``np.random.Generator`` as input to ``random_state``. A generator will be more performant, especially with ``replace=False`` (:issue:`38100`) +- :meth:`Series.sample`, :meth:`DataFrame.sample`, :meth:`.DataFrameGroupBy.sample`, and :meth:`.SeriesGroupBy.sample` now accept a ``np.random.Generator`` as input to ``random_state``. A generator will be more performant, especially with ``replace=False`` (:issue:`38100`) - :meth:`Series.ewm` and :meth:`DataFrame.ewm` now support a ``method`` argument with a ``'table'`` option that performs the windowing operation over an entire :class:`DataFrame`. See :ref:`Window Overview ` for performance and functional benefits (:issue:`42273`) -- :meth:`.GroupBy.cummin` and :meth:`.GroupBy.cummax` now support the argument ``skipna`` (:issue:`34047`) +- :meth:`.DataFrameGroupBy.cummin`, :meth:`.SeriesGroupBy.cummin`, :meth:`.DataFrameGroupBy.cummax`, and :meth:`.SeriesGroupBy.cummax` now support the argument ``skipna`` (:issue:`34047`) - :meth:`read_table` now supports the argument ``storage_options`` (:issue:`39167`) - :meth:`DataFrame.to_stata` and :meth:`StataWriter` now accept the keyword only argument ``value_labels`` to save labels for non-categorical columns (:issue:`38454`) - Methods that relied on hashmap based algos such as :meth:`DataFrameGroupBy.value_counts`, :meth:`DataFrameGroupBy.count` and :func:`factorize` ignored imaginary component for complex numbers (:issue:`17927`) @@ -224,7 +224,7 @@ Other enhancements - :func:`read_csv` now accepts a ``callable`` function in ``on_bad_lines`` when ``engine="python"`` for custom handling of bad lines (:issue:`5686`) - :class:`ExcelWriter` argument ``if_sheet_exists="overlay"`` option added (:issue:`40231`) - :meth:`read_excel` now accepts a ``decimal`` argument that allow the user to specify the decimal point when parsing string columns to numeric (:issue:`14403`) -- :meth:`.GroupBy.mean`, :meth:`.GroupBy.std`, :meth:`.GroupBy.var`, and :meth:`.GroupBy.sum` now support `Numba `_ execution with the ``engine`` keyword (:issue:`43731`, :issue:`44862`, :issue:`44939`) +- :meth:`.DataFrameGroupBy.mean`, :meth:`.SeriesGroupBy.mean`, :meth:`.DataFrameGroupBy.std`, :meth:`.SeriesGroupBy.std`, :meth:`.DataFrameGroupBy.var`, :meth:`.SeriesGroupBy.var`, :meth:`.DataFrameGroupBy.sum`, and :meth:`.SeriesGroupBy.sum` now support `Numba `_ execution with the ``engine`` keyword (:issue:`43731`, :issue:`44862`, :issue:`44939`) - :meth:`Timestamp.isoformat` now handles the ``timespec`` argument from the base ``datetime`` class (:issue:`26131`) - :meth:`NaT.to_numpy` ``dtype`` argument is now respected, so ``np.timedelta64`` can be returned (:issue:`44460`) - New option ``display.max_dir_items`` customizes the number of columns added to :meth:`Dataframe.__dir__` and suggested for tab completion (:issue:`37996`) @@ -419,7 +419,7 @@ raise a ``ValueError`` if the operation could produce a result with more than groupby.apply consistent transform detection ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -:meth:`.GroupBy.apply` is designed to be flexible, allowing users to perform +:meth:`.DataFrameGroupBy.apply` and :meth:`.SeriesGroupBy.apply` are designed to be flexible, allowing users to perform aggregations, transformations, filters, and use it with user-defined functions that might not fall into any of these categories. As part of this, apply will attempt to detect when an operation is a transform, and in such a case, the @@ -753,13 +753,13 @@ Other Deprecations Performance improvements ~~~~~~~~~~~~~~~~~~~~~~~~ -- Performance improvement in :meth:`.GroupBy.sample`, especially when ``weights`` argument provided (:issue:`34483`) +- Performance improvement in :meth:`.DataFrameGroupBy.sample` and :meth:`.SeriesGroupBy.sample`, especially when ``weights`` argument provided (:issue:`34483`) - Performance improvement when converting non-string arrays to string arrays (:issue:`34483`) -- Performance improvement in :meth:`.GroupBy.transform` for user-defined functions (:issue:`41598`) +- Performance improvement in :meth:`.DataFrameGroupBy.transform` and :meth:`.SeriesGroupBy.transform` for user-defined functions (:issue:`41598`) - Performance improvement in constructing :class:`DataFrame` objects (:issue:`42631`, :issue:`43142`, :issue:`43147`, :issue:`43307`, :issue:`43144`, :issue:`44826`) -- Performance improvement in :meth:`GroupBy.shift` when ``fill_value`` argument is provided (:issue:`26615`) +- Performance improvement in :meth:`.DataFrameGroupBy.shift` and :meth:`.SeriesGroupBy.shift` when ``fill_value`` argument is provided (:issue:`26615`) - Performance improvement in :meth:`DataFrame.corr` for ``method=pearson`` on data without missing values (:issue:`40956`) -- Performance improvement in some :meth:`GroupBy.apply` operations (:issue:`42992`, :issue:`43578`) +- Performance improvement in some :meth:`.DataFrameGroupBy.apply` and :meth:`.SeriesGroupBy.apply` operations (:issue:`42992`, :issue:`43578`) - Performance improvement in :func:`read_stata` (:issue:`43059`, :issue:`43227`) - Performance improvement in :func:`read_sas` (:issue:`43333`) - Performance improvement in :meth:`to_datetime` with ``uint`` dtypes (:issue:`42606`) @@ -770,11 +770,11 @@ Performance improvements - Performance improvement in indexing with a non-unique :class:`Index` (:issue:`43792`) - Performance improvement in indexing with a listlike indexer on a :class:`MultiIndex` (:issue:`43370`) - Performance improvement in indexing with a :class:`MultiIndex` indexer on another :class:`MultiIndex` (:issue:`43370`) -- Performance improvement in :meth:`GroupBy.quantile` (:issue:`43469`, :issue:`43725`) -- Performance improvement in :meth:`GroupBy.count` (:issue:`43730`, :issue:`43694`) -- Performance improvement in :meth:`GroupBy.any` and :meth:`GroupBy.all` (:issue:`43675`, :issue:`42841`) -- Performance improvement in :meth:`GroupBy.std` (:issue:`43115`, :issue:`43576`) -- Performance improvement in :meth:`GroupBy.cumsum` (:issue:`43309`) +- Performance improvement in :meth:`.DataFrameGroupBy.quantile` and :meth:`.SeriesGroupBy.quantile` (:issue:`43469`, :issue:`43725`) +- Performance improvement in :meth:`.DataFrameGroupBy.count` and :meth:`.SeriesGroupBy.count` (:issue:`43730`, :issue:`43694`) +- Performance improvement in :meth:`.DataFrameGroupBy.any`, :meth:`.SeriesGroupBy.any`, :meth:`.DataFrameGroupBy.all`, and :meth:`.SeriesGroupBy.all` (:issue:`43675`, :issue:`42841`) +- Performance improvement in :meth:`.DataFrameGroupBy.std` and :meth:`.SeriesGroupBy.std` (:issue:`43115`, :issue:`43576`) +- Performance improvement in :meth:`.DataFrameGroupBy.cumsum` and :meth:`.SeriesGroupBy.cumsum` (:issue:`43309`) - :meth:`SparseArray.min` and :meth:`SparseArray.max` no longer require converting to a dense array (:issue:`43526`) - Indexing into a :class:`SparseArray` with a ``slice`` with ``step=1`` no longer requires converting to a dense array (:issue:`43777`) - Performance improvement in :meth:`SparseArray.take` with ``allow_fill=False`` (:issue:`43654`) @@ -1003,9 +1003,9 @@ Groupby/resample/rolling ^^^^^^^^^^^^^^^^^^^^^^^^ - Bug in :meth:`SeriesGroupBy.apply` where passing an unrecognized string argument failed to raise ``TypeError`` when the underlying ``Series`` is empty (:issue:`42021`) - Bug in :meth:`Series.rolling.apply`, :meth:`DataFrame.rolling.apply`, :meth:`Series.expanding.apply` and :meth:`DataFrame.expanding.apply` with ``engine="numba"`` where ``*args`` were being cached with the user passed function (:issue:`42287`) -- Bug in :meth:`GroupBy.max` and :meth:`GroupBy.min` with nullable integer dtypes losing precision (:issue:`41743`) +- Bug in :meth:`.DataFrameGroupBy.max`, :meth:`.SeriesGroupBy.max`, :meth:`.DataFrameGroupBy.min`, and :meth:`.SeriesGroupBy.min` with nullable integer dtypes losing precision (:issue:`41743`) - Bug in :meth:`DataFrame.groupby.rolling.var` would calculate the rolling variance only on the first group (:issue:`42442`) -- Bug in :meth:`GroupBy.shift` that would return the grouping columns if ``fill_value`` was not ``None`` (:issue:`41556`) +- Bug in :meth:`.DataFrameGroupBy.shift` and :meth:`.SeriesGroupBy.shift` that would return the grouping columns if ``fill_value`` was not ``None`` (:issue:`41556`) - Bug in :meth:`SeriesGroupBy.nlargest` and :meth:`SeriesGroupBy.nsmallest` would have an inconsistent index when the input :class:`Series` was sorted and ``n`` was greater than or equal to all group sizes (:issue:`15272`, :issue:`16345`, :issue:`29129`) - Bug in :meth:`pandas.DataFrame.ewm`, where non-float64 dtypes were silently failing (:issue:`42452`) - Bug in :meth:`pandas.DataFrame.rolling` operation along rows (``axis=1``) incorrectly omits columns containing ``float16`` and ``float32`` (:issue:`41779`) @@ -1013,16 +1013,16 @@ Groupby/resample/rolling - Bug in :meth:`Series.rolling` when the :class:`Series` ``dtype`` was ``Int64`` (:issue:`43016`) - Bug in :meth:`DataFrame.rolling.corr` when the :class:`DataFrame` columns was a :class:`MultiIndex` (:issue:`21157`) - Bug in :meth:`DataFrame.groupby.rolling` when specifying ``on`` and calling ``__getitem__`` would subsequently return incorrect results (:issue:`43355`) -- Bug in :meth:`GroupBy.apply` with time-based :class:`Grouper` objects incorrectly raising ``ValueError`` in corner cases where the grouping vector contains a ``NaT`` (:issue:`43500`, :issue:`43515`) -- Bug in :meth:`GroupBy.mean` failing with ``complex`` dtype (:issue:`43701`) +- Bug in :meth:`.DataFrameGroupBy.apply` and :meth:`.SeriesGroupBy.apply` with time-based :class:`Grouper` objects incorrectly raising ``ValueError`` in corner cases where the grouping vector contains a ``NaT`` (:issue:`43500`, :issue:`43515`) +- Bug in :meth:`.DataFrameGroupBy.mean` and :meth:`.SeriesGroupBy.mean` failing with ``complex`` dtype (:issue:`43701`) - Bug in :meth:`Series.rolling` and :meth:`DataFrame.rolling` not calculating window bounds correctly for the first row when ``center=True`` and index is decreasing (:issue:`43927`) - Bug in :meth:`Series.rolling` and :meth:`DataFrame.rolling` for centered datetimelike windows with uneven nanosecond (:issue:`43997`) -- Bug in :meth:`GroupBy.mean` raising ``KeyError`` when column was selected at least twice (:issue:`44924`) -- Bug in :meth:`GroupBy.nth` failing on ``axis=1`` (:issue:`43926`) +- Bug in :meth:`.DataFrameGroupBy.mean` and :meth:`.SeriesGroupBy.mean` raising ``KeyError`` when column was selected at least twice (:issue:`44924`) +- Bug in :meth:`.DataFrameGroupBy.nth` and :meth:`.SeriesGroupBy.nth` failing on ``axis=1`` (:issue:`43926`) - Bug in :meth:`Series.rolling` and :meth:`DataFrame.rolling` not respecting right bound on centered datetime-like windows, if the index contain duplicates (:issue:`3944`) - Bug in :meth:`Series.rolling` and :meth:`DataFrame.rolling` when using a :class:`pandas.api.indexers.BaseIndexer` subclass that returned unequal start and end arrays would segfault instead of raising a ``ValueError`` (:issue:`44470`) - Bug in :meth:`Groupby.nunique` not respecting ``observed=True`` for ``categorical`` grouping columns (:issue:`45128`) -- Bug in :meth:`GroupBy.head` and :meth:`GroupBy.tail` not dropping groups with ``NaN`` when ``dropna=True`` (:issue:`45089`) +- Bug in :meth:`.DataFrameGroupBy.head`, :meth:`.SeriesGroupBy.head`, :meth:`.DataFrameGroupBy.tail`, and :meth:`.SeriesGroupBy.tail` not dropping groups with ``NaN`` when ``dropna=True`` (:issue:`45089`) - Bug in :meth:`GroupBy.__iter__` after selecting a subset of columns in a :class:`GroupBy` object, which returned all columns instead of the chosen subset (:issue:`44821`) - Bug in :meth:`Groupby.rolling` when non-monotonic data passed, fails to correctly raise ``ValueError`` (:issue:`43909`) - Bug where grouping by a :class:`Series` that has a ``categorical`` data type and length unequal to the axis of grouping raised ``ValueError`` (:issue:`44179`) diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst index bb9b052cd6e00..61c1628ea14da 100644 --- a/doc/source/whatsnew/v1.5.0.rst +++ b/doc/source/whatsnew/v1.5.0.rst @@ -301,7 +301,7 @@ Other enhancements - :meth:`DataFrame.plot` will now allow the ``subplots`` parameter to be a list of iterables specifying column groups, so that columns may be grouped together in the same subplot (:issue:`29688`). - :meth:`to_numeric` now preserves float64 arrays when downcasting would generate values not representable in float32 (:issue:`43693`) - :meth:`Series.reset_index` and :meth:`DataFrame.reset_index` now support the argument ``allow_duplicates`` (:issue:`44410`) -- :meth:`.GroupBy.min` and :meth:`.GroupBy.max` now supports `Numba `_ execution with the ``engine`` keyword (:issue:`45428`) +- :meth:`.DataFrameGroupBy.min`, :meth:`.SeriesGroupBy.min`, :meth:`.DataFrameGroupBy.max`, and :meth:`.SeriesGroupBy.max` now supports `Numba `_ execution with the ``engine`` keyword (:issue:`45428`) - :func:`read_csv` now supports ``defaultdict`` as a ``dtype`` parameter (:issue:`41574`) - :meth:`DataFrame.rolling` and :meth:`Series.rolling` now support a ``step`` parameter with fixed-length windows (:issue:`15354`) - Implemented a ``bool``-dtype :class:`Index`, passing a bool-dtype array-like to ``pd.Index`` will now retain ``bool`` dtype instead of casting to ``object`` (:issue:`45061`) @@ -312,7 +312,7 @@ Other enhancements - :meth:`DataFrame.reset_index` now accepts a ``names`` argument which renames the index names (:issue:`6878`) - :func:`concat` now raises when ``levels`` is given but ``keys`` is None (:issue:`46653`) - :func:`concat` now raises when ``levels`` contains duplicate values (:issue:`46653`) -- Added ``numeric_only`` argument to :meth:`DataFrame.corr`, :meth:`DataFrame.corrwith`, :meth:`DataFrame.cov`, :meth:`DataFrame.idxmin`, :meth:`DataFrame.idxmax`, :meth:`.DataFrameGroupBy.idxmin`, :meth:`.DataFrameGroupBy.idxmax`, :meth:`.GroupBy.var`, :meth:`.GroupBy.std`, :meth:`.GroupBy.sem`, and :meth:`.DataFrameGroupBy.quantile` (:issue:`46560`) +- Added ``numeric_only`` argument to :meth:`DataFrame.corr`, :meth:`DataFrame.corrwith`, :meth:`DataFrame.cov`, :meth:`DataFrame.idxmin`, :meth:`DataFrame.idxmax`, :meth:`.DataFrameGroupBy.idxmin`, :meth:`.DataFrameGroupBy.idxmax`, :meth:`.DataFrameGroupBy.var`, :meth:`.SeriesGroupBy.var`, :meth:`.DataFrameGroupBy.std`, :meth:`.SeriesGroupBy.std`, :meth:`.DataFrameGroupBy.sem`, :meth:`.SeriesGroupBy.sem`, and :meth:`.DataFrameGroupBy.quantile` (:issue:`46560`) - A :class:`errors.PerformanceWarning` is now thrown when using ``string[pyarrow]`` dtype with methods that don't dispatch to ``pyarrow.compute`` methods (:issue:`42613`, :issue:`46725`) - Added ``validate`` argument to :meth:`DataFrame.join` (:issue:`46622`) - A :class:`errors.PerformanceWarning` is now thrown when using ``string[pyarrow]`` dtype with methods that don't dispatch to ``pyarrow.compute`` methods (:issue:`42613`) @@ -320,7 +320,7 @@ Other enhancements - ``times`` argument in :class:`.ExponentialMovingWindow` now accepts ``np.timedelta64`` (:issue:`47003`) - :class:`.DataError`, :class:`.SpecificationError`, :class:`.SettingWithCopyError`, :class:`.SettingWithCopyWarning`, :class:`.NumExprClobberingError`, :class:`.UndefinedVariableError`, :class:`.IndexingError`, :class:`.PyperclipException`, :class:`.PyperclipWindowsException`, :class:`.CSSWarning`, :class:`.PossibleDataLossError`, :class:`.ClosedFileError`, :class:`.IncompatibilityWarning`, :class:`.AttributeConflictWarning`, :class:`.DatabaseError`, :class:`.PossiblePrecisionLoss`, :class:`.ValueLabelTypeMismatch`, :class:`.InvalidColumnName`, and :class:`.CategoricalConversionWarning` are now exposed in ``pandas.errors`` (:issue:`27656`) - Added ``check_like`` argument to :func:`testing.assert_series_equal` (:issue:`47247`) -- Add support for :meth:`.GroupBy.ohlc` for extension array dtypes (:issue:`37493`) +- Add support for :meth:`.DataFrameGroupBy.ohlc` and :meth:`.SeriesGroupBy.ohlc` for extension array dtypes (:issue:`37493`) - Allow reading compressed SAS files with :func:`read_sas` (e.g., ``.sas7bdat.gz`` files) - :func:`pandas.read_html` now supports extracting links from table cells (:issue:`13141`) - :meth:`DatetimeIndex.astype` now supports casting timezone-naive indexes to ``datetime64[s]``, ``datetime64[ms]``, and ``datetime64[us]``, and timezone-aware indexes to the corresponding ``datetime64[unit, tzname]`` dtypes (:issue:`47579`) @@ -738,11 +738,11 @@ See the documentation of :class:`ExcelWriter` for further details. .. _whatsnew_150.deprecations.group_keys_in_apply: -Using ``group_keys`` with transformers in :meth:`.GroupBy.apply` -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +Using ``group_keys`` with transformers in :meth:`.DataFrameGroupBy.apply` and :meth:`.SeriesGroupBy.apply` +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ In previous versions of pandas, if it was inferred that the function passed to -:meth:`.GroupBy.apply` was a transformer (i.e. the resulting index was equal to +:meth:`.DataFrameGroupBy.apply` or :meth:`.SeriesGroupBy.apply` was a transformer (i.e. the resulting index was equal to the input index), the ``group_keys`` argument of :meth:`DataFrame.groupby` and :meth:`Series.groupby` was ignored and the group keys would never be added to the index of the result. In the future, the group keys will be added to the index @@ -879,9 +879,9 @@ gained the ``numeric_only`` argument. - :meth:`.DataFrameGroupBy.cummax` - :meth:`.DataFrameGroupBy.idxmin` - :meth:`.DataFrameGroupBy.idxmax` -- :meth:`.GroupBy.var` -- :meth:`.GroupBy.std` -- :meth:`.GroupBy.sem` +- :meth:`.DataFrameGroupBy.var` +- :meth:`.DataFrameGroupBy.std` +- :meth:`.DataFrameGroupBy.sem` - :meth:`.DataFrameGroupBy.quantile` - :meth:`.Resampler.mean` - :meth:`.Resampler.median` @@ -944,14 +944,14 @@ Other Deprecations Performance improvements ~~~~~~~~~~~~~~~~~~~~~~~~ - Performance improvement in :meth:`DataFrame.corrwith` for column-wise (axis=0) Pearson and Spearman correlation when other is a :class:`Series` (:issue:`46174`) -- Performance improvement in :meth:`.GroupBy.transform` for some user-defined DataFrame -> Series functions (:issue:`45387`) +- Performance improvement in :meth:`.DataFrameGroupBy.transform` and :meth:`.SeriesGroupBy.transform` for some user-defined DataFrame -> Series functions (:issue:`45387`) - Performance improvement in :meth:`DataFrame.duplicated` when subset consists of only one column (:issue:`45236`) -- Performance improvement in :meth:`.GroupBy.diff` (:issue:`16706`) -- Performance improvement in :meth:`.GroupBy.transform` when broadcasting values for user-defined functions (:issue:`45708`) -- Performance improvement in :meth:`.GroupBy.transform` for user-defined functions when only a single group exists (:issue:`44977`) -- Performance improvement in :meth:`.GroupBy.apply` when grouping on a non-unique unsorted index (:issue:`46527`) +- Performance improvement in :meth:`.DataFrameGroupBy.diff` and :meth:`.SeriesGroupBy.diff` (:issue:`16706`) +- Performance improvement in :meth:`.DataFrameGroupBy.transform` and :meth:`.SeriesGroupBy.transform` when broadcasting values for user-defined functions (:issue:`45708`) +- Performance improvement in :meth:`.DataFrameGroupBy.transform` and :meth:`.SeriesGroupBy.transform` for user-defined functions when only a single group exists (:issue:`44977`) +- Performance improvement in :meth:`.DataFrameGroupBy.apply` and :meth:`.SeriesGroupBy.apply` when grouping on a non-unique unsorted index (:issue:`46527`) - Performance improvement in :meth:`DataFrame.loc` and :meth:`Series.loc` for tuple-based indexing of a :class:`MultiIndex` (:issue:`45681`, :issue:`46040`, :issue:`46330`) -- Performance improvement in :meth:`.GroupBy.var` with ``ddof`` other than one (:issue:`48152`) +- Performance improvement in :meth:`.DataFrameGroupBy.var` and :meth:`.SeriesGroupBy.var` with ``ddof`` other than one (:issue:`48152`) - Performance improvement in :meth:`DataFrame.to_records` when the index is a :class:`MultiIndex` (:issue:`47263`) - Performance improvement in :attr:`MultiIndex.values` when the MultiIndex contains levels of type DatetimeIndex, TimedeltaIndex or ExtensionDtypes (:issue:`46288`) - Performance improvement in :func:`merge` when left and/or right are empty (:issue:`45838`) @@ -1169,15 +1169,15 @@ Groupby/resample/rolling - Bug when using ``engine="numba"`` would return the same jitted function when modifying ``engine_kwargs`` (:issue:`46086`) - Bug in :meth:`.DataFrameGroupBy.transform` fails when ``axis=1`` and ``func`` is ``"first"`` or ``"last"`` (:issue:`45986`) - Bug in :meth:`DataFrameGroupBy.cumsum` with ``skipna=False`` giving incorrect results (:issue:`46216`) -- Bug in :meth:`.GroupBy.sum`, :meth:`.GroupBy.prod` and :meth:`.GroupBy.cumsum` with integer dtypes losing precision (:issue:`37493`) -- Bug in :meth:`.GroupBy.cumsum` with ``timedelta64[ns]`` dtype failing to recognize ``NaT`` as a null value (:issue:`46216`) -- Bug in :meth:`.GroupBy.cumsum` with integer dtypes causing overflows when sum was bigger than maximum of dtype (:issue:`37493`) -- Bug in :meth:`.GroupBy.cummin` and :meth:`.GroupBy.cummax` with nullable dtypes incorrectly altering the original data in place (:issue:`46220`) +- Bug in :meth:`.DataFrameGroupBy.sum`, :meth:`.SeriesGroupBy.sum`, :meth:`.DataFrameGroupBy.prod`, :meth:`.SeriesGroupBy.prod, :meth:`.DataFrameGroupBy.cumsum`, and :meth:`.SeriesGroupBy.cumsum` with integer dtypes losing precision (:issue:`37493`) +- Bug in :meth:`.DataFrameGroupBy.cumsum` and :meth:`.SeriesGroupBy.cumsum` with ``timedelta64[ns]`` dtype failing to recognize ``NaT`` as a null value (:issue:`46216`) +- Bug in :meth:`.DataFrameGroupBy.cumsum` and :meth:`.SeriesGroupBy.cumsum` with integer dtypes causing overflows when sum was bigger than maximum of dtype (:issue:`37493`) +- Bug in :meth:`.DataFrameGroupBy.cummin`, :meth:`.SeriesGroupBy.cummin`, :meth:`.DataFrameGroupBy.cummax` and :meth:`.SeriesGroupBy.cummax` with nullable dtypes incorrectly altering the original data in place (:issue:`46220`) - Bug in :meth:`DataFrame.groupby` raising error when ``None`` is in first level of :class:`MultiIndex` (:issue:`47348`) -- Bug in :meth:`.GroupBy.cummax` with ``int64`` dtype with leading value being the smallest possible int64 (:issue:`46382`) -- Bug in :meth:`.GroupBy.cumprod` ``NaN`` influences calculation in different columns with ``skipna=False`` (:issue:`48064`) -- Bug in :meth:`.GroupBy.max` with empty groups and ``uint64`` dtype incorrectly raising ``RuntimeError`` (:issue:`46408`) -- Bug in :meth:`.GroupBy.apply` would fail when ``func`` was a string and args or kwargs were supplied (:issue:`46479`) +- Bug in :meth:`.DataFrameGroupBy.cummax` and :meth:`.SeriesGroupBy.cummax` with ``int64`` dtype with leading value being the smallest possible int64 (:issue:`46382`) +- Bug in :meth:`.DataFrameGroupBy.cumprod` and :meth:`.SeriesGroupBy.cumprod` ``NaN`` influences calculation in different columns with ``skipna=False`` (:issue:`48064`) +- Bug in :meth:`.DataFrameGroupBy.max` and :meth:`.SeriesGroupBy.max` with empty groups and ``uint64`` dtype incorrectly raising ``RuntimeError`` (:issue:`46408`) +- Bug in :meth:`.DataFrameGroupBy.apply` and :meth:`.SeriesGroupBy.apply` would fail when ``func`` was a string and args or kwargs were supplied (:issue:`46479`) - Bug in :meth:`SeriesGroupBy.apply` would incorrectly name its result when there was a unique group (:issue:`46369`) - Bug in :meth:`.Rolling.sum` and :meth:`.Rolling.mean` would give incorrect result with window of same values (:issue:`42064`, :issue:`46431`) - Bug in :meth:`.Rolling.var` and :meth:`.Rolling.std` would give non-zero result with window of same values (:issue:`42064`) diff --git a/doc/source/whatsnew/v1.6.0.rst b/doc/source/whatsnew/v1.6.0.rst index 3effcdc4de83a..405b8cc0a5ded 100644 --- a/doc/source/whatsnew/v1.6.0.rst +++ b/doc/source/whatsnew/v1.6.0.rst @@ -28,7 +28,7 @@ enhancement2 Other enhancements ^^^^^^^^^^^^^^^^^^ -- :meth:`.GroupBy.quantile` now preserving nullable dtypes instead of casting to numpy dtypes (:issue:`37493`) +- :meth:`.DataFrameGroupBy.quantile` and :meth:`.SeriesGroupBy.quantile` now preserve nullable dtypes instead of casting to numpy dtypes (:issue:`37493`) - :meth:`Series.add_suffix`, :meth:`DataFrame.add_suffix`, :meth:`Series.add_prefix` and :meth:`DataFrame.add_prefix` support an ``axis`` argument. If ``axis`` is set, the default behaviour of which axis to consider can be overwritten (:issue:`47819`) - :func:`assert_frame_equal` now shows the first element where the DataFrames differ, analogously to ``pytest``'s output (:issue:`47910`) - Added ``index`` parameter to :meth:`DataFrame.to_dict` (:issue:`46398`) @@ -103,10 +103,10 @@ Deprecations Performance improvements ~~~~~~~~~~~~~~~~~~~~~~~~ -- Performance improvement in :meth:`.GroupBy.median` for nullable dtypes (:issue:`37493`) +- Performance improvement in :meth:`.DataFrameGroupBy.median` and :meth:`.SeriesGroupBy.median` for nullable dtypes (:issue:`37493`) - Performance improvement in :meth:`MultiIndex.argsort` and :meth:`MultiIndex.sort_values` (:issue:`48406`) - Performance improvement in :meth:`MultiIndex.union` without missing values and without duplicates (:issue:`48505`) -- Performance improvement in :meth:`.GroupBy.mean` and :meth:`.GroupBy.var` for extension array dtypes (:issue:`37493`) +- Performance improvement in :meth:`.DataFrameGroupBy.mean`, :meth:`.SeriesGroupBy.mean`, :meth:`.DataFrameGroupBy.var`, and :meth:`.SeriesGroupBy.var` for extension array dtypes (:issue:`37493`) - Performance improvement for :meth:`Series.value_counts` with nullable dtype (:issue:`48338`) - Performance improvement for :class:`Series` constructor passing integer numpy array with nullable dtype (:issue:`48338`) - Performance improvement in :meth:`DataFrame.loc` and :meth:`Series.loc` for tuple-based indexing of a :class:`MultiIndex` (:issue:`48384`) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index aa9845a2abb78..358f81d7e3e07 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -9087,7 +9087,8 @@ def rank( See Also -------- - core.groupby.GroupBy.rank : Rank of values within each group. + core.groupby.DataFrameGroupBy.rank : Rank of values within each group. + core.groupby.SeriesGroupBy.rank : Rank of values within each group. Examples -------- diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index 33f3ffa34489e..3383b0480bf2b 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -485,15 +485,22 @@ def _transform_general(self, func: Callable, *args, **kwargs) -> Series: def filter(self, func, dropna: bool = True, *args, **kwargs): """ - Return a copy of a Series excluding elements from groups that - do not satisfy the boolean criterion specified by func. + Filter elements from groups that don't satisfy a criterion. + + Elements from groups are filtered if they do not satisfy the + boolean criterion specified by func. Parameters ---------- func : function - To apply to each group. Should return True or False. - dropna : Drop groups that do not pass the filter. True by default; - if False, groups that evaluate False are filled with NaNs. + Criterion to apply to each group. Should return True or False. + dropna : bool + Drop groups that do not pass the filter. True by default; if False, + groups that evaluate False are filled with NaNs. + + Returns + ------- + filtered : Series Notes ----- @@ -513,10 +520,6 @@ def filter(self, func, dropna: bool = True, *args, **kwargs): 3 4 5 6 Name: B, dtype: int64 - - Returns - ------- - filtered : Series """ if isinstance(func, str): wrapper = lambda x: getattr(x, func)(*args, **kwargs) @@ -1483,7 +1486,7 @@ def _transform_item_by_item(self, obj: DataFrame, wrapper) -> DataFrame: def filter(self, func, dropna=True, *args, **kwargs): """ - Return a copy of a DataFrame excluding filtered elements. + Filter elements from groups that don't satisfy a criterion. Elements from groups are filtered if they do not satisfy the boolean criterion specified by func. @@ -1491,9 +1494,10 @@ def filter(self, func, dropna=True, *args, **kwargs): Parameters ---------- func : function - Function to apply to each subframe. Should return True or False. - dropna : Drop groups that do not pass the filter. True by default; - If False, groups that evaluate False are filled with NaNs. + Criterion to apply to each group. Should return True or False. + dropna : bool + Drop groups that do not pass the filter. True by default; if False, + groups that evaluate False are filled with NaNs. Returns ------- diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index b963b85b93a31..6d9ef3c484971 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -2511,9 +2511,9 @@ def first(self, numeric_only: bool = False, min_count: int = -1): -------- DataFrame.groupby : Apply a function groupby to each row or column of a DataFrame. - DataFrame.core.groupby.GroupBy.last : Compute the last non-null entry of each - column. - DataFrame.core.groupby.GroupBy.nth : Take the nth row from each group. + pandas.core.groupby.DataFrameGroupBy.last : Compute the last non-null entry + of each column. + pandas.core.groupby.DataFrameGroupBy.nth : Take the nth row from each group. Examples -------- @@ -2583,9 +2583,9 @@ def last(self, numeric_only: bool = False, min_count: int = -1): -------- DataFrame.groupby : Apply a function groupby to each row or column of a DataFrame. - DataFrame.core.groupby.GroupBy.first : Compute the first non-null entry of each - column. - DataFrame.core.groupby.GroupBy.nth : Take the nth row from each group. + pandas.core.groupby.DataFrameGroupBy.first : Compute the first non-null entry + of each column. + pandas.core.groupby.DataFrameGroupBy.nth : Take the nth row from each group. Examples -------- diff --git a/scripts/validate_rst_title_capitalization.py b/scripts/validate_rst_title_capitalization.py index e7233484e16b6..d0490b53fa957 100755 --- a/scripts/validate_rst_title_capitalization.py +++ b/scripts/validate_rst_title_capitalization.py @@ -69,6 +69,8 @@ "CategoricalIndex", "Categorical", "GroupBy", + "DataFrameGroupBy", + "SeriesGroupBy", "SPSS", "ORC", "R",