From d9dec94ece852eb25b7c37c6fb48be0ab44aa8d7 Mon Sep 17 00:00:00 2001 From: MeeseeksMachine <39504233+meeseeksmachine@users.noreply.github.com> Date: Tue, 17 Jan 2023 21:33:33 +0100 Subject: [PATCH] Backport PR #50682 on branch 1.5.x (BUG: pivot_table with nested elements and numpy 1.24) (#50792) Backport PR #50682: BUG: pivot_table with nested elements and numpy 1.24 Co-authored-by: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> --- doc/source/whatsnew/v1.5.3.rst | 2 +- pandas/core/common.py | 12 +++++- pandas/tests/reshape/test_pivot.py | 69 ++++++++++++++++++++++++++++++ 3 files changed, 81 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v1.5.3.rst b/doc/source/whatsnew/v1.5.3.rst index 489a6fda9ffab..6abbff9ed872c 100644 --- a/doc/source/whatsnew/v1.5.3.rst +++ b/doc/source/whatsnew/v1.5.3.rst @@ -32,8 +32,8 @@ Bug fixes - Bug in :meth:`Series.quantile` emitting warning from NumPy when :class:`Series` has only ``NA`` values (:issue:`50681`) - Bug when chaining several :meth:`.Styler.concat` calls, only the last styler was concatenated (:issue:`49207`) - Fixed bug when instantiating a :class:`DataFrame` subclass inheriting from ``typing.Generic`` that triggered a ``UserWarning`` on python 3.11 (:issue:`49649`) +- Bug in :func:`pivot_table` with NumPy 1.24 or greater when the :class:`DataFrame` columns has nested elements (:issue:`50342`) - Bug in :func:`pandas.testing.assert_series_equal` (and equivalent ``assert_`` functions) when having nested data and using numpy >= 1.25 (:issue:`50360`) -- .. --------------------------------------------------------------------------- .. _whatsnew_153.other: diff --git a/pandas/core/common.py b/pandas/core/common.py index 980e7a79414ba..641ddba0222e9 100644 --- a/pandas/core/common.py +++ b/pandas/core/common.py @@ -242,7 +242,17 @@ def asarray_tuplesafe(values: Iterable, dtype: NpDtype | None = None) -> ArrayLi if isinstance(values, list) and dtype in [np.object_, object]: return construct_1d_object_array_from_listlike(values) - result = np.asarray(values, dtype=dtype) + try: + with warnings.catch_warnings(): + # Can remove warning filter once NumPy 1.24 is min version + warnings.simplefilter("ignore", np.VisibleDeprecationWarning) + result = np.asarray(values, dtype=dtype) + except ValueError: + # Using try/except since it's more performant than checking is_list_like + # over each element + # error: Argument 1 to "construct_1d_object_array_from_listlike" + # has incompatible type "Iterable[Any]"; expected "Sized" + return construct_1d_object_array_from_listlike(values) # type: ignore[arg-type] if issubclass(result.dtype.type, str): result = np.asarray(values, dtype=object) diff --git a/pandas/tests/reshape/test_pivot.py b/pandas/tests/reshape/test_pivot.py index f7a00bbe9fef6..416ff104b4cae 100644 --- a/pandas/tests/reshape/test_pivot.py +++ b/pandas/tests/reshape/test_pivot.py @@ -2276,6 +2276,75 @@ def test_pivot_table_datetime_warning(self): ) tm.assert_frame_equal(result, expected) + def test_pivot_table_with_mixed_nested_tuples(self, using_array_manager): + # GH 50342 + df = DataFrame( + { + "A": ["foo", "foo", "foo", "foo", "foo", "bar", "bar", "bar", "bar"], + "B": ["one", "one", "one", "two", "two", "one", "one", "two", "two"], + "C": [ + "small", + "large", + "large", + "small", + "small", + "large", + "small", + "small", + "large", + ], + "D": [1, 2, 2, 3, 3, 4, 5, 6, 7], + "E": [2, 4, 5, 5, 6, 6, 8, 9, 9], + ("col5",): [ + "foo", + "foo", + "foo", + "foo", + "foo", + "bar", + "bar", + "bar", + "bar", + ], + ("col6", 6): [ + "one", + "one", + "one", + "two", + "two", + "one", + "one", + "two", + "two", + ], + (7, "seven"): [ + "small", + "large", + "large", + "small", + "small", + "large", + "small", + "small", + "large", + ], + } + ) + result = pivot_table( + df, values="D", index=["A", "B"], columns=[(7, "seven")], aggfunc=np.sum + ) + expected = DataFrame( + [[4.0, 5.0], [7.0, 6.0], [4.0, 1.0], [np.nan, 6.0]], + columns=Index(["large", "small"], name=(7, "seven")), + index=MultiIndex.from_arrays( + [["bar", "bar", "foo", "foo"], ["one", "two"] * 2], names=["A", "B"] + ), + ) + if using_array_manager: + # INFO(ArrayManager) column without NaNs can preserve int dtype + expected["small"] = expected["small"].astype("int64") + tm.assert_frame_equal(result, expected) + class TestPivot: def test_pivot(self):