From 8d13d6e01879aa98fb1972c14323f10f8cdeb0f4 Mon Sep 17 00:00:00 2001 From: Simon Hawkins Date: Wed, 2 Dec 2020 18:12:53 +0000 Subject: [PATCH 1/2] REGR: unstack on 'int' dtype prevent fillna to work --- doc/source/whatsnew/v1.1.5.rst | 1 + pandas/core/internals/blocks.py | 2 +- pandas/tests/frame/test_stack_unstack.py | 21 +++++++++++++++++++++ 3 files changed, 23 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.1.5.rst b/doc/source/whatsnew/v1.1.5.rst index 0e2e510147603..fbb12cb38448a 100644 --- a/doc/source/whatsnew/v1.1.5.rst +++ b/doc/source/whatsnew/v1.1.5.rst @@ -20,6 +20,7 @@ Fixed regressions - Fixed regression in inplace operations on :class:`Series` with ``ExtensionDtype`` with NumPy dtyped operand (:issue:`37910`) - Fixed regression in metadata propagation for ``groupby`` iterator (:issue:`37343`) - Fixed regression in :class:`MultiIndex` constructed from a :class:`DatetimeIndex` not retaining frequency (:issue:`35563`) +- Fixed regression in :meth:`DataFrame.unstack` with columns with integer dtype (:issue:`37115`) - Fixed regression in indexing on a :class:`Series` with ``CategoricalDtype`` after unpickling (:issue:`37631`) - Fixed regression in :meth:`DataFrame.groupby` aggregation with out-of-bounds datetime objects in an object-dtype column (:issue:`36003`) - Fixed regression in ``df.groupby(..).rolling(..)`` with the resulting :class:`MultiIndex` when grouping by a label that is in the index (:issue:`37641`) diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 1c08888aa85fd..fe07823a80783 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -1542,7 +1542,7 @@ def _unstack(self, unstacker, fill_value, new_placement): new_values = new_values.T[mask] new_placement = new_placement[mask] - blocks = [self.make_block_same_class(new_values, placement=new_placement)] + blocks = [make_block(new_values, placement=new_placement)] return blocks, mask def quantile(self, qs, interpolation="linear", axis: int = 0): diff --git a/pandas/tests/frame/test_stack_unstack.py b/pandas/tests/frame/test_stack_unstack.py index c9e737a9dcb0f..66ed68b794c8b 100644 --- a/pandas/tests/frame/test_stack_unstack.py +++ b/pandas/tests/frame/test_stack_unstack.py @@ -1880,3 +1880,24 @@ def test_unstack_group_index_overflow(self): s = Series(np.arange(1000), index=index) result = s.unstack(4) assert result.shape == (500, 2) + + def test_unstack_with_missing_int_cast_to_float(self): + # https://github.com/pandas-dev/pandas/issues/37115 + df = DataFrame( + { + "a": ["A", "A", "B"], + "b": ["ca", "cb", "cb"], + "v": [10] * 3, + } + ).set_index(["a", "b"]) + + # add another int column to get 2 blocks + df["is_"] = 1 + assert len(df._mgr.blocks) == 2 + + result = df.unstack("b") + result[("is_", "ca")] = result[("is_", "ca")].fillna(0) + + # This would raise `ValueError: Cannot convert non-finite values (NA or inf) + # to integer` if the fillna operation above fails + result[("is_", "ca")] = result[("is_", "ca")].astype("uint8") From 72b31a8507239af649bb919de55332bd81b70330 Mon Sep 17 00:00:00 2001 From: Simon Hawkins Date: Wed, 2 Dec 2020 18:52:12 +0000 Subject: [PATCH 2/2] create explicit expected --- pandas/tests/frame/test_stack_unstack.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/pandas/tests/frame/test_stack_unstack.py b/pandas/tests/frame/test_stack_unstack.py index 66ed68b794c8b..06e5169fc6016 100644 --- a/pandas/tests/frame/test_stack_unstack.py +++ b/pandas/tests/frame/test_stack_unstack.py @@ -1898,6 +1898,12 @@ def test_unstack_with_missing_int_cast_to_float(self): result = df.unstack("b") result[("is_", "ca")] = result[("is_", "ca")].fillna(0) - # This would raise `ValueError: Cannot convert non-finite values (NA or inf) - # to integer` if the fillna operation above fails - result[("is_", "ca")] = result[("is_", "ca")].astype("uint8") + expected = DataFrame( + [[10.0, 10.0, 1.0, 1.0], [np.nan, 10.0, 0.0, 1.0]], + index=Index(["A", "B"], dtype="object", name="a"), + columns=MultiIndex.from_tuples( + [("v", "ca"), ("v", "cb"), ("is_", "ca"), ("is_", "cb")], + names=[None, "b"], + ), + ) + tm.assert_frame_equal(result, expected)