From 4258f505e10b812034ca0cc47596a2016dab487d Mon Sep 17 00:00:00 2001 From: MeeseeksMachine <39504233+meeseeksmachine@users.noreply.github.com> Date: Wed, 26 Oct 2022 13:42:09 +0200 Subject: [PATCH 01/14] Backport PR #49322 on branch 1.5.x (DOC: Fix typo in DataFrame.rolling) (#49326) Backport PR #49322: DOC: Fix typo in DataFrame.rolling Co-authored-by: Xnot <28331593+Xnot@users.noreply.github.com> --- pandas/core/window/rolling.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index c92c448304de2..1a71b41b0e317 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -994,7 +994,7 @@ class Window(BaseWindow): step : int, default None - ..versionadded:: 1.5.0 + .. versionadded:: 1.5.0 Evaluate the window at every ``step`` result, equivalent to slicing as ``[::step]``. ``window`` must be an integer. Using a step argument other From 722fbc370fabb603f6c6ddef73b60d66303f1a99 Mon Sep 17 00:00:00 2001 From: MeeseeksMachine <39504233+meeseeksmachine@users.noreply.github.com> Date: Wed, 26 Oct 2022 15:04:15 +0200 Subject: [PATCH 02/14] Backport PR #49286 on branch 1.5.x (TST: update exception messages for lxml tests) (#49303) Backport PR #49286: TST: update exception messages for lxml tests Co-authored-by: jbrockmendel --- pandas/tests/io/xml/test_to_xml.py | 13 ++++++++++--- pandas/tests/io/xml/test_xml.py | 9 ++++++++- 2 files changed, 18 insertions(+), 4 deletions(-) diff --git a/pandas/tests/io/xml/test_to_xml.py b/pandas/tests/io/xml/test_to_xml.py index d3247eb9dd47e..0f42c7e070c4a 100644 --- a/pandas/tests/io/xml/test_to_xml.py +++ b/pandas/tests/io/xml/test_to_xml.py @@ -1037,9 +1037,16 @@ def test_stylesheet_wrong_path(): def test_empty_string_stylesheet(val): from lxml.etree import XMLSyntaxError - with pytest.raises( - XMLSyntaxError, match=("Document is empty|Start tag expected, '<' not found") - ): + msg = "|".join( + [ + "Document is empty", + "Start tag expected, '<' not found", + # Seen on Mac with lxml 4.9.1 + r"None \(line 0\)", + ] + ) + + with pytest.raises(XMLSyntaxError, match=msg): geom_df.to_xml(stylesheet=val) diff --git a/pandas/tests/io/xml/test_xml.py b/pandas/tests/io/xml/test_xml.py index fd4ba87bd302c..33a98f57310c2 100644 --- a/pandas/tests/io/xml/test_xml.py +++ b/pandas/tests/io/xml/test_xml.py @@ -471,7 +471,14 @@ def test_file_handle_close(datapath, parser): def test_empty_string_lxml(val): from lxml.etree import XMLSyntaxError - with pytest.raises(XMLSyntaxError, match="Document is empty"): + msg = "|".join( + [ + "Document is empty", + # Seen on Mac with lxml 4.91 + r"None \(line 0\)", + ] + ) + with pytest.raises(XMLSyntaxError, match=msg): read_xml(val, parser="lxml") From ebcc47bceffb5a3bb5201943fb023e08a52c42a8 Mon Sep 17 00:00:00 2001 From: MeeseeksMachine <39504233+meeseeksmachine@users.noreply.github.com> Date: Wed, 26 Oct 2022 16:26:25 +0200 Subject: [PATCH 03/14] =?UTF-8?q?Backport=20PR=20#48890=20on=20branch=201.?= =?UTF-8?q?5.x=20(ERR:=20Improve=20error=20message=20when=20assigning=20a?= =?UTF-8?q?=20complete=20row=20using=20'at'=20m=E2=80=A6)=20(#49310)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Backport PR #48890: ERR: Improve error message when assigning a complete row using 'at' m… Co-authored-by: Ambuj Pawar --- pandas/core/frame.py | 8 ++++++++ pandas/tests/indexing/test_at.py | 18 ++++++++++++++++-- 2 files changed, 24 insertions(+), 2 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index b525bf6f57e88..d3116f83d58cb 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -86,6 +86,7 @@ function as nv, np_percentile_argname, ) +from pandas.errors import InvalidIndexError from pandas.util._decorators import ( Appender, Substitution, @@ -4220,6 +4221,13 @@ def _set_value( self.loc[index, col] = value self._item_cache.pop(col, None) + except InvalidIndexError as ii_err: + # GH48729: Seems like you are trying to assign a value to a + # row when only scalar options are permitted + raise InvalidIndexError( + f"You can only assign a scalar value not a {type(value)}" + ) from ii_err + def _ensure_valid_index(self, value) -> None: """ Ensure that if we don't have an index, that we can create one from the diff --git a/pandas/tests/indexing/test_at.py b/pandas/tests/indexing/test_at.py index 1e502ca70189a..adbc0e2f8127a 100644 --- a/pandas/tests/indexing/test_at.py +++ b/pandas/tests/indexing/test_at.py @@ -197,8 +197,12 @@ def test_at_frame_raises_key_error2(self, indexer_al): def test_at_frame_multiple_columns(self): # GH#48296 - at shouldn't modify multiple columns df = DataFrame({"a": [1, 2], "b": [3, 4]}) - with pytest.raises(InvalidIndexError, match=r"slice\(None, None, None\)"): - df.at[5] = [6, 7] + new_row = [6, 7] + with pytest.raises( + InvalidIndexError, + match=f"You can only assign a scalar value not a \\{type(new_row)}", + ): + df.at[5] = new_row def test_at_getitem_mixed_index_no_fallback(self): # GH#19860 @@ -220,3 +224,13 @@ def test_at_categorical_integers(self): for key in [0, 1]: with pytest.raises(KeyError, match=str(key)): df.at[key, key] + + def test_at_applied_for_rows(self): + # GH#48729 .at should raise InvalidIndexError when assigning rows + df = DataFrame(index=["a"], columns=["col1", "col2"]) + new_row = [123, 15] + with pytest.raises( + InvalidIndexError, + match=f"You can only assign a scalar value not a \\{type(new_row)}", + ): + df.at["a"] = new_row From 1396da45442f72a1903bcc9bbd2040f2ed2467ea Mon Sep 17 00:00:00 2001 From: MeeseeksMachine <39504233+meeseeksmachine@users.noreply.github.com> Date: Fri, 28 Oct 2022 17:30:23 +0200 Subject: [PATCH 04/14] Backport PR #49308 on branch 1.5.x (DOC: Added pre-commit link inside the guideline for developers.) (#49370) Backport PR #49308: DOC: Added pre-commit link inside the guideline for developers. Co-authored-by: Hatim Zahid <63000127+HatimZ@users.noreply.github.com> --- doc/source/development/contributing_environment.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/development/contributing_environment.rst b/doc/source/development/contributing_environment.rst index 4a70057cf18e3..afa0d0306f1af 100644 --- a/doc/source/development/contributing_environment.rst +++ b/doc/source/development/contributing_environment.rst @@ -10,7 +10,7 @@ To test out code changes, you'll need to build pandas from source, which requires a C/C++ compiler and Python environment. If you're making documentation changes, you can skip to :ref:`contributing to the documentation ` but if you skip creating the development environment you won't be able to build the documentation -locally before pushing your changes. +locally before pushing your changes. It's recommended to also install the :ref:`pre-commit hooks `. .. contents:: Table of contents: :local: From d1b898b9c25706eb0b2f8cff914148c323af4b30 Mon Sep 17 00:00:00 2001 From: MeeseeksMachine <39504233+meeseeksmachine@users.noreply.github.com> Date: Mon, 31 Oct 2022 20:58:04 +0100 Subject: [PATCH 05/14] Backport PR #48234 on branch 1.5.x (REGR: Fix regression RecursionError when replacing numeric scalar with None) (#49414) Backport PR #48234: REGR: Fix regression RecursionError when replacing numeric scalar with None Co-authored-by: Patrick Hoefler <61934744+phofl@users.noreply.github.com> --- doc/source/whatsnew/v1.5.2.rst | 2 +- pandas/core/internals/blocks.py | 6 ++++-- pandas/tests/frame/methods/test_replace.py | 12 ++++++++++++ pandas/tests/series/methods/test_replace.py | 8 ++++++++ 4 files changed, 25 insertions(+), 3 deletions(-) diff --git a/doc/source/whatsnew/v1.5.2.rst b/doc/source/whatsnew/v1.5.2.rst index aaf00804262bb..4f6274b9084da 100644 --- a/doc/source/whatsnew/v1.5.2.rst +++ b/doc/source/whatsnew/v1.5.2.rst @@ -13,7 +13,7 @@ including other versions of pandas. Fixed regressions ~~~~~~~~~~~~~~~~~ -- +- Fixed regression in :meth:`Series.replace` raising ``RecursionError`` with numeric dtype and when specifying ``value=None`` (:issue:`45725`) - .. --------------------------------------------------------------------------- diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 9c6b3e506b1d4..5e95f83ddfd08 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -569,7 +569,6 @@ def replace( # Note: the checks we do in NDFrame.replace ensure we never get # here with listlike to_replace or value, as those cases # go through replace_list - values = self.values if isinstance(values, Categorical): @@ -608,7 +607,10 @@ def replace( return blocks elif self.ndim == 1 or self.shape[0] == 1: - blk = self.coerce_to_target_dtype(value) + if value is None: + blk = self.astype(np.dtype(object)) + else: + blk = self.coerce_to_target_dtype(value) return blk.replace( to_replace=to_replace, value=value, diff --git a/pandas/tests/frame/methods/test_replace.py b/pandas/tests/frame/methods/test_replace.py index 177f3ec1b4504..f4de685688b00 100644 --- a/pandas/tests/frame/methods/test_replace.py +++ b/pandas/tests/frame/methods/test_replace.py @@ -1496,6 +1496,18 @@ def test_replace_list_with_mixed_type( result = obj.replace(box(to_replace), value) tm.assert_equal(result, expected) + @pytest.mark.parametrize("val", [2, np.nan, 2.0]) + def test_replace_value_none_dtype_numeric(self, val): + # GH#48231 + df = DataFrame({"a": [1, val]}) + result = df.replace(val, None) + expected = DataFrame({"a": [1, None]}, dtype=object) + tm.assert_frame_equal(result, expected) + + df = DataFrame({"a": [1, val]}) + result = df.replace({val: None}) + tm.assert_frame_equal(result, expected) + class TestDataFrameReplaceRegex: @pytest.mark.parametrize( diff --git a/pandas/tests/series/methods/test_replace.py b/pandas/tests/series/methods/test_replace.py index 77c9cf4013bd7..126a89503d636 100644 --- a/pandas/tests/series/methods/test_replace.py +++ b/pandas/tests/series/methods/test_replace.py @@ -667,3 +667,11 @@ def test_replace_different_int_types(self, any_int_numpy_dtype): result = labs.replace(map_dict) expected = labs.replace({0: 0, 2: 1, 1: 2}) tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize("val", [2, np.nan, 2.0]) + def test_replace_value_none_dtype_numeric(self, val): + # GH#48231 + ser = pd.Series([1, val]) + result = ser.replace(val, None) + expected = pd.Series([1, None], dtype=object) + tm.assert_series_equal(result, expected) From a2da6bd984fd381e85daa2cb105d1e7be94247ee Mon Sep 17 00:00:00 2001 From: KotlinIsland <65446343+KotlinIsland@users.noreply.github.com> Date: Wed, 2 Nov 2022 01:11:38 +1000 Subject: [PATCH 06/14] =?UTF-8?q?Backport=20PR=20#49320=20on=20branch=201.?= =?UTF-8?q?5.x=20((=F0=9F=93=9A)=20update=20docs=20to=20mention=203.11=20s?= =?UTF-8?q?upport)=20(#49440)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Backport PR #49320: (📚) update docs to mention 3.11 support --- doc/source/getting_started/install.rst | 2 +- pyproject.toml | 2 +- setup.cfg | 2 ++ 3 files changed, 4 insertions(+), 2 deletions(-) diff --git a/doc/source/getting_started/install.rst b/doc/source/getting_started/install.rst index 00251854e3ffa..31eaa2367b683 100644 --- a/doc/source/getting_started/install.rst +++ b/doc/source/getting_started/install.rst @@ -20,7 +20,7 @@ Instructions for installing from source, Python version support ---------------------- -Officially Python 3.8, 3.9 and 3.10. +Officially Python 3.8, 3.9, 3.10 and 3.11. Installing pandas ----------------- diff --git a/pyproject.toml b/pyproject.toml index 67c56123a847c..54edbfb8ea938 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -5,7 +5,7 @@ requires = [ "setuptools>=51.0.0", "wheel", "Cython>=0.29.32,<3", # Note: sync with setup.py, environment.yml and asv.conf.json - "oldest-supported-numpy>=0.10" + "oldest-supported-numpy>=2022.8.16" ] # uncomment to enable pep517 after versioneer problem is fixed. # https://github.com/python-versioneer/python-versioneer/issues/193 diff --git a/setup.cfg b/setup.cfg index f2314316f7732..cda40dbdfbed0 100644 --- a/setup.cfg +++ b/setup.cfg @@ -22,6 +22,7 @@ classifiers = Programming Language :: Python :: 3.8 Programming Language :: Python :: 3.9 Programming Language :: Python :: 3.10 + Programming Language :: Python :: 3.11 Topic :: Scientific/Engineering project_urls = Bug Tracker = https://github.com/pandas-dev/pandas/issues @@ -33,6 +34,7 @@ packages = find: install_requires = numpy>=1.20.3; python_version<'3.10' numpy>=1.21.0; python_version>='3.10' + numpy>=1.23.2; python_version>='3.11' python-dateutil>=2.8.1 pytz>=2020.1 python_requires = >=3.8 From 250954b116da38f3e94d55ca5d0007d46dc57c83 Mon Sep 17 00:00:00 2001 From: MeeseeksMachine <39504233+meeseeksmachine@users.noreply.github.com> Date: Tue, 1 Nov 2022 16:31:21 +0100 Subject: [PATCH 07/14] Backport PR #49437 on branch 1.5.x (CI: maybe fix docs build) (#49444) Backport PR #49437: CI: maybe fix docs build Co-authored-by: Marco Edward Gorelli <33491632+MarcoGorelli@users.noreply.github.com> --- doc/source/whatsnew/v0.13.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.13.0.rst b/doc/source/whatsnew/v0.13.0.rst index 8265ad58f7ea3..44223bc694360 100644 --- a/doc/source/whatsnew/v0.13.0.rst +++ b/doc/source/whatsnew/v0.13.0.rst @@ -733,7 +733,7 @@ Enhancements .. _scipy: http://www.scipy.org .. _documentation: http://docs.scipy.org/doc/scipy/reference/interpolate.html#univariate-interpolation -.. _guide: http://docs.scipy.org/doc/scipy/reference/tutorial/interpolate.html +.. _guide: https://docs.scipy.org/doc/scipy/tutorial/interpolate.html - ``to_csv`` now takes a ``date_format`` keyword argument that specifies how output datetime objects should be formatted. Datetimes encountered in the From e8b037e451b97bd7bd4b11a17c2d2d8d905ffbff Mon Sep 17 00:00:00 2001 From: MeeseeksMachine <39504233+meeseeksmachine@users.noreply.github.com> Date: Wed, 2 Nov 2022 00:37:09 +0100 Subject: [PATCH 08/14] Backport PR #48996 on branch 1.5.x (BUG: CoW - correctly track references for chained operations) (#49451) Backport PR #48996: BUG: CoW - correctly track references for chained operations Co-authored-by: Joris Van den Bossche --- doc/source/whatsnew/v1.5.2.rst | 2 +- pandas/_libs/internals.pyx | 12 +- pandas/core/internals/managers.py | 55 ++++++-- pandas/tests/copy_view/test_indexing.py | 152 +++++++++++++++++++++++ pandas/tests/copy_view/test_internals.py | 19 +++ pandas/tests/copy_view/test_methods.py | 49 +++++++- 6 files changed, 270 insertions(+), 19 deletions(-) diff --git a/doc/source/whatsnew/v1.5.2.rst b/doc/source/whatsnew/v1.5.2.rst index 4f6274b9084da..ff617e8bc5eef 100644 --- a/doc/source/whatsnew/v1.5.2.rst +++ b/doc/source/whatsnew/v1.5.2.rst @@ -21,7 +21,7 @@ Fixed regressions Bug fixes ~~~~~~~~~ -- +- Bug in the Copy-on-Write implementation losing track of views in certain chained indexing cases (:issue:`48996`) - .. --------------------------------------------------------------------------- diff --git a/pandas/_libs/internals.pyx b/pandas/_libs/internals.pyx index 94ae4a021da4d..ded161c70f121 100644 --- a/pandas/_libs/internals.pyx +++ b/pandas/_libs/internals.pyx @@ -676,8 +676,9 @@ cdef class BlockManager: public bint _known_consolidated, _is_consolidated public ndarray _blknos, _blklocs public list refs + public object parent - def __cinit__(self, blocks=None, axes=None, refs=None, verify_integrity=True): + def __cinit__(self, blocks=None, axes=None, refs=None, parent=None, verify_integrity=True): # None as defaults for unpickling GH#42345 if blocks is None: # This adds 1-2 microseconds to DataFrame(np.array([])) @@ -690,6 +691,7 @@ cdef class BlockManager: self.blocks = blocks self.axes = axes.copy() # copy to make sure we are not remotely-mutable self.refs = refs + self.parent = parent # Populate known_consolidate, blknos, and blklocs lazily self._known_consolidated = False @@ -805,7 +807,9 @@ cdef class BlockManager: nrefs.append(weakref.ref(blk)) new_axes = [self.axes[0], self.axes[1]._getitem_slice(slobj)] - mgr = type(self)(tuple(nbs), new_axes, nrefs, verify_integrity=False) + mgr = type(self)( + tuple(nbs), new_axes, nrefs, parent=self, verify_integrity=False + ) # We can avoid having to rebuild blklocs/blknos blklocs = self._blklocs @@ -827,4 +831,6 @@ cdef class BlockManager: new_axes = list(self.axes) new_axes[axis] = new_axes[axis]._getitem_slice(slobj) - return type(self)(tuple(new_blocks), new_axes, new_refs, verify_integrity=False) + return type(self)( + tuple(new_blocks), new_axes, new_refs, parent=self, verify_integrity=False + ) diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index 881cea45bdb34..f55fcead61fae 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -55,6 +55,7 @@ import pandas.core.algorithms as algos from pandas.core.arrays._mixins import NDArrayBackedExtensionArray from pandas.core.arrays.sparse import SparseDtype +import pandas.core.common as com from pandas.core.construction import ( ensure_wrapped_if_datetimelike, extract_array, @@ -146,6 +147,7 @@ class BaseBlockManager(DataManager): blocks: tuple[Block, ...] axes: list[Index] refs: list[weakref.ref | None] | None + parent: object @property def ndim(self) -> int: @@ -163,6 +165,7 @@ def from_blocks( blocks: list[Block], axes: list[Index], refs: list[weakref.ref | None] | None = None, + parent: object = None, ) -> T: raise NotImplementedError @@ -262,6 +265,8 @@ def _clear_reference_block(self, blkno: int) -> None: """ if self.refs is not None: self.refs[blkno] = None + if com.all_none(*self.refs): + self.parent = None def get_dtypes(self): dtypes = np.array([blk.dtype for blk in self.blocks]) @@ -602,7 +607,9 @@ def _combine( axes[-1] = index axes[0] = self.items.take(indexer) - return type(self).from_blocks(new_blocks, axes, new_refs) + return type(self).from_blocks( + new_blocks, axes, new_refs, parent=None if copy else self + ) @property def nblocks(self) -> int: @@ -645,11 +652,14 @@ def copy_func(ax): new_refs: list[weakref.ref | None] | None if deep: new_refs = None + parent = None else: new_refs = [weakref.ref(blk) for blk in self.blocks] + parent = self res.axes = new_axes res.refs = new_refs + res.parent = parent if self.ndim > 1: # Avoid needing to re-compute these @@ -738,6 +748,7 @@ def reindex_indexer( only_slice=only_slice, use_na_proxy=use_na_proxy, ) + parent = None if com.all_none(*new_refs) else self else: new_blocks = [ blk.take_nd( @@ -750,11 +761,12 @@ def reindex_indexer( for blk in self.blocks ] new_refs = None + parent = None new_axes = list(self.axes) new_axes[axis] = new_axis - new_mgr = type(self).from_blocks(new_blocks, new_axes, new_refs) + new_mgr = type(self).from_blocks(new_blocks, new_axes, new_refs, parent=parent) if axis == 1: # We can avoid the need to rebuild these new_mgr._blknos = self.blknos.copy() @@ -989,6 +1001,7 @@ def __init__( blocks: Sequence[Block], axes: Sequence[Index], refs: list[weakref.ref | None] | None = None, + parent: object = None, verify_integrity: bool = True, ) -> None: @@ -1053,11 +1066,13 @@ def from_blocks( blocks: list[Block], axes: list[Index], refs: list[weakref.ref | None] | None = None, + parent: object = None, ) -> BlockManager: """ Constructor for BlockManager and SingleBlockManager with same signature. """ - return cls(blocks, axes, refs, verify_integrity=False) + parent = parent if _using_copy_on_write() else None + return cls(blocks, axes, refs, parent, verify_integrity=False) # ---------------------------------------------------------------- # Indexing @@ -1079,7 +1094,7 @@ def fast_xs(self, loc: int) -> SingleBlockManager: block = new_block(result, placement=slice(0, len(result)), ndim=1) # in the case of a single block, the new block is a view ref = weakref.ref(self.blocks[0]) - return SingleBlockManager(block, self.axes[0], [ref]) + return SingleBlockManager(block, self.axes[0], [ref], parent=self) dtype = interleaved_dtype([blk.dtype for blk in self.blocks]) @@ -1113,7 +1128,7 @@ def fast_xs(self, loc: int) -> SingleBlockManager: block = new_block(result, placement=slice(0, len(result)), ndim=1) return SingleBlockManager(block, self.axes[0]) - def iget(self, i: int) -> SingleBlockManager: + def iget(self, i: int, track_ref: bool = True) -> SingleBlockManager: """ Return the data as a SingleBlockManager. """ @@ -1123,7 +1138,9 @@ def iget(self, i: int) -> SingleBlockManager: # shortcut for select a single-dim from a 2-dim BM bp = BlockPlacement(slice(0, len(values))) nb = type(block)(values, placement=bp, ndim=1) - return SingleBlockManager(nb, self.axes[1], [weakref.ref(block)]) + ref = weakref.ref(block) if track_ref else None + parent = self if track_ref else None + return SingleBlockManager(nb, self.axes[1], [ref], parent) def iget_values(self, i: int) -> ArrayLike: """ @@ -1365,7 +1382,9 @@ def column_setitem(self, loc: int, idx: int | slice | np.ndarray, value) -> None self.blocks = tuple(blocks) self._clear_reference_block(blkno) - col_mgr = self.iget(loc) + # this manager is only created temporarily to mutate the values in place + # so don't track references, otherwise the `setitem` would perform CoW again + col_mgr = self.iget(loc, track_ref=False) new_mgr = col_mgr.setitem((idx,), value) self.iset(loc, new_mgr._block.values, inplace=True) @@ -1463,7 +1482,9 @@ def idelete(self, indexer) -> BlockManager: nbs, new_refs = self._slice_take_blocks_ax0(taker, only_slice=True) new_columns = self.items[~is_deleted] axes = [new_columns, self.axes[1]] - return type(self)(tuple(nbs), axes, new_refs, verify_integrity=False) + # TODO this might not be needed (can a delete ever be done in chained manner?) + parent = None if com.all_none(*new_refs) else self + return type(self)(tuple(nbs), axes, new_refs, parent, verify_integrity=False) # ---------------------------------------------------------------- # Block-wise Operation @@ -1869,6 +1890,7 @@ def __init__( block: Block, axis: Index, refs: list[weakref.ref | None] | None = None, + parent: object = None, verify_integrity: bool = False, fastpath=lib.no_default, ) -> None: @@ -1887,6 +1909,7 @@ def __init__( self.axes = [axis] self.blocks = (block,) self.refs = refs + self.parent = parent if _using_copy_on_write() else None @classmethod def from_blocks( @@ -1894,6 +1917,7 @@ def from_blocks( blocks: list[Block], axes: list[Index], refs: list[weakref.ref | None] | None = None, + parent: object = None, ) -> SingleBlockManager: """ Constructor for BlockManager and SingleBlockManager with same signature. @@ -1902,7 +1926,7 @@ def from_blocks( assert len(axes) == 1 if refs is not None: assert len(refs) == 1 - return cls(blocks[0], axes[0], refs, verify_integrity=False) + return cls(blocks[0], axes[0], refs, parent, verify_integrity=False) @classmethod def from_array(cls, array: ArrayLike, index: Index) -> SingleBlockManager: @@ -1922,7 +1946,10 @@ def to_2d_mgr(self, columns: Index) -> BlockManager: new_blk = type(blk)(arr, placement=bp, ndim=2) axes = [columns, self.axes[0]] refs: list[weakref.ref | None] = [weakref.ref(blk)] - return BlockManager([new_blk], axes=axes, refs=refs, verify_integrity=False) + parent = self if _using_copy_on_write() else None + return BlockManager( + [new_blk], axes=axes, refs=refs, parent=parent, verify_integrity=False + ) def _has_no_reference(self, i: int = 0) -> bool: """ @@ -2004,7 +2031,7 @@ def getitem_mgr(self, indexer: slice | npt.NDArray[np.bool_]) -> SingleBlockMana new_idx = self.index[indexer] # TODO(CoW) in theory only need to track reference if new_array is a view ref = weakref.ref(blk) - return type(self)(block, new_idx, [ref]) + return type(self)(block, new_idx, [ref], parent=self) def get_slice(self, slobj: slice, axis: int = 0) -> SingleBlockManager: # Assertion disabled for performance @@ -2017,7 +2044,9 @@ def get_slice(self, slobj: slice, axis: int = 0) -> SingleBlockManager: bp = BlockPlacement(slice(0, len(array))) block = type(blk)(array, placement=bp, ndim=1) new_index = self.index._getitem_slice(slobj) - return type(self)(block, new_index, [weakref.ref(blk)]) + # TODO this method is only used in groupby SeriesSplitter at the moment, + # so passing refs / parent is not yet covered by the tests + return type(self)(block, new_index, [weakref.ref(blk)], parent=self) @property def index(self) -> Index: @@ -2064,6 +2093,7 @@ def setitem_inplace(self, indexer, value) -> None: if _using_copy_on_write() and not self._has_no_reference(0): self.blocks = (self._block.copy(),) self.refs = None + self.parent = None self._cache.clear() super().setitem_inplace(indexer, value) @@ -2080,6 +2110,7 @@ def idelete(self, indexer) -> SingleBlockManager: self._cache.clear() # clear reference since delete always results in a new array self.refs = None + self.parent = None return self def fast_xs(self, loc): diff --git a/pandas/tests/copy_view/test_indexing.py b/pandas/tests/copy_view/test_indexing.py index d917a3c79aa97..444c6ff204b88 100644 --- a/pandas/tests/copy_view/test_indexing.py +++ b/pandas/tests/copy_view/test_indexing.py @@ -462,6 +462,158 @@ def test_subset_set_with_column_indexer( tm.assert_frame_equal(df, df_orig) +@pytest.mark.parametrize( + "method", + [ + lambda df: df[["a", "b"]][0:2], + lambda df: df[0:2][["a", "b"]], + lambda df: df[["a", "b"]].iloc[0:2], + lambda df: df[["a", "b"]].loc[0:1], + lambda df: df[0:2].iloc[:, 0:2], + lambda df: df[0:2].loc[:, "a":"b"], # type: ignore[misc] + ], + ids=[ + "row-getitem-slice", + "column-getitem", + "row-iloc-slice", + "row-loc-slice", + "column-iloc-slice", + "column-loc-slice", + ], +) +@pytest.mark.parametrize( + "dtype", ["int64", "float64"], ids=["single-block", "mixed-block"] +) +def test_subset_chained_getitem( + request, method, dtype, using_copy_on_write, using_array_manager +): + # Case: creating a subset using multiple, chained getitem calls using views + # still needs to guarantee proper CoW behaviour + df = DataFrame( + {"a": [1, 2, 3], "b": [4, 5, 6], "c": np.array([7, 8, 9], dtype=dtype)} + ) + df_orig = df.copy() + + # when not using CoW, it depends on whether we have a single block or not + # and whether we are slicing the columns -> in that case we have a view + subset_is_view = request.node.callspec.id in ( + "single-block-column-iloc-slice", + "single-block-column-loc-slice", + ) or ( + request.node.callspec.id + in ("mixed-block-column-iloc-slice", "mixed-block-column-loc-slice") + and using_array_manager + ) + + # modify subset -> don't modify parent + subset = method(df) + subset.iloc[0, 0] = 0 + if using_copy_on_write or (not subset_is_view): + tm.assert_frame_equal(df, df_orig) + else: + assert df.iloc[0, 0] == 0 + + # modify parent -> don't modify subset + subset = method(df) + df.iloc[0, 0] = 0 + expected = DataFrame({"a": [1, 2], "b": [4, 5]}) + if using_copy_on_write or not subset_is_view: + tm.assert_frame_equal(subset, expected) + else: + assert subset.iloc[0, 0] == 0 + + +@pytest.mark.parametrize( + "dtype", ["int64", "float64"], ids=["single-block", "mixed-block"] +) +def test_subset_chained_getitem_column(dtype, using_copy_on_write): + # Case: creating a subset using multiple, chained getitem calls using views + # still needs to guarantee proper CoW behaviour + df = DataFrame( + {"a": [1, 2, 3], "b": [4, 5, 6], "c": np.array([7, 8, 9], dtype=dtype)} + ) + df_orig = df.copy() + + # modify subset -> don't modify parent + subset = df[:]["a"][0:2] + df._clear_item_cache() + subset.iloc[0] = 0 + if using_copy_on_write: + tm.assert_frame_equal(df, df_orig) + else: + assert df.iloc[0, 0] == 0 + + # modify parent -> don't modify subset + subset = df[:]["a"][0:2] + df._clear_item_cache() + df.iloc[0, 0] = 0 + expected = Series([1, 2], name="a") + if using_copy_on_write: + tm.assert_series_equal(subset, expected) + else: + assert subset.iloc[0] == 0 + + +@pytest.mark.parametrize( + "method", + [ + lambda s: s["a":"c"]["a":"b"], # type: ignore[misc] + lambda s: s.iloc[0:3].iloc[0:2], + lambda s: s.loc["a":"c"].loc["a":"b"], # type: ignore[misc] + lambda s: s.loc["a":"c"] # type: ignore[misc] + .iloc[0:3] + .iloc[0:2] + .loc["a":"b"] # type: ignore[misc] + .iloc[0:1], + ], + ids=["getitem", "iloc", "loc", "long-chain"], +) +def test_subset_chained_getitem_series(method, using_copy_on_write): + # Case: creating a subset using multiple, chained getitem calls using views + # still needs to guarantee proper CoW behaviour + s = Series([1, 2, 3], index=["a", "b", "c"]) + s_orig = s.copy() + + # modify subset -> don't modify parent + subset = method(s) + subset.iloc[0] = 0 + if using_copy_on_write: + tm.assert_series_equal(s, s_orig) + else: + assert s.iloc[0] == 0 + + # modify parent -> don't modify subset + subset = s.iloc[0:3].iloc[0:2] + s.iloc[0] = 0 + expected = Series([1, 2], index=["a", "b"]) + if using_copy_on_write: + tm.assert_series_equal(subset, expected) + else: + assert subset.iloc[0] == 0 + + +def test_subset_chained_single_block_row(using_copy_on_write, using_array_manager): + df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]}) + df_orig = df.copy() + + # modify subset -> don't modify parent + subset = df[:].iloc[0].iloc[0:2] + subset.iloc[0] = 0 + if using_copy_on_write or using_array_manager: + tm.assert_frame_equal(df, df_orig) + else: + assert df.iloc[0, 0] == 0 + + # modify parent -> don't modify subset + subset = df[:].iloc[0].iloc[0:2] + df.iloc[0, 0] = 0 + expected = Series([1, 4], index=["a", "b"], name=0) + if using_copy_on_write or using_array_manager: + tm.assert_series_equal(subset, expected) + else: + assert subset.iloc[0] == 0 + + # TODO add more tests modifying the parent diff --git a/pandas/tests/copy_view/test_internals.py b/pandas/tests/copy_view/test_internals.py index 2191fc1b33218..edfa7f843f17f 100644 --- a/pandas/tests/copy_view/test_internals.py +++ b/pandas/tests/copy_view/test_internals.py @@ -1,4 +1,5 @@ import numpy as np +import pytest import pandas.util._test_decorators as td @@ -43,3 +44,21 @@ def test_consolidate(using_copy_on_write): subset.iloc[0, 1] = 0.0 assert df._mgr._has_no_reference(1) assert df.loc[0, "b"] == 0.1 + + +@td.skip_array_manager_invalid_test +def test_clear_parent(using_copy_on_write): + # ensure to clear parent reference if we are no longer viewing data from parent + if not using_copy_on_write: + pytest.skip("test only relevant when using copy-on-write") + + df = DataFrame({"a": [1, 2, 3], "b": [0.1, 0.2, 0.3]}) + subset = df[:] + assert subset._mgr.parent is not None + + # replacing existing columns loses the references to the parent df + subset["a"] = 0 + assert subset._mgr.parent is not None + # when losing the last reference, also the parent should be reset + subset["b"] = 0 + assert subset._mgr.parent is None diff --git a/pandas/tests/copy_view/test_methods.py b/pandas/tests/copy_view/test_methods.py index df723808ce06b..956e2cf98c9b6 100644 --- a/pandas/tests/copy_view/test_methods.py +++ b/pandas/tests/copy_view/test_methods.py @@ -1,4 +1,5 @@ import numpy as np +import pytest from pandas import ( DataFrame, @@ -156,7 +157,7 @@ def test_to_frame(using_copy_on_write): ser = Series([1, 2, 3]) ser_orig = ser.copy() - df = ser.to_frame() + df = ser[:].to_frame() # currently this always returns a "view" assert np.shares_memory(ser.values, get_array(df, 0)) @@ -169,5 +170,47 @@ def test_to_frame(using_copy_on_write): tm.assert_series_equal(ser, ser_orig) else: # but currently select_dtypes() actually returns a view -> mutates parent - ser_orig.iloc[0] = 0 - tm.assert_series_equal(ser, ser_orig) + expected = ser_orig.copy() + expected.iloc[0] = 0 + tm.assert_series_equal(ser, expected) + + # modify original series -> don't modify dataframe + df = ser[:].to_frame() + ser.iloc[0] = 0 + + if using_copy_on_write: + tm.assert_frame_equal(df, ser_orig.to_frame()) + else: + expected = ser_orig.copy().to_frame() + expected.iloc[0, 0] = 0 + tm.assert_frame_equal(df, expected) + + +@pytest.mark.parametrize( + "method, idx", + [ + (lambda df: df.copy(deep=False).copy(deep=False), 0), + (lambda df: df.reset_index().reset_index(), 2), + (lambda df: df.rename(columns=str.upper).rename(columns=str.lower), 0), + (lambda df: df.copy(deep=False).select_dtypes(include="number"), 0), + ], + ids=["shallow-copy", "reset_index", "rename", "select_dtypes"], +) +def test_chained_methods(request, method, idx, using_copy_on_write): + df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [0.1, 0.2, 0.3]}) + df_orig = df.copy() + + # when not using CoW, only the copy() variant actually gives a view + df2_is_view = not using_copy_on_write and request.node.callspec.id == "shallow-copy" + + # modify df2 -> don't modify df + df2 = method(df) + df2.iloc[0, idx] = 0 + if not df2_is_view: + tm.assert_frame_equal(df, df_orig) + + # modify df -> don't modify df2 + df2 = method(df) + df.iloc[0, 0] = 0 + if not df2_is_view: + tm.assert_frame_equal(df2.iloc[:, idx:], df_orig) From 1e2cab93f87cad434da1f2b98dd58e8cf5d42381 Mon Sep 17 00:00:00 2001 From: Zachary Moon Date: Wed, 2 Nov 2022 02:46:02 -0600 Subject: [PATCH 09/14] Backport PR #49377 on branch 1.5.x (BUG: Fix passing Colormap instance to plot methods with mpl >= 3.6) (#49453) Backport PR #49377: BUG: Fix passing `Colormap` instance to plot methods with mpl >= 3.6 --- doc/source/whatsnew/v1.5.2.rst | 2 ++ pandas/io/formats/style.py | 10 +++++++++- pandas/plotting/_matplotlib/core.py | 4 ++-- pandas/tests/io/formats/style/test_matplotlib.py | 14 ++++++++++++++ 4 files changed, 27 insertions(+), 3 deletions(-) diff --git a/doc/source/whatsnew/v1.5.2.rst b/doc/source/whatsnew/v1.5.2.rst index ff617e8bc5eef..e65be3bcecd76 100644 --- a/doc/source/whatsnew/v1.5.2.rst +++ b/doc/source/whatsnew/v1.5.2.rst @@ -14,6 +14,8 @@ including other versions of pandas. Fixed regressions ~~~~~~~~~~~~~~~~~ - Fixed regression in :meth:`Series.replace` raising ``RecursionError`` with numeric dtype and when specifying ``value=None`` (:issue:`45725`) +- Fixed regression in :meth:`DataFrame.plot` preventing :class:`~matplotlib.colors.Colormap` instance + from being passed using the ``colormap`` argument if Matplotlib 3.6+ is used (:issue:`49374`) - .. --------------------------------------------------------------------------- diff --git a/pandas/io/formats/style.py b/pandas/io/formats/style.py index 9c3e4f0bb02fb..d1baaebf6c204 100644 --- a/pandas/io/formats/style.py +++ b/pandas/io/formats/style.py @@ -3927,7 +3927,15 @@ def _background_gradient( rng = smax - smin # extend lower / upper bounds, compresses color range norm = mpl.colors.Normalize(smin - (rng * low), smax + (rng * high)) - rgbas = plt.cm.get_cmap(cmap)(norm(gmap)) + from pandas.plotting._matplotlib.compat import mpl_ge_3_6_0 + + if mpl_ge_3_6_0(): + if cmap is None: + rgbas = mpl.colormaps[mpl.rcParams["image.cmap"]](norm(gmap)) + else: + rgbas = mpl.colormaps.get_cmap(cmap)(norm(gmap)) + else: + rgbas = plt.cm.get_cmap(cmap)(norm(gmap)) def relative_luminance(rgba) -> float: """ diff --git a/pandas/plotting/_matplotlib/core.py b/pandas/plotting/_matplotlib/core.py index 1302413916d58..af91a8ab83e12 100644 --- a/pandas/plotting/_matplotlib/core.py +++ b/pandas/plotting/_matplotlib/core.py @@ -1222,7 +1222,7 @@ def _make_plot(self): if self.colormap is not None: if mpl_ge_3_6_0(): - cmap = mpl.colormaps[self.colormap] + cmap = mpl.colormaps.get_cmap(self.colormap) else: cmap = self.plt.cm.get_cmap(self.colormap) else: @@ -1302,7 +1302,7 @@ def _make_plot(self): # pandas uses colormap, matplotlib uses cmap. cmap = self.colormap or "BuGn" if mpl_ge_3_6_0(): - cmap = mpl.colormaps[cmap] + cmap = mpl.colormaps.get_cmap(cmap) else: cmap = self.plt.cm.get_cmap(cmap) cb = self.kwds.pop("colorbar", True) diff --git a/pandas/tests/io/formats/style/test_matplotlib.py b/pandas/tests/io/formats/style/test_matplotlib.py index 8d9f075d8674d..52fd5355e3302 100644 --- a/pandas/tests/io/formats/style/test_matplotlib.py +++ b/pandas/tests/io/formats/style/test_matplotlib.py @@ -284,3 +284,17 @@ def test_bar_color_raises(df): msg = "`color` and `cmap` cannot both be given" with pytest.raises(ValueError, match=msg): df.style.bar(color="something", cmap="something else").to_html() + + +@pytest.mark.parametrize( + "plot_method", + ["scatter", "hexbin"], +) +def test_pass_colormap_instance(df, plot_method): + # https://github.com/pandas-dev/pandas/issues/49374 + cmap = mpl.colors.ListedColormap([[1, 1, 1], [0, 0, 0]]) + df["c"] = df.A + df.B + kwargs = dict(x="A", y="B", c="c", colormap=cmap) + if plot_method == "hexbin": + kwargs["C"] = kwargs.pop("c") + getattr(df.plot, plot_method)(**kwargs) From 23358e310cf8f8b44f2c68d0d312af50beedb853 Mon Sep 17 00:00:00 2001 From: MeeseeksMachine <39504233+meeseeksmachine@users.noreply.github.com> Date: Wed, 2 Nov 2022 15:06:44 +0100 Subject: [PATCH 10/14] =?UTF-8?q?Backport=20PR=20#49460=20on=20branch=201.?= =?UTF-8?q?5.x=20((=F0=9F=8E=81)=20add=20python=203.11=20to=20sdist.yml)?= =?UTF-8?q?=20(#49471)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Backport PR #49460: (🎁) add python 3.11 to sdist.yml Co-authored-by: KotlinIsland <65446343+KotlinIsland@users.noreply.github.com> --- .github/workflows/sdist.yml | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/.github/workflows/sdist.yml b/.github/workflows/sdist.yml index 14cede7bc1a39..7c20545105009 100644 --- a/.github/workflows/sdist.yml +++ b/.github/workflows/sdist.yml @@ -30,7 +30,7 @@ jobs: strategy: fail-fast: false matrix: - python-version: ["3.8", "3.9", "3.10"] + python-version: ["3.8", "3.9", "3.10", "3.11"] concurrency: # https://github.community/t/concurrecy-not-work-for-push/183068/7 group: ${{ github.event_name == 'push' && github.run_number || github.ref }}-${{matrix.python-version}}-sdist @@ -42,7 +42,7 @@ jobs: fetch-depth: 0 - name: Set up Python - uses: actions/setup-python@v3 + uses: actions/setup-python@v4 with: python-version: ${{ matrix.python-version }} @@ -86,6 +86,8 @@ jobs: pip install numpy==1.20.3 ;; 3.10) pip install numpy==1.21.2 ;; + 3.11) + pip install numpy==1.23.2 ;; esac - name: Import pandas From 0aab994a24dc76f2ea00996ca54d4b3c6fd9cb70 Mon Sep 17 00:00:00 2001 From: MeeseeksMachine <39504233+meeseeksmachine@users.noreply.github.com> Date: Thu, 3 Nov 2022 11:13:57 +0100 Subject: [PATCH 11/14] Backport PR #49488 on branch 1.5.x (CI: Remove GHA running on 1.4.x branch) (#49495) Backport PR #49488: CI: Remove GHA running on 1.4.x branch Co-authored-by: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> --- .github/workflows/32-bit-linux.yml | 2 -- .github/workflows/code-checks.yml | 2 -- .github/workflows/docbuild-and-upload.yml | 2 -- .github/workflows/macos-windows.yml | 2 -- .github/workflows/python-dev.yml | 2 -- .github/workflows/sdist.yml | 2 -- .github/workflows/ubuntu.yml | 2 -- 7 files changed, 14 deletions(-) diff --git a/.github/workflows/32-bit-linux.yml b/.github/workflows/32-bit-linux.yml index 8c9f0b594f321..cf8a0fe0da91c 100644 --- a/.github/workflows/32-bit-linux.yml +++ b/.github/workflows/32-bit-linux.yml @@ -5,12 +5,10 @@ on: branches: - main - 1.5.x - - 1.4.x pull_request: branches: - main - 1.5.x - - 1.4.x paths-ignore: - "doc/**" diff --git a/.github/workflows/code-checks.yml b/.github/workflows/code-checks.yml index 6aff77c708378..738263369409c 100644 --- a/.github/workflows/code-checks.yml +++ b/.github/workflows/code-checks.yml @@ -5,12 +5,10 @@ on: branches: - main - 1.5.x - - 1.4.x pull_request: branches: - main - 1.5.x - - 1.4.x env: ENV_FILE: environment.yml diff --git a/.github/workflows/docbuild-and-upload.yml b/.github/workflows/docbuild-and-upload.yml index cfb4966847721..48a08d4febbaf 100644 --- a/.github/workflows/docbuild-and-upload.yml +++ b/.github/workflows/docbuild-and-upload.yml @@ -5,14 +5,12 @@ on: branches: - main - 1.5.x - - 1.4.x tags: - '*' pull_request: branches: - main - 1.5.x - - 1.4.x env: ENV_FILE: environment.yml diff --git a/.github/workflows/macos-windows.yml b/.github/workflows/macos-windows.yml index 8b3d69943bd9d..5da2d0d281edd 100644 --- a/.github/workflows/macos-windows.yml +++ b/.github/workflows/macos-windows.yml @@ -5,12 +5,10 @@ on: branches: - main - 1.5.x - - 1.4.x pull_request: branches: - main - 1.5.x - - 1.4.x paths-ignore: - "doc/**" diff --git a/.github/workflows/python-dev.yml b/.github/workflows/python-dev.yml index 683e694069582..202efef87312b 100644 --- a/.github/workflows/python-dev.yml +++ b/.github/workflows/python-dev.yml @@ -25,12 +25,10 @@ on: branches: - main - 1.5.x - - 1.4.x pull_request: branches: - main - 1.5.x - - 1.4.x paths-ignore: - "doc/**" diff --git a/.github/workflows/sdist.yml b/.github/workflows/sdist.yml index 7c20545105009..9957fc72e9f51 100644 --- a/.github/workflows/sdist.yml +++ b/.github/workflows/sdist.yml @@ -5,12 +5,10 @@ on: branches: - main - 1.5.x - - 1.4.x pull_request: branches: - main - 1.5.x - - 1.4.x types: [labeled, opened, synchronize, reopened] paths-ignore: - "doc/**" diff --git a/.github/workflows/ubuntu.yml b/.github/workflows/ubuntu.yml index b7cddc6bb3d05..be07e85e01a25 100644 --- a/.github/workflows/ubuntu.yml +++ b/.github/workflows/ubuntu.yml @@ -5,12 +5,10 @@ on: branches: - main - 1.5.x - - 1.4.x pull_request: branches: - main - 1.5.x - - 1.4.x paths-ignore: - "doc/**" From c8018b5102fd979e64346a0599b5374d316e9854 Mon Sep 17 00:00:00 2001 From: MeeseeksMachine <39504233+meeseeksmachine@users.noreply.github.com> Date: Fri, 11 Nov 2022 00:05:22 +0100 Subject: [PATCH 12/14] Backport PR #49610 on branch 1.5.x (BUG: date_range with freq="C" (business days) return value changed on 1.5.0) (#49625) Backport PR #49610: BUG: date_range with freq="C" (business days) return value changed on 1.5.0 Co-authored-by: Douglas Lohmann --- doc/source/whatsnew/v1.5.2.rst | 1 + pandas/_libs/tslibs/offsets.pyx | 4 +++- .../tests/indexes/datetimes/test_date_range.py | 18 ++++++++++++++++++ 3 files changed, 22 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.5.2.rst b/doc/source/whatsnew/v1.5.2.rst index e65be3bcecd76..a46d081aea072 100644 --- a/doc/source/whatsnew/v1.5.2.rst +++ b/doc/source/whatsnew/v1.5.2.rst @@ -16,6 +16,7 @@ Fixed regressions - Fixed regression in :meth:`Series.replace` raising ``RecursionError`` with numeric dtype and when specifying ``value=None`` (:issue:`45725`) - Fixed regression in :meth:`DataFrame.plot` preventing :class:`~matplotlib.colors.Colormap` instance from being passed using the ``colormap`` argument if Matplotlib 3.6+ is used (:issue:`49374`) +- Fixed regression in :func:`date_range` returning an invalid set of periods for ``CustomBusinessDay`` frequency and ``start`` date with timezone (:issue:`49441`) - .. --------------------------------------------------------------------------- diff --git a/pandas/_libs/tslibs/offsets.pyx b/pandas/_libs/tslibs/offsets.pyx index 678ecf103b3d6..851488a89cd43 100644 --- a/pandas/_libs/tslibs/offsets.pyx +++ b/pandas/_libs/tslibs/offsets.pyx @@ -258,7 +258,9 @@ cdef _to_dt64D(dt): if getattr(dt, 'tzinfo', None) is not None: # Get the nanosecond timestamp, # equiv `Timestamp(dt).value` or `dt.timestamp() * 10**9` - naive = dt.astimezone(None) + # The `naive` must be the `dt` naive wall time + # instead of the naive absolute time (GH#49441) + naive = dt.replace(tzinfo=None) dt = np.datetime64(naive, "D") else: dt = np.datetime64(dt) diff --git a/pandas/tests/indexes/datetimes/test_date_range.py b/pandas/tests/indexes/datetimes/test_date_range.py index 377974a918ad9..07f57d3f9c3f4 100644 --- a/pandas/tests/indexes/datetimes/test_date_range.py +++ b/pandas/tests/indexes/datetimes/test_date_range.py @@ -1126,6 +1126,24 @@ def test_range_with_millisecond_resolution(self, start_end): expected = DatetimeIndex([start]) tm.assert_index_equal(result, expected) + @pytest.mark.parametrize( + "start,period,expected", + [ + ("2022-07-23 00:00:00+02:00", 1, ["2022-07-25 00:00:00+02:00"]), + ("2022-07-22 00:00:00+02:00", 1, ["2022-07-22 00:00:00+02:00"]), + ( + "2022-07-22 00:00:00+02:00", + 2, + ["2022-07-22 00:00:00+02:00", "2022-07-25 00:00:00+02:00"], + ), + ], + ) + def test_range_with_timezone_and_custombusinessday(self, start, period, expected): + # GH49441 + result = date_range(start=start, periods=period, freq="C") + expected = DatetimeIndex(expected) + tm.assert_index_equal(result, expected) + def test_date_range_with_custom_holidays(): # GH 30593 From 08d02e2c684683a99e013427623fb728d18f17f2 Mon Sep 17 00:00:00 2001 From: Patrick Hoefler <61934744+phofl@users.noreply.github.com> Date: Fri, 11 Nov 2022 12:40:52 +0100 Subject: [PATCH 13/14] Backport PR #49284 on branch 1.5.x (REGR: MultiIndex.join does not work for ea dtypes) (#49627) REGR: MultiIndex.join does not work for ea dtypes (#49284) * REGR: MultiIndex.join does not work for ea dtypes * Update base.py (cherry picked from commit f82b1c6329a1c2de20e453e6bae5fbe0beb69360) --- doc/source/whatsnew/v1.5.2.rst | 1 + pandas/core/indexes/base.py | 6 ++-- pandas/tests/indexes/multi/test_join.py | 48 +++++++++++++++++++++++++ 3 files changed, 53 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v1.5.2.rst b/doc/source/whatsnew/v1.5.2.rst index a46d081aea072..446235d1656dc 100644 --- a/doc/source/whatsnew/v1.5.2.rst +++ b/doc/source/whatsnew/v1.5.2.rst @@ -13,6 +13,7 @@ including other versions of pandas. Fixed regressions ~~~~~~~~~~~~~~~~~ +- Fixed regression in :meth:`MultiIndex.join` for extension array dtypes (:issue:`49277`) - Fixed regression in :meth:`Series.replace` raising ``RecursionError`` with numeric dtype and when specifying ``value=None`` (:issue:`45725`) - Fixed regression in :meth:`DataFrame.plot` preventing :class:`~matplotlib.colors.Colormap` instance from being passed using the ``colormap`` argument if Matplotlib 3.6+ is used (:issue:`49374`) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 5abd04c29e5d4..677e1dc1a559a 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -4701,8 +4701,10 @@ def join( return self._join_non_unique(other, how=how) elif not self.is_unique or not other.is_unique: if self.is_monotonic_increasing and other.is_monotonic_increasing: - if self._can_use_libjoin: + if not is_interval_dtype(self.dtype): # otherwise we will fall through to _join_via_get_indexer + # GH#39133 + # go through object dtype for ea till engine is supported properly return self._join_monotonic(other, how=how) else: return self._join_non_unique(other, how=how) @@ -5079,7 +5081,7 @@ def _wrap_joined_index(self: _IndexT, joined: ArrayLike, other: _IndexT) -> _Ind return self._constructor(joined, name=name) # type: ignore[return-value] else: name = get_op_result_name(self, other) - return self._constructor._with_infer(joined, name=name) + return self._constructor._with_infer(joined, name=name, dtype=self.dtype) @cache_readonly def _can_use_libjoin(self) -> bool: diff --git a/pandas/tests/indexes/multi/test_join.py b/pandas/tests/indexes/multi/test_join.py index e6bec97aedb38..7000724a6b271 100644 --- a/pandas/tests/indexes/multi/test_join.py +++ b/pandas/tests/indexes/multi/test_join.py @@ -5,6 +5,8 @@ Index, Interval, MultiIndex, + Series, + StringDtype, ) import pandas._testing as tm @@ -158,3 +160,49 @@ def test_join_overlapping_interval_level(): result = idx_1.join(idx_2, how="outer") tm.assert_index_equal(result, expected) + + +def test_join_midx_ea(): + # GH#49277 + midx = MultiIndex.from_arrays( + [Series([1, 1, 3], dtype="Int64"), Series([1, 2, 3], dtype="Int64")], + names=["a", "b"], + ) + midx2 = MultiIndex.from_arrays( + [Series([1], dtype="Int64"), Series([3], dtype="Int64")], names=["a", "c"] + ) + result = midx.join(midx2, how="inner") + expected = MultiIndex.from_arrays( + [ + Series([1, 1], dtype="Int64"), + Series([1, 2], dtype="Int64"), + Series([3, 3], dtype="Int64"), + ], + names=["a", "b", "c"], + ) + tm.assert_index_equal(result, expected) + + +def test_join_midx_string(): + # GH#49277 + midx = MultiIndex.from_arrays( + [ + Series(["a", "a", "c"], dtype=StringDtype()), + Series(["a", "b", "c"], dtype=StringDtype()), + ], + names=["a", "b"], + ) + midx2 = MultiIndex.from_arrays( + [Series(["a"], dtype=StringDtype()), Series(["c"], dtype=StringDtype())], + names=["a", "c"], + ) + result = midx.join(midx2, how="inner") + expected = MultiIndex.from_arrays( + [ + Series(["a", "a"], dtype=StringDtype()), + Series(["a", "b"], dtype=StringDtype()), + Series(["c", "c"], dtype=StringDtype()), + ], + names=["a", "b", "c"], + ) + tm.assert_index_equal(result, expected) From c9252cfd111fab8f8361613a8ca518de1207d51a Mon Sep 17 00:00:00 2001 From: MeeseeksMachine <39504233+meeseeksmachine@users.noreply.github.com> Date: Fri, 11 Nov 2022 12:41:03 +0100 Subject: [PATCH 14/14] Backport PR #49614 on branch 1.5.x (CI: Updating website sync to new server) (#49634) Backport PR #49614: CI: Updating website sync to new server Co-authored-by: Marc Garcia --- .github/workflows/docbuild-and-upload.yml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/docbuild-and-upload.yml b/.github/workflows/docbuild-and-upload.yml index 48a08d4febbaf..1db8fb9a70254 100644 --- a/.github/workflows/docbuild-and-upload.yml +++ b/.github/workflows/docbuild-and-upload.yml @@ -64,22 +64,22 @@ jobs: mkdir -m 700 -p ~/.ssh echo "${{ secrets.server_ssh_key }}" > ~/.ssh/id_rsa chmod 600 ~/.ssh/id_rsa - echo "${{ secrets.server_ip }} ecdsa-sha2-nistp256 AAAAE2VjZHNhLXNoYTItbmlzdHAyNTYAAAAIbmlzdHAyNTYAAABBBE1Kkopomm7FHG5enATf7SgnpICZ4W2bw+Ho+afqin+w7sMcrsa0je7sbztFAV8YchDkiBKnWTG4cRT+KZgZCaY=" > ~/.ssh/known_hosts + echo "${{ secrets.server_ip }} ecdsa-sha2-nistp256 AAAAE2VjZHNhLXNoYTItbmlzdHAyNTYAAAAIbmlzdHAyNTYAAABBBFjYkJBk7sos+r7yATODogQc3jUdW1aascGpyOD4bohj8dWjzwLJv/OJ/fyOQ5lmj81WKDk67tGtqNJYGL9acII=" > ~/.ssh/known_hosts if: github.event_name == 'push' && (github.ref == 'refs/heads/main' || startsWith(github.ref, 'refs/tags/')) - name: Copy cheatsheets into site directory run: cp doc/cheatsheet/Pandas_Cheat_Sheet* web/build/ - name: Upload web - run: rsync -az --delete --exclude='pandas-docs' --exclude='docs' web/build/ docs@${{ secrets.server_ip }}:/usr/share/nginx/pandas + run: rsync -az --delete --exclude='pandas-docs' --exclude='docs' web/build/ web@${{ secrets.server_ip }}:/var/www/html if: github.event_name == 'push' && github.ref == 'refs/heads/main' - name: Upload dev docs - run: rsync -az --delete doc/build/html/ docs@${{ secrets.server_ip }}:/usr/share/nginx/pandas/pandas-docs/dev + run: rsync -az --delete doc/build/html/ web@${{ secrets.server_ip }}:/var/www/html/pandas-docs/dev if: github.event_name == 'push' && github.ref == 'refs/heads/main' - name: Upload prod docs - run: rsync -az --delete doc/build/html/ docs@${{ secrets.server_ip }}:/usr/share/nginx/pandas/pandas-docs/version/${GITHUB_REF_NAME:1} + run: rsync -az --delete doc/build/html/ web@${{ secrets.server_ip }}:/var/www/html/pandas-docs/version/${GITHUB_REF_NAME:1} if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags/') - name: Move docs into site directory