From b545bf2a4c7f5eb39ed42ec9c4a1b260a7e23af4 Mon Sep 17 00:00:00 2001 From: MarcoGorelli <> Date: Mon, 17 Oct 2022 09:21:27 +0100 Subject: [PATCH] Revert "PERF: faster corrwith method for pearson and spearman correlation when other is a Series and axis = 0 (column-wise) (#46174)" This reverts commit 5efb570ec3de616dfeb036d0ee622275955b7888. --- doc/source/whatsnew/v1.5.0.rst | 2 +- pandas/core/frame.py | 42 +--------------------------------- 2 files changed, 2 insertions(+), 42 deletions(-) diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst index 7f968694693f98..3d67f5c4818adb 100644 --- a/doc/source/whatsnew/v1.5.0.rst +++ b/doc/source/whatsnew/v1.5.0.rst @@ -654,7 +654,7 @@ Deprecations In the next major version release, 2.0, several larger API changes are being considered without a formal deprecation such as making the standard library `zoneinfo `_ the default timezone implementation instead of ``pytz``, having the :class:`Index` support all data types instead of having multiple subclasses (:class:`CategoricalIndex`, :class:`Int64Index`, etc.), and more. - The changes under consideration are logged in `this Github issue `_, and any + The changes under consideration are logged in `this GitHub issue `_, and any feedback or concerns are welcome. .. _whatsnew_150.deprecations.int_slicing_series: diff --git a/pandas/core/frame.py b/pandas/core/frame.py index b1e03c3c0d9e72..cf32a196ad6cf6 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -162,7 +162,6 @@ from pandas.core.array_algos.take import take_2d_multi from pandas.core.arraylike import OpsMixin from pandas.core.arrays import ( - BaseMaskedArray, DatetimeArray, ExtensionArray, PeriodArray, @@ -10581,47 +10580,8 @@ def corrwith( if numeric_only is lib.no_default and len(this.columns) < len(self.columns): com.deprecate_numeric_only_default(type(self), "corrwith") - # GH46174: when other is a Series object and axis=0, we achieve a speedup over - # passing .corr() to .apply() by taking the columns as ndarrays and iterating - # over the transposition row-wise. Then we delegate the correlation coefficient - # computation and null-masking to np.corrcoef and np.isnan respectively, - # which are much faster. We exploit the fact that the Spearman correlation - # of two vectors is equal to the Pearson correlation of their ranks to use - # substantially the same method for Pearson and Spearman, - # just with intermediate argsorts on the latter. if isinstance(other, Series): - if axis == 0 and method in ["pearson", "spearman"]: - corrs = {} - if numeric_only: - cols = self.select_dtypes(include=np.number).columns - else: - cols = self.columns - k = other.values - k_mask = ~other.isna() - if isinstance(k, BaseMaskedArray): - k = k._data - if method == "pearson": - for col in cols: - val = self[col].values - nonnull_mask = ~self[col].isna() & k_mask - if isinstance(val, BaseMaskedArray): - val = val._data - corrs[col] = np.corrcoef(val[nonnull_mask], k[nonnull_mask])[ - 0, 1 - ] - else: - for col in cols: - val = self[col].values - nonnull_mask = ~self[col].isna() & k_mask - if isinstance(val, BaseMaskedArray): - val = val._data - corrs[col] = np.corrcoef( - libalgos.rank_1d(val[nonnull_mask]), - libalgos.rank_1d(k[nonnull_mask]), - )[0, 1] - return Series(corrs) - else: - return this.apply(lambda x: other.corr(x, method=method), axis=axis) + return this.apply(lambda x: other.corr(x, method=method), axis=axis) if numeric_only_bool: other = other._get_numeric_data()