From 08fa73f683c4a57aa7fe605b448c55c393d9d81a Mon Sep 17 00:00:00 2001
From: Yuanhao Geng <41546976+GYHHAHA@users.noreply.github.com>
Date: Mon, 10 Oct 2022 13:31:42 -0500
Subject: [PATCH 1/6] Update frame.py

---
 doc/source/whatsnew/v1.5.1.rst              |  1 +
 pandas/core/frame.py                        | 28 +++++++++++------
 pandas/tests/frame/methods/test_cov_corr.py | 35 ++++++++++++++++++++-
 3 files changed, 53 insertions(+), 11 deletions(-)

diff --git a/doc/source/whatsnew/v1.5.1.rst b/doc/source/whatsnew/v1.5.1.rst
index 4518c6f544e48..d3a804ff9f400 100644
--- a/doc/source/whatsnew/v1.5.1.rst
+++ b/doc/source/whatsnew/v1.5.1.rst
@@ -88,6 +88,7 @@ Fixed regressions
 - Fixed regression in :meth:`Series.groupby` and :meth:`DataFrame.groupby` when the grouper is a nullable data type (e.g. :class:`Int64`) or a PyArrow-backed string array, contains null values, and ``dropna=False`` (:issue:`48794`)
 - Fixed regression in :meth:`DataFrame.to_parquet` raising when file name was specified as ``bytes`` (:issue:`48944`)
 - Fixed regression in :class:`ExcelWriter` where the ``book`` attribute could no longer be set; however setting this attribute is now deprecated and this ability will be removed in a future version of pandas (:issue:`48780`)
+- Fixed regression in :meth:`DataFrame.corrwith` when computing correlation on tied data with ``method="spearman"`` (:issue:`48826`)
 
 .. ---------------------------------------------------------------------------
 
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 8b6235374bed0..0f2619dd2122f 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -161,6 +161,7 @@
 from pandas.core.array_algos.take import take_2d_multi
 from pandas.core.arraylike import OpsMixin
 from pandas.core.arrays import (
+    BaseMaskedArray,
     DatetimeArray,
     ExtensionArray,
     PeriodArray,
@@ -10590,23 +10591,30 @@ def corrwith(
                 corrs = {}
                 if numeric_only:
                     cols = self.select_dtypes(include=np.number).columns
-                    ndf = self[cols].values.transpose()
                 else:
                     cols = self.columns
-                    ndf = self.values.transpose()
                 k = other.values
+                k_mask = ~other.isna()
+                if isinstance(k, BaseMaskedArray):
+                    k = k._data
                 if method == "pearson":
-                    for i, r in enumerate(ndf):
-                        nonnull_mask = ~np.isnan(r) & ~np.isnan(k)
-                        corrs[cols[i]] = np.corrcoef(r[nonnull_mask], k[nonnull_mask])[
+                    for col in cols:
+                        val = self[col].values
+                        nonnull_mask = ~self[col].isna() & k_mask
+                        if isinstance(val, BaseMaskedArray):
+                            val = val._data
+                        corrs[col] = np.corrcoef(val[nonnull_mask], k[nonnull_mask])[
                             0, 1
                         ]
                 else:
-                    for i, r in enumerate(ndf):
-                        nonnull_mask = ~np.isnan(r) & ~np.isnan(k)
-                        corrs[cols[i]] = np.corrcoef(
-                            r[nonnull_mask].argsort().argsort(),
-                            k[nonnull_mask].argsort().argsort(),
+                    for col in cols:
+                        val = self[col].values
+                        nonnull_mask = ~self[col].isna() & k_mask
+                        if isinstance(val, BaseMaskedArray):
+                            val = val._data
+                        corrs[col] = np.corrcoef(
+                            libalgos.rank_1d(val[nonnull_mask]),
+                            libalgos.rank_1d(k[nonnull_mask]),
                         )[0, 1]
                 return Series(corrs)
             else:
diff --git a/pandas/tests/frame/methods/test_cov_corr.py b/pandas/tests/frame/methods/test_cov_corr.py
index ee9af3f436943..2d3cc6ff815cf 100644
--- a/pandas/tests/frame/methods/test_cov_corr.py
+++ b/pandas/tests/frame/methods/test_cov_corr.py
@@ -355,7 +355,10 @@ def test_corrwith_mixed_dtypes(self, numeric_only):
             expected = Series(data=corrs, index=["a", "b"])
             tm.assert_series_equal(result, expected)
         else:
-            with pytest.raises(TypeError, match="not supported for the input types"):
+            with pytest.raises(
+                TypeError,
+                match=r"unsupported operand type\(s\) for /: 'str' and 'int'",
+            ):
                 df.corrwith(s, numeric_only=numeric_only)
 
     def test_corrwith_index_intersection(self):
@@ -406,3 +409,33 @@ def test_corrwith_kendall(self):
         result = df.corrwith(df**2, method="kendall")
         expected = Series(np.ones(len(result)))
         tm.assert_series_equal(result, expected)
+
+    def test_corrwith_spearman_with_tied_data(self):
+        # GH#21925
+        df = DataFrame(
+            {
+                "A": [2, 5, 8, 9],
+                "B": [2, np.nan, 8, 9],
+                "C": Series([2, np.nan, 8, 9], dtype="Int64"),
+                "D": [0, 1, 1, 0],
+                "E": [0, np.nan, 1, 0],
+                "F": Series([0, np.nan, 1, 0], dtype="Float64"),
+                "G": [False, True, True, False],
+                "H": Series([False, pd.NA, True, False], dtype="boolean"),
+            },
+        )
+        ser_list = [
+            Series([0, 1, 1, 0]),
+            Series([0.0, 1.0, 1.0, 0.0]),
+            Series([False, True, True, False]),
+            Series([0, pd.NA, 1, 0], dtype="Int64"),
+            Series([0, pd.NA, 1, 0], dtype="Float64"),
+            Series([False, pd.NA, True, False], dtype="boolean"),
+        ]
+        expected = Series(
+            [0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0],
+            index=["A", "B", "C", "D", "E", "F", "G", "H"],
+        )
+        for ser in ser_list:
+            result = df.corrwith(ser, method="spearman", numeric_only=False)
+            tm.assert_series_equal(result, expected)

From 7a219127ff6e47f9f1feb105dcfa456063d82f7d Mon Sep 17 00:00:00 2001
From: MarcoGorelli <>
Date: Mon, 17 Oct 2022 09:21:27 +0100
Subject: [PATCH 2/6] Revert "PERF: faster corrwith method for pearson and
 spearman correlation when other is a Series and axis = 0 (column-wise)
 (#46174)"

This reverts commit 5efb570ec3de616dfeb036d0ee622275955b7888.
---
 pandas/core/frame.py | 42 +-----------------------------------------
 1 file changed, 1 insertion(+), 41 deletions(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 0f2619dd2122f..95de613fd2752 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -161,7 +161,6 @@
 from pandas.core.array_algos.take import take_2d_multi
 from pandas.core.arraylike import OpsMixin
 from pandas.core.arrays import (
-    BaseMaskedArray,
     DatetimeArray,
     ExtensionArray,
     PeriodArray,
@@ -10578,47 +10577,8 @@ def corrwith(
         if numeric_only is lib.no_default and len(this.columns) < len(self.columns):
             com.deprecate_numeric_only_default(type(self), "corrwith")
 
-        # GH46174: when other is a Series object and axis=0, we achieve a speedup over
-        # passing .corr() to .apply() by taking the columns as ndarrays and iterating
-        # over the transposition row-wise. Then we delegate the correlation coefficient
-        # computation and null-masking to np.corrcoef and np.isnan respectively,
-        # which are much faster. We exploit the fact that the Spearman correlation
-        # of two vectors is equal to the Pearson correlation of their ranks to use
-        # substantially the same method for Pearson and Spearman,
-        # just with intermediate argsorts on the latter.
         if isinstance(other, Series):
-            if axis == 0 and method in ["pearson", "spearman"]:
-                corrs = {}
-                if numeric_only:
-                    cols = self.select_dtypes(include=np.number).columns
-                else:
-                    cols = self.columns
-                k = other.values
-                k_mask = ~other.isna()
-                if isinstance(k, BaseMaskedArray):
-                    k = k._data
-                if method == "pearson":
-                    for col in cols:
-                        val = self[col].values
-                        nonnull_mask = ~self[col].isna() & k_mask
-                        if isinstance(val, BaseMaskedArray):
-                            val = val._data
-                        corrs[col] = np.corrcoef(val[nonnull_mask], k[nonnull_mask])[
-                            0, 1
-                        ]
-                else:
-                    for col in cols:
-                        val = self[col].values
-                        nonnull_mask = ~self[col].isna() & k_mask
-                        if isinstance(val, BaseMaskedArray):
-                            val = val._data
-                        corrs[col] = np.corrcoef(
-                            libalgos.rank_1d(val[nonnull_mask]),
-                            libalgos.rank_1d(k[nonnull_mask]),
-                        )[0, 1]
-                return Series(corrs)
-            else:
-                return this.apply(lambda x: other.corr(x, method=method), axis=axis)
+            return this.apply(lambda x: other.corr(x, method=method), axis=axis)
 
         if numeric_only_bool:
             other = other._get_numeric_data()

From f60a87b1a8bd7abdc8487e857989b2ad2a8beb07 Mon Sep 17 00:00:00 2001
From: MarcoGorelli <>
Date: Mon, 17 Oct 2022 09:27:42 +0100
Subject: [PATCH 3/6] fix GH issue number in test

---
 pandas/tests/frame/methods/test_cov_corr.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/tests/frame/methods/test_cov_corr.py b/pandas/tests/frame/methods/test_cov_corr.py
index 2d3cc6ff815cf..ac9066e06c913 100644
--- a/pandas/tests/frame/methods/test_cov_corr.py
+++ b/pandas/tests/frame/methods/test_cov_corr.py
@@ -411,7 +411,7 @@ def test_corrwith_kendall(self):
         tm.assert_series_equal(result, expected)
 
     def test_corrwith_spearman_with_tied_data(self):
-        # GH#21925
+        # GH#48826
         df = DataFrame(
             {
                 "A": [2, 5, 8, 9],

From b6bd6178f3b1628af093c347750ea849476f450c Mon Sep 17 00:00:00 2001
From: MarcoGorelli <>
Date: Mon, 17 Oct 2022 09:31:59 +0100
Subject: [PATCH 4/6] add test from original issue

---
 pandas/tests/frame/methods/test_cov_corr.py | 34 ++++++---------------
 1 file changed, 9 insertions(+), 25 deletions(-)

diff --git a/pandas/tests/frame/methods/test_cov_corr.py b/pandas/tests/frame/methods/test_cov_corr.py
index ac9066e06c913..d8372e74e682a 100644
--- a/pandas/tests/frame/methods/test_cov_corr.py
+++ b/pandas/tests/frame/methods/test_cov_corr.py
@@ -412,30 +412,14 @@ def test_corrwith_kendall(self):
 
     def test_corrwith_spearman_with_tied_data(self):
         # GH#48826
-        df = DataFrame(
+        df1 = DataFrame(
             {
-                "A": [2, 5, 8, 9],
-                "B": [2, np.nan, 8, 9],
-                "C": Series([2, np.nan, 8, 9], dtype="Int64"),
-                "D": [0, 1, 1, 0],
-                "E": [0, np.nan, 1, 0],
-                "F": Series([0, np.nan, 1, 0], dtype="Float64"),
-                "G": [False, True, True, False],
-                "H": Series([False, pd.NA, True, False], dtype="boolean"),
-            },
-        )
-        ser_list = [
-            Series([0, 1, 1, 0]),
-            Series([0.0, 1.0, 1.0, 0.0]),
-            Series([False, True, True, False]),
-            Series([0, pd.NA, 1, 0], dtype="Int64"),
-            Series([0, pd.NA, 1, 0], dtype="Float64"),
-            Series([False, pd.NA, True, False], dtype="boolean"),
-        ]
-        expected = Series(
-            [0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0],
-            index=["A", "B", "C", "D", "E", "F", "G", "H"],
+                "A": [1, np.nan, 7, 8],
+                "B": [False, True, True, False],
+                "C": [10, 4, 9, 3],
+            }
         )
-        for ser in ser_list:
-            result = df.corrwith(ser, method="spearman", numeric_only=False)
-            tm.assert_series_equal(result, expected)
+        df2 = df1[["B", "C"]]
+        result = (df1 + 1).corrwith(df2.B, method="spearman")
+        expected = Series([0.0, 1.0, 0.0], index=["A", "B", "C"])
+        tm.assert_series_equal(result, expected)

From 5bb438dc6c099451b427ff3457f288ec5c0f715a Mon Sep 17 00:00:00 2001
From: MarcoGorelli <>
Date: Mon, 17 Oct 2022 10:25:18 +0100
Subject: [PATCH 5/6] skip if no scipy

---
 pandas/tests/frame/methods/test_cov_corr.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/pandas/tests/frame/methods/test_cov_corr.py b/pandas/tests/frame/methods/test_cov_corr.py
index d8372e74e682a..a5070aef0fa6b 100644
--- a/pandas/tests/frame/methods/test_cov_corr.py
+++ b/pandas/tests/frame/methods/test_cov_corr.py
@@ -410,6 +410,7 @@ def test_corrwith_kendall(self):
         expected = Series(np.ones(len(result)))
         tm.assert_series_equal(result, expected)
 
+    @td.skip_if_no_scipy
     def test_corrwith_spearman_with_tied_data(self):
         # GH#48826
         df1 = DataFrame(

From 3f33d3aef511e54eed93d6a5b4e36de92e70af61 Mon Sep 17 00:00:00 2001
From: MarcoGorelli <>
Date: Mon, 17 Oct 2022 10:29:27 +0100
Subject: [PATCH 6/6] add extra test case

---
 pandas/tests/frame/methods/test_cov_corr.py | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/pandas/tests/frame/methods/test_cov_corr.py b/pandas/tests/frame/methods/test_cov_corr.py
index a5070aef0fa6b..25ef49718fbe7 100644
--- a/pandas/tests/frame/methods/test_cov_corr.py
+++ b/pandas/tests/frame/methods/test_cov_corr.py
@@ -424,3 +424,11 @@ def test_corrwith_spearman_with_tied_data(self):
         result = (df1 + 1).corrwith(df2.B, method="spearman")
         expected = Series([0.0, 1.0, 0.0], index=["A", "B", "C"])
         tm.assert_series_equal(result, expected)
+
+        df_bool = DataFrame(
+            {"A": [True, True, False, False], "B": [True, False, False, True]}
+        )
+        ser_bool = Series([True, True, False, True])
+        result = df_bool.corrwith(ser_bool)
+        expected = Series([0.57735, 0.57735], index=["A", "B"])
+        tm.assert_series_equal(result, expected)