From 74cd050f6c1c6db1131283c1905f6478b136a8b3 Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
Date: Thu, 10 Nov 2022 11:39:19 -0800
Subject: [PATCH] CLN: test_nanops.py (#49423)

---
 pandas/tests/test_nanops.py | 487 +++++++++++++++++++++++++-----------
 1 file changed, 334 insertions(+), 153 deletions(-)

diff --git a/pandas/tests/test_nanops.py b/pandas/tests/test_nanops.py
index 0e64181bd46a7..ae8791a774ed5 100644
--- a/pandas/tests/test_nanops.py
+++ b/pandas/tests/test_nanops.py
@@ -29,6 +29,175 @@ def skipna(request):
     return request.param
 
 
+@pytest.fixture
+def disable_bottleneck(monkeypatch):
+    with monkeypatch.context() as m:
+        m.setattr(nanops, "_USE_BOTTLENECK", False)
+        yield
+
+
+@pytest.fixture
+def arr_shape():
+    return 11, 7
+
+
+@pytest.fixture
+def arr_float(arr_shape):
+    np.random.seed(11235)
+    return np.random.randn(*arr_shape)
+
+
+@pytest.fixture
+def arr_complex(arr_float):
+    return arr_float + arr_float * 1j
+
+
+@pytest.fixture
+def arr_int(arr_shape):
+    np.random.seed(11235)
+    return np.random.randint(-10, 10, arr_shape)
+
+
+@pytest.fixture
+def arr_bool(arr_shape):
+    np.random.seed(11235)
+    return np.random.randint(0, 2, arr_shape) == 0
+
+
+@pytest.fixture
+def arr_str(arr_float):
+    return np.abs(arr_float).astype("S")
+
+
+@pytest.fixture
+def arr_utf(arr_float):
+    return np.abs(arr_float).astype("U")
+
+
+@pytest.fixture
+def arr_date(arr_shape):
+    np.random.seed(11235)
+    return np.random.randint(0, 20000, arr_shape).astype("M8[ns]")
+
+
+@pytest.fixture
+def arr_tdelta(arr_shape):
+    np.random.seed(11235)
+    return np.random.randint(0, 20000, arr_shape).astype("m8[ns]")
+
+
+@pytest.fixture
+def arr_nan(arr_shape):
+    return np.tile(np.nan, arr_shape)
+
+
+@pytest.fixture
+def arr_float_nan(arr_float, arr_nan):
+    return np.vstack([arr_float, arr_nan])
+
+
+@pytest.fixture
+def arr_nan_float1(arr_nan, arr_float):
+    return np.vstack([arr_nan, arr_float])
+
+
+@pytest.fixture
+def arr_nan_nan(arr_nan):
+    return np.vstack([arr_nan, arr_nan])
+
+
+@pytest.fixture
+def arr_inf(arr_float):
+    return arr_float * np.inf
+
+
+@pytest.fixture
+def arr_float_inf(arr_float, arr_inf):
+    return np.vstack([arr_float, arr_inf])
+
+
+@pytest.fixture
+def arr_nan_inf(arr_nan, arr_inf):
+    return np.vstack([arr_nan, arr_inf])
+
+
+@pytest.fixture
+def arr_float_nan_inf(arr_float, arr_nan, arr_inf):
+    return np.vstack([arr_float, arr_nan, arr_inf])
+
+
+@pytest.fixture
+def arr_nan_nan_inf(arr_nan, arr_inf):
+    return np.vstack([arr_nan, arr_nan, arr_inf])
+
+
+@pytest.fixture
+def arr_obj(
+    arr_float, arr_int, arr_bool, arr_complex, arr_str, arr_utf, arr_date, arr_tdelta
+):
+    return np.vstack(
+        [
+            arr_float.astype("O"),
+            arr_int.astype("O"),
+            arr_bool.astype("O"),
+            arr_complex.astype("O"),
+            arr_str.astype("O"),
+            arr_utf.astype("O"),
+            arr_date.astype("O"),
+            arr_tdelta.astype("O"),
+        ]
+    )
+
+
+@pytest.fixture
+def arr_nan_nanj(arr_nan):
+    with np.errstate(invalid="ignore"):
+        return arr_nan + arr_nan * 1j
+
+
+@pytest.fixture
+def arr_complex_nan(arr_complex, arr_nan_nanj):
+    with np.errstate(invalid="ignore"):
+        return np.vstack([arr_complex, arr_nan_nanj])
+
+
+@pytest.fixture
+def arr_nan_infj(arr_inf):
+    with np.errstate(invalid="ignore"):
+        return arr_inf * 1j
+
+
+@pytest.fixture
+def arr_complex_nan_infj(arr_complex, arr_nan_infj):
+    with np.errstate(invalid="ignore"):
+        return np.vstack([arr_complex, arr_nan_infj])
+
+
+@pytest.fixture
+def arr_float_1d(arr_float):
+    return arr_float[:, 0]
+
+
+@pytest.fixture
+def arr_nan_1d(arr_nan):
+    return arr_nan[:, 0]
+
+
+@pytest.fixture
+def arr_float_nan_1d(arr_float_nan):
+    return arr_float_nan[:, 0]
+
+
+@pytest.fixture
+def arr_float1_nan_1d(arr_float1_nan):
+    return arr_float1_nan[:, 0]
+
+
+@pytest.fixture
+def arr_nan_float1_1d(arr_nan_float1):
+    return arr_nan_float1[:, 0]
+
+
 class TestnanopsDataFrame:
     def setup_method(self):
         np.random.seed(11235)
@@ -299,45 +468,6 @@ def test_nanmean(self, skipna):
             nanops.nanmean, np.mean, skipna, allow_obj=False, allow_date=False
         )
 
-    @pytest.mark.parametrize("val", [2**55, -(2**55), 20150515061816532])
-    def test_nanmean_overflow(self, val):
-        # GH 10155
-        # In the previous implementation mean can overflow for int dtypes, it
-        # is now consistent with numpy
-
-        ser = Series(val, index=range(500), dtype=np.int64)
-        result = ser.mean()
-        np_result = ser.values.mean()
-        assert result == val
-        assert result == np_result
-        assert result.dtype == np.float64
-
-    @pytest.mark.parametrize(
-        "dtype",
-        [
-            np.int16,
-            np.int32,
-            np.int64,
-            np.float32,
-            np.float64,
-            getattr(np, "float128", None),
-        ],
-    )
-    def test_returned_dtype(self, dtype):
-        if dtype is None:
-            # no float128 available
-            return
-
-        ser = Series(range(10), dtype=dtype)
-        group_a = ["mean", "std", "var", "skew", "kurt"]
-        group_b = ["min", "max"]
-        for method in group_a + group_b:
-            result = getattr(ser, method)()
-            if is_integer_dtype(dtype) and method in group_a:
-                assert result.dtype == np.float64
-            else:
-                assert result.dtype == dtype
-
     def test_nanmedian(self, skipna):
         with warnings.catch_warnings(record=True):
             warnings.simplefilter("ignore", RuntimeWarning)
@@ -623,124 +753,137 @@ def test_nancov(self):
         targ1 = np.cov(self.arr_float_1d.flat, self.arr_float1_1d.flat)[0, 1]
         self.check_nancorr_nancov_1d(nanops.nancov, targ0, targ1)
 
-    @pytest.mark.parametrize(
-        "op,nanop",
-        [
-            (operator.eq, nanops.naneq),
-            (operator.ne, nanops.nanne),
-            (operator.gt, nanops.nangt),
-            (operator.ge, nanops.nange),
-            (operator.lt, nanops.nanlt),
-            (operator.le, nanops.nanle),
-        ],
-    )
-    def test_nan_comparison(self, op, nanop):
-        targ0 = op(self.arr_float, self.arr_float1)
-        arr_float = self.arr_float
-        arr_float1 = self.arr_float1
-        arr_nan = self.arr_nan
-        arr_nan_nan = self.arr_nan_nan
-        arr_float_nan = self.arr_float_nan
-        arr_float1_nan = self.arr_float1_nan
-        arr_nan_float1 = self.arr_nan_float1
-
-        while targ0.ndim:
-            res0 = nanop(arr_float, arr_float1)
-            tm.assert_almost_equal(targ0, res0)
-
-            if targ0.ndim > 1:
-                targ1 = np.vstack([targ0, arr_nan])
-            else:
-                targ1 = np.hstack([targ0, arr_nan])
-            res1 = nanop(arr_float_nan, arr_float1_nan)
-            tm.assert_numpy_array_equal(targ1, res1, check_dtype=False)
-
-            targ2 = arr_nan_nan
-            res2 = nanop(arr_float_nan, arr_nan_float1)
-            tm.assert_numpy_array_equal(targ2, res2, check_dtype=False)
-
-            # Lower dimension for next step in the loop
-            arr_float = np.take(arr_float, 0, axis=-1)
-            arr_float1 = np.take(arr_float1, 0, axis=-1)
-            arr_nan = np.take(arr_nan, 0, axis=-1)
-            arr_nan_nan = np.take(arr_nan_nan, 0, axis=-1)
-            arr_float_nan = np.take(arr_float_nan, 0, axis=-1)
-            arr_float1_nan = np.take(arr_float1_nan, 0, axis=-1)
-            arr_nan_float1 = np.take(arr_nan_float1, 0, axis=-1)
-            targ0 = np.take(targ0, 0, axis=-1)
 
-    @pytest.mark.parametrize(
-        "arr, correct",
-        [
-            ("arr_complex", False),
-            ("arr_int", False),
-            ("arr_bool", False),
-            ("arr_str", False),
-            ("arr_utf", False),
-            ("arr_complex", False),
-            ("arr_complex_nan", False),
-            ("arr_nan_nanj", False),
-            ("arr_nan_infj", True),
-            ("arr_complex_nan_infj", True),
-        ],
-    )
-    def test__has_infs_non_float(self, arr, correct):
-        val = getattr(self, arr)
-        while getattr(val, "ndim", True):
-            res0 = nanops._has_infs(val)
-            if correct:
-                assert res0
-            else:
-                assert not res0
+@pytest.mark.parametrize(
+    "op,nanop",
+    [
+        (operator.eq, nanops.naneq),
+        (operator.ne, nanops.nanne),
+        (operator.gt, nanops.nangt),
+        (operator.ge, nanops.nange),
+        (operator.lt, nanops.nanlt),
+        (operator.le, nanops.nanle),
+    ],
+)
+def test_nan_comparison(request, op, nanop, disable_bottleneck):
+    arr_float = request.getfixturevalue("arr_float")
+    arr_float1 = request.getfixturevalue("arr_float")
+    targ0 = op(arr_float, arr_float1)
+    arr_nan = request.getfixturevalue("arr_nan")
+    arr_nan_nan = request.getfixturevalue("arr_nan_nan")
+    arr_float_nan = request.getfixturevalue("arr_float_nan")
+    arr_float1_nan = request.getfixturevalue("arr_float_nan")
+    arr_nan_float1 = request.getfixturevalue("arr_nan_float1")
+
+    while targ0.ndim:
+        res0 = nanop(arr_float, arr_float1)
+        tm.assert_almost_equal(targ0, res0)
+
+        if targ0.ndim > 1:
+            targ1 = np.vstack([targ0, arr_nan])
+        else:
+            targ1 = np.hstack([targ0, arr_nan])
+        res1 = nanop(arr_float_nan, arr_float1_nan)
+        tm.assert_numpy_array_equal(targ1, res1, check_dtype=False)
+
+        targ2 = arr_nan_nan
+        res2 = nanop(arr_float_nan, arr_nan_float1)
+        tm.assert_numpy_array_equal(targ2, res2, check_dtype=False)
+
+        # Lower dimension for next step in the loop
+        arr_float = np.take(arr_float, 0, axis=-1)
+        arr_float1 = np.take(arr_float1, 0, axis=-1)
+        arr_nan = np.take(arr_nan, 0, axis=-1)
+        arr_nan_nan = np.take(arr_nan_nan, 0, axis=-1)
+        arr_float_nan = np.take(arr_float_nan, 0, axis=-1)
+        arr_float1_nan = np.take(arr_float1_nan, 0, axis=-1)
+        arr_nan_float1 = np.take(arr_nan_float1, 0, axis=-1)
+        targ0 = np.take(targ0, 0, axis=-1)
 
-            if not hasattr(val, "ndim"):
-                break
 
-            # Reduce dimension for next step in the loop
-            val = np.take(val, 0, axis=-1)
+@pytest.mark.parametrize(
+    "arr, correct",
+    [
+        ("arr_complex", False),
+        ("arr_int", False),
+        ("arr_bool", False),
+        ("arr_str", False),
+        ("arr_utf", False),
+        ("arr_complex", False),
+        ("arr_complex_nan", False),
+        ("arr_nan_nanj", False),
+        ("arr_nan_infj", True),
+        ("arr_complex_nan_infj", True),
+    ],
+)
+def test_has_infs_non_float(request, arr, correct, disable_bottleneck):
+    val = request.getfixturevalue(arr)
+    while getattr(val, "ndim", True):
+        res0 = nanops._has_infs(val)
+        if correct:
+            assert res0
+        else:
+            assert not res0
+
+        if not hasattr(val, "ndim"):
+            break
+
+        # Reduce dimension for next step in the loop
+        val = np.take(val, 0, axis=-1)
 
-    @pytest.mark.parametrize(
-        "arr, correct",
-        [
-            ("arr_float", False),
-            ("arr_nan", False),
-            ("arr_float_nan", False),
-            ("arr_nan_nan", False),
-            ("arr_float_inf", True),
-            ("arr_inf", True),
-            ("arr_nan_inf", True),
-            ("arr_float_nan_inf", True),
-            ("arr_nan_nan_inf", True),
-        ],
-    )
-    @pytest.mark.parametrize("astype", [None, "f4", "f2"])
-    def test__has_infs_floats(self, arr, correct, astype):
-        val = getattr(self, arr)
-        if astype is not None:
-            val = val.astype(astype)
-        while getattr(val, "ndim", True):
-            res0 = nanops._has_infs(val)
-            if correct:
-                assert res0
-            else:
-                assert not res0
 
-            if not hasattr(val, "ndim"):
-                break
+@pytest.mark.parametrize(
+    "arr, correct",
+    [
+        ("arr_float", False),
+        ("arr_nan", False),
+        ("arr_float_nan", False),
+        ("arr_nan_nan", False),
+        ("arr_float_inf", True),
+        ("arr_inf", True),
+        ("arr_nan_inf", True),
+        ("arr_float_nan_inf", True),
+        ("arr_nan_nan_inf", True),
+    ],
+)
+@pytest.mark.parametrize("astype", [None, "f4", "f2"])
+def test_has_infs_floats(request, arr, correct, astype, disable_bottleneck):
+    val = request.getfixturevalue(arr)
+    if astype is not None:
+        val = val.astype(astype)
+    while getattr(val, "ndim", True):
+        res0 = nanops._has_infs(val)
+        if correct:
+            assert res0
+        else:
+            assert not res0
 
-            # Reduce dimension for next step in the loop
-            val = np.take(val, 0, axis=-1)
+        if not hasattr(val, "ndim"):
+            break
 
-    def test__bn_ok_dtype(self):
-        assert nanops._bn_ok_dtype(self.arr_float.dtype, "test")
-        assert nanops._bn_ok_dtype(self.arr_complex.dtype, "test")
-        assert nanops._bn_ok_dtype(self.arr_int.dtype, "test")
-        assert nanops._bn_ok_dtype(self.arr_bool.dtype, "test")
-        assert nanops._bn_ok_dtype(self.arr_str.dtype, "test")
-        assert nanops._bn_ok_dtype(self.arr_utf.dtype, "test")
-        assert not nanops._bn_ok_dtype(self.arr_date.dtype, "test")
-        assert not nanops._bn_ok_dtype(self.arr_tdelta.dtype, "test")
-        assert not nanops._bn_ok_dtype(self.arr_obj.dtype, "test")
+        # Reduce dimension for next step in the loop
+        val = np.take(val, 0, axis=-1)
+
+
+@pytest.mark.parametrize(
+    "fixture", ["arr_float", "arr_complex", "arr_int", "arr_bool", "arr_str", "arr_utf"]
+)
+def test_bn_ok_dtype(fixture, request, disable_bottleneck):
+    obj = request.getfixturevalue(fixture)
+    assert nanops._bn_ok_dtype(obj.dtype, "test")
+
+
+@pytest.mark.parametrize(
+    "fixture",
+    [
+        "arr_date",
+        "arr_tdelta",
+        "arr_obj",
+    ],
+)
+def test_bn_not_ok_dtype(fixture, request, disable_bottleneck):
+    obj = request.getfixturevalue(fixture)
+    assert not nanops._bn_ok_dtype(obj.dtype, "test")
 
 
 class TestEnsureNumeric:
@@ -1111,7 +1254,7 @@ def test_nanops_independent_of_mask_param(operation):
 
 
 @pytest.mark.parametrize("min_count", [-1, 0])
-def test_check_below_min_count__negative_or_zero_min_count(min_count):
+def test_check_below_min_count_negative_or_zero_min_count(min_count):
     # GH35227
     result = nanops.check_below_min_count((21, 37), None, min_count)
     expected_result = False
@@ -1122,7 +1265,7 @@ def test_check_below_min_count__negative_or_zero_min_count(min_count):
     "mask", [None, np.array([False, False, True]), np.array([True] + 9 * [False])]
 )
 @pytest.mark.parametrize("min_count, expected_result", [(1, False), (101, True)])
-def test_check_below_min_count__positive_min_count(mask, min_count, expected_result):
+def test_check_below_min_count_positive_min_count(mask, min_count, expected_result):
     # GH35227
     shape = (10, 10)
     result = nanops.check_below_min_count(shape, mask, min_count)
@@ -1132,7 +1275,7 @@ def test_check_below_min_count__positive_min_count(mask, min_count, expected_res
 @td.skip_if_windows
 @td.skip_if_32bit
 @pytest.mark.parametrize("min_count, expected_result", [(1, False), (2812191852, True)])
-def test_check_below_min_count__large_shape(min_count, expected_result):
+def test_check_below_min_count_large_shape(min_count, expected_result):
     # GH35227 large shape used to show that the issue is fixed
     shape = (2244367, 1253)
     result = nanops.check_below_min_count(shape, mask=None, min_count=min_count)
@@ -1143,3 +1286,41 @@ def test_check_below_min_count__large_shape(min_count, expected_result):
 def test_check_bottleneck_disallow(any_real_numpy_dtype, func):
     # GH 42878 bottleneck sometimes produces unreliable results for mean and sum
     assert not nanops._bn_ok_dtype(np.dtype(any_real_numpy_dtype).type, func)
+
+
+@pytest.mark.parametrize("val", [2**55, -(2**55), 20150515061816532])
+def test_nanmean_overflow(disable_bottleneck, val):
+    # GH 10155
+    # In the previous implementation mean can overflow for int dtypes, it
+    # is now consistent with numpy
+
+    ser = Series(val, index=range(500), dtype=np.int64)
+    result = ser.mean()
+    np_result = ser.values.mean()
+    assert result == val
+    assert result == np_result
+    assert result.dtype == np.float64
+
+
+@pytest.mark.parametrize(
+    "dtype",
+    [
+        np.int16,
+        np.int32,
+        np.int64,
+        np.float32,
+        np.float64,
+        getattr(np, "float128", None),
+    ],
+)
+@pytest.mark.parametrize("method", ["mean", "std", "var", "skew", "kurt", "min", "max"])
+def test_returned_dtype(disable_bottleneck, dtype, method):
+    if dtype is None:
+        pytest.skip("np.float128 not available")
+
+    ser = Series(range(10), dtype=dtype)
+    result = getattr(ser, method)()
+    if is_integer_dtype(dtype) and method not in ["min", "max"]:
+        assert result.dtype == np.float64
+    else:
+        assert result.dtype == dtype