MAINT: stats.monte_carlo_test: used biased estimate of p-value (scipy…

…#16721)
tylerjereddy · Jul 28, 2022 · ee9b834 · ee9b834
1 parent 7ecca8d
commit ee9b834
Show file tree

Hide file tree

Showing 2 changed files with 19 additions and 3 deletions.
diff --git a/scipy/stats/_resampling.py b/scipy/stats/_resampling.py
@@ -607,6 +607,13 @@ def monte_carlo_test(sample, rvs, statistic, *, vectorized=False,
     null_distribution : ndarray
         The values of the test statistic generated under the null hypothesis.
 
+    References
+    ----------
+
+    .. [1] B. Phipson and G. K. Smyth. "Permutation P-values Should Never Be
+       Zero: Calculating Exact P-values When Permutations Are Randomly Drawn."
+       Statistical Applications in Genetics and Molecular Biology 9.1 (2010).
+
     Examples
     --------
 
@@ -642,7 +649,7 @@ def monte_carlo_test(sample, rvs, statistic, *, vectorized=False,
     >>> print(res.statistic)
     0.12457412450240658
     >>> print(res.pvalue)
-    0.701070107010701
+    0.7012
 
     The probability of obtaining a test statistic less than or equal to the
     observed value under the null hypothesis is ~70%. This is greater than
@@ -695,12 +702,12 @@ def monte_carlo_test(sample, rvs, statistic, *, vectorized=False,
 
     def less(null_distribution, observed):
         cmps = null_distribution <= observed
-        pvalues = cmps.sum(axis=0) / n_resamples
+        pvalues = (cmps.sum(axis=0) + 1) / (n_resamples + 1)  # see [1]
         return pvalues
 
     def greater(null_distribution, observed):
         cmps = null_distribution >= observed
-        pvalues = cmps.sum(axis=0) / n_resamples
+        pvalues = (cmps.sum(axis=0) + 1) / (n_resamples + 1)  # see [1]
         return pvalues
 
     def two_sided(null_distribution, observed):

diff --git a/scipy/stats/tests/test_resampling.py b/scipy/stats/tests/test_resampling.py
@@ -760,6 +760,15 @@ def statistic1d(x):
         assert_allclose(res.statistic, expected_stat)
         assert_allclose(res.pvalue, expected_p, atol=2*self.atol)
 
+    def test_p_never_zero(self):
+        # Use biased estimate of p-value to ensure that p-value is never zero
+        # per monte_carlo_test reference [1]
+        rng = np.random.default_rng(2190176673029737545)
+        x = np.zeros(100)
+        res = monte_carlo_test(x, rng.random, np.mean,
+                               vectorized=True, alternative='less')
+        assert res.pvalue == 0.0001
+
 
 class TestPermutationTest: