Skip to content

Commit

Permalink
MAINT: stats.kendalltau: avoid overflow (#18193)
Browse files Browse the repository at this point in the history
  • Loading branch information
mdhaber committed Mar 24, 2023
1 parent d799631 commit 9562a4a
Show file tree
Hide file tree
Showing 2 changed files with 23 additions and 4 deletions.
9 changes: 5 additions & 4 deletions scipy/stats/_stats_py.py
Original file line number Diff line number Diff line change
Expand Up @@ -5820,9 +5820,10 @@ def kendalltau(x, y, initial_lexsort=None, nan_policy='propagate',
def count_rank_tie(ranks):
cnt = np.bincount(ranks).astype('int64', copy=False)
cnt = cnt[cnt > 1]
return ((cnt * (cnt - 1) // 2).sum(),
(cnt * (cnt - 1.) * (cnt - 2)).sum(),
(cnt * (cnt - 1.) * (2*cnt + 5)).sum())
# Python ints to avoid overflow down the line
return (int((cnt * (cnt - 1) // 2).sum()),
int((cnt * (cnt - 1.) * (cnt - 2)).sum()),
int((cnt * (cnt - 1.) * (2*cnt + 5)).sum()))

size = x.size
perm = np.argsort(y) # sort on y and convert y to dense ranks
Expand All @@ -5839,7 +5840,7 @@ def count_rank_tie(ranks):
obs = np.r_[True, (x[1:] != x[:-1]) | (y[1:] != y[:-1]), True]
cnt = np.diff(np.nonzero(obs)[0]).astype('int64', copy=False)

ntie = (cnt * (cnt - 1) // 2).sum() # joint ties
ntie = int((cnt * (cnt - 1) // 2).sum()) # joint ties
xtie, x0, x1 = count_rank_tie(x) # ties in x, stats
ytie, y0, y1 = count_rank_tie(y) # ties in y, stats

Expand Down
18 changes: 18 additions & 0 deletions scipy/stats/tests/test_stats.py
Original file line number Diff line number Diff line change
Expand Up @@ -1368,6 +1368,24 @@ def test_kendalltau_dep_initial_lexsort():
stats.kendalltau([], [], initial_lexsort=True)


def test_kendalltau_gh18139_overflow():
# gh-18139 reported an overflow in `kendalltau` that appeared after
# SciPy 0.15.1. Check that this particular overflow does not occur.
# (Test would fail if warning were emitted.)
import random
random.seed(6272161)
classes = [1, 2, 3, 4, 5, 6, 7]
n_samples = 2 * 10 ** 5
x = random.choices(classes, k=n_samples)
y = random.choices(classes, k=n_samples)
res = stats.kendalltau(x, y)
# Reference value from SciPy 0.15.1
assert_allclose(res.statistic, 0.0011816493905730343)
# Reference p-value from `permutation_test` w/ n_resamples=9999 (default).
# Expected to be accurate to at least two digits.
assert_allclose(res.pvalue, 0.4894, atol=2e-3)


class TestKendallTauAlternative:
def test_kendalltau_alternative_asymptotic(self):
# Test alternative parameter, asymptotic method (due to tie)
Expand Down

0 comments on commit 9562a4a

Please sign in to comment.