Skip to content

Commit

Permalink
ENH: stats.gaussian_kde: replace use of inv_cov in pdf (#16692)
Browse files Browse the repository at this point in the history
* Remove more uses of inv() and replace with cho_solve()

* ENH: stats.gaussian_kde: try to use cholesky

* Still not working?

* ENH: stats.gaussian_kde: finish replacing use of inv_cov in pdf

* Apply suggestions from code review

* Apply suggestions from code review

[skip azp] [skip circleci]

* MAINT: stats.kde: move dtype assurance

* Update scipy/stats/_kde.py

Co-authored-by: Damon McDougall <damon.mcdougall@gmail.com>
  • Loading branch information
mdhaber and dmcdougall committed Aug 26, 2022
1 parent 2dc7a02 commit 00315a5
Show file tree
Hide file tree
Showing 3 changed files with 40 additions and 34 deletions.
32 changes: 24 additions & 8 deletions scipy/stats/_kde.py
Original file line number Diff line number Diff line change
Expand Up @@ -266,8 +266,11 @@ def evaluate(self, points):
else:
raise TypeError('%s has unexpected item size %d' %
(output_dtype, itemsize))
result = gaussian_kernel_estimate[spec](self.dataset.T, self.weights[:, None],
points.T, self.inv_cov, output_dtype)

result = gaussian_kernel_estimate[spec](
self.dataset.T, self.weights[:, None],
points.T, self.cho_cov, output_dtype)

return result[:, 0]

__call__ = evaluate
Expand Down Expand Up @@ -574,17 +577,30 @@ def _compute_covariance(self):
covariance_factor().
"""
self.factor = self.covariance_factor()
# Cache covariance and inverse covariance of the data
if not hasattr(self, '_data_inv_cov'):
# Cache covariance and permuted cholesky decomp of permuted covariance
if not hasattr(self, '_data_cho_cov'):
self._data_covariance = atleast_2d(cov(self.dataset, rowvar=1,
bias=False,
aweights=self.weights))
self._data_inv_cov = linalg.inv(self._data_covariance)
self._data_cho_cov = linalg.cholesky(
self._data_covariance[::-1, ::-1]).T[::-1, ::-1]

self.covariance = self._data_covariance * self.factor**2
self.inv_cov = (self._data_inv_cov / self.factor**2).astype(np.float64)
L = linalg.cholesky(self.covariance*2*pi)
self.log_det = 2*np.log(np.diag(L)).sum()
self.cho_cov = (self._data_cho_cov * self.factor).astype(np.float64)
self.log_det = 2*np.log(np.diag(self.cho_cov
* np.sqrt(2*pi))).sum()

@property
def inv_cov(self):
# Re-compute from scratch each time because I'm not sure how this is
# used in the wild. (Perhaps users change the `dataset`, since it's
# not a private attribute?) `_compute_covariance` used to recalculate
# all these, so we'll recalculate everything now that this is a
# a property.
self.factor = self.covariance_factor()
self._data_covariance = atleast_2d(cov(self.dataset, rowvar=1,
bias=False, aweights=self.weights))
return linalg.inv(self._data_covariance) / self.factor**2

def pdf(self, x):
"""
Expand Down
26 changes: 14 additions & 12 deletions scipy/stats/_stats.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ from numpy cimport ndarray, int64_t, float64_t, intp_t
import warnings
import numpy as np
import scipy.stats, scipy.special
from scipy.linalg import solve_triangular
cimport scipy.special.cython_special as cs

np.import_array()
Expand Down Expand Up @@ -706,10 +707,9 @@ ctypedef fused real:

@cython.wraparound(False)
@cython.boundscheck(False)
def gaussian_kernel_estimate(points, values, xi, precision, dtype, real _=0):
def gaussian_kernel_estimate(points, values, xi, cho_cov, dtype,
real _=0):
"""
def gaussian_kernel_estimate(points, real[:, :] values, xi, precision)
Evaluate a multivariate Gaussian kernel estimate.
Parameters
Expand All @@ -720,16 +720,16 @@ def gaussian_kernel_estimate(points, values, xi, precision, dtype, real _=0):
Multivariate values associated with the data points.
xi : array_like with shape (m, d)
Coordinates to evaluate the estimate at in d dimensions.
precision : array_like with shape (d, d)
Precision matrix for the Gaussian kernel.
cho_cov : array_like with shape (d, d)
Permuted Cholesky factor of permuted covariance of the data.
Returns
-------
estimate : double[:, :] with shape (m, p)
Multivariate Gaussian kernel estimate evaluated at the input coordinates.
"""
cdef:
real[:, :] points_, xi_, values_, estimate, whitening
real[:, :] points_, xi_, values_, estimate
int i, j, k
int n, d, m, p
real arg, residual, norm
Expand All @@ -741,19 +741,21 @@ def gaussian_kernel_estimate(points, values, xi, precision, dtype, real _=0):

if xi.shape[1] != d:
raise ValueError("points and xi must have same trailing dim")
if precision.shape[0] != d or precision.shape[1] != d:
raise ValueError("precision matrix must match data dims")
if cho_cov.shape[0] != d or cho_cov.shape[1] != d:
raise ValueError("Covariance matrix must match data dims")

# Rescale the data
whitening = np.linalg.cholesky(precision).astype(dtype, copy=False)
points_ = np.dot(points, whitening).astype(dtype, copy=False)
xi_ = np.dot(xi, whitening).astype(dtype, copy=False)
cho_cov = cho_cov.astype(dtype, copy=False)
points_ = np.asarray(solve_triangular(cho_cov, points.T, lower=False).T,
dtype=dtype)
xi_ = np.asarray(solve_triangular(cho_cov, xi.T, lower=False).T,
dtype=dtype)
values_ = values.astype(dtype, copy=False)

# Evaluate the normalisation
norm = math.pow((2 * PI) ,(- d / 2))
for i in range(d):
norm *= whitening[i, i]
norm /= cho_cov[i, i]

# Create the result array and evaluate the weighted sum
estimate = np.zeros((m, p), dtype)
Expand Down
16 changes: 2 additions & 14 deletions scipy/stats/tests/test_kdeoth.py
Original file line number Diff line number Diff line change
Expand Up @@ -218,16 +218,6 @@ def __init__(self, dataset):
super().__init__(dataset)


class _kde_subclass3(stats.gaussian_kde):
def __init__(self, dataset, covariance):
self.covariance = covariance
stats.gaussian_kde.__init__(self, dataset)

def _compute_covariance(self):
self.inv_cov = np.linalg.inv(self.covariance)
self._norm_factor = np.sqrt(np.linalg.det(2 * np.pi * self.covariance))


class _kde_subclass4(stats.gaussian_kde):
def covariance_factor(self):
return 0.5 * self.silverman_factor()
Expand All @@ -251,10 +241,8 @@ def test_gaussian_kde_subclassing():
y2 = kde2(xs)
assert_array_almost_equal_nulp(ys, y2, nulp=10)

# subclass 3
kde3 = _kde_subclass3(x1, kde.covariance)
y3 = kde3(xs)
assert_array_almost_equal_nulp(ys, y3, nulp=10)
# subclass 3 was removed because we have no obligation to maintain support
# for user invocation of private methods

# subclass 4
kde4 = _kde_subclass4(x1)
Expand Down

0 comments on commit 00315a5

Please sign in to comment.