Skip to content

Commit

Permalink
FIX PLSRegression.coef_ takes X and Y variances into account when sca…
Browse files Browse the repository at this point in the history
…le=True (#28612)

Co-authored-by: Jérémie du Boisberranger <jeremie@probabl.ai>
  • Loading branch information
glemaitre and jeremiedbb committed May 2, 2024
1 parent 2bafd7b commit e19d8c2
Show file tree
Hide file tree
Showing 3 changed files with 30 additions and 6 deletions.
7 changes: 6 additions & 1 deletion doc/whats_new/v1.5.rst
Original file line number Diff line number Diff line change
Expand Up @@ -212,7 +212,12 @@ Changelog
:class:`cross_decomposition.CCA`,
and :class:`cross_decomposition.PLSSVD`.
`Y` will be removed in version 1.7.
:pr:`28604` by :user:`David Leon <davidleon123>`
:pr:`28604` by :user:`David Leon <davidleon123>`.

- |Fix| The `coef_` fitted attribute of :class:`cross_decomposition.PLSRegression`
now takes into account both the scale of `X` and `Y` when `scale=True`. Note that
the previous predicted values were not affected by this bug.
:pr:`28612` by :user:`Guillaume Lemaitre <glemaitre>`.

:mod:`sklearn.datasets`
.......................
Expand Down
5 changes: 2 additions & 3 deletions sklearn/cross_decomposition/_pls.py
Original file line number Diff line number Diff line change
Expand Up @@ -388,7 +388,7 @@ def fit(self, X, y=None, Y=None):
pinv2(np.dot(self.y_loadings_.T, self.y_weights_), check_finite=False),
)
self.coef_ = np.dot(self.x_rotations_, self.y_loadings_.T)
self.coef_ = (self.coef_ * self._y_std).T
self.coef_ = (self.coef_ * self._y_std).T / self._x_std
self.intercept_ = self._y_mean
self._n_features_out = self.x_rotations_.shape[1]
return self
Expand Down Expand Up @@ -517,9 +517,8 @@ def predict(self, X, copy=True):
"""
check_is_fitted(self)
X = self._validate_data(X, copy=copy, dtype=FLOAT_DTYPES, reset=False)
# Normalize
# Only center X but do not scale it since the coefficients are already scaled
X -= self._x_mean
X /= self._x_std
Ypred = X @ self.coef_.T + self.intercept_
return Ypred.ravel() if self._predict_1d else Ypred

Expand Down
24 changes: 22 additions & 2 deletions sklearn/cross_decomposition/tests/test_pls.py
Original file line number Diff line number Diff line change
Expand Up @@ -589,8 +589,6 @@ def test_pls_prediction(PLSEstimator, scale):

y_mean = Y.mean(axis=0)
X_trans = X - X.mean(axis=0)
if scale:
X_trans /= X.std(axis=0, ddof=1)

assert_allclose(pls.intercept_, y_mean)
assert_allclose(Y_pred, X_trans @ pls.coef_.T + pls.intercept_)
Expand Down Expand Up @@ -646,6 +644,28 @@ def test_pls_regression_fit_1d_y():
assert_allclose(y_pred, expected)


def test_pls_regression_scaling_coef():
"""Check that when using `scale=True`, the coefficients are using the std. dev. from
both `X` and `Y`.
Non-regression test for:
https://github.com/scikit-learn/scikit-learn/issues/27964
"""
# handcrafted data where we can predict Y from X with an additional scaling factor
rng = np.random.RandomState(0)
coef = rng.uniform(size=(3, 5))
X = rng.normal(scale=10, size=(30, 5)) # add a std of 10
Y = X @ coef.T

# we need to make sure that the dimension of the latent space is large enough to
# perfectly predict `Y` from `X` (no information loss)
pls = PLSRegression(n_components=5, scale=True).fit(X, Y)
assert_allclose(pls.coef_, coef)

# we therefore should be able to predict `Y` from `X`
assert_allclose(pls.predict(X), Y)


# TODO(1.7): Remove
@pytest.mark.parametrize("Klass", [PLSRegression, CCA, PLSSVD, PLSCanonical])
def test_pls_fit_warning_on_deprecated_Y_argument(Klass):
Expand Down

0 comments on commit e19d8c2

Please sign in to comment.