Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[MRG] ENH: Make StackingRegressor support Multioutput #27704

Open
wants to merge 14 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 11 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
19 changes: 14 additions & 5 deletions sklearn/ensemble/_stacking.py
Original file line number Diff line number Diff line change
Expand Up @@ -953,7 +953,7 @@ def fit(self, X, y, sample_weight=None):
Training vectors, where `n_samples` is the number of samples and
`n_features` is the number of features.

y : array-like of shape (n_samples,)
y : array-like of shape (n_samples,) or (n_samples, n_outputs)
Target values.

sample_weight : array-like of shape (n_samples,), default=None
Expand All @@ -967,7 +967,12 @@ def fit(self, X, y, sample_weight=None):
Returns a fitted instance.
"""
_raise_for_unsupported_routing(self, "fit", sample_weight=sample_weight)
y = column_or_1d(y, warn=True)
try:
# Single Target
y = column_or_1d(y, warn=True)
except ValueError:
# Multioutput target.
y = self._validate_data(X="no_validation", y=y, multi_output=True)
OmarManzoor marked this conversation as resolved.
Show resolved Hide resolved
return super().fit(X, y, sample_weight)
OmarManzoor marked this conversation as resolved.
Show resolved Hide resolved

def transform(self, X):
Expand All @@ -981,8 +986,10 @@ def transform(self, X):

Returns
-------
y_preds : ndarray of shape (n_samples, n_estimators)
y_preds : ndarray of shape
(n_samples, n_estimators) or (n_samples, n_estimators x n_outputs).
Prediction outputs for each estimator.
If passthrough=True, the number of columns increases by n_features.
"""
return self._transform(X)

Expand All @@ -995,7 +1002,7 @@ def fit_transform(self, X, y, sample_weight=None):
Training vectors, where `n_samples` is the number of samples and
`n_features` is the number of features.

y : array-like of shape (n_samples,)
y : array-like of shape (n_samples,) or (n_samples, n_outputs)
Target values.

sample_weight : array-like of shape (n_samples,), default=None
Expand All @@ -1005,8 +1012,10 @@ def fit_transform(self, X, y, sample_weight=None):

Returns
-------
y_preds : ndarray of shape (n_samples, n_estimators)
y_preds : ndarray of shape
(n_samples, n_estimators) or (n_samples, n_estimators x n_outputs).
Prediction outputs for each estimator.
If passthrough=True, the number of columns increases by n_features.
"""
return super().fit_transform(X, y, sample_weight=sample_weight)

Expand Down
85 changes: 85 additions & 0 deletions sklearn/ensemble/tests/test_stacking.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
RidgeClassifier,
)
from sklearn.model_selection import KFold, StratifiedKFold, train_test_split
from sklearn.multioutput import MultiOutputRegressor
from sklearn.neighbors import KNeighborsClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.preprocessing import scale
Expand Down Expand Up @@ -861,6 +862,90 @@ def test_stacking_classifier_base_regressor():
assert clf.score(X_test, y_test) > 0.8


def test_stacking_regressor_multioutput():
"""Check that a stacking regressor with multioutput works"""
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
"""Check that a stacking regressor with multioutput works"""
"""Check that a stacking regressor works with multioutput"""

cv = 2
acceptable_relative_tolerance = 1e-10
acceptable_aboslute_tolerance = 1e-10

X_train = np.hstack([np.arange(5)] * cv).reshape(-1, 1)
y_train = np.hstack([2 * X_train + 1, 3 * X_train - 2])
assert y_train.ndim > 1

estimator1 = LinearRegression(fit_intercept=True)
estimator2 = MultiOutputRegressor(DummyRegressor(strategy="constant", constant=0))
final_estimator = Ridge(alpha=1e-12, fit_intercept=False, random_state=42)

reg = StackingRegressor(
estimators=[("lr", estimator1), ("dr", estimator2)],
final_estimator=final_estimator,
cv=KFold(n_splits=cv, shuffle=False),
passthrough=False,
)

reg.fit(X_train, y_train)
# predict
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
# predict

y_pred = reg.predict(X_train)
# NOTE: In this case the estimator can predict almost exactly the target
assert_allclose(
y_pred,
y_train,
rtol=acceptable_relative_tolerance,
atol=acceptable_aboslute_tolerance,
)
# transform
X_trans = reg.transform(X_train)
# NOTE: The result of transform is the horizontal stack of the predictions
assert_allclose(
X_trans,
np.hstack([y_train, np.zeros(y_train.shape)]),
rtol=acceptable_relative_tolerance,
atol=acceptable_aboslute_tolerance,
)


def test_stacking_regressor_multioutput_with_passthrough():
"""Check that a stacking regressor with multioutput works"""
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
"""Check that a stacking regressor with multioutput works"""
"""Check that a stacking regressor with passthrough works with multioutput"""

cv = 2
acceptable_relative_tolerance = 1e-10
acceptable_aboslute_tolerance = 1e-10

X_train = np.hstack([np.arange(5)] * cv).reshape(-1, 1)
y_train = np.hstack([2 * X_train + 1, 3 * X_train - 2])
assert y_train.ndim > 1

estimator1 = LinearRegression(fit_intercept=True)
estimator2 = MultiOutputRegressor(DummyRegressor(strategy="constant", constant=0))
final_estimator = Ridge(alpha=1e-12, fit_intercept=False, random_state=42)

reg = StackingRegressor(
estimators=[("lr", estimator1), ("dr", estimator2)],
final_estimator=final_estimator,
cv=KFold(n_splits=cv, shuffle=False),
passthrough=True,
)

reg.fit(X_train, y_train)
# predict
y_pred = reg.predict(X_train)
# NOTE: In this case, the estimator can predict almost exactly the target
assert_allclose(
y_pred,
y_train,
rtol=acceptable_relative_tolerance,
atol=acceptable_aboslute_tolerance,
)
# transform
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
# transform

X_trans = reg.transform(X_train)
# NOTE: X_trans should be the horizontal stack of the predictions and X_train
assert_allclose(
X_trans,
np.hstack([y_train, np.zeros(y_train.shape), X_train]),
rtol=acceptable_relative_tolerance,
atol=acceptable_aboslute_tolerance,
)


def test_stacking_final_estimator_attribute_error():
"""Check that we raise the proper AttributeError when the final estimator
does not implement the `decision_function` method, which is decorated with
Expand Down