diff --git a/doc/whats_new/v1.2.rst b/doc/whats_new/v1.2.rst index 759bbf411fe19..5b9c046758ede 100644 --- a/doc/whats_new/v1.2.rst +++ b/doc/whats_new/v1.2.rst @@ -328,6 +328,12 @@ Changelog as value to specify monotonicity constraints for each feature. :pr:`24855` by :user:`Olivier Grisel `. +- |Enhancement| Interaction constraints for + :class:`ensemble.HistGradientBoostingClassifier` + and :class:`ensemble.HistGradientBoostingRegressor` can now be specified + as strings for two common cases: "no_interactions" and "pairwise" interactions. + :pr:`24849` by :user:`Tim Head `. + - |Fix| Fixed the issue where :class:`ensemble.AdaBoostClassifier` outputs NaN in feature importance when fitted with very small sample weight. :pr:`20415` by :user:`Zhehao Liu `. diff --git a/examples/inspection/plot_partial_dependence.py b/examples/inspection/plot_partial_dependence.py index a7ef29edef183..5b85e6edbc151 100644 --- a/examples/inspection/plot_partial_dependence.py +++ b/examples/inspection/plot_partial_dependence.py @@ -270,9 +270,7 @@ print("Training interaction constraint HistGradientBoostingRegressor...") tic = time() -est_no_interactions = HistGradientBoostingRegressor( - interaction_cst=[[i] for i in range(X_train.shape[1])] -) +est_no_interactions = HistGradientBoostingRegressor(interaction_cst="no_interactions") est_no_interactions.fit(X_train, y_train) print(f"done in {time() - tic:.3f}s") diff --git a/sklearn/ensemble/_hist_gradient_boosting/gradient_boosting.py b/sklearn/ensemble/_hist_gradient_boosting/gradient_boosting.py index 8a9a4b876a178..af9225933100c 100644 --- a/sklearn/ensemble/_hist_gradient_boosting/gradient_boosting.py +++ b/sklearn/ensemble/_hist_gradient_boosting/gradient_boosting.py @@ -3,6 +3,7 @@ from abc import ABC, abstractmethod from functools import partial +import itertools from numbers import Real, Integral import warnings @@ -92,7 +93,12 @@ class BaseHistGradientBoosting(BaseEstimator, ABC): "min_samples_leaf": [Interval(Integral, 1, None, closed="left")], "l2_regularization": [Interval(Real, 0, None, closed="left")], "monotonic_cst": ["array-like", dict, None], - "interaction_cst": [list, tuple, None], + "interaction_cst": [ + list, + tuple, + StrOptions({"pairwise", "no_interactions"}), + None, + ], "n_iter_no_change": [Interval(Integral, 1, None, closed="left")], "validation_fraction": [ Interval(Real, 0, 1, closed="neither"), @@ -288,8 +294,15 @@ def _check_interaction_cst(self, n_features): if self.interaction_cst is None: return None + if self.interaction_cst == "no_interactions": + interaction_cst = [[i] for i in range(n_features)] + elif self.interaction_cst == "pairwise": + interaction_cst = itertools.combinations(range(n_features), 2) + else: + interaction_cst = self.interaction_cst + try: - constraints = [set(group) for group in self.interaction_cst] + constraints = [set(group) for group in interaction_cst] except TypeError: raise ValueError( "Interaction constraints must be a sequence of tuples or lists, got:" @@ -1275,7 +1288,8 @@ class HistGradientBoostingRegressor(RegressorMixin, BaseHistGradientBoosting): .. versionchanged:: 1.2 Accept dict of constraints with feature names as keys. - interaction_cst : sequence of lists/tuples/sets of int, default=None + interaction_cst : {"pairwise", "no_interaction"} or sequence of lists/tuples/sets \ + of int, default=None Specify interaction constraints, the sets of features which can interact with each other in child node splits. @@ -1284,6 +1298,9 @@ class HistGradientBoostingRegressor(RegressorMixin, BaseHistGradientBoosting): specified in these constraints, they are treated as if they were specified as an additional set. + The strings "pairwise" and "no_interactions" are shorthands for + allowing only pairwise or no interactions, respectively. + For instance, with 5 features in total, `interaction_cst=[{0, 1}]` is equivalent to `interaction_cst=[{0, 1}, {2, 3, 4}]`, and specifies that each branch of a tree will either only split @@ -1623,7 +1640,8 @@ class HistGradientBoostingClassifier(ClassifierMixin, BaseHistGradientBoosting): .. versionchanged:: 1.2 Accept dict of constraints with feature names as keys. - interaction_cst : sequence of lists/tuples/sets of int, default=None + interaction_cst : {"pairwise", "no_interaction"} or sequence of lists/tuples/sets \ + of int, default=None Specify interaction constraints, the sets of features which can interact with each other in child node splits. @@ -1632,6 +1650,9 @@ class HistGradientBoostingClassifier(ClassifierMixin, BaseHistGradientBoosting): specified in these constraints, they are treated as if they were specified as an additional set. + The strings "pairwise" and "no_interactions" are shorthands for + allowing only pairwise or no interactions, respectively. + For instance, with 5 features in total, `interaction_cst=[{0, 1}]` is equivalent to `interaction_cst=[{0, 1}, {2, 3, 4}]`, and specifies that each branch of a tree will either only split diff --git a/sklearn/ensemble/_hist_gradient_boosting/tests/test_gradient_boosting.py b/sklearn/ensemble/_hist_gradient_boosting/tests/test_gradient_boosting.py index 490c3a7509738..d1a8f56bbd479 100644 --- a/sklearn/ensemble/_hist_gradient_boosting/tests/test_gradient_boosting.py +++ b/sklearn/ensemble/_hist_gradient_boosting/tests/test_gradient_boosting.py @@ -1187,6 +1187,10 @@ def test_uint8_predict(Est): [ (None, 931, None), ([{0, 1}], 2, [{0, 1}]), + ("pairwise", 2, [{0, 1}]), + ("pairwise", 4, [{0, 1}, {0, 2}, {0, 3}, {1, 2}, {1, 3}, {2, 3}]), + ("no_interactions", 2, [{0}, {1}]), + ("no_interactions", 4, [{0}, {1}, {2}, {3}]), ([(1, 0), [5, 1]], 6, [{0, 1}, {1, 5}, {2, 3, 4}]), ], )