From 1e769b695bce0f0e651d6b01f68b2ace7466f014 Mon Sep 17 00:00:00 2001 From: Tim Head Date: Mon, 7 Nov 2022 11:56:04 +0100 Subject: [PATCH 01/17] Add interaction constraint shortcuts Allow users to specify common interaction constraints as a string. --- .../gradient_boosting.py | 17 ++++++++-- .../tests/test_gradient_boosting.py | 34 +++++++++++++++++++ 2 files changed, 49 insertions(+), 2 deletions(-) diff --git a/sklearn/ensemble/_hist_gradient_boosting/gradient_boosting.py b/sklearn/ensemble/_hist_gradient_boosting/gradient_boosting.py index 8a9a4b876a178..6e2ba650df380 100644 --- a/sklearn/ensemble/_hist_gradient_boosting/gradient_boosting.py +++ b/sklearn/ensemble/_hist_gradient_boosting/gradient_boosting.py @@ -3,6 +3,7 @@ from abc import ABC, abstractmethod from functools import partial +import itertools from numbers import Real, Integral import warnings @@ -92,7 +93,7 @@ class BaseHistGradientBoosting(BaseEstimator, ABC): "min_samples_leaf": [Interval(Integral, 1, None, closed="left")], "l2_regularization": [Interval(Real, 0, None, closed="left")], "monotonic_cst": ["array-like", dict, None], - "interaction_cst": [list, tuple, None], + "interaction_cst": [list, tuple, str, None], "n_iter_no_change": [Interval(Integral, 1, None, closed="left")], "validation_fraction": [ Interval(Real, 0, 1, closed="neither"), @@ -288,8 +289,20 @@ def _check_interaction_cst(self, n_features): if self.interaction_cst is None: return None + if isinstance(self.interaction_cst, str): + if self.interaction_cst == "no interactions": + interaction_cst = [[i] for i in range(n_features)] + + elif self.interaction_cst == "pairwise": + interaction_cst = itertools.combinations(range(n_features), 2) + else: + raise ValueError( + f"'{self.interaction_cst}' is not a valid interaction constraint. " + "Use 'no interactions', 'pairwise' or specify them explicitly." + ) + try: - constraints = [set(group) for group in self.interaction_cst] + constraints = [set(group) for group in interaction_cst] except TypeError: raise ValueError( "Interaction constraints must be a sequence of tuples or lists, got:" diff --git a/sklearn/ensemble/_hist_gradient_boosting/tests/test_gradient_boosting.py b/sklearn/ensemble/_hist_gradient_boosting/tests/test_gradient_boosting.py index 490c3a7509738..e93dda2fd7338 100644 --- a/sklearn/ensemble/_hist_gradient_boosting/tests/test_gradient_boosting.py +++ b/sklearn/ensemble/_hist_gradient_boosting/tests/test_gradient_boosting.py @@ -1,3 +1,4 @@ +import itertools import warnings import re @@ -1187,6 +1188,10 @@ def test_uint8_predict(Est): [ (None, 931, None), ([{0, 1}], 2, [{0, 1}]), + ("pairwise", 2, [{0, 1}]), + ("pairwise", 4, [{0, 1}, {0, 2}, {0, 3}, {1, 2}, {1, 3}, {2, 3}]), + ("no interactions", 2, [{0}, {1}]), + ("no interactions", 4, [{0}, {1}, {2}, {3}]), ([(1, 0), [5, 1]], 6, [{0, 1}, {1, 5}, {2, 3, 4}]), ], ) @@ -1197,6 +1202,35 @@ def test_check_interaction_cst(interaction_cst, n_features, result): assert est._check_interaction_cst(n_features) == result +@pytest.mark.parametrize( + "Est, shortcut, expected_message", + [ + (combination[0], *combination[1]) + for combination in itertools.product( + (HistGradientBoostingRegressor, HistGradientBoostingClassifier), + ( + ("no interactions", None), + ("pairwise", None), + ("pairwiseS", "not a valid interaction constraint"), + ), + ) + ], +) +def test_interaction_cst_shortcuts(Est, shortcut, expected_message): + rng = np.random.RandomState(42) + X = rng.uniform(size=(20, 4)) + y = np.hstack((X, 5 * X[:, [0]] * X[:, [1]])).sum(axis=1, dtype=int) + + est = Est(interaction_cst=shortcut) + + if expected_message is not None: + with pytest.raises(ValueError, match=expected_message): + est.fit(X, y) + + else: + est.fit(X, y) + + def test_interaction_cst_numerically(): """Check that interaction constraints have no forbidden interactions.""" rng = np.random.RandomState(42) From 184e7cd0417f21f756afeaee26af1af8d83bd537 Mon Sep 17 00:00:00 2001 From: Tim Head Date: Mon, 7 Nov 2022 13:38:13 +0100 Subject: [PATCH 02/17] Update doc strings with interaction cst shortcut --- .../_hist_gradient_boosting/gradient_boosting.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/sklearn/ensemble/_hist_gradient_boosting/gradient_boosting.py b/sklearn/ensemble/_hist_gradient_boosting/gradient_boosting.py index 6e2ba650df380..91d8a4a29c49b 100644 --- a/sklearn/ensemble/_hist_gradient_boosting/gradient_boosting.py +++ b/sklearn/ensemble/_hist_gradient_boosting/gradient_boosting.py @@ -1288,7 +1288,11 @@ class HistGradientBoostingRegressor(RegressorMixin, BaseHistGradientBoosting): .. versionchanged:: 1.2 Accept dict of constraints with feature names as keys. - interaction_cst : sequence of lists/tuples/sets of int, default=None + interaction_cst : iterable of iterables of int, str, default=None + Specify interaction constraints, i.e. sets of features which can + only interact with each other in child nodes splits. + + interaction_cst : str or sequence of lists/tuples/sets of int, default=None Specify interaction constraints, the sets of features which can interact with each other in child node splits. @@ -1297,6 +1301,9 @@ class HistGradientBoostingRegressor(RegressorMixin, BaseHistGradientBoosting): specified in these constraints, they are treated as if they were specified as an additional set. + Alternatively, "pairwise" or "no interactions" are shorthands for + allowing only pairwise/no interactions. + For instance, with 5 features in total, `interaction_cst=[{0, 1}]` is equivalent to `interaction_cst=[{0, 1}, {2, 3, 4}]`, and specifies that each branch of a tree will either only split @@ -1645,6 +1652,9 @@ class HistGradientBoostingClassifier(ClassifierMixin, BaseHistGradientBoosting): specified in these constraints, they are treated as if they were specified as an additional set. + Alternatively, "pairwise" or "no interactions" are shorthands for + allowing only pairwise/no interactions. + For instance, with 5 features in total, `interaction_cst=[{0, 1}]` is equivalent to `interaction_cst=[{0, 1}, {2, 3, 4}]`, and specifies that each branch of a tree will either only split From 8a54b7bfdce11e029c97b8660a41e0b600e3fa47 Mon Sep 17 00:00:00 2001 From: Tim Head Date: Mon, 7 Nov 2022 13:48:50 +0100 Subject: [PATCH 03/17] Add whats new entry --- doc/whats_new/v1.2.rst | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/doc/whats_new/v1.2.rst b/doc/whats_new/v1.2.rst index 759bbf411fe19..913c2abbb260b 100644 --- a/doc/whats_new/v1.2.rst +++ b/doc/whats_new/v1.2.rst @@ -299,6 +299,11 @@ Changelog Using interaction constraints also makes fitting faster. :pr:`24856` by :user:`Christian Lorentzen `. +- |Enhancement| Interaction constraints for :class:`~sklearn.ensemble.HistGradientBoostingClassifier` + and :class:`~sklearn.ensemble.HistGradientBoostingRegressor` can now be specified + as strings for two common cases: no interactions and pairwise interactions. + :pr:`24849` by :user:`Tim Head `. + - |Feature| Adds `class_weight` to :class:`ensemble.HistGradientBoostingClassifier`. :pr:`22014` by `Thomas Fan`_. From d0f14bcd2c26a56ca6f13a9099799127e8c25b88 Mon Sep 17 00:00:00 2001 From: Tim Head Date: Fri, 11 Nov 2022 10:43:59 +0100 Subject: [PATCH 04/17] Switch from whitespace to underscore for consistency Renamed parameter value to use an underscore and use `StrOptions` to describe valid parameter values. --- .../_hist_gradient_boosting/gradient_boosting.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/sklearn/ensemble/_hist_gradient_boosting/gradient_boosting.py b/sklearn/ensemble/_hist_gradient_boosting/gradient_boosting.py index 91d8a4a29c49b..9efe79ca9321f 100644 --- a/sklearn/ensemble/_hist_gradient_boosting/gradient_boosting.py +++ b/sklearn/ensemble/_hist_gradient_boosting/gradient_boosting.py @@ -93,7 +93,7 @@ class BaseHistGradientBoosting(BaseEstimator, ABC): "min_samples_leaf": [Interval(Integral, 1, None, closed="left")], "l2_regularization": [Interval(Real, 0, None, closed="left")], "monotonic_cst": ["array-like", dict, None], - "interaction_cst": [list, tuple, str, None], + "interaction_cst": [list, tuple, StrOptions({"pairwise", "no_interactions"}), None], "n_iter_no_change": [Interval(Integral, 1, None, closed="left")], "validation_fraction": [ Interval(Real, 0, 1, closed="neither"), @@ -290,7 +290,7 @@ def _check_interaction_cst(self, n_features): return None if isinstance(self.interaction_cst, str): - if self.interaction_cst == "no interactions": + if self.interaction_cst == "no_interactions": interaction_cst = [[i] for i in range(n_features)] elif self.interaction_cst == "pairwise": @@ -298,7 +298,7 @@ def _check_interaction_cst(self, n_features): else: raise ValueError( f"'{self.interaction_cst}' is not a valid interaction constraint. " - "Use 'no interactions', 'pairwise' or specify them explicitly." + "Use 'no_interactions', 'pairwise' or specify them explicitly." ) try: @@ -1301,7 +1301,7 @@ class HistGradientBoostingRegressor(RegressorMixin, BaseHistGradientBoosting): specified in these constraints, they are treated as if they were specified as an additional set. - Alternatively, "pairwise" or "no interactions" are shorthands for + Alternatively, "pairwise" or "no_interactions" are shorthands for allowing only pairwise/no interactions. For instance, with 5 features in total, `interaction_cst=[{0, 1}]` @@ -1652,7 +1652,7 @@ class HistGradientBoostingClassifier(ClassifierMixin, BaseHistGradientBoosting): specified in these constraints, they are treated as if they were specified as an additional set. - Alternatively, "pairwise" or "no interactions" are shorthands for + Alternatively, "pairwise" or "no_interactions" are shorthands for allowing only pairwise/no interactions. For instance, with 5 features in total, `interaction_cst=[{0, 1}]` From efb78a3deeda85d503fd21798b410f2de55b9a3d Mon Sep 17 00:00:00 2001 From: Tim Head Date: Fri, 11 Nov 2022 13:46:07 +0100 Subject: [PATCH 05/17] Update test to use new parameter value --- .../_hist_gradient_boosting/tests/test_gradient_boosting.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sklearn/ensemble/_hist_gradient_boosting/tests/test_gradient_boosting.py b/sklearn/ensemble/_hist_gradient_boosting/tests/test_gradient_boosting.py index e93dda2fd7338..6d8e01500a550 100644 --- a/sklearn/ensemble/_hist_gradient_boosting/tests/test_gradient_boosting.py +++ b/sklearn/ensemble/_hist_gradient_boosting/tests/test_gradient_boosting.py @@ -1190,8 +1190,8 @@ def test_uint8_predict(Est): ([{0, 1}], 2, [{0, 1}]), ("pairwise", 2, [{0, 1}]), ("pairwise", 4, [{0, 1}, {0, 2}, {0, 3}, {1, 2}, {1, 3}, {2, 3}]), - ("no interactions", 2, [{0}, {1}]), - ("no interactions", 4, [{0}, {1}, {2}, {3}]), + ("no_interactions", 2, [{0}, {1}]), + ("no_interactions", 4, [{0}, {1}, {2}, {3}]), ([(1, 0), [5, 1]], 6, [{0, 1}, {1, 5}, {2, 3, 4}]), ], ) From 60b99383ab34e7498fae23e5d92b8fec7c6d09aa Mon Sep 17 00:00:00 2001 From: Tim Head Date: Fri, 11 Nov 2022 13:51:11 +0100 Subject: [PATCH 06/17] Remove explicit parameter validation --- .../_hist_gradient_boosting/gradient_boosting.py | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/sklearn/ensemble/_hist_gradient_boosting/gradient_boosting.py b/sklearn/ensemble/_hist_gradient_boosting/gradient_boosting.py index 9efe79ca9321f..e60dedb9f92ac 100644 --- a/sklearn/ensemble/_hist_gradient_boosting/gradient_boosting.py +++ b/sklearn/ensemble/_hist_gradient_boosting/gradient_boosting.py @@ -289,17 +289,11 @@ def _check_interaction_cst(self, n_features): if self.interaction_cst is None: return None - if isinstance(self.interaction_cst, str): - if self.interaction_cst == "no_interactions": - interaction_cst = [[i] for i in range(n_features)] + if self.interaction_cst == "no_interactions": + interaction_cst = [[i] for i in range(n_features)] - elif self.interaction_cst == "pairwise": - interaction_cst = itertools.combinations(range(n_features), 2) - else: - raise ValueError( - f"'{self.interaction_cst}' is not a valid interaction constraint. " - "Use 'no_interactions', 'pairwise' or specify them explicitly." - ) + elif self.interaction_cst == "pairwise": + interaction_cst = itertools.combinations(range(n_features), 2) try: constraints = [set(group) for group in interaction_cst] From 8bc96edf7ba17fb0be7820485fb651558c0121d1 Mon Sep 17 00:00:00 2001 From: Tim Head Date: Fri, 11 Nov 2022 13:55:08 +0100 Subject: [PATCH 07/17] Update doc string --- .../ensemble/_hist_gradient_boosting/gradient_boosting.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/sklearn/ensemble/_hist_gradient_boosting/gradient_boosting.py b/sklearn/ensemble/_hist_gradient_boosting/gradient_boosting.py index e60dedb9f92ac..0f464b980fcbf 100644 --- a/sklearn/ensemble/_hist_gradient_boosting/gradient_boosting.py +++ b/sklearn/ensemble/_hist_gradient_boosting/gradient_boosting.py @@ -1295,8 +1295,8 @@ class HistGradientBoostingRegressor(RegressorMixin, BaseHistGradientBoosting): specified in these constraints, they are treated as if they were specified as an additional set. - Alternatively, "pairwise" or "no_interactions" are shorthands for - allowing only pairwise/no interactions. + The strings "pairwise" and "no_interactions" are shorthands for + allowing only pairwise or no interactions, respectively. For instance, with 5 features in total, `interaction_cst=[{0, 1}]` is equivalent to `interaction_cst=[{0, 1}, {2, 3, 4}]`, @@ -1646,8 +1646,8 @@ class HistGradientBoostingClassifier(ClassifierMixin, BaseHistGradientBoosting): specified in these constraints, they are treated as if they were specified as an additional set. - Alternatively, "pairwise" or "no_interactions" are shorthands for - allowing only pairwise/no interactions. + The strings "pairwise" and "no_interactions" are shorthands for + allowing only pairwise or no interactions, respectively. For instance, with 5 features in total, `interaction_cst=[{0, 1}]` is equivalent to `interaction_cst=[{0, 1}, {2, 3, 4}]`, From 45f403a3e858887994c5bd47322f0d83df66c49b Mon Sep 17 00:00:00 2001 From: Tim Head Date: Fri, 11 Nov 2022 15:46:04 +0100 Subject: [PATCH 08/17] Update PDP example to use "no_interactions" shortcut --- examples/inspection/plot_partial_dependence.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/inspection/plot_partial_dependence.py b/examples/inspection/plot_partial_dependence.py index a7ef29edef183..200a5f3c73c73 100644 --- a/examples/inspection/plot_partial_dependence.py +++ b/examples/inspection/plot_partial_dependence.py @@ -271,7 +271,7 @@ print("Training interaction constraint HistGradientBoostingRegressor...") tic = time() est_no_interactions = HistGradientBoostingRegressor( - interaction_cst=[[i] for i in range(X_train.shape[1])] + interaction_cst="no_interactions" ) est_no_interactions.fit(X_train, y_train) print(f"done in {time() - tic:.3f}s") From a868526c930ac868ffcaa1158899814c6c467dd1 Mon Sep 17 00:00:00 2001 From: Tim Head Date: Fri, 11 Nov 2022 16:23:00 +0100 Subject: [PATCH 09/17] Update whats new --- doc/whats_new/v1.2.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/whats_new/v1.2.rst b/doc/whats_new/v1.2.rst index 913c2abbb260b..5d129d586bacd 100644 --- a/doc/whats_new/v1.2.rst +++ b/doc/whats_new/v1.2.rst @@ -301,7 +301,7 @@ Changelog - |Enhancement| Interaction constraints for :class:`~sklearn.ensemble.HistGradientBoostingClassifier` and :class:`~sklearn.ensemble.HistGradientBoostingRegressor` can now be specified - as strings for two common cases: no interactions and pairwise interactions. + as strings for two common cases: "no_interactions" and "pairwise" interactions. :pr:`24849` by :user:`Tim Head `. - |Feature| Adds `class_weight` to :class:`ensemble.HistGradientBoostingClassifier`. From 0211c2f8f0c5c10f345b0419c80d7075df35c9d0 Mon Sep 17 00:00:00 2001 From: Tim Head Date: Mon, 14 Nov 2022 08:52:28 +0100 Subject: [PATCH 10/17] Fix formatting --- examples/inspection/plot_partial_dependence.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/examples/inspection/plot_partial_dependence.py b/examples/inspection/plot_partial_dependence.py index 200a5f3c73c73..5b85e6edbc151 100644 --- a/examples/inspection/plot_partial_dependence.py +++ b/examples/inspection/plot_partial_dependence.py @@ -270,9 +270,7 @@ print("Training interaction constraint HistGradientBoostingRegressor...") tic = time() -est_no_interactions = HistGradientBoostingRegressor( - interaction_cst="no_interactions" -) +est_no_interactions = HistGradientBoostingRegressor(interaction_cst="no_interactions") est_no_interactions.fit(X_train, y_train) print(f"done in {time() - tic:.3f}s") From b9905990e948cf307b5419313fc24c5f8ba3ca47 Mon Sep 17 00:00:00 2001 From: Tim Head Date: Fri, 25 Nov 2022 13:19:48 +0100 Subject: [PATCH 11/17] Fix doc string --- .../ensemble/_hist_gradient_boosting/gradient_boosting.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/sklearn/ensemble/_hist_gradient_boosting/gradient_boosting.py b/sklearn/ensemble/_hist_gradient_boosting/gradient_boosting.py index 0f464b980fcbf..66bd2b1f11d65 100644 --- a/sklearn/ensemble/_hist_gradient_boosting/gradient_boosting.py +++ b/sklearn/ensemble/_hist_gradient_boosting/gradient_boosting.py @@ -1282,10 +1282,6 @@ class HistGradientBoostingRegressor(RegressorMixin, BaseHistGradientBoosting): .. versionchanged:: 1.2 Accept dict of constraints with feature names as keys. - interaction_cst : iterable of iterables of int, str, default=None - Specify interaction constraints, i.e. sets of features which can - only interact with each other in child nodes splits. - interaction_cst : str or sequence of lists/tuples/sets of int, default=None Specify interaction constraints, the sets of features which can interact with each other in child node splits. @@ -1637,7 +1633,7 @@ class HistGradientBoostingClassifier(ClassifierMixin, BaseHistGradientBoosting): .. versionchanged:: 1.2 Accept dict of constraints with feature names as keys. - interaction_cst : sequence of lists/tuples/sets of int, default=None + interaction_cst : str or sequence of lists/tuples/sets of int, default=None Specify interaction constraints, the sets of features which can interact with each other in child node splits. From 152e622af3989a7a17d4d2bab7464aeb984952aa Mon Sep 17 00:00:00 2001 From: Tim Head Date: Fri, 25 Nov 2022 13:49:11 +0100 Subject: [PATCH 12/17] Small fixes --- .../_hist_gradient_boosting/gradient_boosting.py | 10 +++++++++- .../tests/test_gradient_boosting.py | 8 ++++++-- 2 files changed, 15 insertions(+), 3 deletions(-) diff --git a/sklearn/ensemble/_hist_gradient_boosting/gradient_boosting.py b/sklearn/ensemble/_hist_gradient_boosting/gradient_boosting.py index 66bd2b1f11d65..ba44532077bc1 100644 --- a/sklearn/ensemble/_hist_gradient_boosting/gradient_boosting.py +++ b/sklearn/ensemble/_hist_gradient_boosting/gradient_boosting.py @@ -93,7 +93,12 @@ class BaseHistGradientBoosting(BaseEstimator, ABC): "min_samples_leaf": [Interval(Integral, 1, None, closed="left")], "l2_regularization": [Interval(Real, 0, None, closed="left")], "monotonic_cst": ["array-like", dict, None], - "interaction_cst": [list, tuple, StrOptions({"pairwise", "no_interactions"}), None], + "interaction_cst": [ + list, + tuple, + StrOptions({"pairwise", "no_interactions"}), + None, + ], "n_iter_no_change": [Interval(Integral, 1, None, closed="left")], "validation_fraction": [ Interval(Real, 0, 1, closed="neither"), @@ -295,6 +300,9 @@ def _check_interaction_cst(self, n_features): elif self.interaction_cst == "pairwise": interaction_cst = itertools.combinations(range(n_features), 2) + else: + interaction_cst = self.interaction_cst + try: constraints = [set(group) for group in interaction_cst] except TypeError: diff --git a/sklearn/ensemble/_hist_gradient_boosting/tests/test_gradient_boosting.py b/sklearn/ensemble/_hist_gradient_boosting/tests/test_gradient_boosting.py index 6d8e01500a550..33c83de0959f7 100644 --- a/sklearn/ensemble/_hist_gradient_boosting/tests/test_gradient_boosting.py +++ b/sklearn/ensemble/_hist_gradient_boosting/tests/test_gradient_boosting.py @@ -1209,9 +1209,13 @@ def test_check_interaction_cst(interaction_cst, n_features, result): for combination in itertools.product( (HistGradientBoostingRegressor, HistGradientBoostingClassifier), ( - ("no interactions", None), + ("no_interactions", None), ("pairwise", None), - ("pairwiseS", "not a valid interaction constraint"), + ( + "pairwiseS", + "a str among {'pairwise', 'no_interactions'} or None. Got" + " 'pairwiseS' instead.", + ), ), ) ], From fe868decdd2552f5c44819b458f47355dcf53316 Mon Sep 17 00:00:00 2001 From: jeremie du boisberranger Date: Fri, 25 Nov 2022 15:55:26 +0100 Subject: [PATCH 13/17] right place in what's new --- doc/whats_new/v1.2.rst | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/doc/whats_new/v1.2.rst b/doc/whats_new/v1.2.rst index 5d129d586bacd..5b9c046758ede 100644 --- a/doc/whats_new/v1.2.rst +++ b/doc/whats_new/v1.2.rst @@ -299,11 +299,6 @@ Changelog Using interaction constraints also makes fitting faster. :pr:`24856` by :user:`Christian Lorentzen `. -- |Enhancement| Interaction constraints for :class:`~sklearn.ensemble.HistGradientBoostingClassifier` - and :class:`~sklearn.ensemble.HistGradientBoostingRegressor` can now be specified - as strings for two common cases: "no_interactions" and "pairwise" interactions. - :pr:`24849` by :user:`Tim Head `. - - |Feature| Adds `class_weight` to :class:`ensemble.HistGradientBoostingClassifier`. :pr:`22014` by `Thomas Fan`_. @@ -333,6 +328,12 @@ Changelog as value to specify monotonicity constraints for each feature. :pr:`24855` by :user:`Olivier Grisel `. +- |Enhancement| Interaction constraints for + :class:`ensemble.HistGradientBoostingClassifier` + and :class:`ensemble.HistGradientBoostingRegressor` can now be specified + as strings for two common cases: "no_interactions" and "pairwise" interactions. + :pr:`24849` by :user:`Tim Head `. + - |Fix| Fixed the issue where :class:`ensemble.AdaBoostClassifier` outputs NaN in feature importance when fitted with very small sample weight. :pr:`20415` by :user:`Zhehao Liu `. From c5725af96b9e7e95622cbc43470b6d567d2fcf16 Mon Sep 17 00:00:00 2001 From: jeremie du boisberranger Date: Fri, 25 Nov 2022 16:00:25 +0100 Subject: [PATCH 14/17] remove test for error because it's handled by the common test --- .../tests/test_gradient_boosting.py | 33 ------------------- 1 file changed, 33 deletions(-) diff --git a/sklearn/ensemble/_hist_gradient_boosting/tests/test_gradient_boosting.py b/sklearn/ensemble/_hist_gradient_boosting/tests/test_gradient_boosting.py index 33c83de0959f7..72e77fe73c9e4 100644 --- a/sklearn/ensemble/_hist_gradient_boosting/tests/test_gradient_boosting.py +++ b/sklearn/ensemble/_hist_gradient_boosting/tests/test_gradient_boosting.py @@ -1202,39 +1202,6 @@ def test_check_interaction_cst(interaction_cst, n_features, result): assert est._check_interaction_cst(n_features) == result -@pytest.mark.parametrize( - "Est, shortcut, expected_message", - [ - (combination[0], *combination[1]) - for combination in itertools.product( - (HistGradientBoostingRegressor, HistGradientBoostingClassifier), - ( - ("no_interactions", None), - ("pairwise", None), - ( - "pairwiseS", - "a str among {'pairwise', 'no_interactions'} or None. Got" - " 'pairwiseS' instead.", - ), - ), - ) - ], -) -def test_interaction_cst_shortcuts(Est, shortcut, expected_message): - rng = np.random.RandomState(42) - X = rng.uniform(size=(20, 4)) - y = np.hstack((X, 5 * X[:, [0]] * X[:, [1]])).sum(axis=1, dtype=int) - - est = Est(interaction_cst=shortcut) - - if expected_message is not None: - with pytest.raises(ValueError, match=expected_message): - est.fit(X, y) - - else: - est.fit(X, y) - - def test_interaction_cst_numerically(): """Check that interaction constraints have no forbidden interactions.""" rng = np.random.RandomState(42) From 34a5eaadd8465780d7cc0c3115d508f7969e78bc Mon Sep 17 00:00:00 2001 From: jeremie du boisberranger Date: Fri, 25 Nov 2022 16:02:57 +0100 Subject: [PATCH 15/17] nit --- sklearn/ensemble/_hist_gradient_boosting/gradient_boosting.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/sklearn/ensemble/_hist_gradient_boosting/gradient_boosting.py b/sklearn/ensemble/_hist_gradient_boosting/gradient_boosting.py index ba44532077bc1..261366ce72192 100644 --- a/sklearn/ensemble/_hist_gradient_boosting/gradient_boosting.py +++ b/sklearn/ensemble/_hist_gradient_boosting/gradient_boosting.py @@ -296,10 +296,8 @@ def _check_interaction_cst(self, n_features): if self.interaction_cst == "no_interactions": interaction_cst = [[i] for i in range(n_features)] - elif self.interaction_cst == "pairwise": interaction_cst = itertools.combinations(range(n_features), 2) - else: interaction_cst = self.interaction_cst From 4f9ad0140c8787944a6ad38b787188d8e12eec5c Mon Sep 17 00:00:00 2001 From: jeremie du boisberranger Date: Fri, 25 Nov 2022 16:05:50 +0100 Subject: [PATCH 16/17] follow docstring conventions --- .../ensemble/_hist_gradient_boosting/gradient_boosting.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/sklearn/ensemble/_hist_gradient_boosting/gradient_boosting.py b/sklearn/ensemble/_hist_gradient_boosting/gradient_boosting.py index 261366ce72192..af9225933100c 100644 --- a/sklearn/ensemble/_hist_gradient_boosting/gradient_boosting.py +++ b/sklearn/ensemble/_hist_gradient_boosting/gradient_boosting.py @@ -1288,7 +1288,8 @@ class HistGradientBoostingRegressor(RegressorMixin, BaseHistGradientBoosting): .. versionchanged:: 1.2 Accept dict of constraints with feature names as keys. - interaction_cst : str or sequence of lists/tuples/sets of int, default=None + interaction_cst : {"pairwise", "no_interaction"} or sequence of lists/tuples/sets \ + of int, default=None Specify interaction constraints, the sets of features which can interact with each other in child node splits. @@ -1639,7 +1640,8 @@ class HistGradientBoostingClassifier(ClassifierMixin, BaseHistGradientBoosting): .. versionchanged:: 1.2 Accept dict of constraints with feature names as keys. - interaction_cst : str or sequence of lists/tuples/sets of int, default=None + interaction_cst : {"pairwise", "no_interaction"} or sequence of lists/tuples/sets \ + of int, default=None Specify interaction constraints, the sets of features which can interact with each other in child node splits. From b69b68f8d7e525c193ab3570e291a4ed9883e11a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A9r=C3=A9mie=20du=20Boisberranger?= <34657725+jeremiedbb@users.noreply.github.com> Date: Fri, 25 Nov 2022 16:11:40 +0100 Subject: [PATCH 17/17] Update sklearn/ensemble/_hist_gradient_boosting/tests/test_gradient_boosting.py --- .../_hist_gradient_boosting/tests/test_gradient_boosting.py | 1 - 1 file changed, 1 deletion(-) diff --git a/sklearn/ensemble/_hist_gradient_boosting/tests/test_gradient_boosting.py b/sklearn/ensemble/_hist_gradient_boosting/tests/test_gradient_boosting.py index 72e77fe73c9e4..d1a8f56bbd479 100644 --- a/sklearn/ensemble/_hist_gradient_boosting/tests/test_gradient_boosting.py +++ b/sklearn/ensemble/_hist_gradient_boosting/tests/test_gradient_boosting.py @@ -1,4 +1,3 @@ -import itertools import warnings import re