From ebbf4ecf444a0d869703568016387914723c50c9 Mon Sep 17 00:00:00 2001 From: Julien Jerphanion Date: Thu, 29 Sep 2022 13:59:27 +0200 Subject: [PATCH 1/3] TST Relax test_gradient_boosting_early_stopping --- sklearn/ensemble/tests/test_gradient_boosting.py | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/sklearn/ensemble/tests/test_gradient_boosting.py b/sklearn/ensemble/tests/test_gradient_boosting.py index cbe56578e45c5..c2e75c34916e0 100644 --- a/sklearn/ensemble/tests/test_gradient_boosting.py +++ b/sklearn/ensemble/tests/test_gradient_boosting.py @@ -1122,8 +1122,14 @@ def test_gradient_boosting_early_stopping(): ) X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42) - # Check if early_stopping works as expected - for est, tol, early_stop_n_estimators in ( + # Check if early_stopping works as expected, that is empirically check that the + # number of estimators is increasing when the tolerance decreases. + + # Depending on platforms, the number of fitted estimators might slightly vary. + # Hence, we check for its inclusion in an interval centered in the expected + # number of fitted estimators rather than a strict equality. + delta_early_stop_n_estimators = 2 + for est, tol, expected_early_stop_n_estimators in ( (gbc, 1e-1, 28), (gbr, 1e-1, 13), (gbc, 1e-3, 70), @@ -1131,7 +1137,11 @@ def test_gradient_boosting_early_stopping(): ): est.set_params(tol=tol) est.fit(X_train, y_train) - assert est.n_estimators_ == early_stop_n_estimators + assert ( + expected_early_stop_n_estimators - delta_early_stop_n_estimators + <= est.n_estimators_ + <= expected_early_stop_n_estimators + delta_early_stop_n_estimators + ) assert est.score(X_test, y_test) > 0.7 # Without early stopping From c47bbbdc77fc0308c5126f05d67f56600303c538 Mon Sep 17 00:00:00 2001 From: Julien Jerphanion Date: Fri, 30 Sep 2022 11:14:35 +0200 Subject: [PATCH 2/3] TST Simplify test and split into two MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Jérémie du Boisberranger --- .../ensemble/tests/test_gradient_boosting.py | 55 ++++++++++--------- 1 file changed, 28 insertions(+), 27 deletions(-) diff --git a/sklearn/ensemble/tests/test_gradient_boosting.py b/sklearn/ensemble/tests/test_gradient_boosting.py index c2e75c34916e0..944c4dde4e070 100644 --- a/sklearn/ensemble/tests/test_gradient_boosting.py +++ b/sklearn/ensemble/tests/test_gradient_boosting.py @@ -1102,49 +1102,49 @@ def test_sparse_input(EstimatorClass, sparse_matrix): assert_array_almost_equal(res_sparse, res) -def test_gradient_boosting_early_stopping(): +@pytest.mark.parametrize( + "GradientBoostingEstimator", [GradientBoostingClassifier, GradientBoostingRegressor] +) +def test_gradient_boosting_early_stopping(GradientBoostingEstimator): + # Check if early stopping works as expected, that is empirically check that the + # number of trained estimators is increasing when the tolerance decreases. + X, y = make_classification(n_samples=1000, random_state=0) + n_estimators = 1000 - gbc = GradientBoostingClassifier( - n_estimators=1000, + gb_large_tol = GradientBoostingEstimator( + n_estimators=n_estimators, n_iter_no_change=10, learning_rate=0.1, max_depth=3, random_state=42, + tol=1e-1, ) - gbr = GradientBoostingRegressor( - n_estimators=1000, + gb_small_tol = GradientBoostingEstimator( + n_estimators=n_estimators, n_iter_no_change=10, learning_rate=0.1, max_depth=3, random_state=42, + tol=1e-3, ) X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42) - # Check if early_stopping works as expected, that is empirically check that the - # number of estimators is increasing when the tolerance decreases. - - # Depending on platforms, the number of fitted estimators might slightly vary. - # Hence, we check for its inclusion in an interval centered in the expected - # number of fitted estimators rather than a strict equality. - delta_early_stop_n_estimators = 2 - for est, tol, expected_early_stop_n_estimators in ( - (gbc, 1e-1, 28), - (gbr, 1e-1, 13), - (gbc, 1e-3, 70), - (gbr, 1e-3, 28), - ): - est.set_params(tol=tol) - est.fit(X_train, y_train) - assert ( - expected_early_stop_n_estimators - delta_early_stop_n_estimators - <= est.n_estimators_ - <= expected_early_stop_n_estimators + delta_early_stop_n_estimators - ) - assert est.score(X_test, y_test) > 0.7 + gb_large_tol.fit(X_train, y_train) + gb_small_tol.fit(X_train, y_train) + + assert gb_large_tol.n_estimators_ < gb_small_tol.n_estimators_ < n_estimators + + assert gb_large_tol.score(X_test, y_test) > 0.7 + assert gb_small_tol.score(X_test, y_test) > 0.7 + + +def test_gradient_boosting_without_early_stopping(): + # When early stopping is not used, the number of trained estimators + # must be the one specified. + X, y = make_classification(n_samples=1000, random_state=0) - # Without early stopping gbc = GradientBoostingClassifier( n_estimators=50, learning_rate=0.1, max_depth=3, random_state=42 ) @@ -1154,6 +1154,7 @@ def test_gradient_boosting_early_stopping(): ) gbr.fit(X, y) + # The number of trained estimators but be the one specified. assert gbc.n_estimators_ == 50 assert gbr.n_estimators_ == 30 From 321221f8af9d253b1427b12e61f1a5a7f44056e3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A9r=C3=A9mie=20du=20Boisberranger?= <34657725+jeremiedbb@users.noreply.github.com> Date: Fri, 7 Oct 2022 10:12:52 +0200 Subject: [PATCH 3/3] Update sklearn/ensemble/tests/test_gradient_boosting.py --- sklearn/ensemble/tests/test_gradient_boosting.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/ensemble/tests/test_gradient_boosting.py b/sklearn/ensemble/tests/test_gradient_boosting.py index 944c4dde4e070..4c355332b1b81 100644 --- a/sklearn/ensemble/tests/test_gradient_boosting.py +++ b/sklearn/ensemble/tests/test_gradient_boosting.py @@ -1154,7 +1154,7 @@ def test_gradient_boosting_without_early_stopping(): ) gbr.fit(X, y) - # The number of trained estimators but be the one specified. + # The number of trained estimators must be the one specified. assert gbc.n_estimators_ == 50 assert gbr.n_estimators_ == 30