Skip to content

Commit

Permalink
Remove param 'evals_result' 'early_stopping_rounds' in lightgbm versi…
Browse files Browse the repository at this point in the history
…on > 3.3.1 (#5206)

The parameter evals_result is removed in the master of lightgbm: microsoft/LightGBM#4882.
The parameter early_stopping_rounds is removed in the master of lightgbm: microsoft/LightGBM#4908.

We should also remove this param in our test.

This PR also fixed the sagemaker test failure.

Signed-off-by: Liang Zhang <liang.zhang@databricks.com>
  • Loading branch information
liangz1 committed Dec 31, 2021
1 parent 62bd02b commit c0e0571
Show file tree
Hide file tree
Showing 3 changed files with 134 additions and 65 deletions.
16 changes: 9 additions & 7 deletions mlflow/lightgbm.py
Expand Up @@ -25,6 +25,7 @@
import logging
import functools
from copy import deepcopy
from packaging.version import Version

import mlflow
from mlflow import pyfunc
Expand Down Expand Up @@ -372,7 +373,8 @@ def autolog(
- parameters specified in `lightgbm.train`_.
- metrics on each iteration (if ``valid_sets`` specified).
- metrics at the best iteration (if ``early_stopping_rounds`` specified).
- metrics at the best iteration (if ``early_stopping_rounds`` specified or ``early_stopping``
callback is set).
- feature importance (both "split" and "gain") as JSON files and plots.
- trained model, including:
- an example of valid input.
Expand Down Expand Up @@ -496,10 +498,13 @@ def log_feature_importance_plot(features, importance, importance_type):
"fobj",
"feval",
"init_model",
"evals_result",
"learning_rates",
"callbacks",
]
if Version(lightgbm.__version__) <= Version("3.3.1"):
# The parameter `evals_result` in `lightgbm.train` is removed in this PR:
# https://github.com/microsoft/LightGBM/pull/4882
unlogged_params.append("evals_result")

params_to_log_for_fn = get_mlflow_run_params_for_fn_args(
original, args, kwargs, unlogged_params
Expand Down Expand Up @@ -531,12 +536,9 @@ def log_feature_importance_plot(features, importance, importance_type):
# training model
model = original(*args, **kwargs)

# If early_stopping_rounds is present, logging metrics at the best iteration
# If early stopping is activated, logging metrics at the best iteration
# as extra metrics with the max step + 1.
early_stopping_index = all_arg_names.index("early_stopping_rounds")
early_stopping = (
num_pos_args >= early_stopping_index + 1 or "early_stopping_rounds" in kwargs
)
early_stopping = model.best_iteration > 0
if early_stopping:
extra_step = len(eval_results)
autologging_client.log_metrics(
Expand Down
175 changes: 123 additions & 52 deletions tests/lightgbm/test_lightgbm_autolog.py
Expand Up @@ -102,10 +102,13 @@ def test_lgb_autolog_logs_default_params(bst_params, train_set):
"fobj",
"feval",
"init_model",
"evals_result",
"learning_rates",
"callbacks",
]
if Version(lgb.__version__) <= Version("3.3.1"):
# The parameter `evals_result` in `lightgbm.train` is removed in this PR:
# https://github.com/microsoft/LightGBM/pull/4882
unlogged_params.append("evals_result")

for param in unlogged_params:
assert param not in params
Expand All @@ -116,12 +119,14 @@ def test_lgb_autolog_logs_specified_params(bst_params, train_set):
mlflow.lightgbm.autolog()
expected_params = {
"num_boost_round": 10,
"early_stopping_rounds": 5,
}
if Version(lgb.__version__) <= Version("3.3.1"):
# The parameter `verbose_eval` in `lightgbm.train` is removed in this PR:
# https://github.com/microsoft/LightGBM/pull/4878
# The parameter `early_stopping_rounds` in `lightgbm.train` is removed in this PR:
# https://github.com/microsoft/LightGBM/pull/4908
expected_params["verbose_eval"] = False
expected_params["early_stopping_rounds"] = 5
lgb.train(bst_params, train_set, valid_sets=[train_set], **expected_params)
run = get_latest_run()
params = run.data.params
Expand All @@ -140,10 +145,13 @@ def test_lgb_autolog_logs_specified_params(bst_params, train_set):
"fobj",
"feval",
"init_model",
"evals_result",
"learning_rates",
"callbacks",
]
if Version(lgb.__version__) <= Version("3.3.1"):
# The parameter `evals_result` in `lightgbm.train` is removed in this PR:
# https://github.com/microsoft/LightGBM/pull/4882
unlogged_params.append("evals_result")

for param in unlogged_params:
assert param not in params
Expand All @@ -153,14 +161,24 @@ def test_lgb_autolog_logs_specified_params(bst_params, train_set):
def test_lgb_autolog_logs_metrics_with_validation_data(bst_params, train_set):
mlflow.lightgbm.autolog()
evals_result = {}
lgb.train(
bst_params,
train_set,
num_boost_round=10,
valid_sets=[train_set],
valid_names=["train"],
evals_result=evals_result,
)
if Version(lgb.__version__) <= Version("3.3.1"):
lgb.train(
bst_params,
train_set,
num_boost_round=10,
valid_sets=[train_set],
valid_names=["train"],
evals_result=evals_result,
)
else:
lgb.train(
bst_params,
train_set,
num_boost_round=10,
valid_sets=[train_set],
valid_names=["train"],
callbacks=[lgb.record_evaluation(evals_result)],
)
run = get_latest_run()
data = run.data
client = mlflow.tracking.MlflowClient()
Expand All @@ -179,14 +197,24 @@ def test_lgb_autolog_logs_metrics_with_multi_validation_data(bst_params, train_s
# To avoid that, create a new Dataset object.
valid_sets = [train_set, lgb.Dataset(train_set.data)]
valid_names = ["train", "valid"]
lgb.train(
bst_params,
train_set,
num_boost_round=10,
valid_sets=valid_sets,
valid_names=valid_names,
evals_result=evals_result,
)
if Version(lgb.__version__) <= Version("3.3.1"):
lgb.train(
bst_params,
train_set,
num_boost_round=10,
valid_sets=valid_sets,
valid_names=valid_names,
evals_result=evals_result,
)
else:
lgb.train(
bst_params,
train_set,
num_boost_round=10,
valid_sets=valid_sets,
valid_names=valid_names,
callbacks=[lgb.record_evaluation(evals_result)],
)
run = get_latest_run()
data = run.data
client = mlflow.tracking.MlflowClient()
Expand All @@ -206,14 +234,24 @@ def test_lgb_autolog_logs_metrics_with_multi_metrics(bst_params, train_set):
params.update(bst_params)
valid_sets = [train_set]
valid_names = ["train"]
lgb.train(
params,
train_set,
num_boost_round=10,
valid_sets=valid_sets,
valid_names=valid_names,
evals_result=evals_result,
)
if Version(lgb.__version__) <= Version("3.3.1"):
lgb.train(
params,
train_set,
num_boost_round=10,
valid_sets=valid_sets,
valid_names=valid_names,
evals_result=evals_result,
)
else:
lgb.train(
params,
train_set,
num_boost_round=10,
valid_sets=valid_sets,
valid_names=valid_names,
callbacks=[lgb.record_evaluation(evals_result)],
)
run = get_latest_run()
data = run.data
client = mlflow.tracking.MlflowClient()
Expand All @@ -233,14 +271,24 @@ def test_lgb_autolog_logs_metrics_with_multi_validation_data_and_metrics(bst_par
params.update(bst_params)
valid_sets = [train_set, lgb.Dataset(train_set.data)]
valid_names = ["train", "valid"]
lgb.train(
params,
train_set,
num_boost_round=10,
valid_sets=valid_sets,
valid_names=valid_names,
evals_result=evals_result,
)
if Version(lgb.__version__) <= Version("3.3.1"):
lgb.train(
params,
train_set,
num_boost_round=10,
valid_sets=valid_sets,
valid_names=valid_names,
evals_result=evals_result,
)
else:
lgb.train(
params,
train_set,
num_boost_round=10,
valid_sets=valid_sets,
valid_names=valid_names,
callbacks=[lgb.record_evaluation(evals_result)],
)
run = get_latest_run()
data = run.data
client = mlflow.tracking.MlflowClient()
Expand Down Expand Up @@ -279,14 +327,24 @@ def record_metrics_side_effect(self, metrics, step=None):
params.update(bst_params)
valid_sets = [train_set, lgb.Dataset(train_set.data)]
valid_names = ["train", "valid"]
lgb.train(
params,
train_set,
num_boost_round=10,
valid_sets=valid_sets,
valid_names=valid_names,
evals_result=evals_result,
)
if Version(lgb.__version__) <= Version("3.3.1"):
lgb.train(
params,
train_set,
num_boost_round=10,
valid_sets=valid_sets,
valid_names=valid_names,
evals_result=evals_result,
)
else:
lgb.train(
params,
train_set,
num_boost_round=10,
valid_sets=valid_sets,
valid_names=valid_names,
callbacks=[lgb.record_evaluation(evals_result)],
)

run = get_latest_run()
original_metrics = run.data.metrics
Expand All @@ -307,15 +365,28 @@ def test_lgb_autolog_logs_metrics_with_early_stopping(bst_params, train_set):
params.update(bst_params)
valid_sets = [train_set, lgb.Dataset(train_set.data)]
valid_names = ["train", "valid"]
model = lgb.train(
params,
train_set,
num_boost_round=10,
early_stopping_rounds=5,
valid_sets=valid_sets,
valid_names=valid_names,
evals_result=evals_result,
)
if Version(lgb.__version__) <= Version("3.3.1"):
model = lgb.train(
params,
train_set,
num_boost_round=10,
early_stopping_rounds=5,
valid_sets=valid_sets,
valid_names=valid_names,
evals_result=evals_result,
)
else:
model = lgb.train(
params,
train_set,
num_boost_round=10,
valid_sets=valid_sets,
valid_names=valid_names,
callbacks=[
lgb.record_evaluation(evals_result),
lgb.early_stopping(5),
],
)
run = get_latest_run()
data = run.data
client = mlflow.tracking.MlflowClient()
Expand Down
8 changes: 2 additions & 6 deletions tests/sagemaker/mock/__init__.py
Expand Up @@ -5,7 +5,6 @@

from moto.core import BaseBackend, BaseModel
from moto.core.responses import BaseResponse
from moto.ec2 import ec2_backends

from moto.iam.models import ACCOUNT_ID
from moto.core.models import base_decorator
Expand Down Expand Up @@ -1076,10 +1075,7 @@ def response_object(self):
return response


# Create a SageMaker backend for each EC2 region
sagemaker_backends = {}
for region, ec2_backend in ec2_backends.items():
new_backend = SageMakerBackend()
sagemaker_backends[region] = new_backend
# Create a SageMaker backend for EC2 region: "us-west-2"
sagemaker_backends = {"us-west-2": SageMakerBackend()}

mock_sagemaker = base_decorator(sagemaker_backends)

0 comments on commit c0e0571

Please sign in to comment.