Skip to content

Commit

Permalink
Autologging functionality for scikit-learn integration with LightGBM …
Browse files Browse the repository at this point in the history
…(Part 2) (#5200)

* init commit, to-do: examples

Signed-off-by: Junwen Yao <jwyiao@gmail.com>

* add examples, update doc

Signed-off-by: Junwen Yao <jwyiao@gmail.com>

* re-start example test

Signed-off-by: Junwen Yao <jwyiao@gmail.com>

* update

Signed-off-by: Junwen Yao <jwyiao@gmail.com>

* check sagemaker

Signed-off-by: Junwen Yao <jwyiao@gmail.com>

* [resolve conflict] update

Signed-off-by: Junwen Yao <jwyiao@gmail.com>
  • Loading branch information
jwyyy committed Jan 14, 2022
1 parent 0144d86 commit ee9532a
Show file tree
Hide file tree
Showing 16 changed files with 228 additions and 46 deletions.
25 changes: 2 additions & 23 deletions examples/lightgbm/README.md
@@ -1,25 +1,4 @@
# LightGBM Example

This example trains a LightGBM classifier with the iris dataset and logs hyperparameters, metrics, and trained model.
# Examples for LightGBM Autologging

## Running the code

```
python train.py --colsample-bytree 0.8 --subsample 0.9
```
You can try experimenting with different parameter values like:
```
python train.py --learning-rate 0.4 --colsample-bytree 0.7 --subsample 0.8
```

Then you can open the MLflow UI to track the experiments and compare your runs via:
```
mlflow ui
```

## Running the code as a project

```
mlflow run . -P learning_rate=0.2 -P colsample_bytree=0.8 -P subsample=0.9
```
LightGBM autologging functionalities are demonstrated through two examples. The first example in the `lightgbm_native` folder logs a Booster model trained by `xgboost.train()`. The second example in the `lightgbm_sklearn` folder shows how autologging works for LightGBM scikit-learn models. The autologging for all LightGBM models is enabled via `mlflow.xgboost.autolog()`.
File renamed without changes.
25 changes: 25 additions & 0 deletions examples/lightgbm/lightgbm_native/README.md
@@ -0,0 +1,25 @@
# LightGBM Example

This example trains a LightGBM classifier with the iris dataset and logs hyperparameters, metrics, and trained model.

## Running the code

```
python train.py --colsample-bytree 0.8 --subsample 0.9
```
You can try experimenting with different parameter values like:
```
python train.py --learning-rate 0.4 --colsample-bytree 0.7 --subsample 0.8
```

Then you can open the MLflow UI to track the experiments and compare your runs via:
```
mlflow ui
```

## Running the code as a project

```
mlflow run . -P learning_rate=0.2 -P colsample_bytree=0.8 -P subsample=0.9
```
File renamed without changes.
File renamed without changes.
5 changes: 5 additions & 0 deletions examples/lightgbm/lightgbm_sklearn/MLproject
@@ -0,0 +1,5 @@
name: lightgbm-sklearn-example
conda_env: conda.yaml
entry_points:
main:
command: python train.py
11 changes: 11 additions & 0 deletions examples/lightgbm/lightgbm_sklearn/README.md
@@ -0,0 +1,11 @@
# XGBoost Scikit-learn Model Example

This example trains an [`LightGBM.LGBMClassifier`](https://lightgbm.readthedocs.io/en/latest/pythonapi/lightgbm.LGBMClassifier.html) with the diabetes dataset and logs hyperparameters, metrics, and trained model.

Like the other LightGBM example, we enable autologging for LightGBM scikit-learn models via `mlflow.lightgbm.autolog()`. Saving / loading models also supports LightGBM scikit-learn models.

You can run this example using the following command:

``` python
python train.py
```
11 changes: 11 additions & 0 deletions examples/lightgbm/lightgbm_sklearn/conda.yaml
@@ -0,0 +1,11 @@
name: lightgbm-example
channels:
- conda-forge
dependencies:
- python=3.6
- pip
- pip:
- mlflow>=1.6.0
- matplotlib
- lightgbm
- cloudpickle>=2.0.0
39 changes: 39 additions & 0 deletions examples/lightgbm/lightgbm_sklearn/train.py
@@ -0,0 +1,39 @@
from pprint import pprint

import lightgbm as lgb
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score

import mlflow
import mlflow.lightgbm

from utils import fetch_logged_data


def main():
# prepare example dataset
X, y = load_iris(return_X_y=True, as_frame=True)
X_train, X_test, y_train, y_test = train_test_split(X, y)

# enable auto logging
# this includes lightgbm.sklearn estimators
mlflow.lightgbm.autolog()

with mlflow.start_run() as run:

regressor = lgb.LGBMClassifier(n_estimators=20, reg_lambda=1.0)
regressor.fit(X_train, y_train, eval_set=[(X_test, y_test)])
y_pred = regressor.predict(X_test)
f1 = f1_score(y_test, y_pred, average="micro")
run_id = run.info.run_id
print("Logged data and model in run {}".format(run_id))

# show logged data
for key, data in fetch_logged_data(run.info.run_id).items():
print("\n---------- logged {} ----------".format(key))
pprint(data)


if __name__ == "__main__":
main()
26 changes: 26 additions & 0 deletions examples/lightgbm/lightgbm_sklearn/utils.py
@@ -0,0 +1,26 @@
import mlflow


def yield_artifacts(run_id, path=None):
"""Yield all artifacts in the specified run"""
client = mlflow.tracking.MlflowClient()
for item in client.list_artifacts(run_id, path):
if item.is_dir:
yield from yield_artifacts(run_id, item.path)
else:
yield item.path


def fetch_logged_data(run_id):
"""Fetch params, metrics, tags, and artifacts in the specified run"""
client = mlflow.tracking.MlflowClient()
data = client.get_run(run_id).data
# Exclude system tags: https://www.mlflow.org/docs/latest/tracking.html#system-tags
tags = {k: v for k, v in data.tags.items() if not k.startswith("mlflow.")}
artifacts = list(yield_artifacts(run_id))
return {
"params": data.params,
"metrics": data.metrics,
"tags": tags,
"artifacts": artifacts,
}
43 changes: 35 additions & 8 deletions mlflow/lightgbm.py
Expand Up @@ -103,8 +103,8 @@ def save_model(
"""
Save a LightGBM model to a path on the local file system.
:param lgb_model: LightGBM model (an instance of `lightgbm.Booster`_) to be saved.
Note that models that implement the `scikit-learn API`_ are not supported.
:param lgb_model: LightGBM model (an instance of `lightgbm.Booster`_) or
models that implement the `scikit-learn API`_ to be saved.
:param path: Local path where the model is to be saved.
:param conda_env: {{ conda_env }}
:param mlflow_model: :py:mod:`mlflow.models.Model` this flavor is being added to.
Expand Down Expand Up @@ -231,8 +231,8 @@ def log_model(
"""
Log a LightGBM model as an MLflow artifact for the current run.
:param lgb_model: LightGBM model (an instance of `lightgbm.Booster`_) to be saved.
Note that models that implement the `scikit-learn API`_ are not supported.
:param lgb_model: LightGBM model (an instance of `lightgbm.Booster`_) or
models that implement the `scikit-learn API`_ to be saved.
:param artifact_path: Run-relative artifact path.
:param conda_env: {{ conda_env }}
:param registered_model_name: If given, create a model version under
Expand Down Expand Up @@ -382,7 +382,7 @@ def autolog(
- an example of valid input.
- inferred signature of the inputs and outputs of the model.
Note that the `scikit-learn API`_ is not supported.
Note that the `scikit-learn API`_ is now supported.
:param log_input_examples: If ``True``, input examples from training datasets are collected and
logged along with LightGBM model artifacts during training. If
Expand Down Expand Up @@ -439,7 +439,7 @@ def __init__(original, self, *args, **kwargs):

original(self, *args, **kwargs)

def train(original, *args, **kwargs):
def train(_log_models, original, *args, **kwargs):
def record_eval_results(eval_results, metrics_logger):
"""
Create a callback function that records evaluation results.
Expand Down Expand Up @@ -602,7 +602,7 @@ def infer_model_signature(input_example):
return model_signature

# Whether to automatically log the trained model based on boolean flag.
if log_models:
if _log_models:
# Will only resolve `input_example` and `signature` if `log_models` is `True`.
input_example, signature = resolve_input_example_and_signature(
get_input_example,
Expand All @@ -625,5 +625,32 @@ def infer_model_signature(input_example):

return model

safe_patch(FLAVOR_NAME, lightgbm, "train", train, manage_run=True)
safe_patch(FLAVOR_NAME, lightgbm.Dataset, "__init__", __init__)
safe_patch(
FLAVOR_NAME, lightgbm, "train", functools.partial(train, log_models), manage_run=True
)
# The `train()` method logs LightGBM models as Booster objects. When using LightGBM
# scikit-learn models, we want to save / log models as their model classes. So we turn
# off the log_models functionality in the `train()` method patched to `lightgbm.sklearn`.
# Instead the model logging is handled in `fit_mlflow_xgboost_and_lightgbm()`
# in `mlflow.sklearn._autolog()`, where models are logged as LightGBM scikit-learn models
# after the `fit()` method returns.
safe_patch(
FLAVOR_NAME, lightgbm.sklearn, "train", functools.partial(train, False), manage_run=True
)

# enable LightGBM scikit-learn estimators autologging
import mlflow.sklearn

mlflow.sklearn._autolog(
flavor_name=FLAVOR_NAME,
log_input_examples=log_input_examples,
log_model_signatures=log_model_signatures,
log_models=log_models,
disable=disable,
exclusive=exclusive,
disable_for_unsupported_versions=disable_for_unsupported_versions,
silent=silent,
max_tuning_runs=None,
log_post_training_metrics=True,
)
21 changes: 15 additions & 6 deletions mlflow/sklearn/__init__.py
Expand Up @@ -1200,6 +1200,7 @@ def _autolog(
_is_supported_version,
_get_X_y_and_sample_weight,
_gen_xgboost_sklearn_estimators_to_patch,
_gen_lightgbm_sklearn_estimators_to_patch,
_log_estimator_content,
_all_estimators,
_get_estimator_info_tags,
Expand Down Expand Up @@ -1227,12 +1228,12 @@ def _autolog(
stacklevel=2,
)

def fit_mlflow_xgboost(original, self, *args, **kwargs):
def fit_mlflow_xgboost_and_lightgbm(original, self, *args, **kwargs):
"""
Autologging function for XGBoost scikit-learn models
Autologging function for XGBoost and LightGBM scikit-learn models
"""
# parameter, metric, and non-model artifact logging
# are done in `train()` in `mlflow.xgboost.autolog()`
# parameter, metric, and non-model artifact logging are done in
# `train()` in `mlflow.xgboost.autolog()` and `mlflow.lightgbm.autolog()`
fit_output = original(self, *args, **kwargs)
# log models after training
X = _get_X_y_and_sample_weight(self.fit, args, kwargs)[0]
Expand All @@ -1244,7 +1245,12 @@ def fit_mlflow_xgboost(original, self, *args, **kwargs):
log_model_signatures,
_logger,
)
mlflow.xgboost.log_model(
log_model_func = (
mlflow.xgboost.log_model
if flavor_name == mlflow.xgboost.FLAVOR_NAME
else mlflow.lightgbm.log_model
)
log_model_func(
self,
artifact_path="model",
signature=signature,
Expand Down Expand Up @@ -1611,7 +1617,10 @@ def out(*args, **kwargs):

if flavor_name == mlflow.xgboost.FLAVOR_NAME:
estimators_to_patch = _gen_xgboost_sklearn_estimators_to_patch()
patched_fit_impl = fit_mlflow_xgboost
patched_fit_impl = fit_mlflow_xgboost_and_lightgbm
elif flavor_name == mlflow.lightgbm.FLAVOR_NAME:
estimators_to_patch = _gen_lightgbm_sklearn_estimators_to_patch()
patched_fit_impl = fit_mlflow_xgboost_and_lightgbm
else:
estimators_to_patch = _gen_estimators_to_patch()
patched_fit_impl = fit_mlflow
Expand Down
20 changes: 19 additions & 1 deletion mlflow/sklearn/utils.py
Expand Up @@ -47,6 +47,25 @@ def _gen_xgboost_sklearn_estimators_to_patch():
return sklearn_estimators


def _gen_lightgbm_sklearn_estimators_to_patch():
import mlflow.lightgbm
import lightgbm as lgb

all_classes = inspect.getmembers(lgb.sklearn, inspect.isclass)
base_class = lgb.sklearn._LGBMModelBase
sklearn_estimators = []
for _, class_object in all_classes:
package_name = class_object.__module__.split(".")[0]
if (
package_name == mlflow.lightgbm.FLAVOR_NAME
and issubclass(class_object, base_class)
and class_object != base_class
):
sklearn_estimators.append(class_object)

return sklearn_estimators


def _get_estimator_info_tags(estimator):
"""
:return: A dictionary of MLflow run tag keys and values
Expand Down Expand Up @@ -102,7 +121,6 @@ def _get_sample_weight(arg_names, args, kwargs):
return None

fit_arg_names = _get_arg_names(fit_func)

# In most cases, X_var_name and y_var_name become "X" and "y", respectively.
# However, certain sklearn models use different variable names for X and y.
# E.g., see: https://scikit-learn.org/stable/modules/generated/sklearn.multioutput.MultiOutputClassifier.html#sklearn.multioutput.MultiOutputClassifier.fit
Expand Down
13 changes: 7 additions & 6 deletions tests/autologging/test_autologging_behaviors_integration.py
Expand Up @@ -90,17 +90,18 @@ def test_autologging_integrations_use_safe_patch_for_monkey_patching(integration
) as gorilla_mock, mock.patch(
integration.__name__ + ".safe_patch", wraps=safe_patch
) as safe_patch_mock:
# In `mlflow.xgboost.autolog()`, we enable autologging for XGBoost sklearn
# models using `mlflow.sklearn._autolog()`. So besides `safe_patch` calls in
# `mlflow.xgboost.autolog()`, we need to count additional `safe_patch` calls
# In `mlflow.xgboost.autolog()` and `mlflow.lightgbm.autolog()`,
# we enable autologging for XGBoost and LightGBM sklearn models
# using `mlflow.sklearn._autolog()`. So besides `safe_patch` calls in
# `autolog()`, we need to count additional `safe_patch` calls
# in sklearn autologging routine as well.
if integration.__name__ == "mlflow.xgboost":
if integration.__name__ in ["mlflow.xgboost", "mlflow.lightgbm"]:
with mock.patch(
"mlflow.sklearn.safe_patch", wraps=safe_patch
) as xgb_sklearn_safe_patch_mock:
) as sklearn_safe_patch_mock:
integration.autolog(disable=False)
safe_patch_call_count = (
safe_patch_mock.call_count + xgb_sklearn_safe_patch_mock.call_count
safe_patch_mock.call_count + sklearn_safe_patch_mock.call_count
)
else:
integration.autolog(disable=False)
Expand Down
6 changes: 4 additions & 2 deletions tests/examples/test_examples.py
Expand Up @@ -82,9 +82,10 @@ def report_free_disk_space(capsys):
("hyperparam", ["-e", "gpyopt", "-P", "epochs=1"]),
("hyperparam", ["-e", "hyperopt", "-P", "epochs=1"]),
(
"lightgbm",
os.path.join("lightgbm", "lightgbm_native"),
["-P", "learning_rate=0.1", "-P", "colsample_bytree=0.8", "-P", "subsample=0.9"],
),
(os.path.join("lightgbm", "lightgbm_sklearn"), []),
("statsmodels", ["-P", "inverse_method=qr"]),
("pytorch", ["-P", "epochs=2"]),
("sklearn_logistic_regression", []),
Expand Down Expand Up @@ -140,7 +141,7 @@ def test_mlflow_run_example(directory, params, tmpdir):
("gluon", ["python", "train.py"]),
("keras", ["python", "train.py"]),
(
"lightgbm",
os.path.join("lightgbm", "lightgbm_native"),
[
"python",
"train.py",
Expand All @@ -152,6 +153,7 @@ def test_mlflow_run_example(directory, params, tmpdir):
"0.9",
],
),
(os.path.join("lightgbm", "lightgbm_sklearn"), ["python", "train.py"]),
("statsmodels", ["python", "train.py", "--inverse-method", "qr"]),
("quickstart", ["python", "mlflow_tracking.py"]),
("remote_store", ["python", "remote_server.py"]),
Expand Down

0 comments on commit ee9532a

Please sign in to comment.