mlflow · dbczumar · Nov 29, 2021 · Nov 17, 2021 · Nov 17, 2021 · Nov 17, 2021
diff --git a/examples/xgboost_sklearn/train_sklearn.py b/examples/xgboost_sklearn/train_sklearn.py
@@ -0,0 +1,48 @@
+from pprint import pprint
+
+import pandas as pd
+import xgboost as xgb
+from sklearn.datasets import load_wine
+from sklearn.model_selection import train_test_split
+from sklearn.metrics import mean_squared_error
+
+import numpy as np
+import mlflow
+import mlflow.xgboost
+
+from utils import fetch_logged_data
+
+
+def main():
+    # prepare example dataset
+    wine = load_wine()
+    X = pd.DataFrame(wine.data, columns=wine.feature_names)
+    y = pd.Series(wine.target)
+    X_train, X_test, y_train, y_test = train_test_split(X, y)
+
+    # enable auto logging
+    # this includes xgboost.sklearn estimators
+    mlflow.xgboost.autolog()
+
+    with mlflow.start_run() as run:
+
+        regressor = xgb.XGBRegressor(n_estimators=100, reg_lambda=1, gamma=0, max_depth=3)
+        regressor.fit(X_train, y_train)
+        y_pred = regressor.predict(X_test)
+        mse = mean_squared_error(y_test, y_pred)
+        run_id = run.info.run_id
+        print("Logged data and model in run {}".format(run_id))
+        mlflow.xgboost.log_model(regressor, artifact_path="log_model")
+
+    # show logged data
+    for key, data in fetch_logged_data(run.info.run_id).items():
+        print("\n---------- logged {} ----------".format(key))
+        pprint(data)
+
+    mlflow.xgboost.save_model(regressor, "trained_model/")
+    reload_model = mlflow.pyfunc.load_model("trained_model/")
+    np.testing.assert_array_almost_equal(y_pred, reload_model.predict(X_test))
+
+
+if __name__ == "__main__":
+    main()
diff --git a/examples/xgboost_sklearn/utils.py b/examples/xgboost_sklearn/utils.py
@@ -0,0 +1,26 @@
+import mlflow
+
+
+def yield_artifacts(run_id, path=None):
+    """Yield all artifacts in the specified run"""
+    client = mlflow.tracking.MlflowClient()
+    for item in client.list_artifacts(run_id, path):
+        if item.is_dir:
+            yield from yield_artifacts(run_id, item.path)
+        else:
+            yield item.path
+
+
+def fetch_logged_data(run_id):
+    """Fetch params, metrics, tags, and artifacts in the specified run"""
+    client = mlflow.tracking.MlflowClient()
+    data = client.get_run(run_id).data
+    # Exclude system tags: https://www.mlflow.org/docs/latest/tracking.html#system-tags
+    tags = {k: v for k, v in data.tags.items() if not k.startswith("mlflow.")}
+    artifacts = list(yield_artifacts(run_id))
+    return {
+        "params": data.params,
+        "metrics": data.metrics,
+        "tags": tags,
+        "artifacts": artifacts,
+    }
diff --git a/mlflow/sklearn/__init__.py b/mlflow/sklearn/__init__.py
@@ -371,7 +371,7 @@ def log_model(
         # log model
         mlflow.sklearn.log_model(sk_model, "sk_models")
     """
-    return Model.log(
+    Model.log(
         artifact_path=artifact_path,
         flavor=mlflow.sklearn,
         sk_model=sk_model,
@@ -1152,6 +1152,40 @@ def fetch_logged_data(run_id):
                                       ``True``. See the `post training metrics`_ section for more
                                       details.
     """
+    _autolog(
+        flavor_name=FLAVOR_NAME,
+        log_input_examples=log_input_examples,
+        log_model_signatures=log_model_signatures,
+        log_models=log_models,
+        disable=disable,
+        exclusive=exclusive,
+        disable_for_unsupported_versions=disable_for_unsupported_versions,
+        silent=silent,
+        max_tuning_runs=max_tuning_runs,
+        log_post_training_metrics=log_post_training_metrics,
+    )
+
+
+def _autolog(
+    flavor_name=FLAVOR_NAME,
+    log_input_examples=False,
+    log_model_signatures=True,
+    log_models=True,
+    disable=False,
+    exclusive=False,
+    disable_for_unsupported_versions=False,
+    silent=False,
+    max_tuning_runs=5,
+    log_post_training_metrics=True,
+):  # pylint: disable=unused-argument
+    """
+    Internal autologging function for scikit-learn models.
+    :param flavor_name: A string value. Enable a ``mlflow.sklearn`` autologging routine
+                        for a flavor. By default it enables autologging for original
+                        scikit-learn models, as ``mlflow.sklearn.autolog()`` does. If
+                        the argument is `xgboost`, autologging for XGBoost scikit-learn
+                        models is enabled.
+    """
     import pandas as pd
     import sklearn
     import sklearn.metrics
@@ -1162,6 +1196,7 @@ def fetch_logged_data(run_id):
         _MIN_SKLEARN_VERSION,
         _TRAINING_PREFIX,
         _is_supported_version,
+        _gen_xgboost_sklearn_estimators_to_patch,
         _get_args_for_metrics,
         _log_estimator_content,
         _all_estimators,
@@ -1190,6 +1225,38 @@ def fetch_logged_data(run_id):
             stacklevel=2,
         )
 
+    def fit_mlflow_xgboost(original, self, *args, **kwargs):
+        """
+        Autologging function for XGBoost scikit-learn models
+        """
+        autologging_client = MlflowAutologgingQueueingClient()
+        autologging_client.set_tags(
+            run_id=mlflow.active_run().info.run_id,
+            tags=_get_estimator_info_tags(self),
+        )
+        tags_logging = autologging_client.flush(synchronous=False)
+        # parameter, metric, and non-model artifact logging
+        # are done in `train()` in `mlflow.xgboost.autolog()`
+        fit_output = original(self, *args, **kwargs)
+        # log models after training
+        (X, _, _) = _get_args_for_metrics(self.fit, args, kwargs)
 def _get_args_for_metrics(fit_func, fit_args, fit_kwargs): 
 def _get_args_for_metrics(fit_func, fit_args, fit_kwargs): 
+        if log_models:
+            input_example, signature = resolve_input_example_and_signature(
+                lambda: X[:INPUT_EXAMPLE_SAMPLE_ROWS],
+                lambda input_example: infer_signature(input_example, self.predict(input_example)),
+                log_input_examples,
+                log_model_signatures,
+                _logger,
+            )
+            mlflow.xgboost.log_model(
+                self,
+                artifact_path="model",
+                signature=signature,
+                input_example=input_example,
+            )
+        tags_logging.await_completion()
+        return fit_output
+
     def fit_mlflow(original, self, *args, **kwargs):
         """
         Autologging function that performs model training by executing the training method
@@ -1340,7 +1407,7 @@ def _log_model_with_except_handling(*args, **kwargs):
                     # Fetch environment-specific tags (e.g., user and source) to ensure that lineage
                     # information is consistent with the parent run
                     child_tags = context_registry.resolve_tags()
-                    child_tags.update({MLFLOW_AUTOLOGGING: FLAVOR_NAME})
+                    child_tags.update({MLFLOW_AUTOLOGGING: flavor_name})
                     _create_child_runs_for_parameter_search(
                         autologging_client=autologging_client,
                         cv_estimator=estimator,
@@ -1369,7 +1436,7 @@ def _log_model_with_except_handling(*args, **kwargs):
                     )
                     _logger.warning(msg)
 
-    def patched_fit(original, self, *args, **kwargs):
+    def patched_fit(fit_impl, original, self, *args, **kwargs):
         """
         Autologging patch function to be applied to a sklearn model class that defines a `fit`
         method and inherits from `BaseEstimator` (thereby defining the `get_params()` method)
@@ -1390,7 +1457,7 @@ def patched_fit(original, self, *args, **kwargs):
                 # In `fit_mlflow` call, it will also call metric API for computing training metrics
                 # so we need temporarily disable the post_training_metrics patching.
                 with _AUTOLOGGING_METRICS_MANAGER.disable_log_post_training_metrics():
-                    result = fit_mlflow(original, self, *args, **kwargs)
+                    result = fit_impl(original, self, *args, **kwargs)
                 if should_log_post_training_metrics:
                     _AUTOLOGGING_METRICS_MANAGER.register_model(
                         self, mlflow.active_run().info.run_id
@@ -1547,21 +1614,28 @@ def out(*args, **kwargs):
 
     _apply_sklearn_descriptor_unbound_method_call_fix()
 
-    for class_def in _gen_estimators_to_patch():
+    if flavor_name == mlflow.xgboost.FLAVOR_NAME:
+        estimators_to_patch = _gen_xgboost_sklearn_estimators_to_patch()
+        patched_fit_impl = fit_mlflow_xgboost
+    else:
+        estimators_to_patch = _gen_estimators_to_patch()
+        patched_fit_impl = fit_mlflow
+
+    for class_def in estimators_to_patch:
         # Patch fitting methods
         for func_name in ["fit", "fit_transform", "fit_predict"]:
             _patch_estimator_method_if_available(
-                FLAVOR_NAME,
+                flavor_name,
                 class_def,
                 func_name,
-                patched_fit,
+                functools.partial(patched_fit, patched_fit_impl),
                 manage_run=True,
             )
 
         # Patch inference methods
         for func_name in ["predict", "predict_proba", "transform", "predict_log_proba"]:
             _patch_estimator_method_if_available(
-                FLAVOR_NAME,
+                flavor_name,
                 class_def,
                 func_name,
                 patched_predict,
@@ -1570,7 +1644,7 @@ def out(*args, **kwargs):
 
         # Patch scoring methods
         _patch_estimator_method_if_available(
-            FLAVOR_NAME,
+            flavor_name,
             class_def,
             "score",
             patched_model_score,
@@ -1580,19 +1654,19 @@ def out(*args, **kwargs):
     if log_post_training_metrics:
         for metric_name in _get_metric_name_list():
             safe_patch(
-                FLAVOR_NAME, sklearn.metrics, metric_name, patched_metric_api, manage_run=False
+                flavor_name, sklearn.metrics, metric_name, patched_metric_api, manage_run=False
             )
 
         for scorer in sklearn.metrics.SCORERS.values():
-            safe_patch(FLAVOR_NAME, scorer, "_score_func", patched_metric_api, manage_run=False)
+            safe_patch(flavor_name, scorer, "_score_func", patched_metric_api, manage_run=False)
 
     def patched_fn_with_autolog_disabled(original, *args, **kwargs):
         with disable_autologging():
             return original(*args, **kwargs)
 
     for disable_autolog_func_name in _apis_autologging_disabled:
         safe_patch(
-            FLAVOR_NAME,
+            flavor_name,
             sklearn.model_selection,
             disable_autolog_func_name,
             patched_fn_with_autolog_disabled,

diff --git a/mlflow/sklearn/utils.py b/mlflow/sklearn/utils.py
@@ -34,6 +34,19 @@
 _SklearnMetric = collections.namedtuple("_SklearnMetric", ["name", "function", "arguments"])
 
 
+def _gen_xgboost_sklearn_estimators_to_patch():
+    import xgboost as xgb
+
+    all_classes = inspect.getmembers(xgb.sklearn, inspect.isclass)
+    base_class = xgb.sklearn.XGBModel
+    sklearn_estimators = []
+    for _, class_object in all_classes:
+        if issubclass(class_object, base_class) and class_object != base_class:
+            sklearn_estimators.append(class_object)
+
+    return sklearn_estimators
+
+
 def _get_estimator_info_tags(estimator):
     """
     :return: A dictionary of MLflow run tag keys and values

diff --git a/mlflow/xgboost/__init__.py b/mlflow/xgboost/__init__.py
@@ -381,9 +381,9 @@ def autolog(
                    autologging. If ``False``, show all events and warnings during XGBoost
                    autologging.
     """
-    import functools
     import xgboost
     import numpy as np
+    import functools
 
     if importance_types is None:
         importance_types = ["weight"]
@@ -412,7 +412,7 @@ def __init__(original, self, *args, **kwargs):
 
         original(self, *args, **kwargs)
 
-    def train(original, *args, **kwargs):
+    def train(_log_models, original, *args, **kwargs):
         def record_eval_results(eval_results, metrics_logger):
             """
             Create a callback function that records evaluation results.
@@ -426,7 +426,7 @@ def record_eval_results(eval_results, metrics_logger):
 
                 # In xgboost >= 1.3.0, user-defined callbacks should inherit
                 # `xgboost.callback.TrainingCallback`:
-                # https://xgboost.readthedocs.io/en/latest/python/callbacks.html#defining-your-own-callback  # noqa
+                # https://xgboost.readthedocs.io/en/latest/python/callbacks.html#defining-your-own-callback
                 return AutologCallback(metrics_logger, eval_results)
             else:
                 from mlflow.xgboost._autolog import autolog_callback
@@ -645,7 +645,7 @@ def infer_model_signature(input_example):
             return model_signature
 
         # Only log the model if the autolog() param log_models is set to True.
-        if log_models:
+        if _log_models:
             # Will only resolve `input_example` and `signature` if `log_models` is `True`.
             input_example, signature = resolve_input_example_and_signature(
                 get_input_example,
@@ -668,5 +668,24 @@ def infer_model_signature(input_example):
 
         return model
 
-    safe_patch(FLAVOR_NAME, xgboost, "train", train, manage_run=True)
+    safe_patch(FLAVOR_NAME, xgboost, "train", functools.partial(train, log_models), manage_run=True)
+    safe_patch(
+        FLAVOR_NAME, xgboost.sklearn, "train", functools.partial(train, False), manage_run=True
+    )
     safe_patch(FLAVOR_NAME, xgboost.DMatrix, "__init__", __init__)
+
+    # enable xgboost scikit-learn estimators autologging
+    import mlflow.sklearn
+
+    mlflow.sklearn._autolog(
+        flavor_name=FLAVOR_NAME,
+        log_input_examples=log_input_examples,
+        log_model_signatures=log_model_signatures,
+        log_models=log_models,
+        disable=disable,
+        exclusive=exclusive,
+        disable_for_unsupported_versions=disable_for_unsupported_versions,
+        silent=silent,
+        max_tuning_runs=None,
+        log_post_training_metrics=True,
+    )
diff --git a/tests/autologging/test_autologging_behaviors_integration.py b/tests/autologging/test_autologging_behaviors_integration.py
@@ -90,14 +90,29 @@ def test_autologging_integrations_use_safe_patch_for_monkey_patching(integration
         ) as gorilla_mock, mock.patch(
             integration.__name__ + ".safe_patch", wraps=safe_patch
         ) as safe_patch_mock:
-            integration.autolog(disable=False)
-            assert safe_patch_mock.call_count > 0
+            # In `mlflow.xgboost.autolog()`, we enable autologging for XGBoost sklearn
+            # models using `mlflow.sklearn._autolog()`. So besides `safe_patch` calls in
+            # `mlflow.xgboost.autolog()`, we need to count additional `safe_patch` calls
+            # in sklearn autologging routine as well.
+            if integration.__name__ == "mlflow.xgboost":
+                with mock.patch(
+                    "mlflow.sklearn.safe_patch", wraps=safe_patch
+                ) as xgb_sklearn_safe_patch_mock:
+                    integration.autolog(disable=False)
+                    safe_patch_call_count = (
+                        safe_patch_mock.call_count + xgb_sklearn_safe_patch_mock.call_count
+                    )
+            else:
+                integration.autolog(disable=False)
+                safe_patch_call_count = safe_patch_mock.call_count
+
+            assert safe_patch_call_count > 0
             # `safe_patch` leverages `gorilla.apply` in its implementation. Accordingly, we expect
             # that the total number of `gorilla.apply` calls to be equivalent to the number of
             # `safe_patch` calls. This verifies that autologging integrations are leveraging
             # `safe_patch`, rather than calling `gorilla.apply` directly (which does not provide
             # exception safety properties)
-            assert safe_patch_mock.call_count == gorilla_mock.call_count
+            assert safe_patch_call_count == gorilla_mock.call_count
 
 
 def test_autolog_respects_exclusive_flag(setup_sklearn_model):