From 0144d8679cdc4167b7262df0157f1ca731b421c4 Mon Sep 17 00:00:00 2001 From: WeichenXu Date: Fri, 14 Jan 2022 23:20:32 +0800 Subject: [PATCH] Evaluate Api examples (#5186) * init Signed-off-by: Weichen Xu * permlink Signed-off-by: Weichen Xu * update Signed-off-by: Weichen Xu * add readme Signed-off-by: Weichen Xu * fix lint Signed-off-by: Weichen Xu * update Signed-off-by: Weichen Xu * update Signed-off-by: Weichen Xu * update Signed-off-by: Weichen Xu * update Signed-off-by: Weichen Xu * update Signed-off-by: Weichen Xu * add artifact __repr__ Signed-off-by: Weichen Xu --- examples/evaluation/README.md | 27 ++++++++++++++++ .../evaluate_on_binary_classifier.py | 31 +++++++++++++++++++ .../evaluate_on_multiclass_classifier.py | 27 ++++++++++++++++ examples/evaluation/evaluate_on_regressor.py | 30 ++++++++++++++++++ mlflow/models/evaluation/base.py | 3 ++ mlflow/models/evaluation/lift_curve.py | 2 ++ 6 files changed, 120 insertions(+) create mode 100644 examples/evaluation/README.md create mode 100644 examples/evaluation/evaluate_on_binary_classifier.py create mode 100644 examples/evaluation/evaluate_on_multiclass_classifier.py create mode 100644 examples/evaluation/evaluate_on_regressor.py diff --git a/examples/evaluation/README.md b/examples/evaluation/README.md new file mode 100644 index 0000000000000..08ea3072815d3 --- /dev/null +++ b/examples/evaluation/README.md @@ -0,0 +1,27 @@ +### MLflow evaluation Examples + +The examples in this directory illustrate how you can use the `mlflow.evaluate` API to evaluate a PyFunc model on the +specified dataset using builtin default evaluator, and log resulting metrics & artifacts to MLflow Tracking. + +- Example `evaluate_on_binary_classifier.py` evaluates an xgboost `XGBClassifier` model on dataset loaded by + `shap.datasets.adult`. +- Example `evaluate_on_multiclass_classifier.py` evaluates a scikit-learn `LogisticRegression` model on dataset + generated by `sklearn.datasets.make_classification`. +- Example `evaluate_on_regressor.py` evaluate as scikit-learn `LinearRegression` model on dataset loaded by + `sklearn.datasets.fetch_california_housing` + +#### Prerequisites + +``` +pip install scikit-learn xgboost shap>=0.40 matplotlib +``` + +#### How to run the examples + +Run in this directory with Python. + +``` +python evaluate_on_binary_classifier.py +python evaluate_on_multiclass_classifier.py +python evaluate_on_regressor.py +``` diff --git a/examples/evaluation/evaluate_on_binary_classifier.py b/examples/evaluation/evaluate_on_binary_classifier.py new file mode 100644 index 0000000000000..7bac4a0beca2f --- /dev/null +++ b/examples/evaluation/evaluate_on_binary_classifier.py @@ -0,0 +1,31 @@ +import xgboost +import shap +import mlflow +from sklearn.model_selection import train_test_split + +# train XGBoost model +X, y = shap.datasets.adult() + +num_examples = len(X) + +X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42) + +model = xgboost.XGBClassifier().fit(X_train, y_train) + +eval_data = X_test +eval_data["label"] = y_test + +with mlflow.start_run() as run: + mlflow.sklearn.log_model(model, "model") + model_uri = mlflow.get_artifact_uri("model") + result = mlflow.evaluate( + model_uri, + eval_data, + targets="label", + model_type="classifier", + dataset_name="adult", + evaluators=["default"], + ) + +print(f"metrics:\n{result.metrics}") +print(f"artifacts:\n{result.artifacts}") diff --git a/examples/evaluation/evaluate_on_multiclass_classifier.py b/examples/evaluation/evaluate_on_multiclass_classifier.py new file mode 100644 index 0000000000000..4eab5cf86d0ee --- /dev/null +++ b/examples/evaluation/evaluate_on_multiclass_classifier.py @@ -0,0 +1,27 @@ +import mlflow +from sklearn.linear_model import LogisticRegression +from sklearn.datasets import make_classification +from sklearn.model_selection import train_test_split + +mlflow.sklearn.autolog() + +X, y = make_classification(n_samples=10000, n_classes=10, n_informative=5, random_state=1) + +X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42) + +with mlflow.start_run() as run: + model = LogisticRegression(solver="liblinear").fit(X_train, y_train) + model_uri = mlflow.get_artifact_uri("model") + result = mlflow.evaluate( + model_uri, + X_test, + targets=y_test, + model_type="classifier", + dataset_name="multiclass-classification-dataset", + evaluators="default", + evaluator_config={"log_model_explainability": True, "explainability_nsamples": 1000}, + ) + +print(f"run_id={run.info.run_id}") +print(f"metrics:\n{result.metrics}") +print(f"artifacts:\n{result.artifacts}") diff --git a/examples/evaluation/evaluate_on_regressor.py b/examples/evaluation/evaluate_on_regressor.py new file mode 100644 index 0000000000000..538d8cf3ea8b1 --- /dev/null +++ b/examples/evaluation/evaluate_on_regressor.py @@ -0,0 +1,30 @@ +import mlflow +from sklearn.datasets import fetch_california_housing +from sklearn.linear_model import LinearRegression +from sklearn.model_selection import train_test_split + +mlflow.sklearn.autolog() + +california_housing_data = fetch_california_housing() + +X_train, X_test, y_train, y_test = train_test_split( + california_housing_data.data, california_housing_data.target, test_size=0.33, random_state=42 +) + +with mlflow.start_run() as run: + model = LinearRegression().fit(X_train, y_train) + model_uri = mlflow.get_artifact_uri("model") + + result = mlflow.evaluate( + model_uri, + X_test, + targets=y_test, + model_type="regressor", + dataset_name="california_housing", + evaluators="default", + feature_names=california_housing_data.feature_names, + evaluator_config={"explainability_nsamples": 1000}, + ) + +print(f"metrics:\n{result.metrics}") +print(f"artifacts:\n{result.artifacts}") diff --git a/mlflow/models/evaluation/base.py b/mlflow/models/evaluation/base.py index 4cdb3ba91be59..98768f0cc6459 100644 --- a/mlflow/models/evaluation/base.py +++ b/mlflow/models/evaluation/base.py @@ -76,6 +76,9 @@ def uri(self) -> str: """ return self._uri + def __repr__(self): + return f"{self.__class__.__name__}(uri='{self.uri}')" + class EvaluationResult: """ diff --git a/mlflow/models/evaluation/lift_curve.py b/mlflow/models/evaluation/lift_curve.py index cbcba712a9329..d2df4ba453369 100644 --- a/mlflow/models/evaluation/lift_curve.py +++ b/mlflow/models/evaluation/lift_curve.py @@ -6,6 +6,7 @@ def _cumulative_gain_curve(y_true, y_score, pos_label=None): """ This method is copied from scikit-plot package. + See https://github.com/reiinakano/scikit-plot/blob/2dd3e6a76df77edcbd724c4db25575f70abb57cb/scikitplot/helpers.py#L157 This function generates the points necessary to plot the Cumulative Gain @@ -77,6 +78,7 @@ def plot_lift_curve( ): """ This method is copied from scikit-plot package. + See https://github.com/reiinakano/scikit-plot/blob/2dd3e6a76df77edcbd724c4db25575f70abb57cb/scikitplot/metrics.py#L1133 Generates the Lift Curve from labels and scores/probabilities