Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* init Signed-off-by: Weichen Xu <weichen.xu@databricks.com> * permlink Signed-off-by: Weichen Xu <weichen.xu@databricks.com> * update Signed-off-by: Weichen Xu <weichen.xu@databricks.com> * add readme Signed-off-by: Weichen Xu <weichen.xu@databricks.com> * fix lint Signed-off-by: Weichen Xu <weichen.xu@databricks.com> * update Signed-off-by: Weichen Xu <weichen.xu@databricks.com> * update Signed-off-by: Weichen Xu <weichen.xu@databricks.com> * update Signed-off-by: Weichen Xu <weichen.xu@databricks.com> * update Signed-off-by: Weichen Xu <weichen.xu@databricks.com> * update Signed-off-by: Weichen Xu <weichen.xu@databricks.com> * add artifact __repr__ Signed-off-by: Weichen Xu <weichen.xu@databricks.com>
- Loading branch information
1 parent
e365e69
commit 0144d86
Showing
6 changed files
with
120 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,27 @@ | ||
### MLflow evaluation Examples | ||
|
||
The examples in this directory illustrate how you can use the `mlflow.evaluate` API to evaluate a PyFunc model on the | ||
specified dataset using builtin default evaluator, and log resulting metrics & artifacts to MLflow Tracking. | ||
|
||
- Example `evaluate_on_binary_classifier.py` evaluates an xgboost `XGBClassifier` model on dataset loaded by | ||
`shap.datasets.adult`. | ||
- Example `evaluate_on_multiclass_classifier.py` evaluates a scikit-learn `LogisticRegression` model on dataset | ||
generated by `sklearn.datasets.make_classification`. | ||
- Example `evaluate_on_regressor.py` evaluate as scikit-learn `LinearRegression` model on dataset loaded by | ||
`sklearn.datasets.fetch_california_housing` | ||
|
||
#### Prerequisites | ||
|
||
``` | ||
pip install scikit-learn xgboost shap>=0.40 matplotlib | ||
``` | ||
|
||
#### How to run the examples | ||
|
||
Run in this directory with Python. | ||
|
||
``` | ||
python evaluate_on_binary_classifier.py | ||
python evaluate_on_multiclass_classifier.py | ||
python evaluate_on_regressor.py | ||
``` |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,31 @@ | ||
import xgboost | ||
import shap | ||
import mlflow | ||
from sklearn.model_selection import train_test_split | ||
|
||
# train XGBoost model | ||
X, y = shap.datasets.adult() | ||
|
||
num_examples = len(X) | ||
|
||
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42) | ||
|
||
model = xgboost.XGBClassifier().fit(X_train, y_train) | ||
|
||
eval_data = X_test | ||
eval_data["label"] = y_test | ||
|
||
with mlflow.start_run() as run: | ||
mlflow.sklearn.log_model(model, "model") | ||
model_uri = mlflow.get_artifact_uri("model") | ||
result = mlflow.evaluate( | ||
model_uri, | ||
eval_data, | ||
targets="label", | ||
model_type="classifier", | ||
dataset_name="adult", | ||
evaluators=["default"], | ||
) | ||
|
||
print(f"metrics:\n{result.metrics}") | ||
print(f"artifacts:\n{result.artifacts}") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,27 @@ | ||
import mlflow | ||
from sklearn.linear_model import LogisticRegression | ||
from sklearn.datasets import make_classification | ||
from sklearn.model_selection import train_test_split | ||
|
||
mlflow.sklearn.autolog() | ||
|
||
X, y = make_classification(n_samples=10000, n_classes=10, n_informative=5, random_state=1) | ||
|
||
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42) | ||
|
||
with mlflow.start_run() as run: | ||
model = LogisticRegression(solver="liblinear").fit(X_train, y_train) | ||
model_uri = mlflow.get_artifact_uri("model") | ||
result = mlflow.evaluate( | ||
model_uri, | ||
X_test, | ||
targets=y_test, | ||
model_type="classifier", | ||
dataset_name="multiclass-classification-dataset", | ||
evaluators="default", | ||
evaluator_config={"log_model_explainability": True, "explainability_nsamples": 1000}, | ||
) | ||
|
||
print(f"run_id={run.info.run_id}") | ||
print(f"metrics:\n{result.metrics}") | ||
print(f"artifacts:\n{result.artifacts}") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,30 @@ | ||
import mlflow | ||
from sklearn.datasets import fetch_california_housing | ||
from sklearn.linear_model import LinearRegression | ||
from sklearn.model_selection import train_test_split | ||
|
||
mlflow.sklearn.autolog() | ||
|
||
california_housing_data = fetch_california_housing() | ||
|
||
X_train, X_test, y_train, y_test = train_test_split( | ||
california_housing_data.data, california_housing_data.target, test_size=0.33, random_state=42 | ||
) | ||
|
||
with mlflow.start_run() as run: | ||
model = LinearRegression().fit(X_train, y_train) | ||
model_uri = mlflow.get_artifact_uri("model") | ||
|
||
result = mlflow.evaluate( | ||
model_uri, | ||
X_test, | ||
targets=y_test, | ||
model_type="regressor", | ||
dataset_name="california_housing", | ||
evaluators="default", | ||
feature_names=california_housing_data.feature_names, | ||
evaluator_config={"explainability_nsamples": 1000}, | ||
) | ||
|
||
print(f"metrics:\n{result.metrics}") | ||
print(f"artifacts:\n{result.artifacts}") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters