Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve evaluation api #5256

Merged
merged 14 commits into from Jan 14, 2022
7 changes: 0 additions & 7 deletions docs/source/python_api/mlflow.models.evaluation.base.rst

This file was deleted.

6 changes: 4 additions & 2 deletions mlflow/models/__init__.py
Expand Up @@ -24,14 +24,16 @@
from .model import Model
from .flavor_backend import FlavorBackend
from ..utils.environment import infer_pip_requirements
from .evaluation import evaluate, EvaluationDataset
from .evaluation import evaluate, EvaluationArtifact, EvaluationResult, list_evaluators

__all__ = [
"Model",
"FlavorBackend",
"infer_pip_requirements",
"evaluate",
"EvaluationDataset",
"EvaluationArtifact",
"EvaluationResult",
"list_evaluators",
]


Expand Down
4 changes: 0 additions & 4 deletions mlflow/models/evaluation/__init__.py
@@ -1,18 +1,14 @@
from mlflow.models.evaluation.base import (
ModelEvaluator,
EvaluationDataset,
EvaluationResult,
EvaluationMetrics,
EvaluationArtifact,
evaluate,
list_evaluators,
)

__all__ = [
"ModelEvaluator",
"EvaluationDataset",
"EvaluationResult",
"EvaluationMetrics",
"EvaluationArtifact",
"evaluate",
"list_evaluators",
Expand Down
249 changes: 138 additions & 111 deletions mlflow/models/evaluation/base.py

Large diffs are not rendered by default.

22 changes: 15 additions & 7 deletions mlflow/models/evaluation/default_evaluator.py
@@ -1,7 +1,6 @@
import mlflow
from mlflow.models.evaluation.base import (
ModelEvaluator,
EvaluationMetrics,
EvaluationResult,
)
from mlflow.entities.metric import Metric
Expand Down Expand Up @@ -326,22 +325,31 @@ def _log_model_explainability(self):
# but spark model input dataframe contains Vector type feature column
# which shap explainer does not support.
# To support this, we need expand the Vector type feature column into
# multiple scaler feature columns and pass it to shap explainer.
# multiple scalar feature columns and pass it to shap explainer.
_logger.warning(
"Logging model explainability insights is not currently supported for PySpark "
"models."
)
return

if self.model_type == "classifier" and not all(
[isinstance(label, (numbers.Number, np.bool_)) for label in self.label_list]
):
if not (np.issubdtype(self.y.dtype, np.number) or self.y.dtype == np.bool_):
# Note: python bool type inherits number type but np.bool_ does not inherit np.number.
_logger.warning(
"Skip logging model explainability insights because it requires all label "
"values to be Number type."
"values to be number type or bool type."
)
return

feature_dtypes = list(self.X.dtypes) if isinstance(self.X, pd.DataFrame) else [self.X.dtype]
for feature_dtype in feature_dtypes:
if not np.issubdtype(feature_dtype, np.number):
_logger.warning(
"Skip logging model explainability insights because it requires all feature "
"values to be number type, and each feature column must only contain scalar "
"values."
)
return

try:
import shap
import matplotlib.pyplot as pyplot
Expand Down Expand Up @@ -652,7 +660,7 @@ def evaluate(

self.X = dataset.features_data
self.y = dataset.labels_data
self.metrics = EvaluationMetrics()
self.metrics = dict()
self.artifacts = {}

infered_model_type = _infer_model_type_by_labels(self.y)
Expand Down
30 changes: 20 additions & 10 deletions tests/models/test_default_evaluator.py
Expand Up @@ -40,9 +40,11 @@ def assert_dict_equal(d1, d2, rtol):
def test_regressor_evaluation(linear_regressor_model_uri, diabetes_dataset):
with mlflow.start_run() as run:
result = evaluate(
model=linear_regressor_model_uri,
linear_regressor_model_uri,
diabetes_dataset._constructor_args["data"],
model_type="regressor",
dataset=diabetes_dataset,
targets=diabetes_dataset._constructor_args["targets"],
dataset_name=diabetes_dataset.name,
evaluators="default",
)

Expand Down Expand Up @@ -81,9 +83,11 @@ def test_regressor_evaluation(linear_regressor_model_uri, diabetes_dataset):
def test_multi_classifier_evaluation(multiclass_logistic_regressor_model_uri, iris_dataset):
with mlflow.start_run() as run:
result = evaluate(
model=multiclass_logistic_regressor_model_uri,
multiclass_logistic_regressor_model_uri,
iris_dataset._constructor_args["data"],
model_type="classifier",
dataset=iris_dataset,
targets=iris_dataset._constructor_args["targets"],
dataset_name=iris_dataset.name,
evaluators="default",
)

Expand Down Expand Up @@ -132,9 +136,11 @@ def test_multi_classifier_evaluation(multiclass_logistic_regressor_model_uri, ir
def test_bin_classifier_evaluation(binary_logistic_regressor_model_uri, breast_cancer_dataset):
with mlflow.start_run() as run:
result = evaluate(
model=binary_logistic_regressor_model_uri,
binary_logistic_regressor_model_uri,
breast_cancer_dataset._constructor_args["data"],
model_type="classifier",
dataset=breast_cancer_dataset,
targets=breast_cancer_dataset._constructor_args["targets"],
dataset_name=breast_cancer_dataset.name,
evaluators="default",
)

Expand Down Expand Up @@ -184,9 +190,11 @@ def test_bin_classifier_evaluation(binary_logistic_regressor_model_uri, breast_c
def test_spark_regressor_model_evaluation(spark_linear_regressor_model_uri, diabetes_spark_dataset):
with mlflow.start_run() as run:
result = evaluate(
model=spark_linear_regressor_model_uri,
spark_linear_regressor_model_uri,
diabetes_spark_dataset._constructor_args["data"],
model_type="regressor",
dataset=diabetes_spark_dataset,
targets=diabetes_spark_dataset._constructor_args["targets"],
dataset_name=diabetes_spark_dataset.name,
evaluators="default",
evaluator_config={"log_model_explainability": True},
)
Expand Down Expand Up @@ -222,9 +230,11 @@ def test_spark_regressor_model_evaluation(spark_linear_regressor_model_uri, diab
def test_svm_classifier_evaluation(svm_model_uri, breast_cancer_dataset):
with mlflow.start_run() as run:
result = evaluate(
model=svm_model_uri,
svm_model_uri,
breast_cancer_dataset._constructor_args["data"],
model_type="classifier",
dataset=breast_cancer_dataset,
targets=breast_cancer_dataset._constructor_args["targets"],
dataset_name=breast_cancer_dataset.name,
evaluators="default",
)

Expand Down