forked from mlflow/mlflow
-
Notifications
You must be signed in to change notification settings - Fork 0
/
train_sklearn.py
49 lines (38 loc) · 1.49 KB
/
train_sklearn.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
from pprint import pprint
import pandas as pd
import xgboost as xgb
from sklearn.datasets import load_wine
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
import numpy as np
import mlflow
import mlflow.xgboost
from utils import fetch_logged_data
def main():
# prepare example dataset
wine = load_wine()
X = pd.DataFrame(wine.data, columns=wine.feature_names)
y = pd.Series(wine.target)
X_train, X_test, y_train, y_test = train_test_split(X, y)
# enable auto logging
# this includes xgboost.sklearn estimators
mlflow.xgboost.autolog()
run_id = None
with mlflow.start_run() as run:
regressor = xgb.XGBRegressor(n_estimators=100, reg_lambda=1, gamma=0, max_depth=3)
regressor.fit(X_train, y_train)
y_pred = regressor.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
mlflow.log_metrics({"mse": mse})
run_id = run.info.run_id
print("Logged data and model in run {}".format(run_id))
mlflow.xgboost.log_model(regressor, artifact_path="log_model")
# show logged data
for key, data in fetch_logged_data(run.info.run_id).items():
print("\n---------- logged {} ----------".format(key))
pprint(data)
mlflow.xgboost.save_model(regressor, "trained_model/")
reload_model = mlflow.pyfunc.load_model("trained_model/")
np.testing.assert_array_almost_equal(y_pred, reload_model.predict(X_test))
if __name__ == "__main__":
main()