-
Notifications
You must be signed in to change notification settings - Fork 4k
/
fluent.py
1615 lines (1285 loc) · 61.6 KB
/
fluent.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
"""
Internal module implementing the fluent API, allowing management of an active
MLflow run. This module is exposed to users at the top-level :py:mod:`mlflow` module.
"""
import os
import atexit
import time
import logging
import inspect
from copy import deepcopy
from packaging.version import Version
from typing import Any, Dict, List, Optional, Union, TYPE_CHECKING
from mlflow.entities import Experiment, Run, RunInfo, RunStatus, Param, RunTag, Metric, ViewType
from mlflow.entities.lifecycle_stage import LifecycleStage
from mlflow.exceptions import MlflowException
from mlflow.protos.databricks_pb2 import (
INVALID_PARAMETER_VALUE,
RESOURCE_DOES_NOT_EXIST,
)
from mlflow.tracking.client import MlflowClient
from mlflow.tracking import artifact_utils, _get_store
from mlflow.tracking.context import registry as context_registry
from mlflow.store.tracking import SEARCH_MAX_RESULTS_DEFAULT
from mlflow.utils import env
from mlflow.utils.autologging_utils import (
is_testing,
autologging_integration,
AUTOLOGGING_INTEGRATIONS,
autologging_is_disabled,
)
from mlflow.utils.databricks_utils import is_in_databricks_notebook, get_notebook_id
from mlflow.utils.import_hooks import register_post_import_hook
from mlflow.utils.mlflow_tags import MLFLOW_PARENT_RUN_ID, MLFLOW_RUN_NAME
from mlflow.utils.validation import _validate_run_id
if TYPE_CHECKING:
import pandas # pylint: disable=unused-import
import matplotlib # pylint: disable=unused-import
import plotly # pylint: disable=unused-import
import numpy # pylint: disable=unused-import
import PIL # pylint: disable=unused-import
_EXPERIMENT_ID_ENV_VAR = "MLFLOW_EXPERIMENT_ID"
_EXPERIMENT_NAME_ENV_VAR = "MLFLOW_EXPERIMENT_NAME"
_RUN_ID_ENV_VAR = "MLFLOW_RUN_ID"
_active_run_stack = []
_active_experiment_id = None
SEARCH_MAX_RESULTS_PANDAS = 100000
NUM_RUNS_PER_PAGE_PANDAS = 10000
_logger = logging.getLogger(__name__)
def set_experiment(experiment_name: str = None, experiment_id: str = None) -> None:
"""
Set the given experiment as the active experiment. The experiment must either be specified by
name via `experiment_name` or by ID via `experiment_id`. The experiment name and ID cannot
both be specified.
:param experiment_name: Case sensitive name of the experiment to be activated. If an experiment
with this name does not exist, a new experiment wth this name is
created.
:param experiment_id: ID of the experiment to be activated. If an experiment with this ID
does not exist, an exception is thrown.
:return: An instance of :py:class:`mlflow.entities.Experiment` representing the new active
experiment.
.. code-block:: python
:caption: Example
import mlflow
# Set an experiment name, which must be unique and case sensitive.
mlflow.set_experiment("Social NLP Experiments")
# Get Experiment Details
experiment = mlflow.get_experiment_by_name("Social NLP Experiments")
print("Experiment_id: {}".format(experiment.experiment_id))
print("Artifact Location: {}".format(experiment.artifact_location))
print("Tags: {}".format(experiment.tags))
print("Lifecycle_stage: {}".format(experiment.lifecycle_stage))
.. code-block:: text
:caption: Output
Experiment_id: 1
Artifact Location: file:///.../mlruns/1
Tags: {}
Lifecycle_stage: active
"""
if (experiment_name is not None and experiment_id is not None) or (
experiment_name is None and experiment_id is None
):
raise MlflowException(
message="Must specify exactly one of: `experiment_id` or `experiment_name`.",
error_code=INVALID_PARAMETER_VALUE,
)
client = MlflowClient()
if experiment_id is None:
experiment = client.get_experiment_by_name(experiment_name)
if not experiment:
_logger.info(
"Experiment with name '%s' does not exist. Creating a new experiment.",
experiment_name,
)
# NB: If two simultaneous threads or processes attempt to set the same experiment
# simultaneously, a race condition may be encountered here wherein experiment creation
# fails
experiment_id = client.create_experiment(experiment_name)
experiment = client.get_experiment(experiment_id)
else:
experiment = client.get_experiment(experiment_id)
if experiment is None:
raise MlflowException(
message=f"Experiment with ID '{experiment_id}' does not exist.",
error_code=RESOURCE_DOES_NOT_EXIST,
)
if experiment.lifecycle_stage != LifecycleStage.ACTIVE:
raise MlflowException(
message=(
"Cannot set a deleted experiment '%s' as the active experiment."
" You can restore the experiment, or permanently delete the "
" experiment to create a new one." % experiment.name
),
error_code=INVALID_PARAMETER_VALUE,
)
global _active_experiment_id
_active_experiment_id = experiment.experiment_id
return experiment
class ActiveRun(Run): # pylint: disable=W0223
"""Wrapper around :py:class:`mlflow.entities.Run` to enable using Python ``with`` syntax."""
def __init__(self, run):
Run.__init__(self, run.info, run.data)
def __enter__(self):
return self
def __exit__(self, exc_type, exc_val, exc_tb):
status = RunStatus.FINISHED if exc_type is None else RunStatus.FAILED
end_run(RunStatus.to_string(status))
return exc_type is None
def start_run(
run_id: str = None,
experiment_id: Optional[str] = None,
run_name: Optional[str] = None,
nested: bool = False,
tags: Optional[Dict[str, Any]] = None,
) -> ActiveRun:
"""
Start a new MLflow run, setting it as the active run under which metrics and parameters
will be logged. The return value can be used as a context manager within a ``with`` block;
otherwise, you must call ``end_run()`` to terminate the current run.
If you pass a ``run_id`` or the ``MLFLOW_RUN_ID`` environment variable is set,
``start_run`` attempts to resume a run with the specified run ID and
other parameters are ignored. ``run_id`` takes precedence over ``MLFLOW_RUN_ID``.
If resuming an existing run, the run status is set to ``RunStatus.RUNNING``.
MLflow sets a variety of default tags on the run, as defined in
:ref:`MLflow system tags <system_tags>`.
:param run_id: If specified, get the run with the specified UUID and log parameters
and metrics under that run. The run's end time is unset and its status
is set to running, but the run's other attributes (``source_version``,
``source_type``, etc.) are not changed.
:param experiment_id: ID of the experiment under which to create the current run (applicable
only when ``run_id`` is not specified). If ``experiment_id`` argument
is unspecified, will look for valid experiment in the following order:
activated using ``set_experiment``, ``MLFLOW_EXPERIMENT_NAME``
environment variable, ``MLFLOW_EXPERIMENT_ID`` environment variable,
or the default experiment as defined by the tracking server.
:param run_name: Name of new run (stored as a ``mlflow.runName`` tag).
Used only when ``run_id`` is unspecified.
:param nested: Controls whether run is nested in parent run. ``True`` creates a nested run.
:param tags: An optional dictionary of string keys and values to set as tags on the run.
If a run is being resumed, these tags are set on the resumed run. If a new run is
being created, these tags are set on the new run.
:return: :py:class:`mlflow.ActiveRun` object that acts as a context manager wrapping
the run's state.
.. code-block:: python
:caption: Example
import mlflow
# Create nested runs
with mlflow.start_run(run_name='PARENT_RUN') as parent_run:
mlflow.log_param("parent", "yes")
with mlflow.start_run(run_name='CHILD_RUN', nested=True) as child_run:
mlflow.log_param("child", "yes")
print("parent run_id: {}".format(parent_run.info.run_id))
print("child run_id : {}".format(child_run.info.run_id))
print("--")
# Search all child runs with a parent id
query = "tags.mlflow.parentRunId = '{}'".format(parent_run.info.run_id)
results = mlflow.search_runs(filter_string=query)
print(results[["run_id", "params.child", "tags.mlflow.runName"]])
.. code-block:: text
:caption: Output
parent run_id: 5ec0e7ae18f54c2694ffb48c2fccf25c
child run_id : 78b3b0d264b44cd29e8dc389749bb4be
--
run_id params.child tags.mlflow.runName
0 78b3b0d264b44cd29e8dc389749bb4be yes CHILD_RUN
"""
global _active_run_stack
# back compat for int experiment_id
experiment_id = str(experiment_id) if isinstance(experiment_id, int) else experiment_id
if len(_active_run_stack) > 0 and not nested:
raise Exception(
(
"Run with UUID {} is already active. To start a new run, first end the "
+ "current run with mlflow.end_run(). To start a nested "
+ "run, call start_run with nested=True"
).format(_active_run_stack[0].info.run_id)
)
client = MlflowClient()
if run_id:
existing_run_id = run_id
elif _RUN_ID_ENV_VAR in os.environ:
existing_run_id = os.environ[_RUN_ID_ENV_VAR]
del os.environ[_RUN_ID_ENV_VAR]
else:
existing_run_id = None
if existing_run_id:
_validate_run_id(existing_run_id)
active_run_obj = client.get_run(existing_run_id)
# Check to see if experiment_id from environment matches experiment_id from set_experiment()
if (
_active_experiment_id is not None
and _active_experiment_id != active_run_obj.info.experiment_id
):
raise MlflowException(
"Cannot start run with ID {} because active run ID "
"does not match environment run ID. Make sure --experiment-name "
"or --experiment-id matches experiment set with "
"set_experiment(), or just use command-line "
"arguments".format(existing_run_id)
)
# Check to see if current run isn't deleted
if active_run_obj.info.lifecycle_stage == LifecycleStage.DELETED:
raise MlflowException(
"Cannot start run with ID {} because it is in the "
"deleted state.".format(existing_run_id)
)
# Use previous end_time because a value is required for update_run_info
end_time = active_run_obj.info.end_time
_get_store().update_run_info(
existing_run_id, run_status=RunStatus.RUNNING, end_time=end_time
)
if tags:
client.log_batch(
run_id=existing_run_id,
tags=[RunTag(key, str(value)) for key, value in tags.items()],
)
active_run_obj = client.get_run(existing_run_id)
else:
if len(_active_run_stack) > 0:
parent_run_id = _active_run_stack[-1].info.run_id
else:
parent_run_id = None
exp_id_for_run = experiment_id if experiment_id is not None else _get_experiment_id()
user_specified_tags = deepcopy(tags) or {}
if parent_run_id is not None:
user_specified_tags[MLFLOW_PARENT_RUN_ID] = parent_run_id
if run_name is not None:
user_specified_tags[MLFLOW_RUN_NAME] = run_name
resolved_tags = context_registry.resolve_tags(user_specified_tags)
active_run_obj = client.create_run(experiment_id=exp_id_for_run, tags=resolved_tags)
_active_run_stack.append(ActiveRun(active_run_obj))
return _active_run_stack[-1]
def end_run(status: str = RunStatus.to_string(RunStatus.FINISHED)) -> None:
"""End an active MLflow run (if there is one).
.. code-block:: python
:caption: Example
import mlflow
# Start run and get status
mlflow.start_run()
run = mlflow.active_run()
print("run_id: {}; status: {}".format(run.info.run_id, run.info.status))
# End run and get status
mlflow.end_run()
run = mlflow.get_run(run.info.run_id)
print("run_id: {}; status: {}".format(run.info.run_id, run.info.status))
print("--")
# Check for any active runs
print("Active run: {}".format(mlflow.active_run()))
.. code-block:: text
:caption: Output
run_id: b47ee4563368419880b44ad8535f6371; status: RUNNING
run_id: b47ee4563368419880b44ad8535f6371; status: FINISHED
--
Active run: None
"""
global _active_run_stack
if len(_active_run_stack) > 0:
# Clear out the global existing run environment variable as well.
env.unset_variable(_RUN_ID_ENV_VAR)
run = _active_run_stack.pop()
MlflowClient().set_terminated(run.info.run_id, status)
atexit.register(end_run)
def active_run() -> Optional[ActiveRun]:
"""Get the currently active ``Run``, or None if no such run exists.
**Note**: You cannot access currently-active run attributes
(parameters, metrics, etc.) through the run returned by ``mlflow.active_run``. In order
to access such attributes, use the :py:class:`mlflow.tracking.MlflowClient` as follows:
.. code-block:: python
:caption: Example
import mlflow
mlflow.start_run()
run = mlflow.active_run()
print("Active run_id: {}".format(run.info.run_id))
mlflow.end_run()
.. code-block:: text
:caption: Output
Active run_id: 6f252757005748708cd3aad75d1ff462
"""
return _active_run_stack[-1] if len(_active_run_stack) > 0 else None
def get_run(run_id: str) -> Run:
"""
Fetch the run from backend store. The resulting :py:class:`Run <mlflow.entities.Run>`
contains a collection of run metadata -- :py:class:`RunInfo <mlflow.entities.RunInfo>`,
as well as a collection of run parameters, tags, and metrics --
:py:class:`RunData <mlflow.entities.RunData>`. In the case where multiple metrics with the
same key are logged for the run, the :py:class:`RunData <mlflow.entities.RunData>` contains
the most recently logged value at the largest step for each metric.
:param run_id: Unique identifier for the run.
:return: A single :py:class:`mlflow.entities.Run` object, if the run exists. Otherwise,
raises an exception.
.. code-block:: python
:caption: Example
import mlflow
with mlflow.start_run() as run:
mlflow.log_param("p", 0)
run_id = run.info.run_id
print("run_id: {}; lifecycle_stage: {}".format(run_id,
mlflow.get_run(run_id).info.lifecycle_stage))
.. code-block:: text
:caption: Output
run_id: 7472befefc754e388e8e922824a0cca5; lifecycle_stage: active
"""
return MlflowClient().get_run(run_id)
def log_param(key: str, value: Any) -> None:
"""
Log a parameter under the current run. If no run is active, this method will create
a new active run.
:param key: Parameter name (string). This string may only contain alphanumerics,
underscores (_), dashes (-), periods (.), spaces ( ), and slashes (/).
All backend stores will support keys up to length 250, but some may
support larger keys.
:param value: Parameter value (string, but will be string-ified if not).
All backend stores will support values up to length 5000, but some
may support larger values.
.. code-block:: python
:caption: Example
import mlflow
with mlflow.start_run():
mlflow.log_param("learning_rate", 0.01)
"""
run_id = _get_or_start_run().info.run_id
MlflowClient().log_param(run_id, key, value)
def set_tag(key: str, value: Any) -> None:
"""
Set a tag under the current run. If no run is active, this method will create a
new active run.
:param key: Tag name (string). This string may only contain alphanumerics, underscores
(_), dashes (-), periods (.), spaces ( ), and slashes (/).
All backend stores will support keys up to length 250, but some may
support larger keys.
:param value: Tag value (string, but will be string-ified if not).
All backend stores will support values up to length 5000, but some
may support larger values.
.. code-block:: python
:caption: Example
import mlflow
with mlflow.start_run():
mlflow.set_tag("release.version", "2.2.0")
"""
run_id = _get_or_start_run().info.run_id
MlflowClient().set_tag(run_id, key, value)
def delete_tag(key: str) -> None:
"""
Delete a tag from a run. This is irreversible. If no run is active, this method
will create a new active run.
:param key: Name of the tag
.. code-block:: python
:caption: Example
import mlflow
tags = {"engineering": "ML Platform",
"engineering_remote": "ML Platform"}
with mlflow.start_run() as run:
mlflow.set_tags(tags)
with mlflow.start_run(run_id=run.info.run_id):
mlflow.delete_tag("engineering_remote")
"""
run_id = _get_or_start_run().info.run_id
MlflowClient().delete_tag(run_id, key)
def log_metric(key: str, value: float, step: Optional[int] = None) -> None:
"""
Log a metric under the current run. If no run is active, this method will create
a new active run.
:param key: Metric name (string). This string may only contain alphanumerics, underscores (_),
dashes (-), periods (.), spaces ( ), and slashes (/).
All backend stores will support keys up to length 250, but some may
support larger keys.
:param value: Metric value (float). Note that some special values such as +/- Infinity may be
replaced by other values depending on the store. For example, the
SQLAlchemy store replaces +/- Infinity with max / min float values.
All backend stores will support values up to length 5000, but some
may support larger values.
:param step: Metric step (int). Defaults to zero if unspecified.
.. code-block:: python
:caption: Example
import mlflow
with mlflow.start_run():
mlflow.log_metric("mse", 2500.00)
"""
run_id = _get_or_start_run().info.run_id
MlflowClient().log_metric(run_id, key, value, int(time.time() * 1000), step or 0)
def log_metrics(metrics: Dict[str, float], step: Optional[int] = None) -> None:
"""
Log multiple metrics for the current run. If no run is active, this method will create a new
active run.
:param metrics: Dictionary of metric_name: String -> value: Float. Note that some special
values such as +/- Infinity may be replaced by other values depending on
the store. For example, sql based store may replace +/- Infinity with
max / min float values.
:param step: A single integer step at which to log the specified
Metrics. If unspecified, each metric is logged at step zero.
:returns: None
.. code-block:: python
:caption: Example
import mlflow
metrics = {"mse": 2500.00, "rmse": 50.00}
# Log a batch of metrics
with mlflow.start_run():
mlflow.log_metrics(metrics)
"""
run_id = _get_or_start_run().info.run_id
timestamp = int(time.time() * 1000)
metrics_arr = [Metric(key, value, timestamp, step or 0) for key, value in metrics.items()]
MlflowClient().log_batch(run_id=run_id, metrics=metrics_arr, params=[], tags=[])
def log_params(params: Dict[str, Any]) -> None:
"""
Log a batch of params for the current run. If no run is active, this method will create a
new active run.
:param params: Dictionary of param_name: String -> value: (String, but will be string-ified if
not)
:returns: None
.. code-block:: python
:caption: Example
import mlflow
params = {"learning_rate": 0.01, "n_estimators": 10}
# Log a batch of parameters
with mlflow.start_run():
mlflow.log_params(params)
"""
run_id = _get_or_start_run().info.run_id
params_arr = [Param(key, str(value)) for key, value in params.items()]
MlflowClient().log_batch(run_id=run_id, metrics=[], params=params_arr, tags=[])
def set_tags(tags: Dict[str, Any]) -> None:
"""
Log a batch of tags for the current run. If no run is active, this method will create a
new active run.
:param tags: Dictionary of tag_name: String -> value: (String, but will be string-ified if
not)
:returns: None
.. code-block:: python
:caption: Example
import mlflow
tags = {"engineering": "ML Platform",
"release.candidate": "RC1",
"release.version": "2.2.0"}
# Set a batch of tags
with mlflow.start_run():
mlflow.set_tags(tags)
"""
run_id = _get_or_start_run().info.run_id
tags_arr = [RunTag(key, str(value)) for key, value in tags.items()]
MlflowClient().log_batch(run_id=run_id, metrics=[], params=[], tags=tags_arr)
def log_artifact(local_path: str, artifact_path: Optional[str] = None) -> None:
"""
Log a local file or directory as an artifact of the currently active run. If no run is
active, this method will create a new active run.
:param local_path: Path to the file to write.
:param artifact_path: If provided, the directory in ``artifact_uri`` to write to.
.. code-block:: python
:caption: Example
import mlflow
# Create a features.txt artifact file
features = "rooms, zipcode, median_price, school_rating, transport"
with open("features.txt", 'w') as f:
f.write(features)
# With artifact_path=None write features.txt under
# root artifact_uri/artifacts directory
with mlflow.start_run():
mlflow.log_artifact("features.txt")
"""
run_id = _get_or_start_run().info.run_id
MlflowClient().log_artifact(run_id, local_path, artifact_path)
def log_artifacts(local_dir: str, artifact_path: Optional[str] = None) -> None:
"""
Log all the contents of a local directory as artifacts of the run. If no run is active,
this method will create a new active run.
:param local_dir: Path to the directory of files to write.
:param artifact_path: If provided, the directory in ``artifact_uri`` to write to.
.. code-block:: python
:caption: Example
import os
import mlflow
# Create some files to preserve as artifacts
features = "rooms, zipcode, median_price, school_rating, transport"
data = {"state": "TX", "Available": 25, "Type": "Detached"}
# Create couple of artifact files under the directory "data"
os.makedirs("data", exist_ok=True)
with open("data/data.json", 'w', encoding='utf-8') as f:
json.dump(data, f, indent=2)
with open("data/features.txt", 'w') as f:
f.write(features)
# Write all files in "data" to root artifact_uri/states
with mlflow.start_run():
mlflow.log_artifacts("data", artifact_path="states")
"""
run_id = _get_or_start_run().info.run_id
MlflowClient().log_artifacts(run_id, local_dir, artifact_path)
def log_text(text: str, artifact_file: str) -> None:
"""
Log text as an artifact.
:param text: String containing text to log.
:param artifact_file: The run-relative artifact file path in posixpath format to which
the text is saved (e.g. "dir/file.txt").
.. code-block:: python
:caption: Example
import mlflow
with mlflow.start_run():
# Log text to a file under the run's root artifact directory
mlflow.log_text("text1", "file1.txt")
# Log text in a subdirectory of the run's root artifact directory
mlflow.log_text("text2", "dir/file2.txt")
# Log HTML text
mlflow.log_text("<h1>header</h1>", "index.html")
"""
run_id = _get_or_start_run().info.run_id
MlflowClient().log_text(run_id, text, artifact_file)
def log_dict(dictionary: Any, artifact_file: str) -> None:
"""
Log a JSON/YAML-serializable object (e.g. `dict`) as an artifact. The serialization
format (JSON or YAML) is automatically inferred from the extension of `artifact_file`.
If the file extension doesn't exist or match any of [".json", ".yml", ".yaml"],
JSON format is used.
:param dictionary: Dictionary to log.
:param artifact_file: The run-relative artifact file path in posixpath format to which
the dictionary is saved (e.g. "dir/data.json").
.. code-block:: python
:caption: Example
import mlflow
dictionary = {"k": "v"}
with mlflow.start_run():
# Log a dictionary as a JSON file under the run's root artifact directory
mlflow.log_dict(dictionary, "data.json")
# Log a dictionary as a YAML file in a subdirectory of the run's root artifact directory
mlflow.log_dict(dictionary, "dir/data.yml")
# If the file extension doesn't exist or match any of [".json", ".yaml", ".yml"],
# JSON format is used.
mlflow.log_dict(dictionary, "data")
mlflow.log_dict(dictionary, "data.txt")
"""
run_id = _get_or_start_run().info.run_id
MlflowClient().log_dict(run_id, dictionary, artifact_file)
def log_figure(
figure: Union["matplotlib.figure.Figure", "plotly.graph_objects.Figure"], artifact_file: str
) -> None:
"""
Log a figure as an artifact. The following figure objects are supported:
- `matplotlib.figure.Figure`_
- `plotly.graph_objects.Figure`_
.. _matplotlib.figure.Figure:
https://matplotlib.org/api/_as_gen/matplotlib.figure.Figure.html
.. _plotly.graph_objects.Figure:
https://plotly.com/python-api-reference/generated/plotly.graph_objects.Figure.html
:param figure: Figure to log.
:param artifact_file: The run-relative artifact file path in posixpath format to which
the figure is saved (e.g. "dir/file.png").
.. code-block:: python
:caption: Matplotlib Example
import mlflow
import matplotlib.pyplot as plt
fig, ax = plt.subplots()
ax.plot([0, 1], [2, 3])
with mlflow.start_run():
mlflow.log_figure(fig, "figure.png")
.. code-block:: python
:caption: Plotly Example
import mlflow
from plotly import graph_objects as go
fig = go.Figure(go.Scatter(x=[0, 1], y=[2, 3]))
with mlflow.start_run():
mlflow.log_figure(fig, "figure.html")
"""
run_id = _get_or_start_run().info.run_id
MlflowClient().log_figure(run_id, figure, artifact_file)
def log_image(image: Union["numpy.ndarray", "PIL.Image.Image"], artifact_file: str) -> None:
"""
Log an image as an artifact. The following image objects are supported:
- `numpy.ndarray`_
- `PIL.Image.Image`_
.. _numpy.ndarray:
https://numpy.org/doc/stable/reference/generated/numpy.ndarray.html
.. _PIL.Image.Image:
https://pillow.readthedocs.io/en/stable/reference/Image.html#PIL.Image.Image
Numpy array support
- data type (( ) represents a valid value range):
- bool
- integer (0 ~ 255)
- unsigned integer (0 ~ 255)
- float (0.0 ~ 1.0)
.. warning::
- Out-of-range integer values will be **clipped** to [0, 255].
- Out-of-range float values will be **clipped** to [0, 1].
- shape (H: height, W: width):
- H x W (Grayscale)
- H x W x 1 (Grayscale)
- H x W x 3 (an RGB channel order is assumed)
- H x W x 4 (an RGBA channel order is assumed)
:param image: Image to log.
:param artifact_file: The run-relative artifact file path in posixpath format to which
the image is saved (e.g. "dir/image.png").
.. code-block:: python
:caption: Numpy Example
import mlflow
import numpy as np
image = np.random.randint(0, 256, size=(100, 100, 3), dtype=np.uint8)
with mlflow.start_run():
mlflow.log_image(image, "image.png")
.. code-block:: python
:caption: Pillow Example
import mlflow
from PIL import Image
image = Image.new("RGB", (100, 100))
with mlflow.start_run():
mlflow.log_image(image, "image.png")
"""
run_id = _get_or_start_run().info.run_id
MlflowClient().log_image(run_id, image, artifact_file)
def _record_logged_model(mlflow_model):
run_id = _get_or_start_run().info.run_id
MlflowClient()._record_logged_model(run_id, mlflow_model)
def get_experiment(experiment_id: str) -> Experiment:
"""
Retrieve an experiment by experiment_id from the backend store
:param experiment_id: The string-ified experiment ID returned from ``create_experiment``.
:return: :py:class:`mlflow.entities.Experiment`
.. code-block:: python
:caption: Example
import mlflow
experiment = mlflow.get_experiment("0")
print("Name: {}".format(experiment.name))
print("Artifact Location: {}".format(experiment.artifact_location))
print("Tags: {}".format(experiment.tags))
print("Lifecycle_stage: {}".format(experiment.lifecycle_stage))
.. code-block:: text
:caption: Output
Name: Default
Artifact Location: file:///.../mlruns/0
Tags: {}
Lifecycle_stage: active
"""
return MlflowClient().get_experiment(experiment_id)
def get_experiment_by_name(name: str) -> Optional[Experiment]:
"""
Retrieve an experiment by experiment name from the backend store
:param name: The case senstive experiment name.
:return: An instance of :py:class:`mlflow.entities.Experiment`
if an experiment with the specified name exists, otherwise None.
.. code-block:: python
:caption: Example
import mlflow
# Case sensitive name
experiment = mlflow.get_experiment_by_name("Default")
print("Experiment_id: {}".format(experiment.experiment_id))
print("Artifact Location: {}".format(experiment.artifact_location))
print("Tags: {}".format(experiment.tags))
print("Lifecycle_stage: {}".format(experiment.lifecycle_stage))
.. code-block:: text
:caption: Output
Experiment_id: 0
Artifact Location: file:///.../mlruns/0
Tags: {}
Lifecycle_stage: active
"""
return MlflowClient().get_experiment_by_name(name)
def list_experiments(
view_type: int = ViewType.ACTIVE_ONLY,
max_results: Optional[int] = None,
) -> List[Experiment]:
"""
:param view_type: Qualify requested type of experiments.
:param max_results: If passed, specifies the maximum number of experiments desired. If not
passed, all experiments will be returned.
:return: A list of :py:class:`Experiment <mlflow.entities.Experiment>` objects.
"""
def pagination_wrapper_func(number_to_get, next_page_token):
return MlflowClient().list_experiments(
view_type=view_type,
max_results=number_to_get,
page_token=next_page_token,
)
return _paginate(pagination_wrapper_func, SEARCH_MAX_RESULTS_DEFAULT, max_results)
def create_experiment(
name: str,
artifact_location: Optional[str] = None,
tags: Optional[Dict[str, Any]] = None,
) -> str:
"""
Create an experiment.
:param name: The experiment name, which must be unique and is case sensitive
:param artifact_location: The location to store run artifacts.
If not provided, the server picks an appropriate default.
:param tags: An optional dictionary of string keys and values to set as
tags on the experiment.
:return: String ID of the created experiment.
.. code-block:: python
:caption: Example
import mlflow
# Create an experiment name, which must be unique and case sensitive
experiment_id = mlflow.create_experiment("Social NLP Experiments")
experiment = mlflow.get_experiment(experiment_id)
print("Name: {}".format(experiment.name))
print("Experiment_id: {}".format(experiment.experiment_id))
print("Artifact Location: {}".format(experiment.artifact_location))
print("Tags: {}".format(experiment.tags))
print("Lifecycle_stage: {}".format(experiment.lifecycle_stage))
.. code-block:: text
:caption: Output
Name: Social NLP Experiments
Experiment_id: 1
Artifact Location: file:///.../mlruns/1
Tags= {}
Lifecycle_stage: active
"""
return MlflowClient().create_experiment(name, artifact_location, tags)
def delete_experiment(experiment_id: str) -> None:
"""
Delete an experiment from the backend store.
:param experiment_id: The The string-ified experiment ID returned from ``create_experiment``.
.. code-block:: python
:caption: Example
import mlflow
experiment_id = mlflow.create_experiment("New Experiment")
mlflow.delete_experiment(experiment_id)
# Examine the deleted experiment details.
experiment = mlflow.get_experiment(experiment_id)
print("Name: {}".format(experiment.name))
print("Artifact Location: {}".format(experiment.artifact_location))
print("Lifecycle_stage: {}".format(experiment.lifecycle_stage))
.. code-block:: text
:caption: Output
Name: New Experiment
Artifact Location: file:///.../mlruns/2
Lifecycle_stage: deleted
"""
MlflowClient().delete_experiment(experiment_id)
def delete_run(run_id: str) -> None:
"""
Deletes a run with the given ID.
:param run_id: Unique identifier for the run to delete.
.. code-block:: python
:caption: Example
import mlflow
with mlflow.start_run() as run:
mlflow.log_param("p", 0)
run_id = run.info.run_id
mlflow.delete_run(run_id)
print("run_id: {}; lifecycle_stage: {}".format(run_id,
mlflow.get_run(run_id).info.lifecycle_stage))
.. code-block:: text
:caption: Output
run_id: 45f4af3e6fd349e58579b27fcb0b8277; lifecycle_stage: deleted
"""
MlflowClient().delete_run(run_id)
def get_artifact_uri(artifact_path: Optional[str] = None) -> str:
"""
Get the absolute URI of the specified artifact in the currently active run.
If `path` is not specified, the artifact root URI of the currently active