forked from mlrun/mlrun
/
httpdb.py
2790 lines (2460 loc) · 117 KB
/
httpdb.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
# Copyright 2018 Iguazio
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import enum
import http
import os
import tempfile
import time
import traceback
import warnings
from datetime import datetime
from os import path, remove
from typing import Dict, List, Optional, Union
import kfp
import requests
import semver
from requests.adapters import HTTPAdapter
from urllib3.util.retry import Retry
import mlrun
import mlrun.projects
from mlrun.api import schemas
from mlrun.errors import MLRunInvalidArgumentError
from ..api.schemas import ModelEndpoint
from ..config import config
from ..feature_store import FeatureSet, FeatureVector
from ..lists import ArtifactList, RunList
from ..runtimes import BaseRuntime
from ..utils import datetime_to_iso, dict_to_json, logger, new_pipe_meta, version
from .base import RunDBError, RunDBInterface
_artifact_keys = [
"format",
"inline",
"key",
"src_path",
"target_path",
"viewer",
]
def bool2str(val):
return "yes" if val else "no"
http_adapter = HTTPAdapter(
max_retries=Retry(
total=3,
backoff_factor=1,
status_forcelist=[500, 502, 503, 504],
# we want to retry but not to raise since we do want that last response (to parse details on the
# error from response body) we'll handle raising ourselves
raise_on_status=False,
),
)
class HTTPRunDB(RunDBInterface):
"""Interface for accessing and manipulating the :py:mod:`mlrun` persistent store, maintaining the full state
and catalog of objects that MLRun uses. The :py:class:`HTTPRunDB` class serves as a client-side proxy to the MLRun
API service which maintains the actual data-store, accesses the server through REST APIs.
The class provides functions for accessing and modifying the various objects that are used by MLRun in its
operation. The functions provided follow some standard guidelines, which are:
- Every object in MLRun exists in the context of a project (except projects themselves). When referencing an object
through any API, a project name must be provided. The default for most APIs is for an empty project name, which
will be replaced by the name of the default project (usually ``default``). Therefore, if performing an API to
list functions, for example, and not providing a project name - the result will not be functions from all
projects but rather from the ``default`` project.
- Many objects can be assigned labels, and listed/queried by label. The label parameter for query APIs allows for
listing objects that:
- Have a specific label, by asking for ``label="<label_name>"``. In this case the actual value of the label
doesn't matter and every object with that label will be returned
- Have a label with a specific value. This is done by specifying ``label="<label_name>=<label_value>"``. In this
case only objects whose label matches the value will be returned
- Most objects have a ``create`` method as well as a ``store`` method. Create can only be called when such an
does not exist yet, while store allows for either creating a new object or overwriting an existing object.
- Some objects have a ``versioned`` option, in which case overwriting the same object with a different version of
it does not delete the previous version, but rather creates a new version of the object and keeps both versions.
Versioned objects usually have a ``uid`` property which is based on their content and allows to reference a
specific version of an object (other than tagging objects, which also allows for easy referencing).
- Many objects have both a ``store`` function and a ``patch`` function. These are used in the same way as the
corresponding REST verbs - a ``store`` is passed a full object and will basically perform a PUT operation,
replacing the full object (if it exists) while ``patch`` receives just a dictionary containing the differences to
be applied to the object, and will merge those changes to the existing object. The ``patch``
operation also has a strategy assigned to it which determines how the merge logic should behave.
The strategy can be either ``replace`` or ``additive``. For further details on those strategies, refer
to https://pypi.org/project/mergedeep/
"""
kind = "http"
def __init__(self, base_url, user="", password="", token=""):
self.base_url = base_url
self.user = user
self.password = password
self.token = token
self.server_version = ""
self.session = None
self._wait_for_project_terminal_state_retry_interval = 3
self._wait_for_background_task_terminal_state_retry_interval = 3
self._wait_for_project_deletion_interval = 3
self.client_version = version.Version().get()["version"]
def __repr__(self):
cls = self.__class__.__name__
return f"{cls}({self.base_url!r})"
@staticmethod
def get_api_path_prefix(version: str = None) -> str:
"""
:param version: API version to use, None (the default) will mean to use the default value from mlconf,
for un-versioned api set an empty string.
"""
if version is not None:
return f"api/{version}" if version else "api"
api_version_path = (
f"api/{config.api_base_version}" if config.api_base_version else "api"
)
return api_version_path
def get_base_api_url(self, path: str, version: str = None) -> str:
path_prefix = self.get_api_path_prefix(version)
url = f"{self.base_url}/{path_prefix}/{path}"
return url
def api_call(
self,
method,
path,
error=None,
params=None,
body=None,
json=None,
headers=None,
timeout=45,
version=None,
):
"""Perform a direct REST API call on the :py:mod:`mlrun` API server.
Caution:
For advanced usage - prefer using the various APIs exposed through this class, rather than
directly invoking REST calls.
:param method: REST method (POST, GET, PUT...)
:param path: Path to endpoint executed, for example ``"projects"``
:param error: Error to return if API invocation fails
:param body: Payload to be passed in the call. If using JSON objects, prefer using the ``json`` param
:param json: JSON payload to be passed in the call
:param headers: REST headers, passed as a dictionary: ``{"<header-name>": "<header-value>"}``
:param timeout: API call timeout
:param version: API version to use, None (the default) will mean to use the default value from config,
for un-versioned api set an empty string.
:return: Python HTTP response object
"""
url = self.get_base_api_url(path, version)
kw = {
key: value
for key, value in (
("params", params),
("data", body),
("json", json),
("headers", headers),
)
if value is not None
}
if self.user:
kw["auth"] = (self.user, self.password)
elif self.token:
# Iguazio auth doesn't support passing token through bearer, so use cookie instead
if mlrun.platforms.iguazio.is_iguazio_session(self.token):
session_cookie = f'j:{{"sid": "{self.token}"}}'
cookies = {
"session": session_cookie,
}
kw["cookies"] = cookies
else:
if "Authorization" not in kw.setdefault("headers", {}):
kw["headers"].update({"Authorization": "Bearer " + self.token})
if mlrun.api.schemas.HeaderNames.client_version not in kw.setdefault(
"headers", {}
):
kw["headers"].update(
{mlrun.api.schemas.HeaderNames.client_version: self.client_version}
)
# requests no longer supports header values to be enum (https://github.com/psf/requests/pull/6154)
# convert to strings. Do the same for params for niceness
for dict_ in [headers, params]:
if dict_ is not None:
for key in dict_.keys():
if isinstance(dict_[key], enum.Enum):
dict_[key] = dict_[key].value
if not self.session:
self.session = requests.Session()
self.session.mount("http://", http_adapter)
self.session.mount("https://", http_adapter)
try:
response = self.session.request(
method, url, timeout=timeout, verify=False, **kw
)
except requests.RequestException as exc:
error = f"{str(exc)}: {error}" if error else str(exc)
raise mlrun.errors.MLRunRuntimeError(error) from exc
if not response.ok:
if response.content:
try:
data = response.json()
error_details = data.get("detail", {})
if not error_details:
logger.warning("Failed parsing error response body", data=data)
except Exception:
error_details = ""
if error_details:
error_details = f"details: {error_details}"
error = f"{error} {error_details}" if error else error_details
mlrun.errors.raise_for_status(response, error)
mlrun.errors.raise_for_status(response, error)
return response
def _path_of(self, prefix, project, uid):
project = project or config.default_project
return f"{prefix}/{project}/{uid}"
def connect(self, secrets=None):
"""Connect to the MLRun API server. Must be called prior to executing any other method.
The code utilizes the URL for the API server from the configuration - ``mlconf.dbpath``.
For example::
mlconf.dbpath = mlconf.dbpath or 'http://mlrun-api:8080'
db = get_run_db().connect()
"""
# hack to allow unit tests to instantiate HTTPRunDB without a real server behind
if "mock-server" in self.base_url:
return
resp = self.api_call("GET", "client-spec", timeout=5)
try:
server_cfg = resp.json()
self.server_version = server_cfg["version"]
self._validate_version_compatibility(self.server_version, config.version)
config.namespace = config.namespace or server_cfg.get("namespace")
if (
"namespace" in server_cfg
and server_cfg["namespace"] != config.namespace
):
logger.warning(
f"warning!, server ({server_cfg['namespace']}) and client ({config.namespace})"
" namespace don't match"
)
# get defaults from remote server
config.remote_host = config.remote_host or server_cfg.get("remote_host")
config.mpijob_crd_version = config.mpijob_crd_version or server_cfg.get(
"mpijob_crd_version"
)
config.ui.url = config.resolve_ui_url() or server_cfg.get("ui_url")
config.artifact_path = config.artifact_path or server_cfg.get(
"artifact_path"
)
config.spark_app_image = config.spark_app_image or server_cfg.get(
"spark_app_image"
)
config.spark_app_image_tag = config.spark_app_image_tag or server_cfg.get(
"spark_app_image_tag"
)
config.spark_history_server_path = (
config.spark_history_server_path
or server_cfg.get("spark_history_server_path")
)
config.httpdb.builder.docker_registry = (
config.httpdb.builder.docker_registry
or server_cfg.get("docker_registry")
)
config.httpdb.api_url = config.httpdb.api_url or server_cfg.get("api_url")
config.nuclio_version = config.nuclio_version or server_cfg.get(
"nuclio_version"
)
config.default_function_priority_class_name = (
config.default_function_priority_class_name
or server_cfg.get("default_function_priority_class_name")
)
config.valid_function_priority_class_names = (
config.valid_function_priority_class_names
or server_cfg.get("valid_function_priority_class_names")
)
# These have a default value, therefore local config will always have a value, prioritize the
# API value first
config.ui.projects_prefix = (
server_cfg.get("ui_projects_prefix") or config.ui.projects_prefix
)
config.kfp_image = server_cfg.get("kfp_image") or config.kfp_image
config.dask_kfp_image = (
server_cfg.get("dask_kfp_image") or config.dask_kfp_image
)
config.scrape_metrics = (
server_cfg.get("scrape_metrics")
if server_cfg.get("scrape_metrics") is not None
else config.scrape_metrics
)
config.hub_url = server_cfg.get("hub_url") or config.hub_url
config.default_function_node_selector = (
server_cfg.get("default_function_node_selector")
or config.default_function_node_selector
)
config.igz_version = server_cfg.get("igz_version") or config.igz_version
config.storage.auto_mount_type = (
server_cfg.get("auto_mount_type") or config.storage.auto_mount_type
)
config.storage.auto_mount_params = (
server_cfg.get("auto_mount_params") or config.storage.auto_mount_params
)
config.spark_operator_version = (
server_cfg.get("spark_operator_version")
or config.spark_operator_version
)
config.default_tensorboard_logs_path = (
server_cfg.get("default_tensorboard_logs_path")
or config.default_tensorboard_logs_path
)
config.default_function_pod_resources = (
server_cfg.get("default_function_pod_resources")
or config.default_function_pod_resources
)
config.function_defaults.preemption_mode = (
server_cfg.get("default_preemption_mode")
or config.function_defaults.preemption_mode
)
config.preemptible_nodes.node_selector = (
server_cfg.get("preemptible_nodes_node_selector")
or config.preemptible_nodes.node_selector
)
config.preemptible_nodes.tolerations = (
server_cfg.get("preemptible_nodes_tolerations")
or config.preemptible_nodes.tolerations
)
config.force_run_local = config.force_run_local or server_cfg.get(
"force_run_local"
)
except Exception as exc:
logger.warning(
"Failed syncing config from server",
exc=str(exc),
traceback=traceback.format_exc(),
)
return self
def store_log(self, uid, project="", body=None, append=False):
"""Save a log persistently.
:param uid: Log unique ID
:param project: Project name for which this log belongs
:param body: The actual log to store
:param append: Whether to append the log provided in ``body`` to an existing log with the same ``uid`` or to
create a new log. If set to ``False``, an existing log with same ``uid`` will be overwritten
"""
if not body:
return
path = self._path_of("log", project, uid)
params = {"append": bool2str(append)}
error = f"store log {project}/{uid}"
self.api_call("POST", path, error, params, body)
def get_log(self, uid, project="", offset=0, size=-1):
"""Retrieve a log.
:param uid: Log unique ID
:param project: Project name for which the log belongs
:param offset: Retrieve partial log, get up to ``size`` bytes starting at offset ``offset``
from beginning of log
:param size: See ``offset``. If set to ``-1`` (the default) will retrieve all data to end of log.
:returns: The following objects:
- state - The state of the runtime object which generates this log, if it exists. In case no known state
exists, this will be ``unknown``.
- content - The actual log content.
"""
params = {"offset": offset, "size": size}
path = self._path_of("log", project, uid)
error = f"get log {project}/{uid}"
resp = self.api_call("GET", path, error, params=params)
if resp.headers:
state = resp.headers.get("x-mlrun-run-state", "")
return state.lower(), resp.content
return "unknown", resp.content
def watch_log(self, uid, project="", watch=True, offset=0):
"""Retrieve logs of a running process, and watch the progress of the execution until it completes. This
method will print out the logs and continue to periodically poll for, and print, new logs as long as the
state of the runtime which generates this log is either ``pending`` or ``running``.
:param uid: The uid of the log object to watch.
:param project: Project that the log belongs to.
:param watch: If set to ``True`` will continue tracking the log as described above. Otherwise this function
is practically equivalent to the :py:func:`~get_log` function.
:param offset: Minimal offset in the log to watch.
:returns: The final state of the log being watched.
"""
state, text = self.get_log(uid, project, offset=offset)
if text:
print(text.decode())
if watch:
nil_resp = 0
while state in ["pending", "running"]:
offset += len(text)
if nil_resp < 3:
time.sleep(3)
else:
time.sleep(10)
state, text = self.get_log(uid, project, offset=offset)
if text:
nil_resp = 0
print(text.decode(), end="")
else:
nil_resp += 1
return state
def store_run(self, struct, uid, project="", iter=0):
"""Store run details in the DB. This method is usually called from within other :py:mod:`mlrun` flows
and not called directly by the user."""
path = self._path_of("run", project, uid)
params = {"iter": iter}
error = f"store run {project}/{uid}"
body = _as_json(struct)
self.api_call("POST", path, error, params=params, body=body)
def update_run(self, updates: dict, uid, project="", iter=0):
"""Update the details of a stored run in the DB."""
path = self._path_of("run", project, uid)
params = {"iter": iter}
error = f"update run {project}/{uid}"
body = _as_json(updates)
self.api_call("PATCH", path, error, params=params, body=body)
def abort_run(self, uid, project="", iter=0):
"""
Abort a running run - will remove the run's runtime resources and mark its state as aborted
"""
self.update_run(
{"status.state": mlrun.runtimes.constants.RunStates.aborted},
uid,
project,
iter,
)
def read_run(self, uid, project="", iter=0):
"""Read the details of a stored run from the DB.
:param uid: The run's unique ID.
:param project: Project name.
:param iter: Iteration within a specific execution.
"""
path = self._path_of("run", project, uid)
params = {"iter": iter}
error = f"get run {project}/{uid}"
resp = self.api_call("GET", path, error, params=params)
return resp.json()["data"]
def del_run(self, uid, project="", iter=0):
"""Delete details of a specific run from DB.
:param uid: Unique ID for the specific run to delete.
:param project: Project that the run belongs to.
:param iter: Iteration within a specific task.
"""
path = self._path_of("run", project, uid)
params = {"iter": iter}
error = f"del run {project}/{uid}"
self.api_call("DELETE", path, error, params=params)
def list_runs(
self,
name=None,
uid=None,
project=None,
labels=None,
state=None,
sort=True,
last=0,
iter=False,
start_time_from: datetime = None,
start_time_to: datetime = None,
last_update_time_from: datetime = None,
last_update_time_to: datetime = None,
partition_by: Union[schemas.RunPartitionByField, str] = None,
rows_per_partition: int = 1,
partition_sort_by: Union[schemas.SortField, str] = None,
partition_order: Union[schemas.OrderType, str] = schemas.OrderType.desc,
max_partitions: int = 0,
) -> RunList:
"""Retrieve a list of runs, filtered by various options.
Example::
runs = db.list_runs(name='download', project='iris', labels='owner=admin')
# If running in Jupyter, can use the .show() function to display the results
db.list_runs(name='', project=project_name).show()
:param name: Name of the run to retrieve.
:param uid: Unique ID of the run.
:param project: Project that the runs belongs to.
:param labels: List runs that have a specific label assigned. Currently only a single label filter can be
applied, otherwise result will be empty.
:param state: List only runs whose state is specified.
:param sort: Whether to sort the result according to their start time. Otherwise, results will be
returned by their internal order in the DB (order will not be guaranteed).
:param last: Deprecated - currently not used.
:param iter: If ``True`` return runs from all iterations. Otherwise, return only runs whose ``iter`` is 0.
:param start_time_from: Filter by run start time in ``[start_time_from, start_time_to]``.
:param start_time_to: Filter by run start time in ``[start_time_from, start_time_to]``.
:param last_update_time_from: Filter by run last update time in ``(last_update_time_from,
last_update_time_to)``.
:param last_update_time_to: Filter by run last update time in ``(last_update_time_from, last_update_time_to)``.
:param partition_by: Field to group results by. Only allowed value is `name`. When `partition_by` is specified,
the `partition_sort_by` parameter must be provided as well.
:param rows_per_partition: How many top rows (per sorting defined by `partition_sort_by` and `partition_order`)
to return per group. Default value is 1.
:param partition_sort_by: What field to sort the results by, within each partition defined by `partition_by`.
Currently the only allowed values are `created` and `updated`.
:param partition_order: Order of sorting within partitions - `asc` or `desc`. Default is `desc`.
:param max_partitions: Maximal number of partitions to include in the result. Default is `0` which means no
limit.
"""
project = project or config.default_project
params = {
"name": name,
"uid": uid,
"project": project,
"label": labels or [],
"state": state,
"sort": bool2str(sort),
"iter": bool2str(iter),
"start_time_from": datetime_to_iso(start_time_from),
"start_time_to": datetime_to_iso(start_time_to),
"last_update_time_from": datetime_to_iso(last_update_time_from),
"last_update_time_to": datetime_to_iso(last_update_time_to),
}
if partition_by:
params.update(
self._generate_partition_by_params(
schemas.RunPartitionByField,
partition_by,
rows_per_partition,
partition_sort_by,
partition_order,
max_partitions,
)
)
error = "list runs"
resp = self.api_call("GET", "runs", error, params=params)
return RunList(resp.json()["runs"])
def del_runs(self, name=None, project=None, labels=None, state=None, days_ago=0):
"""Delete a group of runs identified by the parameters of the function.
Example::
db.del_runs(state='completed')
:param name: Name of the task which the runs belong to.
:param project: Project to which the runs belong.
:param labels: Filter runs that are labeled using these specific label values.
:param state: Filter only runs which are in this state.
:param days_ago: Filter runs whose start time is newer than this parameter.
"""
project = project or config.default_project
params = {
"name": name,
"project": project,
"label": labels or [],
"state": state,
"days_ago": str(days_ago),
}
error = "del runs"
self.api_call("DELETE", "runs", error, params=params)
def store_artifact(self, key, artifact, uid, iter=None, tag=None, project=""):
"""Store an artifact in the DB.
:param key: Identifying key of the artifact.
:param artifact: The actual artifact to store.
:param uid: A unique ID for this specific version of the artifact.
:param iter: The task iteration which generated this artifact. If ``iter`` is not ``None`` the iteration will
be added to the key provided to generate a unique key for the artifact of the specific iteration.
:param tag: Tag of the artifact.
:param project: Project that the artifact belongs to.
"""
path = self._path_of("artifact", project, uid) + "/" + key
params = {
"tag": tag,
}
if iter:
params["iter"] = str(iter)
error = f"store artifact {project}/{uid}/{key}"
body = _as_json(artifact)
self.api_call("POST", path, error, params=params, body=body)
def read_artifact(self, key, tag=None, iter=None, project=""):
"""Read an artifact, identified by its key, tag and iteration."""
project = project or config.default_project
tag = tag or "latest"
path = f"projects/{project}/artifact/{key}?tag={tag}"
error = f"read artifact {project}/{key}"
# The default is legacy format, need to override it.
params = {"format": schemas.ArtifactsFormat.full.value}
if iter:
params["iter"] = str(iter)
resp = self.api_call("GET", path, error, params=params)
return resp.json()["data"]
def del_artifact(self, key, tag=None, project=""):
"""Delete an artifact."""
path = self._path_of("artifact", project, key) # TODO: uid?
params = {
"key": key,
"tag": tag,
}
error = f"del artifact {project}/{key}"
self.api_call("DELETE", path, error, params=params)
def list_artifacts(
self,
name=None,
project=None,
tag=None,
labels=None,
since=None,
until=None,
iter: int = None,
best_iteration: bool = False,
kind: str = None,
category: Union[str, schemas.ArtifactCategories] = None,
) -> ArtifactList:
"""List artifacts filtered by various parameters.
Examples::
# Show latest version of all artifacts in project
latest_artifacts = db.list_artifacts('', tag='latest', project='iris')
# check different artifact versions for a specific artifact
result_versions = db.list_artifacts('results', tag='*', project='iris')
:param name: Name of artifacts to retrieve. Name is used as a like query, and is not case-sensitive. This means
that querying for ``name`` may return artifacts named ``my_Name_1`` or ``surname``.
:param project: Project name.
:param tag: Return artifacts assigned this tag.
:param labels: Return artifacts that have these labels.
:param since: Not in use in :py:class:`HTTPRunDB`.
:param until: Not in use in :py:class:`HTTPRunDB`.
:param iter: Return artifacts from a specific iteration (where ``iter=0`` means the root iteration). If
``None`` (default) return artifacts from all iterations.
:param best_iteration: Returns the artifact which belongs to the best iteration of a given run, in the case of
artifacts generated from a hyper-param run. If only a single iteration exists, will return the artifact
from that iteration. If using ``best_iter``, the ``iter`` parameter must not be used.
:param kind: Return artifacts of the requested kind.
:param category: Return artifacts of the requested category.
"""
project = project or config.default_project
params = {
"name": name,
"project": project,
"tag": tag,
"label": labels or [],
"iter": iter,
"best-iteration": best_iteration,
"kind": kind,
"category": category,
"format": schemas.ArtifactsFormat.full.value,
}
error = "list artifacts"
resp = self.api_call("GET", "artifacts", error, params=params)
values = ArtifactList(resp.json()["artifacts"])
values.tag = tag
return values
def del_artifacts(self, name=None, project=None, tag=None, labels=None, days_ago=0):
"""Delete artifacts referenced by the parameters.
:param name: Name of artifacts to delete. Note that this is a like query, and is case-insensitive. See
:py:func:`~list_artifacts` for more details.
:param project: Project that artifacts belong to.
:param tag: Choose artifacts who are assigned this tag.
:param labels: Choose artifacts which are labeled.
:param days_ago: This parameter is deprecated and not used.
"""
project = project or config.default_project
params = {
"name": name,
"project": project,
"tag": tag,
"label": labels or [],
"days_ago": str(days_ago),
}
error = "del artifacts"
self.api_call("DELETE", "artifacts", error, params=params)
def list_artifact_tags(self, project=None) -> List[str]:
"""Return a list of all the tags assigned to artifacts in the scope of the given project."""
project = project or config.default_project
error_message = f"Failed listing artifact tags. project={project}"
response = self.api_call(
"GET", f"projects/{project}/artifact-tags", error_message
)
return response.json()["tags"]
def store_function(self, function, name, project="", tag=None, versioned=False):
"""Store a function object. Function is identified by its name and tag, and can be versioned."""
params = {"tag": tag, "versioned": versioned}
project = project or config.default_project
path = self._path_of("func", project, name)
error = f"store function {project}/{name}"
resp = self.api_call(
"POST", path, error, params=params, body=dict_to_json(function)
)
# hash key optional to be backwards compatible to API v<0.4.10 in which it wasn't in the response
return resp.json().get("hash_key")
def get_function(self, name, project="", tag=None, hash_key=""):
"""Retrieve details of a specific function, identified by its name and potentially a tag or function hash."""
params = {"tag": tag, "hash_key": hash_key}
project = project or config.default_project
path = self._path_of("func", project, name)
error = f"get function {project}/{name}"
resp = self.api_call("GET", path, error, params=params)
return resp.json()["func"]
def delete_function(self, name: str, project: str = ""):
"""Delete a function belonging to a specific project."""
project = project or config.default_project
path = f"projects/{project}/functions/{name}"
error_message = f"Failed deleting function {project}/{name}"
self.api_call("DELETE", path, error_message)
def list_functions(self, name=None, project=None, tag=None, labels=None):
"""Retrieve a list of functions, filtered by specific criteria.
:param name: Return only functions with a specific name.
:param project: Return functions belonging to this project. If not specified, the default project is used.
:param tag: Return function versions with specific tags.
:param labels: Return functions that have specific labels assigned to them.
:returns: List of function objects (as dictionary).
"""
params = {
"project": project or config.default_project,
"name": name,
"tag": tag,
"label": labels or [],
}
error = "list functions"
resp = self.api_call("GET", "funcs", error, params=params)
return resp.json()["funcs"]
def list_runtime_resources(
self,
project: Optional[str] = None,
label_selector: Optional[str] = None,
kind: Optional[str] = None,
object_id: Optional[str] = None,
group_by: Optional[mlrun.api.schemas.ListRuntimeResourcesGroupByField] = None,
) -> Union[
mlrun.api.schemas.RuntimeResourcesOutput,
mlrun.api.schemas.GroupedByJobRuntimeResourcesOutput,
mlrun.api.schemas.GroupedByProjectRuntimeResourcesOutput,
]:
"""List current runtime resources, which are usually (but not limited to) Kubernetes pods or CRDs.
Function applies for runs of type ``['dask', 'job', 'spark', 'remote-spark', 'mpijob']``, and will return per
runtime kind a list of the runtime resources (which may have already completed their execution).
:param project: Get only runtime resources of a specific project, by default None, which will return only the
projects you're authorized to see.
:param label_selector: A label filter that will be passed to Kubernetes for filtering the results according
to their labels.
:param kind: The kind of runtime to query. May be one of ``['dask', 'job', 'spark', 'remote-spark', 'mpijob']``
:param object_id: The identifier of the mlrun object to query its runtime resources. for most function runtimes,
runtime resources are per Run, for which the identifier is the Run's UID. For dask runtime, the runtime
resources are per Function, for which the identifier is the Function's name.
:param group_by: Object to group results by. Allowed values are `job` and `project`.
"""
params = {
"label_selector": label_selector,
"group-by": group_by,
"kind": kind,
"object-id": object_id,
}
project_path = project if project else "*"
error = "Failed listing runtime resources"
response = self.api_call(
"GET", f"projects/{project_path}/runtime-resources", error, params=params
)
if group_by is None:
structured_list = [
mlrun.api.schemas.KindRuntimeResources(**kind_runtime_resources)
for kind_runtime_resources in response.json()
]
return structured_list
elif group_by == mlrun.api.schemas.ListRuntimeResourcesGroupByField.job:
structured_dict = {}
for project, job_runtime_resources_map in response.json().items():
for job_id, runtime_resources in job_runtime_resources_map.items():
structured_dict.setdefault(project, {})[
job_id
] = mlrun.api.schemas.RuntimeResources(**runtime_resources)
return structured_dict
elif group_by == mlrun.api.schemas.ListRuntimeResourcesGroupByField.project:
structured_dict = {}
for project, kind_runtime_resources_map in response.json().items():
for kind, runtime_resources in kind_runtime_resources_map.items():
structured_dict.setdefault(project, {})[
kind
] = mlrun.api.schemas.RuntimeResources(**runtime_resources)
return structured_dict
else:
raise NotImplementedError(
f"Provided group by field is not supported. group_by={group_by}"
)
def list_runtimes(self, label_selector: str = None) -> List:
"""Deprecated use :py:func:`~list_runtime_resources` instead"""
warnings.warn(
"This method is deprecated, use list_runtime_resources instead"
"This will be removed in 0.9.0",
# TODO: Remove in 0.9.0
DeprecationWarning,
)
params = {"label_selector": label_selector}
error = "list runtimes"
resp = self.api_call("GET", "runtimes", error, params=params)
return resp.json()
def get_runtime(self, kind: str, label_selector: str = None) -> Dict:
"""Deprecated use :py:func:`~list_runtime_resources` (with kind filter) instead"""
warnings.warn(
"This method is deprecated, use list_runtime_resources (with kind filter) instead"
"This will be removed in 0.9.0",
# TODO: Remove in 0.9.0
DeprecationWarning,
)
params = {"label_selector": label_selector}
path = f"runtimes/{kind}"
error = f"get runtime {kind}"
resp = self.api_call("GET", path, error, params=params)
return resp.json()
def delete_runtime_resources(
self,
project: Optional[str] = None,
label_selector: Optional[str] = None,
kind: Optional[str] = None,
object_id: Optional[str] = None,
force: bool = False,
grace_period: int = None,
) -> mlrun.api.schemas.GroupedByProjectRuntimeResourcesOutput:
"""Delete all runtime resources which are in terminal state.
:param project: Delete only runtime resources of a specific project, by default None, which will delete only
from the projects you're authorized to delete from.
:param label_selector: Delete only runtime resources matching the label selector.
:param kind: The kind of runtime to delete. May be one of ``['dask', 'job', 'spark', 'remote-spark', 'mpijob']``
:param object_id: The identifier of the mlrun object to delete its runtime resources. for most function
runtimes, runtime resources are per Run, for which the identifier is the Run's UID. For dask runtime, the
runtime resources are per Function, for which the identifier is the Function's name.
:param force: Force deletion - delete the runtime resource even if it's not in terminal state or if the grace
period didn't pass.
:param grace_period: Grace period given to the runtime resource before they are actually removed, counted from
the moment they moved to terminal state.
:returns: :py:class:`~mlrun.api.schemas.GroupedByProjectRuntimeResourcesOutput` listing the runtime resources
that were removed.
"""
if grace_period is None:
grace_period = config.runtime_resources_deletion_grace_period
params = {
"label-selector": label_selector,
"kind": kind,
"object-id": object_id,
"force": force,
"grace-period": grace_period,
}
error = "Failed deleting runtime resources"
project_path = project if project else "*"
response = self.api_call(
"DELETE",
f"projects/{project_path}/runtime-resources",
error,
params=params,
)
structured_dict = {}
for project, kind_runtime_resources_map in response.json().items():
for kind, runtime_resources in kind_runtime_resources_map.items():
structured_dict.setdefault(project, {})[
kind
] = mlrun.api.schemas.RuntimeResources(**runtime_resources)
return structured_dict
def delete_runtimes(
self,
label_selector: str = None,
force: bool = False,
grace_period: int = None,
):
"""Deprecated use :py:func:`~delete_runtime_resources` instead"""
warnings.warn(
"This method is deprecated, use delete_runtime_resources instead"
"This will be removed in 0.9.0",
# TODO: Remove in 0.9.0
DeprecationWarning,
)
if grace_period is None:
grace_period = config.runtime_resources_deletion_grace_period
params = {
"label_selector": label_selector,
"force": force,
"grace_period": grace_period,
}
error = "delete runtimes"
self.api_call("DELETE", "runtimes", error, params=params)
def delete_runtime(
self,
kind: str,
label_selector: str = None,
force: bool = False,
grace_period: int = None,
):
"""Deprecated use :py:func:`~delete_runtime_resources` (with kind filter) instead"""
warnings.warn(
"This method is deprecated, use delete_runtime_resources (with kind filter) instead"
"This will be removed in 0.9.0",
# TODO: Remove in 0.9.0
DeprecationWarning,
)
if grace_period is None:
grace_period = config.runtime_resources_deletion_grace_period
params = {
"label_selector": label_selector,
"force": force,
"grace_period": grace_period,
}
path = f"runtimes/{kind}"
error = f"delete runtime {kind}"
self.api_call("DELETE", path, error, params=params)
def delete_runtime_object(
self,
kind: str,