Add server option for serving only artifacts and proxied serving mode (…

…#5045) * Add --serve-artifacts-opt and --artifacts-only options to mlflow server Signed-off-by: Ben Wilson <benjamin.wilson@databricks.com>
mlflow · Dec 1, 2021 · fb2972f · fb2972f
1 parent 85ae465
commit fb2972f
Show file tree

Hide file tree

Showing 19 changed files with 326 additions and 72 deletions.
diff --git a/examples/mlflow_artifacts/README.md b/examples/mlflow_artifacts/README.md
@@ -16,16 +16,19 @@ First, launch the tracking server with the artifacts service via `mlflow server`
 ```sh
 # Launch a tracking server with the artifacts service
 $ mlflow server \
+    --serve-artifacts \
     --artifacts-destination ./mlartifacts \
     --default-artifact-root http://localhost:5000/api/2.0/mlflow-artifacts/artifacts/experiments \
     --gunicorn-opts "--log-level debug"
 ```
 
 Notes:
 
+- `--serve-artifacts` enables the MLflow Artifacts service endpoints to enable proxied serving of artifacts through the REST API
 - `--artifacts-destination` specifies the base artifact location from which to resolve artifact upload/download/list requests. In this examples, we're using a local directory `./mlartifacts`, but it can be changed to a s3 bucket or
 - `--default-artifact-root` points to the `experiments` directory of the artifacts service. Therefore, the default artifact location of a newly-created experiment is set to `./mlartifacts/experiments/<experiment_id>`.
 - `--gunicorn-opts "--log-level debug"` is specified to print out request logs but can be omitted if unnecessary.
+- `--artifacts-only` disables all other endpoints for the tracking server apart from those involved in listing, uploading, and downloading artifacts. This makes the MLflow server a single-purpose proxy for artifact handling only.
 
 Then, run `example.py` that performs upload, download, and list operations for artifacts:
 

diff --git a/examples/mlflow_artifacts/docker-compose.yml b/examples/mlflow_artifacts/docker-compose.yml
@@ -54,6 +54,8 @@ services:
       --port 5500
       --artifacts-destination s3://bucket
       --gunicorn-opts "--log-level debug"
+      --serve-artifacts
+      --artifacts-only
 
   postgres:
     image: postgres

diff --git a/examples/mlflow_artifacts/example.py b/examples/mlflow_artifacts/example.py
@@ -10,6 +10,10 @@ def save_text(path, text):
         f.write(text)
 
 
+#  NOTE: ensure the tracking server has been started with --serve-artifacts to enable
+#        MLflow artifact serving functionality.
+
+
 def main():
     assert "MLFLOW_TRACKING_URI" in os.environ
 

diff --git a/mlflow/azure/client.py b/mlflow/azure/client.py
@@ -38,7 +38,7 @@ def put_block(sas_url, block_id, data, headers):
     with rest_utils.cloud_storage_http_request(
         "put", request_url, data=data, headers=request_headers
     ) as response:
-        response.raise_for_status()
+        rest_utils.augmented_raise_for_status(response)
 
 
 def put_block_list(sas_url, block_list, headers):
@@ -66,7 +66,7 @@ def put_block_list(sas_url, block_list, headers):
     with rest_utils.cloud_storage_http_request(
         "put", request_url, data=data, headers=request_headers
     ) as response:
-        response.raise_for_status()
+        rest_utils.augmented_raise_for_status(response)
 
 
 def _append_query_parameters(url, parameters):

diff --git a/mlflow/cli.py b/mlflow/cli.py
@@ -13,14 +13,14 @@
 import mlflow.runs
 import mlflow.store.artifact.cli
 from mlflow import tracking
-from mlflow.store.tracking import DEFAULT_LOCAL_FILE_AND_ARTIFACT_PATH
+from mlflow.store.tracking import DEFAULT_LOCAL_FILE_AND_ARTIFACT_PATH, DEFAULT_ARTIFACTS_URI
 from mlflow.store.artifact.artifact_repository_registry import get_artifact_repository
 from mlflow.tracking import _get_store
 from mlflow.utils import cli_args
 from mlflow.utils.annotations import experimental
 from mlflow.utils.logging_utils import eprint
 from mlflow.utils.process import ShellCommandException
-from mlflow.utils.uri import is_local_uri
+from mlflow.utils.uri import resolve_default_artifact_root
 from mlflow.entities.lifecycle_stage import LifecycleStage
 from mlflow.exceptions import MlflowException
 
@@ -233,20 +233,27 @@ def _validate_server_args(gunicorn_opts=None, workers=None, waitress_opts=None):
     "SQLAlchemy-compatible database connection strings "
     "(e.g. 'sqlite:///path/to/file.db') or local filesystem URIs "
     "(e.g. 'file:///absolute/path/to/directory'). By default, data will be logged "
-    "to the ./mlruns directory.",
+    f"to {DEFAULT_LOCAL_FILE_AND_ARTIFACT_PATH}",
 )
 @click.option(
     "--default-artifact-root",
     metavar="URI",
     default=None,
-    help="Path to local directory to store artifacts, for new experiments. "
-    "Note that this flag does not impact already-created experiments. "
-    "Default: " + DEFAULT_LOCAL_FILE_AND_ARTIFACT_PATH,
+    help="Directory in which to store artifacts for any new experiments created. For tracking "
+    "server backends that rely on SQL, this option is required in order to store artifacts. "
+    "Note that this flag does not impact already-created experiments with any previous "
+    "configuration of an MLflow server instance. "
+    "If the --serve-artifacts option is specified, the default artifact root is "
+    f"{DEFAULT_ARTIFACTS_URI}. Otherwise, the default artifact root is "
+    f"{DEFAULT_LOCAL_FILE_AND_ARTIFACT_PATH}.",
 )
+@cli_args.SERVE_ARTIFACTS
 @cli_args.ARTIFACTS_DESTINATION
 @cli_args.PORT
 @cli_args.HOST
-def ui(backend_store_uri, default_artifact_root, artifacts_destination, port, host):
+def ui(
+    backend_store_uri, default_artifact_root, serve_artifacts, artifacts_destination, port, host
+):
     """
     Launch the MLflow tracking UI for local viewing of run results. To launch a production
     server, use the "mlflow server" command instead.
@@ -263,11 +270,9 @@ def ui(backend_store_uri, default_artifact_root, artifacts_destination, port, ho
     if not backend_store_uri:
         backend_store_uri = DEFAULT_LOCAL_FILE_AND_ARTIFACT_PATH
 
-    if not default_artifact_root:
-        if is_local_uri(backend_store_uri):
-            default_artifact_root = backend_store_uri
-        else:
-            default_artifact_root = DEFAULT_LOCAL_FILE_AND_ARTIFACT_PATH
+    default_artifact_root = resolve_default_artifact_root(
+        serve_artifacts, default_artifact_root, backend_store_uri, resolve_to_local=True
+    )
 
     try:
         initialize_backend_stores(backend_store_uri, default_artifact_root)
@@ -279,7 +284,15 @@ def ui(backend_store_uri, default_artifact_root, artifacts_destination, port, ho
     # TODO: We eventually want to disable the write path in this version of the server.
     try:
         _run_server(
-            backend_store_uri, default_artifact_root, artifacts_destination, host, port, None, 1
+            backend_store_uri,
+            default_artifact_root,
+            serve_artifacts,
+            False,
+            artifacts_destination,
+            host,
+            port,
+            None,
+            1,
         )
     except ShellCommandException:
         eprint("Running the mlflow server failed. Please see the logs above for details.")
@@ -315,10 +328,24 @@ def _validate_static_prefix(ctx, param, value):  # pylint: disable=unused-argume
     "--default-artifact-root",
     metavar="URI",
     default=None,
-    help="Local or S3 URI to store artifacts, for new experiments. "
-    "Note that this flag does not impact already-created experiments. "
-    "Default: Within file store, if a file:/ URI is provided. If a sql backend is"
-    " used, then this option is required.",
+    help="Directory in which to store artifacts for any new experiments created. For tracking "
+    "server backends that rely on SQL, this option is required in order to store artifacts. "
+    "Note that this flag does not impact already-created experiments with any previous "
+    "configuration of an MLflow server instance. "
+    f"By default, data will be logged to the {DEFAULT_ARTIFACTS_URI} uri proxy if "
+    "the --serve-artifacts option is enabled. Otherwise, the default location will "
+    f"be {DEFAULT_LOCAL_FILE_AND_ARTIFACT_PATH}.",
+)
+@cli_args.SERVE_ARTIFACTS
+@click.option(
+    "--artifacts-only",
+    is_flag=True,
+    default=False,
+    help="If specified, configures the mlflow server to be used only for proxied artifact serving. "
+    "With this mode enabled, functionality of the mlflow tracking service (e.g. run creation, "
+    "metric logging, and parameter logging) is disabled. The server will only expose "
+    "endpoints for uploading, downloading, and listing artifacts. "
+    "Default: False",
 )
 @cli_args.ARTIFACTS_DESTINATION
 @cli_args.HOST
@@ -348,6 +375,8 @@ def _validate_static_prefix(ctx, param, value):  # pylint: disable=unused-argume
 def server(
     backend_store_uri,
     default_artifact_root,
+    serve_artifacts,
+    artifacts_only,
     artifacts_destination,
     host,
     port,
@@ -374,15 +403,9 @@ def server(
     if not backend_store_uri:
         backend_store_uri = DEFAULT_LOCAL_FILE_AND_ARTIFACT_PATH
 
-    if not default_artifact_root:
-        if is_local_uri(backend_store_uri):
-            default_artifact_root = backend_store_uri
-        else:
-            eprint(
-                "Option 'default-artifact-root' is required, when backend store is not "
-                "local file based."
-            )
-            sys.exit(1)
+    default_artifact_root = resolve_default_artifact_root(
+        serve_artifacts, default_artifact_root, backend_store_uri
+    )
 
     try:
         initialize_backend_stores(backend_store_uri, default_artifact_root)
@@ -395,6 +418,8 @@ def server(
         _run_server(
             backend_store_uri,
             default_artifact_root,
+            serve_artifacts,
+            artifacts_only,
             artifacts_destination,
             host,
             port,

diff --git a/mlflow/projects/utils.py b/mlflow/projects/utils.py
@@ -28,7 +28,7 @@
     MLFLOW_PROJECT_ENTRY_POINT,
     MLFLOW_PARENT_RUN_ID,
 )
-
+from mlflow.utils.rest_utils import augmented_raise_for_status
 
 # TODO: this should be restricted to just Git repos and not S3 and stuff like that
 _GIT_URI_REGEX = re.compile(r"^[^/]*:")
@@ -214,7 +214,7 @@ def _fetch_zip_repo(uri):
     # https://github.com/mlflow/mlflow/issues/763.
     response = requests.get(uri)
     try:
-        response.raise_for_status()
+        augmented_raise_for_status(response)
     except requests.HTTPError as error:
         raise ExecutionException("Unable to retrieve ZIP file. Reason: %s" % str(error))
     return BytesIO(response.content)

diff --git a/mlflow/server/__init__.py b/mlflow/server/__init__.py
@@ -20,6 +20,8 @@
 ARTIFACT_ROOT_ENV_VAR = "_MLFLOW_SERVER_ARTIFACT_ROOT"
 ARTIFACTS_DESTINATION_ENV_VAR = "_MLFLOW_SERVER_ARTIFACT_DESTINATION"
 PROMETHEUS_EXPORTER_ENV_VAR = "prometheus_multiproc_dir"
+SERVE_ARTIFACTS_ENV_VAR = "_MLFLOW_SERVER_SERVE_ARTIFACTS"
+ARTIFACTS_ONLY_ENV_VAR = "_MLFLOW_SERVER_ARTIFACTS_ONLY"
 
 REL_STATIC_DIR = "js/build"
 
@@ -106,6 +108,8 @@ def _build_gunicorn_command(gunicorn_opts, host, port, workers):
 def _run_server(
     file_store_path,
     default_artifact_root,
+    serve_artifacts,
+    artifacts_only,
     artifacts_destination,
     host,
     port,
@@ -126,6 +130,10 @@ def _run_server(
         env_map[BACKEND_STORE_URI_ENV_VAR] = file_store_path
     if default_artifact_root:
         env_map[ARTIFACT_ROOT_ENV_VAR] = default_artifact_root
+    if serve_artifacts:
+        env_map[SERVE_ARTIFACTS_ENV_VAR] = "true"
+    if artifacts_only:
+        env_map[ARTIFACTS_ONLY_ENV_VAR] = "true"
     if artifacts_destination:
         env_map[ARTIFACTS_DESTINATION_ENV_VAR] = artifacts_destination
     if static_prefix: