Skip to content

Commit

Permalink
Add server option for serving only artifacts and proxied serving mode (
Browse files Browse the repository at this point in the history
…#5045)

* Add --serve-artifacts-opt and --artifacts-only options to mlflow server

Signed-off-by: Ben Wilson <benjamin.wilson@databricks.com>
  • Loading branch information
BenWilson2 committed Dec 1, 2021
1 parent 85ae465 commit fb2972f
Show file tree
Hide file tree
Showing 19 changed files with 326 additions and 72 deletions.
3 changes: 3 additions & 0 deletions examples/mlflow_artifacts/README.md
Expand Up @@ -16,16 +16,19 @@ First, launch the tracking server with the artifacts service via `mlflow server`
```sh
# Launch a tracking server with the artifacts service
$ mlflow server \
--serve-artifacts \
--artifacts-destination ./mlartifacts \
--default-artifact-root http://localhost:5000/api/2.0/mlflow-artifacts/artifacts/experiments \
--gunicorn-opts "--log-level debug"
```

Notes:

- `--serve-artifacts` enables the MLflow Artifacts service endpoints to enable proxied serving of artifacts through the REST API
- `--artifacts-destination` specifies the base artifact location from which to resolve artifact upload/download/list requests. In this examples, we're using a local directory `./mlartifacts`, but it can be changed to a s3 bucket or
- `--default-artifact-root` points to the `experiments` directory of the artifacts service. Therefore, the default artifact location of a newly-created experiment is set to `./mlartifacts/experiments/<experiment_id>`.
- `--gunicorn-opts "--log-level debug"` is specified to print out request logs but can be omitted if unnecessary.
- `--artifacts-only` disables all other endpoints for the tracking server apart from those involved in listing, uploading, and downloading artifacts. This makes the MLflow server a single-purpose proxy for artifact handling only.

Then, run `example.py` that performs upload, download, and list operations for artifacts:

Expand Down
2 changes: 2 additions & 0 deletions examples/mlflow_artifacts/docker-compose.yml
Expand Up @@ -54,6 +54,8 @@ services:
--port 5500
--artifacts-destination s3://bucket
--gunicorn-opts "--log-level debug"
--serve-artifacts
--artifacts-only
postgres:
image: postgres
Expand Down
4 changes: 4 additions & 0 deletions examples/mlflow_artifacts/example.py
Expand Up @@ -10,6 +10,10 @@ def save_text(path, text):
f.write(text)


# NOTE: ensure the tracking server has been started with --serve-artifacts to enable
# MLflow artifact serving functionality.


def main():
assert "MLFLOW_TRACKING_URI" in os.environ

Expand Down
4 changes: 2 additions & 2 deletions mlflow/azure/client.py
Expand Up @@ -38,7 +38,7 @@ def put_block(sas_url, block_id, data, headers):
with rest_utils.cloud_storage_http_request(
"put", request_url, data=data, headers=request_headers
) as response:
response.raise_for_status()
rest_utils.augmented_raise_for_status(response)


def put_block_list(sas_url, block_list, headers):
Expand Down Expand Up @@ -66,7 +66,7 @@ def put_block_list(sas_url, block_list, headers):
with rest_utils.cloud_storage_http_request(
"put", request_url, data=data, headers=request_headers
) as response:
response.raise_for_status()
rest_utils.augmented_raise_for_status(response)


def _append_query_parameters(url, parameters):
Expand Down
77 changes: 51 additions & 26 deletions mlflow/cli.py
Expand Up @@ -13,14 +13,14 @@
import mlflow.runs
import mlflow.store.artifact.cli
from mlflow import tracking
from mlflow.store.tracking import DEFAULT_LOCAL_FILE_AND_ARTIFACT_PATH
from mlflow.store.tracking import DEFAULT_LOCAL_FILE_AND_ARTIFACT_PATH, DEFAULT_ARTIFACTS_URI
from mlflow.store.artifact.artifact_repository_registry import get_artifact_repository
from mlflow.tracking import _get_store
from mlflow.utils import cli_args
from mlflow.utils.annotations import experimental
from mlflow.utils.logging_utils import eprint
from mlflow.utils.process import ShellCommandException
from mlflow.utils.uri import is_local_uri
from mlflow.utils.uri import resolve_default_artifact_root
from mlflow.entities.lifecycle_stage import LifecycleStage
from mlflow.exceptions import MlflowException

Expand Down Expand Up @@ -233,20 +233,27 @@ def _validate_server_args(gunicorn_opts=None, workers=None, waitress_opts=None):
"SQLAlchemy-compatible database connection strings "
"(e.g. 'sqlite:///path/to/file.db') or local filesystem URIs "
"(e.g. 'file:///absolute/path/to/directory'). By default, data will be logged "
"to the ./mlruns directory.",
f"to {DEFAULT_LOCAL_FILE_AND_ARTIFACT_PATH}",
)
@click.option(
"--default-artifact-root",
metavar="URI",
default=None,
help="Path to local directory to store artifacts, for new experiments. "
"Note that this flag does not impact already-created experiments. "
"Default: " + DEFAULT_LOCAL_FILE_AND_ARTIFACT_PATH,
help="Directory in which to store artifacts for any new experiments created. For tracking "
"server backends that rely on SQL, this option is required in order to store artifacts. "
"Note that this flag does not impact already-created experiments with any previous "
"configuration of an MLflow server instance. "
"If the --serve-artifacts option is specified, the default artifact root is "
f"{DEFAULT_ARTIFACTS_URI}. Otherwise, the default artifact root is "
f"{DEFAULT_LOCAL_FILE_AND_ARTIFACT_PATH}.",
)
@cli_args.SERVE_ARTIFACTS
@cli_args.ARTIFACTS_DESTINATION
@cli_args.PORT
@cli_args.HOST
def ui(backend_store_uri, default_artifact_root, artifacts_destination, port, host):
def ui(
backend_store_uri, default_artifact_root, serve_artifacts, artifacts_destination, port, host
):
"""
Launch the MLflow tracking UI for local viewing of run results. To launch a production
server, use the "mlflow server" command instead.
Expand All @@ -263,11 +270,9 @@ def ui(backend_store_uri, default_artifact_root, artifacts_destination, port, ho
if not backend_store_uri:
backend_store_uri = DEFAULT_LOCAL_FILE_AND_ARTIFACT_PATH

if not default_artifact_root:
if is_local_uri(backend_store_uri):
default_artifact_root = backend_store_uri
else:
default_artifact_root = DEFAULT_LOCAL_FILE_AND_ARTIFACT_PATH
default_artifact_root = resolve_default_artifact_root(
serve_artifacts, default_artifact_root, backend_store_uri, resolve_to_local=True
)

try:
initialize_backend_stores(backend_store_uri, default_artifact_root)
Expand All @@ -279,7 +284,15 @@ def ui(backend_store_uri, default_artifact_root, artifacts_destination, port, ho
# TODO: We eventually want to disable the write path in this version of the server.
try:
_run_server(
backend_store_uri, default_artifact_root, artifacts_destination, host, port, None, 1
backend_store_uri,
default_artifact_root,
serve_artifacts,
False,
artifacts_destination,
host,
port,
None,
1,
)
except ShellCommandException:
eprint("Running the mlflow server failed. Please see the logs above for details.")
Expand Down Expand Up @@ -315,10 +328,24 @@ def _validate_static_prefix(ctx, param, value): # pylint: disable=unused-argume
"--default-artifact-root",
metavar="URI",
default=None,
help="Local or S3 URI to store artifacts, for new experiments. "
"Note that this flag does not impact already-created experiments. "
"Default: Within file store, if a file:/ URI is provided. If a sql backend is"
" used, then this option is required.",
help="Directory in which to store artifacts for any new experiments created. For tracking "
"server backends that rely on SQL, this option is required in order to store artifacts. "
"Note that this flag does not impact already-created experiments with any previous "
"configuration of an MLflow server instance. "
f"By default, data will be logged to the {DEFAULT_ARTIFACTS_URI} uri proxy if "
"the --serve-artifacts option is enabled. Otherwise, the default location will "
f"be {DEFAULT_LOCAL_FILE_AND_ARTIFACT_PATH}.",
)
@cli_args.SERVE_ARTIFACTS
@click.option(
"--artifacts-only",
is_flag=True,
default=False,
help="If specified, configures the mlflow server to be used only for proxied artifact serving. "
"With this mode enabled, functionality of the mlflow tracking service (e.g. run creation, "
"metric logging, and parameter logging) is disabled. The server will only expose "
"endpoints for uploading, downloading, and listing artifacts. "
"Default: False",
)
@cli_args.ARTIFACTS_DESTINATION
@cli_args.HOST
Expand Down Expand Up @@ -348,6 +375,8 @@ def _validate_static_prefix(ctx, param, value): # pylint: disable=unused-argume
def server(
backend_store_uri,
default_artifact_root,
serve_artifacts,
artifacts_only,
artifacts_destination,
host,
port,
Expand All @@ -374,15 +403,9 @@ def server(
if not backend_store_uri:
backend_store_uri = DEFAULT_LOCAL_FILE_AND_ARTIFACT_PATH

if not default_artifact_root:
if is_local_uri(backend_store_uri):
default_artifact_root = backend_store_uri
else:
eprint(
"Option 'default-artifact-root' is required, when backend store is not "
"local file based."
)
sys.exit(1)
default_artifact_root = resolve_default_artifact_root(
serve_artifacts, default_artifact_root, backend_store_uri
)

try:
initialize_backend_stores(backend_store_uri, default_artifact_root)
Expand All @@ -395,6 +418,8 @@ def server(
_run_server(
backend_store_uri,
default_artifact_root,
serve_artifacts,
artifacts_only,
artifacts_destination,
host,
port,
Expand Down
4 changes: 2 additions & 2 deletions mlflow/projects/utils.py
Expand Up @@ -28,7 +28,7 @@
MLFLOW_PROJECT_ENTRY_POINT,
MLFLOW_PARENT_RUN_ID,
)

from mlflow.utils.rest_utils import augmented_raise_for_status

# TODO: this should be restricted to just Git repos and not S3 and stuff like that
_GIT_URI_REGEX = re.compile(r"^[^/]*:")
Expand Down Expand Up @@ -214,7 +214,7 @@ def _fetch_zip_repo(uri):
# https://github.com/mlflow/mlflow/issues/763.
response = requests.get(uri)
try:
response.raise_for_status()
augmented_raise_for_status(response)
except requests.HTTPError as error:
raise ExecutionException("Unable to retrieve ZIP file. Reason: %s" % str(error))
return BytesIO(response.content)
Expand Down
8 changes: 8 additions & 0 deletions mlflow/server/__init__.py
Expand Up @@ -20,6 +20,8 @@
ARTIFACT_ROOT_ENV_VAR = "_MLFLOW_SERVER_ARTIFACT_ROOT"
ARTIFACTS_DESTINATION_ENV_VAR = "_MLFLOW_SERVER_ARTIFACT_DESTINATION"
PROMETHEUS_EXPORTER_ENV_VAR = "prometheus_multiproc_dir"
SERVE_ARTIFACTS_ENV_VAR = "_MLFLOW_SERVER_SERVE_ARTIFACTS"
ARTIFACTS_ONLY_ENV_VAR = "_MLFLOW_SERVER_ARTIFACTS_ONLY"

REL_STATIC_DIR = "js/build"

Expand Down Expand Up @@ -106,6 +108,8 @@ def _build_gunicorn_command(gunicorn_opts, host, port, workers):
def _run_server(
file_store_path,
default_artifact_root,
serve_artifacts,
artifacts_only,
artifacts_destination,
host,
port,
Expand All @@ -126,6 +130,10 @@ def _run_server(
env_map[BACKEND_STORE_URI_ENV_VAR] = file_store_path
if default_artifact_root:
env_map[ARTIFACT_ROOT_ENV_VAR] = default_artifact_root
if serve_artifacts:
env_map[SERVE_ARTIFACTS_ENV_VAR] = "true"
if artifacts_only:
env_map[ARTIFACTS_ONLY_ENV_VAR] = "true"
if artifacts_destination:
env_map[ARTIFACTS_DESTINATION_ENV_VAR] = artifacts_destination
if static_prefix:
Expand Down

0 comments on commit fb2972f

Please sign in to comment.