Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add server option for serving only artifacts and proxied serving mode #5045

Merged
merged 22 commits into from Dec 1, 2021
Merged
Show file tree
Hide file tree
Changes from 21 commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
69a26b8
Add --serve-artifacts-opt and --artifacts-only options to mlflow server
BenWilson2 Nov 10, 2021
a649b6f
Merge branch 'master' of https://github.com/mlflow/mlflow into suppor…
BenWilson2 Nov 10, 2021
c44e62a
Update examples to show cli arguments in MLflow Artifacts
BenWilson2 Nov 10, 2021
74fa204
Update examples to turn on the REST API endpoints
BenWilson2 Nov 10, 2021
d1b986d
linting
BenWilson2 Nov 10, 2021
7dbd469
PR changes and adjust test for serve-artifact option flag
BenWilson2 Nov 11, 2021
4a0053a
Merge branch 'master' of https://github.com/mlflow/mlflow into suppor…
BenWilson2 Nov 11, 2021
e0a02fb
Merge branch 'master' of https://github.com/mlflow/mlflow into suppor…
BenWilson2 Nov 11, 2021
d6b2f32
PR changes
BenWilson2 Nov 11, 2021
b0987a5
PR updates
BenWilson2 Nov 11, 2021
45b9498
PR feedback changes
BenWilson2 Nov 12, 2021
3f0cbb0
lint
BenWilson2 Nov 12, 2021
05899d9
Add ui server support for proxied artifacts and update exception mess…
BenWilson2 Nov 15, 2021
d83c7a3
Rebase and discard formatting commits
BenWilson2 Nov 25, 2021
6d5641c
typos
BenWilson2 Nov 29, 2021
1370034
Merge branch 'master' of https://github.com/mlflow/mlflow into suppor…
BenWilson2 Nov 29, 2021
8fffe43
fix parsing of uri for trailing slash
BenWilson2 Nov 29, 2021
374e209
Test complexity reduction
BenWilson2 Nov 30, 2021
de2e78c
tracking uri validation checks and test simplification
BenWilson2 Nov 30, 2021
d530c50
Parameterize the uri resolution tests to aid in test debugging
BenWilson2 Nov 30, 2021
5db552b
Cleaner test syntax
BenWilson2 Nov 30, 2021
85d191c
Add and adjust pydoc strings
BenWilson2 Nov 30, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
3 changes: 3 additions & 0 deletions examples/mlflow_artifacts/README.md
Expand Up @@ -16,16 +16,19 @@ First, launch the tracking server with the artifacts service via `mlflow server`
```sh
# Launch a tracking server with the artifacts service
$ mlflow server \
--serve-artifacts \
--artifacts-destination ./mlartifacts \
--default-artifact-root http://localhost:5000/api/2.0/mlflow-artifacts/artifacts/experiments \
--gunicorn-opts "--log-level debug"
```

Notes:

- `--serve-artifacts` enables the MLflow Artifacts service endpoints to enable proxied serving of artifacts through the REST API
- `--artifacts-destination` specifies the base artifact location from which to resolve artifact upload/download/list requests. In this examples, we're using a local directory `./mlartifacts`, but it can be changed to a s3 bucket or
- `--default-artifact-root` points to the `experiments` directory of the artifacts service. Therefore, the default artifact location of a newly-created experiment is set to `./mlartifacts/experiments/<experiment_id>`.
- `--gunicorn-opts "--log-level debug"` is specified to print out request logs but can be omitted if unnecessary.
- `--artifacts-only` disables all other endpoints for the tracking server apart from those involved in listing, uploading, and downloading artifacts. This makes the MLflow server a single-purpose proxy for artifact handling only.

Then, run `example.py` that performs upload, download, and list operations for artifacts:

Expand Down
2 changes: 2 additions & 0 deletions examples/mlflow_artifacts/docker-compose.yml
Expand Up @@ -54,6 +54,8 @@ services:
--port 5500
--artifacts-destination s3://bucket
--gunicorn-opts "--log-level debug"
--serve-artifacts
harupy marked this conversation as resolved.
Show resolved Hide resolved
--artifacts-only

postgres:
image: postgres
Expand Down
4 changes: 4 additions & 0 deletions examples/mlflow_artifacts/example.py
Expand Up @@ -10,6 +10,10 @@ def save_text(path, text):
f.write(text)


# NOTE: ensure the tracking server has been started with --serve-artifacts to enable
# MLflow artifact serving functionality.
harupy marked this conversation as resolved.
Show resolved Hide resolved


def main():
assert "MLFLOW_TRACKING_URI" in os.environ

Expand Down
4 changes: 2 additions & 2 deletions mlflow/azure/client.py
Expand Up @@ -38,7 +38,7 @@ def put_block(sas_url, block_id, data, headers):
with rest_utils.cloud_storage_http_request(
"put", request_url, data=data, headers=request_headers
) as response:
response.raise_for_status()
rest_utils.augmented_raise_for_status(response)


def put_block_list(sas_url, block_list, headers):
Expand Down Expand Up @@ -66,7 +66,7 @@ def put_block_list(sas_url, block_list, headers):
with rest_utils.cloud_storage_http_request(
"put", request_url, data=data, headers=request_headers
) as response:
response.raise_for_status()
rest_utils.augmented_raise_for_status(response)


def _append_query_parameters(url, parameters):
Expand Down
77 changes: 51 additions & 26 deletions mlflow/cli.py
Expand Up @@ -13,14 +13,14 @@
import mlflow.runs
import mlflow.store.artifact.cli
from mlflow import tracking
from mlflow.store.tracking import DEFAULT_LOCAL_FILE_AND_ARTIFACT_PATH
from mlflow.store.tracking import DEFAULT_LOCAL_FILE_AND_ARTIFACT_PATH, DEFAULT_ARTIFACTS_URI
from mlflow.store.artifact.artifact_repository_registry import get_artifact_repository
from mlflow.tracking import _get_store
from mlflow.utils import cli_args
from mlflow.utils.annotations import experimental
from mlflow.utils.logging_utils import eprint
from mlflow.utils.process import ShellCommandException
from mlflow.utils.uri import is_local_uri
from mlflow.utils.uri import resolve_default_artifact_root
from mlflow.entities.lifecycle_stage import LifecycleStage
from mlflow.exceptions import MlflowException

Expand Down Expand Up @@ -233,20 +233,27 @@ def _validate_server_args(gunicorn_opts=None, workers=None, waitress_opts=None):
"SQLAlchemy-compatible database connection strings "
"(e.g. 'sqlite:///path/to/file.db') or local filesystem URIs "
"(e.g. 'file:///absolute/path/to/directory'). By default, data will be logged "
"to the ./mlruns directory.",
f"to {DEFAULT_LOCAL_FILE_AND_ARTIFACT_PATH}",
)
@click.option(
"--default-artifact-root",
metavar="URI",
default=None,
help="Path to local directory to store artifacts, for new experiments. "
"Note that this flag does not impact already-created experiments. "
"Default: " + DEFAULT_LOCAL_FILE_AND_ARTIFACT_PATH,
help="Directory in which to store artifacts for any new experiments created. For tracking "
"server backends that rely on SQL, this option is required in order to store artifacts. "
"Note that this flag does not impact already-created experiments with any previous "
"configuration of an MLflow server instance. "
f"By default, data will be logged to the {DEFAULT_ARTIFACTS_URI} uri proxy if "
"the --serve-artifacts option is enabled. Otherwise, the default location will "
f"be {DEFAULT_LOCAL_FILE_AND_ARTIFACT_PATH}.",
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
f"By default, data will be logged to the {DEFAULT_ARTIFACTS_URI} uri proxy if "
"the --serve-artifacts option is enabled. Otherwise, the default location will "
f"be {DEFAULT_LOCAL_FILE_AND_ARTIFACT_PATH}.",
f"If the --serve-artifacts option is specified, the default artifact root is {DEFAULT_ARTIFACTS_URI}. "
f "otherwise, the default artifact root is {DEFAULT_LOCAL_FILE_AND_ARTIFACT_PATH}".

)
@cli_args.SERVE_ARTIFACTS
@cli_args.ARTIFACTS_DESTINATION
@cli_args.PORT
@cli_args.HOST
def ui(backend_store_uri, default_artifact_root, artifacts_destination, port, host):
def ui(
backend_store_uri, default_artifact_root, serve_artifacts, artifacts_destination, port, host
):
"""
Launch the MLflow tracking UI for local viewing of run results. To launch a production
server, use the "mlflow server" command instead.
Expand All @@ -263,11 +270,9 @@ def ui(backend_store_uri, default_artifact_root, artifacts_destination, port, ho
if not backend_store_uri:
backend_store_uri = DEFAULT_LOCAL_FILE_AND_ARTIFACT_PATH

if not default_artifact_root:
if is_local_uri(backend_store_uri):
default_artifact_root = backend_store_uri
else:
default_artifact_root = DEFAULT_LOCAL_FILE_AND_ARTIFACT_PATH
default_artifact_root = resolve_default_artifact_root(
serve_artifacts, default_artifact_root, backend_store_uri, resolve_to_local=True
)

try:
initialize_backend_stores(backend_store_uri, default_artifact_root)
Expand All @@ -279,7 +284,15 @@ def ui(backend_store_uri, default_artifact_root, artifacts_destination, port, ho
# TODO: We eventually want to disable the write path in this version of the server.
try:
_run_server(
backend_store_uri, default_artifact_root, artifacts_destination, host, port, None, 1
backend_store_uri,
default_artifact_root,
serve_artifacts,
False,
artifacts_destination,
host,
port,
None,
1,
)
except ShellCommandException:
eprint("Running the mlflow server failed. Please see the logs above for details.")
Expand Down Expand Up @@ -315,10 +328,24 @@ def _validate_static_prefix(ctx, param, value): # pylint: disable=unused-argume
"--default-artifact-root",
metavar="URI",
default=None,
help="Local or S3 URI to store artifacts, for new experiments. "
"Note that this flag does not impact already-created experiments. "
"Default: Within file store, if a file:/ URI is provided. If a sql backend is"
" used, then this option is required.",
help="Directory in which to store artifacts for any new experiments created. For tracking "
"server backends that rely on SQL, this option is required in order to store artifacts. "
"Note that this flag does not impact already-created experiments with any previous "
"configuration of an MLflow server instance. "
f"By default, data will be logged to the {DEFAULT_ARTIFACTS_URI} uri proxy if "
"the --serve-artifacts option is enabled. Otherwise, the default location will "
f"be {DEFAULT_LOCAL_FILE_AND_ARTIFACT_PATH}.",
)
@cli_args.SERVE_ARTIFACTS
@click.option(
"--artifacts-only",
is_flag=True,
default=False,
help="If specified, configures the mlflow server to be used only for proxied artifact serving. "
"With this mode enabled, functionality of the mlflow tracking service (e.g. run creation, "
"metric logging, and parameter logging) is disabled. The server will only expose "
"endpoints for uploading, downloading, and listing artifacts. "
"Default: False",
)
@cli_args.ARTIFACTS_DESTINATION
@cli_args.HOST
Expand Down Expand Up @@ -348,6 +375,8 @@ def _validate_static_prefix(ctx, param, value): # pylint: disable=unused-argume
def server(
backend_store_uri,
default_artifact_root,
serve_artifacts,
artifacts_only,
artifacts_destination,
host,
port,
Expand All @@ -374,15 +403,9 @@ def server(
if not backend_store_uri:
backend_store_uri = DEFAULT_LOCAL_FILE_AND_ARTIFACT_PATH

if not default_artifact_root:
if is_local_uri(backend_store_uri):
default_artifact_root = backend_store_uri
else:
eprint(
"Option 'default-artifact-root' is required, when backend store is not "
"local file based."
)
sys.exit(1)
default_artifact_root = resolve_default_artifact_root(
serve_artifacts, default_artifact_root, backend_store_uri
)

try:
initialize_backend_stores(backend_store_uri, default_artifact_root)
Expand All @@ -395,6 +418,8 @@ def server(
_run_server(
backend_store_uri,
default_artifact_root,
serve_artifacts,
artifacts_only,
artifacts_destination,
host,
port,
Expand Down
4 changes: 2 additions & 2 deletions mlflow/projects/utils.py
Expand Up @@ -28,7 +28,7 @@
MLFLOW_PROJECT_ENTRY_POINT,
MLFLOW_PARENT_RUN_ID,
)

from mlflow.utils.rest_utils import augmented_raise_for_status

# TODO: this should be restricted to just Git repos and not S3 and stuff like that
_GIT_URI_REGEX = re.compile(r"^[^/]*:")
Expand Down Expand Up @@ -209,7 +209,7 @@ def _fetch_zip_repo(uri):
# https://github.com/mlflow/mlflow/issues/763.
response = requests.get(uri)
try:
response.raise_for_status()
augmented_raise_for_status(response)
except requests.HTTPError as error:
raise ExecutionException("Unable to retrieve ZIP file. Reason: %s" % str(error))
return BytesIO(response.content)
Expand Down
8 changes: 8 additions & 0 deletions mlflow/server/__init__.py
Expand Up @@ -20,6 +20,8 @@
ARTIFACT_ROOT_ENV_VAR = "_MLFLOW_SERVER_ARTIFACT_ROOT"
ARTIFACTS_DESTINATION_ENV_VAR = "_MLFLOW_SERVER_ARTIFACT_DESTINATION"
PROMETHEUS_EXPORTER_ENV_VAR = "prometheus_multiproc_dir"
SERVE_ARTIFACTS_ENV_VAR = "_MLFLOW_SERVER_SERVE_ARTIFACTS"
ARTIFACTS_ONLY_ENV_VAR = "_MLFLOW_SERVER_ARTIFACTS_ONLY"

REL_STATIC_DIR = "js/build"

Expand Down Expand Up @@ -106,6 +108,8 @@ def _build_gunicorn_command(gunicorn_opts, host, port, workers):
def _run_server(
file_store_path,
default_artifact_root,
serve_artifacts,
artifacts_only,
artifacts_destination,
host,
port,
Expand All @@ -126,6 +130,10 @@ def _run_server(
env_map[BACKEND_STORE_URI_ENV_VAR] = file_store_path
if default_artifact_root:
env_map[ARTIFACT_ROOT_ENV_VAR] = default_artifact_root
if serve_artifacts:
env_map[SERVE_ARTIFACTS_ENV_VAR] = "true"
if artifacts_only:
env_map[ARTIFACTS_ONLY_ENV_VAR] = "true"
if artifacts_destination:
env_map[ARTIFACTS_DESTINATION_ENV_VAR] = artifacts_destination
if static_prefix:
Expand Down