From d83c7a32b1c928845b2e2dfb69e39c64cb5a9122 Mon Sep 17 00:00:00 2001 From: Ben Wilson Date: Wed, 24 Nov 2021 19:35:56 -0500 Subject: [PATCH] Rebase and discard formatting commits Signed-off-by: Ben Wilson --- mlflow/cli.py | 50 +++++++++++++++---------------- mlflow/store/tracking/__init__.py | 1 + mlflow/utils/cli_args.py | 1 + mlflow/utils/uri.py | 22 ++++++++++++++ 4 files changed, 49 insertions(+), 25 deletions(-) diff --git a/mlflow/cli.py b/mlflow/cli.py index f2ebf8ad6c9c9..30dfef1066cc3 100644 --- a/mlflow/cli.py +++ b/mlflow/cli.py @@ -13,14 +13,14 @@ import mlflow.runs import mlflow.store.artifact.cli from mlflow import tracking -from mlflow.store.tracking import DEFAULT_LOCAL_FILE_AND_ARTIFACT_PATH +from mlflow.store.tracking import DEFAULT_LOCAL_FILE_AND_ARTIFACT_PATH, DEFAULT_ARTIFACTS_URI from mlflow.store.artifact.artifact_repository_registry import get_artifact_repository from mlflow.tracking import _get_store from mlflow.utils import cli_args from mlflow.utils.annotations import experimental from mlflow.utils.logging_utils import eprint from mlflow.utils.process import ShellCommandException -from mlflow.utils.uri import is_local_uri +from mlflow.utils.uri import resolve_default_artifact_root from mlflow.entities.lifecycle_stage import LifecycleStage from mlflow.exceptions import MlflowException @@ -233,15 +233,19 @@ def _validate_server_args(gunicorn_opts=None, workers=None, waitress_opts=None): "SQLAlchemy-compatible database connection strings " "(e.g. 'sqlite:///path/to/file.db') or local filesystem URIs " "(e.g. 'file:///absolute/path/to/directory'). By default, data will be logged " - "to the ./mlruns directory.", + f"to {DEFAULT_LOCAL_FILE_AND_ARTIFACT_PATH}", ) @click.option( "--default-artifact-root", metavar="URI", default=None, - help="Path to local directory to store artifacts, for new experiments. " - "Note that this flag does not impact already-created experiments. " - "Default: " + DEFAULT_LOCAL_FILE_AND_ARTIFACT_PATH, + help="Directory in which to store artifacts for any new experiments created. For tracking " + "server backends that rely on SQL, this option is required in order to store artifacts. " + "Note that this flag does not impact already-created experiments with any previous " + "configuration of an MLflow server instance. " + f"By default, data will be logged to the {DEFAULT_ARTIFACTS_URI} uri proxy if " + "the --serve-artifacts option is enabled. Otherwise, the default location will " + f"be {DEFAULT_LOCAL_FILE_AND_ARTIFACT_PATH}.", ) @cli_args.SERVE_ARTIFACTS @cli_args.ARTIFACTS_DESTINATION @@ -266,11 +270,9 @@ def ui( if not backend_store_uri: backend_store_uri = DEFAULT_LOCAL_FILE_AND_ARTIFACT_PATH - if not default_artifact_root: - if is_local_uri(backend_store_uri): - default_artifact_root = backend_store_uri - else: - default_artifact_root = DEFAULT_LOCAL_FILE_AND_ARTIFACT_PATH + default_artifact_root = resolve_default_artifact_root( + serve_artifacts, default_artifact_root, backend_store_uri, True + ) try: initialize_backend_stores(backend_store_uri, default_artifact_root) @@ -326,18 +328,22 @@ def _validate_static_prefix(ctx, param, value): # pylint: disable=unused-argume "--default-artifact-root", metavar="URI", default=None, - help="Local or S3 URI to store artifacts, for new experiments. " - "Note that this flag does not impact already-created experiments. " - "Default: Within file store, if a file:/ URI is provided. If a sql backend is" - " used, then this option is required.", + help="Directory in which to store artifacts for any new experiments created. For tracking " + "server backends that rely on SQL, this option is required in order to store artifacts. " + "Note that this flag does not impact already-created experiments with any previous " + "configuration of an MLflow server instance. " + f"By default, data will be logged to the {DEFAULT_ARTIFACTS_URI} uri proxy if " + "the --serve-artifacts option is enabled. Otherwise, the default location will " + f"be {DEFAULT_LOCAL_FILE_AND_ARTIFACT_PATH}.", ) @cli_args.SERVE_ARTIFACTS @click.option( "--artifacts-only", is_flag=True, + default=False, help="If specified, configures the mlflow server to be used only for proxied artifact serving. " "With this mode enabled, functionality of the mlflow tracking service (e.g. run creation, " - "metric logging, and parameter logging are disabled. The server will only expose " + "metric logging, and parameter logging) is disabled. The server will only expose " "endpoints for uploading, downloading, and listing artifacts. " "Default: False", ) @@ -397,15 +403,9 @@ def server( if not backend_store_uri: backend_store_uri = DEFAULT_LOCAL_FILE_AND_ARTIFACT_PATH - if not default_artifact_root: - if is_local_uri(backend_store_uri): - default_artifact_root = backend_store_uri - else: - eprint( - "Option 'default-artifact-root' is required, when backend store is not " - "local file based." - ) - sys.exit(1) + default_artifact_root = resolve_default_artifact_root( + serve_artifacts, default_artifact_root, backend_store_uri + ) try: initialize_backend_stores(backend_store_uri, default_artifact_root) diff --git a/mlflow/store/tracking/__init__.py b/mlflow/store/tracking/__init__.py index 889f5d7c43ec1..6127275bb2872 100644 --- a/mlflow/store/tracking/__init__.py +++ b/mlflow/store/tracking/__init__.py @@ -10,5 +10,6 @@ # Also used as default location for artifacts, when not provided, in non local file based backends # (eg MySQL) DEFAULT_LOCAL_FILE_AND_ARTIFACT_PATH = "./mlruns" +DEFAULT_ARTIFACTS_URI = "mlflow-artifacts:/" SEARCH_MAX_RESULTS_DEFAULT = 1000 SEARCH_MAX_RESULTS_THRESHOLD = 50000 diff --git a/mlflow/utils/cli_args.py b/mlflow/utils/cli_args.py index a24c962968435..30dc1fc793f8c 100644 --- a/mlflow/utils/cli_args.py +++ b/mlflow/utils/cli_args.py @@ -103,6 +103,7 @@ SERVE_ARTIFACTS = click.option( "--serve-artifacts", is_flag=True, + default=False, help="If specified, enables serving of artifact uploads, downloads, and list requests " "by routing these requests to the storage location that is specified by " "'--artifact-destination' directly through a proxy. The default location that " diff --git a/mlflow/utils/uri.py b/mlflow/utils/uri.py index 808585b74e7ae..55e346d1144f9 100644 --- a/mlflow/utils/uri.py +++ b/mlflow/utils/uri.py @@ -1,10 +1,13 @@ +import sys import posixpath import urllib.parse from mlflow.exceptions import MlflowException from mlflow.protos.databricks_pb2 import INVALID_PARAMETER_VALUE from mlflow.store.db.db_types import DATABASE_ENGINES +from mlflow.store.tracking import DEFAULT_LOCAL_FILE_AND_ARTIFACT_PATH, DEFAULT_ARTIFACTS_URI from mlflow.utils.validation import _validate_db_type_string +from mlflow.utils.logging_utils import eprint _INVALID_DB_URI_MSG = ( "Please refer to https://mlflow.org/docs/latest/tracking.html#storage for " @@ -294,3 +297,22 @@ def dbfs_hdfs_uri_to_fuse_path(dbfs_uri): ) return _DBFS_FUSE_PREFIX + dbfs_uri[len(_DBFS_HDFS_URI_PREFIX) :] + + +def resolve_default_artifact_root( + serve_artifacts, default_artifact_root, backend_store_uri, resolve_to_local=False +): + if serve_artifacts and not default_artifact_root: + default_artifact_root = DEFAULT_ARTIFACTS_URI + elif not serve_artifacts and not default_artifact_root: + if is_local_uri(backend_store_uri): + default_artifact_root = backend_store_uri + elif resolve_to_local: + default_artifact_root = DEFAULT_LOCAL_FILE_AND_ARTIFACT_PATH + else: + eprint( + "Option 'default-artifact-root' is required, when backend store is not " + "local file based." + ) + sys.exit(1) + return default_artifact_root