From fd77ae6e59c84c71d67f9a7513aeed8baca73896 Mon Sep 17 00:00:00 2001 From: Liang Zhang Date: Wed, 15 Dec 2021 16:44:32 -0800 Subject: [PATCH 1/5] init Signed-off-by: Liang Zhang --- mlflow/tracking/context/databricks_job_context.py | 6 ++++++ mlflow/utils/mlflow_tags.py | 2 ++ 2 files changed, 8 insertions(+) diff --git a/mlflow/tracking/context/databricks_job_context.py b/mlflow/tracking/context/databricks_job_context.py index 99d8664ef920c..02440c370974f 100644 --- a/mlflow/tracking/context/databricks_job_context.py +++ b/mlflow/tracking/context/databricks_job_context.py @@ -8,6 +8,8 @@ MLFLOW_DATABRICKS_JOB_ID, MLFLOW_DATABRICKS_JOB_RUN_ID, MLFLOW_DATABRICKS_JOB_TYPE, + MLFLOW_DATABRICKS_WORKSPACE_URL, + MLFLOW_DATABRICKS_WORKSPACE_ID, ) @@ -20,6 +22,7 @@ def tags(self): job_run_id = databricks_utils.get_job_run_id() job_type = databricks_utils.get_job_type() webapp_url = databricks_utils.get_webapp_url() + workspace_url, workspace_id = databricks_utils.get_workspace_info_from_dbutils() tags = { MLFLOW_SOURCE_NAME: ( "jobs/{job_id}/run/{job_run_id}".format(job_id=job_id, job_run_id=job_run_id) @@ -36,4 +39,7 @@ def tags(self): tags[MLFLOW_DATABRICKS_JOB_TYPE] = job_type if webapp_url is not None: tags[MLFLOW_DATABRICKS_WEBAPP_URL] = webapp_url + if workspace_url is not None: + tags[MLFLOW_DATABRICKS_WORKSPACE_URL] = workspace_url + tags[MLFLOW_DATABRICKS_WORKSPACE_ID] = workspace_id return tags diff --git a/mlflow/utils/mlflow_tags.py b/mlflow/utils/mlflow_tags.py index f61b0bad9b163..e89cea93d90f0 100644 --- a/mlflow/utils/mlflow_tags.py +++ b/mlflow/utils/mlflow_tags.py @@ -27,6 +27,8 @@ MLFLOW_DATABRICKS_WEBAPP_URL = "mlflow.databricks.webappURL" MLFLOW_DATABRICKS_RUN_URL = "mlflow.databricks.runURL" MLFLOW_DATABRICKS_CLUSTER_ID = "mlflow.databricks.cluster.id" +MLFLOW_DATABRICKS_WORKSPACE_URL = "mlflow.databricks.workspaceURL" +MLFLOW_DATABRICKS_WORKSPACE_ID = "mlflow.databricks.workspaceID" # The unique ID of a command execution in a Databricks notebook MLFLOW_DATABRICKS_NOTEBOOK_COMMAND_ID = "mlflow.databricks.notebook.commandID" # The SHELL_JOB_ID and SHELL_JOB_RUN_ID tags are used for tracking the From 6a4c847e428183518ab698fcb072bf5801219ffd Mon Sep 17 00:00:00 2001 From: Liang Zhang Date: Tue, 21 Dec 2021 11:05:27 -0800 Subject: [PATCH 2/5] fix test Signed-off-by: Liang Zhang --- .../context/test_databricks_job_context.py | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/tests/tracking/context/test_databricks_job_context.py b/tests/tracking/context/test_databricks_job_context.py index cfbf502118556..2ebbd265a8bd2 100644 --- a/tests/tracking/context/test_databricks_job_context.py +++ b/tests/tracking/context/test_databricks_job_context.py @@ -8,6 +8,8 @@ MLFLOW_DATABRICKS_JOB_RUN_ID, MLFLOW_DATABRICKS_JOB_TYPE, MLFLOW_DATABRICKS_WEBAPP_URL, + MLFLOW_DATABRICKS_WORKSPACE_URL, + MLFLOW_DATABRICKS_WORKSPACE_ID, ) from mlflow.tracking.context.databricks_job_context import DatabricksJobRunContext from tests.helper_functions import multi_context @@ -23,12 +25,19 @@ def test_databricks_job_run_context_tags(): patch_job_run_id = mock.patch("mlflow.utils.databricks_utils.get_job_run_id") patch_job_type = mock.patch("mlflow.utils.databricks_utils.get_job_type") patch_webapp_url = mock.patch("mlflow.utils.databricks_utils.get_webapp_url") + patch_workspace_info = mock.patch( + "mlflow.utils.databricks_utils.get_workspace_info_from_dbutils", + return_value=("https://databricks.com", "123456"), + ) - with multi_context(patch_job_id, patch_job_run_id, patch_job_type, patch_webapp_url) as ( + with multi_context( + patch_job_id, patch_job_run_id, patch_job_type, patch_webapp_url, patch_workspace_info + ) as ( job_id_mock, job_run_id_mock, job_type_mock, webapp_url_mock, + workspace_info_mock, ): assert DatabricksJobRunContext().tags() == { MLFLOW_SOURCE_NAME: "jobs/{job_id}/run/{job_run_id}".format( @@ -39,6 +48,8 @@ def test_databricks_job_run_context_tags(): MLFLOW_DATABRICKS_JOB_RUN_ID: job_run_id_mock.return_value, MLFLOW_DATABRICKS_JOB_TYPE: job_type_mock.return_value, MLFLOW_DATABRICKS_WEBAPP_URL: webapp_url_mock.return_value, + MLFLOW_DATABRICKS_WORKSPACE_URL: workspace_info_mock.return_value[0], + MLFLOW_DATABRICKS_WORKSPACE_ID: workspace_info_mock.return_value[1], } @@ -47,8 +58,11 @@ def test_databricks_job_run_context_tags_nones(): patch_job_run_id = mock.patch("mlflow.utils.databricks_utils.get_job_run_id", return_value=None) patch_job_type = mock.patch("mlflow.utils.databricks_utils.get_job_type", return_value=None) patch_webapp_url = mock.patch("mlflow.utils.databricks_utils.get_webapp_url", return_value=None) + patch_workspace_info = mock.patch( + "mlflow.utils.databricks_utils.get_workspace_info_from_dbutils", return_value=(None, None) + ) - with patch_job_id, patch_job_run_id, patch_job_type, patch_webapp_url: + with patch_job_id, patch_job_run_id, patch_job_type, patch_webapp_url, patch_workspace_info: assert DatabricksJobRunContext().tags() == { MLFLOW_SOURCE_NAME: None, MLFLOW_SOURCE_TYPE: SourceType.to_string(SourceType.JOB), From 6a73ba473dcbce98a4845fe2d4532664310ae688 Mon Sep 17 00:00:00 2001 From: Liang Zhang Date: Wed, 22 Dec 2021 15:42:50 -0800 Subject: [PATCH 3/5] fix notebook source link for cross-workspace runs Signed-off-by: Liang Zhang --- .../context/databricks_notebook_context.py | 6 ++++++ .../test_databricks_notebook_context.py | 18 ++++++++++++++++-- 2 files changed, 22 insertions(+), 2 deletions(-) diff --git a/mlflow/tracking/context/databricks_notebook_context.py b/mlflow/tracking/context/databricks_notebook_context.py index d891a60a51dc9..8ca39848f1899 100644 --- a/mlflow/tracking/context/databricks_notebook_context.py +++ b/mlflow/tracking/context/databricks_notebook_context.py @@ -7,6 +7,8 @@ MLFLOW_DATABRICKS_WEBAPP_URL, MLFLOW_DATABRICKS_NOTEBOOK_PATH, MLFLOW_DATABRICKS_NOTEBOOK_ID, + MLFLOW_DATABRICKS_WORKSPACE_URL, + MLFLOW_DATABRICKS_WORKSPACE_ID, ) @@ -18,6 +20,7 @@ def tags(self): notebook_id = databricks_utils.get_notebook_id() notebook_path = databricks_utils.get_notebook_path() webapp_url = databricks_utils.get_webapp_url() + workspace_url, workspace_id = databricks_utils.get_workspace_info_from_dbutils() tags = { MLFLOW_SOURCE_NAME: notebook_path, MLFLOW_SOURCE_TYPE: SourceType.to_string(SourceType.NOTEBOOK), @@ -28,4 +31,7 @@ def tags(self): tags[MLFLOW_DATABRICKS_NOTEBOOK_PATH] = notebook_path if webapp_url is not None: tags[MLFLOW_DATABRICKS_WEBAPP_URL] = webapp_url + if workspace_url is not None: + tags[MLFLOW_DATABRICKS_WORKSPACE_URL] = workspace_url + tags[MLFLOW_DATABRICKS_WORKSPACE_ID] = workspace_id return tags diff --git a/tests/tracking/context/test_databricks_notebook_context.py b/tests/tracking/context/test_databricks_notebook_context.py index 15a8d9c185c8f..6464dde51f46a 100644 --- a/tests/tracking/context/test_databricks_notebook_context.py +++ b/tests/tracking/context/test_databricks_notebook_context.py @@ -7,6 +7,8 @@ MLFLOW_DATABRICKS_NOTEBOOK_ID, MLFLOW_DATABRICKS_NOTEBOOK_PATH, MLFLOW_DATABRICKS_WEBAPP_URL, + MLFLOW_DATABRICKS_WORKSPACE_URL, + MLFLOW_DATABRICKS_WORKSPACE_ID, ) from mlflow.tracking.context.databricks_notebook_context import DatabricksNotebookRunContext from tests.helper_functions import multi_context @@ -21,11 +23,18 @@ def test_databricks_notebook_run_context_tags(): patch_notebook_id = mock.patch("mlflow.utils.databricks_utils.get_notebook_id") patch_notebook_path = mock.patch("mlflow.utils.databricks_utils.get_notebook_path") patch_webapp_url = mock.patch("mlflow.utils.databricks_utils.get_webapp_url") + patch_workspace_info = mock.patch( + "mlflow.utils.databricks_utils.get_workspace_info_from_dbutils", + return_value=("https://databricks.com", "123456"), + ) - with multi_context(patch_notebook_id, patch_notebook_path, patch_webapp_url) as ( + with multi_context( + patch_notebook_id, patch_notebook_path, patch_webapp_url, patch_workspace_info + ) as ( notebook_id_mock, notebook_path_mock, webapp_url_mock, + workspace_info_mock, ): assert DatabricksNotebookRunContext().tags() == { MLFLOW_SOURCE_NAME: notebook_path_mock.return_value, @@ -33,6 +42,8 @@ def test_databricks_notebook_run_context_tags(): MLFLOW_DATABRICKS_NOTEBOOK_ID: notebook_id_mock.return_value, MLFLOW_DATABRICKS_NOTEBOOK_PATH: notebook_path_mock.return_value, MLFLOW_DATABRICKS_WEBAPP_URL: webapp_url_mock.return_value, + MLFLOW_DATABRICKS_WORKSPACE_URL: workspace_info_mock.return_value[0], + MLFLOW_DATABRICKS_WORKSPACE_ID: workspace_info_mock.return_value[1], } @@ -44,8 +55,11 @@ def test_databricks_notebook_run_context_tags_nones(): "mlflow.utils.databricks_utils.get_notebook_path", return_value=None ) patch_webapp_url = mock.patch("mlflow.utils.databricks_utils.get_webapp_url", return_value=None) + patch_workspace_info = mock.patch( + "mlflow.utils.databricks_utils.get_workspace_info_from_dbutils", return_value=(None, None) + ) - with patch_notebook_id, patch_notebook_path, patch_webapp_url: + with patch_notebook_id, patch_notebook_path, patch_webapp_url, patch_workspace_info: assert DatabricksNotebookRunContext().tags() == { MLFLOW_SOURCE_NAME: None, MLFLOW_SOURCE_TYPE: SourceType.to_string(SourceType.NOTEBOOK), From c167b2ad521893ae7bed3d2c52a53eac13c29d58 Mon Sep 17 00:00:00 2001 From: Liang Zhang Date: Wed, 22 Dec 2021 16:32:06 -0800 Subject: [PATCH 4/5] fix test Signed-off-by: Liang Zhang --- tests/tracking/fluent/test_fluent.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/tests/tracking/fluent/test_fluent.py b/tests/tracking/fluent/test_fluent.py index 9e4cf7dc55bf2..1bfb2a36f7ada 100644 --- a/tests/tracking/fluent/test_fluent.py +++ b/tests/tracking/fluent/test_fluent.py @@ -382,6 +382,10 @@ def test_start_run_defaults_databricks_notebook( webapp_url_patch = mock.patch( "mlflow.utils.databricks_utils.get_webapp_url", return_value=mock_webapp_url ) + workspace_info_patch = mock.patch( + "mlflow.utils.databricks_utils.get_workspace_info_from_dbutils", + return_value=("https://databricks.com", "123456"), + ) expected_tags = { mlflow_tags.MLFLOW_USER: mock_user, @@ -391,6 +395,8 @@ def test_start_run_defaults_databricks_notebook( mlflow_tags.MLFLOW_DATABRICKS_NOTEBOOK_ID: mock_notebook_id, mlflow_tags.MLFLOW_DATABRICKS_NOTEBOOK_PATH: mock_notebook_path, mlflow_tags.MLFLOW_DATABRICKS_WEBAPP_URL: mock_webapp_url, + mlflow_tags.MLFLOW_DATABRICKS_WORKSPACE_URL: "https://databricks.com", + mlflow_tags.MLFLOW_DATABRICKS_WORKSPACE_ID: "123456", } create_run_patch = mock.patch.object(MlflowClient, "create_run") @@ -403,6 +409,7 @@ def test_start_run_defaults_databricks_notebook( notebook_id_patch, notebook_path_patch, webapp_url_patch, + workspace_info_patch, create_run_patch, ): active_run = start_run() From 1e8051fad0009aaca5defc46f8c0ce6ea91c96cc Mon Sep 17 00:00:00 2001 From: Liang Zhang Date: Thu, 23 Dec 2021 12:53:45 -0800 Subject: [PATCH 5/5] fix nit Signed-off-by: Liang Zhang --- mlflow/tracking/context/databricks_job_context.py | 1 + mlflow/tracking/context/databricks_notebook_context.py | 1 + 2 files changed, 2 insertions(+) diff --git a/mlflow/tracking/context/databricks_job_context.py b/mlflow/tracking/context/databricks_job_context.py index 02440c370974f..7494118b8615d 100644 --- a/mlflow/tracking/context/databricks_job_context.py +++ b/mlflow/tracking/context/databricks_job_context.py @@ -41,5 +41,6 @@ def tags(self): tags[MLFLOW_DATABRICKS_WEBAPP_URL] = webapp_url if workspace_url is not None: tags[MLFLOW_DATABRICKS_WORKSPACE_URL] = workspace_url + if workspace_id is not None: tags[MLFLOW_DATABRICKS_WORKSPACE_ID] = workspace_id return tags diff --git a/mlflow/tracking/context/databricks_notebook_context.py b/mlflow/tracking/context/databricks_notebook_context.py index 8ca39848f1899..797b420bd7d3d 100644 --- a/mlflow/tracking/context/databricks_notebook_context.py +++ b/mlflow/tracking/context/databricks_notebook_context.py @@ -33,5 +33,6 @@ def tags(self): tags[MLFLOW_DATABRICKS_WEBAPP_URL] = webapp_url if workspace_url is not None: tags[MLFLOW_DATABRICKS_WORKSPACE_URL] = workspace_url + if workspace_id is not None: tags[MLFLOW_DATABRICKS_WORKSPACE_ID] = workspace_id return tags