Skip to content

Commit

Permalink
[ML-19342] Fix cross-workspace job source link on databricks (#5174)
Browse files Browse the repository at this point in the history
Cross-workspace job source link and notebook source link are incorrect because it does not use the workspace URL and workspace ID that the job/notebook is executed on, but uses it's own workspace URL and workspace ID. This PR fixes them by using the logged correct workspace URL and workspace ID.

Signed-off-by: Liang Zhang <liang.zhang@databricks.com>
  • Loading branch information
liangz1 committed Dec 24, 2021
1 parent c098669 commit cf6ad38
Show file tree
Hide file tree
Showing 6 changed files with 55 additions and 4 deletions.
7 changes: 7 additions & 0 deletions mlflow/tracking/context/databricks_job_context.py
Expand Up @@ -8,6 +8,8 @@
MLFLOW_DATABRICKS_JOB_ID,
MLFLOW_DATABRICKS_JOB_RUN_ID,
MLFLOW_DATABRICKS_JOB_TYPE,
MLFLOW_DATABRICKS_WORKSPACE_URL,
MLFLOW_DATABRICKS_WORKSPACE_ID,
)


Expand All @@ -20,6 +22,7 @@ def tags(self):
job_run_id = databricks_utils.get_job_run_id()
job_type = databricks_utils.get_job_type()
webapp_url = databricks_utils.get_webapp_url()
workspace_url, workspace_id = databricks_utils.get_workspace_info_from_dbutils()
tags = {
MLFLOW_SOURCE_NAME: (
"jobs/{job_id}/run/{job_run_id}".format(job_id=job_id, job_run_id=job_run_id)
Expand All @@ -36,4 +39,8 @@ def tags(self):
tags[MLFLOW_DATABRICKS_JOB_TYPE] = job_type
if webapp_url is not None:
tags[MLFLOW_DATABRICKS_WEBAPP_URL] = webapp_url
if workspace_url is not None:
tags[MLFLOW_DATABRICKS_WORKSPACE_URL] = workspace_url
if workspace_id is not None:
tags[MLFLOW_DATABRICKS_WORKSPACE_ID] = workspace_id
return tags
7 changes: 7 additions & 0 deletions mlflow/tracking/context/databricks_notebook_context.py
Expand Up @@ -7,6 +7,8 @@
MLFLOW_DATABRICKS_WEBAPP_URL,
MLFLOW_DATABRICKS_NOTEBOOK_PATH,
MLFLOW_DATABRICKS_NOTEBOOK_ID,
MLFLOW_DATABRICKS_WORKSPACE_URL,
MLFLOW_DATABRICKS_WORKSPACE_ID,
)


Expand All @@ -18,6 +20,7 @@ def tags(self):
notebook_id = databricks_utils.get_notebook_id()
notebook_path = databricks_utils.get_notebook_path()
webapp_url = databricks_utils.get_webapp_url()
workspace_url, workspace_id = databricks_utils.get_workspace_info_from_dbutils()
tags = {
MLFLOW_SOURCE_NAME: notebook_path,
MLFLOW_SOURCE_TYPE: SourceType.to_string(SourceType.NOTEBOOK),
Expand All @@ -28,4 +31,8 @@ def tags(self):
tags[MLFLOW_DATABRICKS_NOTEBOOK_PATH] = notebook_path
if webapp_url is not None:
tags[MLFLOW_DATABRICKS_WEBAPP_URL] = webapp_url
if workspace_url is not None:
tags[MLFLOW_DATABRICKS_WORKSPACE_URL] = workspace_url
if workspace_id is not None:
tags[MLFLOW_DATABRICKS_WORKSPACE_ID] = workspace_id
return tags
2 changes: 2 additions & 0 deletions mlflow/utils/mlflow_tags.py
Expand Up @@ -27,6 +27,8 @@
MLFLOW_DATABRICKS_WEBAPP_URL = "mlflow.databricks.webappURL"
MLFLOW_DATABRICKS_RUN_URL = "mlflow.databricks.runURL"
MLFLOW_DATABRICKS_CLUSTER_ID = "mlflow.databricks.cluster.id"
MLFLOW_DATABRICKS_WORKSPACE_URL = "mlflow.databricks.workspaceURL"
MLFLOW_DATABRICKS_WORKSPACE_ID = "mlflow.databricks.workspaceID"
# The unique ID of a command execution in a Databricks notebook
MLFLOW_DATABRICKS_NOTEBOOK_COMMAND_ID = "mlflow.databricks.notebook.commandID"
# The SHELL_JOB_ID and SHELL_JOB_RUN_ID tags are used for tracking the
Expand Down
18 changes: 16 additions & 2 deletions tests/tracking/context/test_databricks_job_context.py
Expand Up @@ -8,6 +8,8 @@
MLFLOW_DATABRICKS_JOB_RUN_ID,
MLFLOW_DATABRICKS_JOB_TYPE,
MLFLOW_DATABRICKS_WEBAPP_URL,
MLFLOW_DATABRICKS_WORKSPACE_URL,
MLFLOW_DATABRICKS_WORKSPACE_ID,
)
from mlflow.tracking.context.databricks_job_context import DatabricksJobRunContext
from tests.helper_functions import multi_context
Expand All @@ -23,12 +25,19 @@ def test_databricks_job_run_context_tags():
patch_job_run_id = mock.patch("mlflow.utils.databricks_utils.get_job_run_id")
patch_job_type = mock.patch("mlflow.utils.databricks_utils.get_job_type")
patch_webapp_url = mock.patch("mlflow.utils.databricks_utils.get_webapp_url")
patch_workspace_info = mock.patch(
"mlflow.utils.databricks_utils.get_workspace_info_from_dbutils",
return_value=("https://databricks.com", "123456"),
)

with multi_context(patch_job_id, patch_job_run_id, patch_job_type, patch_webapp_url) as (
with multi_context(
patch_job_id, patch_job_run_id, patch_job_type, patch_webapp_url, patch_workspace_info
) as (
job_id_mock,
job_run_id_mock,
job_type_mock,
webapp_url_mock,
workspace_info_mock,
):
assert DatabricksJobRunContext().tags() == {
MLFLOW_SOURCE_NAME: "jobs/{job_id}/run/{job_run_id}".format(
Expand All @@ -39,6 +48,8 @@ def test_databricks_job_run_context_tags():
MLFLOW_DATABRICKS_JOB_RUN_ID: job_run_id_mock.return_value,
MLFLOW_DATABRICKS_JOB_TYPE: job_type_mock.return_value,
MLFLOW_DATABRICKS_WEBAPP_URL: webapp_url_mock.return_value,
MLFLOW_DATABRICKS_WORKSPACE_URL: workspace_info_mock.return_value[0],
MLFLOW_DATABRICKS_WORKSPACE_ID: workspace_info_mock.return_value[1],
}


Expand All @@ -47,8 +58,11 @@ def test_databricks_job_run_context_tags_nones():
patch_job_run_id = mock.patch("mlflow.utils.databricks_utils.get_job_run_id", return_value=None)
patch_job_type = mock.patch("mlflow.utils.databricks_utils.get_job_type", return_value=None)
patch_webapp_url = mock.patch("mlflow.utils.databricks_utils.get_webapp_url", return_value=None)
patch_workspace_info = mock.patch(
"mlflow.utils.databricks_utils.get_workspace_info_from_dbutils", return_value=(None, None)
)

with patch_job_id, patch_job_run_id, patch_job_type, patch_webapp_url:
with patch_job_id, patch_job_run_id, patch_job_type, patch_webapp_url, patch_workspace_info:
assert DatabricksJobRunContext().tags() == {
MLFLOW_SOURCE_NAME: None,
MLFLOW_SOURCE_TYPE: SourceType.to_string(SourceType.JOB),
Expand Down
18 changes: 16 additions & 2 deletions tests/tracking/context/test_databricks_notebook_context.py
Expand Up @@ -7,6 +7,8 @@
MLFLOW_DATABRICKS_NOTEBOOK_ID,
MLFLOW_DATABRICKS_NOTEBOOK_PATH,
MLFLOW_DATABRICKS_WEBAPP_URL,
MLFLOW_DATABRICKS_WORKSPACE_URL,
MLFLOW_DATABRICKS_WORKSPACE_ID,
)
from mlflow.tracking.context.databricks_notebook_context import DatabricksNotebookRunContext
from tests.helper_functions import multi_context
Expand All @@ -21,18 +23,27 @@ def test_databricks_notebook_run_context_tags():
patch_notebook_id = mock.patch("mlflow.utils.databricks_utils.get_notebook_id")
patch_notebook_path = mock.patch("mlflow.utils.databricks_utils.get_notebook_path")
patch_webapp_url = mock.patch("mlflow.utils.databricks_utils.get_webapp_url")
patch_workspace_info = mock.patch(
"mlflow.utils.databricks_utils.get_workspace_info_from_dbutils",
return_value=("https://databricks.com", "123456"),
)

with multi_context(patch_notebook_id, patch_notebook_path, patch_webapp_url) as (
with multi_context(
patch_notebook_id, patch_notebook_path, patch_webapp_url, patch_workspace_info
) as (
notebook_id_mock,
notebook_path_mock,
webapp_url_mock,
workspace_info_mock,
):
assert DatabricksNotebookRunContext().tags() == {
MLFLOW_SOURCE_NAME: notebook_path_mock.return_value,
MLFLOW_SOURCE_TYPE: SourceType.to_string(SourceType.NOTEBOOK),
MLFLOW_DATABRICKS_NOTEBOOK_ID: notebook_id_mock.return_value,
MLFLOW_DATABRICKS_NOTEBOOK_PATH: notebook_path_mock.return_value,
MLFLOW_DATABRICKS_WEBAPP_URL: webapp_url_mock.return_value,
MLFLOW_DATABRICKS_WORKSPACE_URL: workspace_info_mock.return_value[0],
MLFLOW_DATABRICKS_WORKSPACE_ID: workspace_info_mock.return_value[1],
}


Expand All @@ -44,8 +55,11 @@ def test_databricks_notebook_run_context_tags_nones():
"mlflow.utils.databricks_utils.get_notebook_path", return_value=None
)
patch_webapp_url = mock.patch("mlflow.utils.databricks_utils.get_webapp_url", return_value=None)
patch_workspace_info = mock.patch(
"mlflow.utils.databricks_utils.get_workspace_info_from_dbutils", return_value=(None, None)
)

with patch_notebook_id, patch_notebook_path, patch_webapp_url:
with patch_notebook_id, patch_notebook_path, patch_webapp_url, patch_workspace_info:
assert DatabricksNotebookRunContext().tags() == {
MLFLOW_SOURCE_NAME: None,
MLFLOW_SOURCE_TYPE: SourceType.to_string(SourceType.NOTEBOOK),
Expand Down
7 changes: 7 additions & 0 deletions tests/tracking/fluent/test_fluent.py
Expand Up @@ -382,6 +382,10 @@ def test_start_run_defaults_databricks_notebook(
webapp_url_patch = mock.patch(
"mlflow.utils.databricks_utils.get_webapp_url", return_value=mock_webapp_url
)
workspace_info_patch = mock.patch(
"mlflow.utils.databricks_utils.get_workspace_info_from_dbutils",
return_value=("https://databricks.com", "123456"),
)

expected_tags = {
mlflow_tags.MLFLOW_USER: mock_user,
Expand All @@ -391,6 +395,8 @@ def test_start_run_defaults_databricks_notebook(
mlflow_tags.MLFLOW_DATABRICKS_NOTEBOOK_ID: mock_notebook_id,
mlflow_tags.MLFLOW_DATABRICKS_NOTEBOOK_PATH: mock_notebook_path,
mlflow_tags.MLFLOW_DATABRICKS_WEBAPP_URL: mock_webapp_url,
mlflow_tags.MLFLOW_DATABRICKS_WORKSPACE_URL: "https://databricks.com",
mlflow_tags.MLFLOW_DATABRICKS_WORKSPACE_ID: "123456",
}

create_run_patch = mock.patch.object(MlflowClient, "create_run")
Expand All @@ -403,6 +409,7 @@ def test_start_run_defaults_databricks_notebook(
notebook_id_patch,
notebook_path_patch,
webapp_url_patch,
workspace_info_patch,
create_run_patch,
):
active_run = start_run()
Expand Down

0 comments on commit cf6ad38

Please sign in to comment.