Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[ML-19342] Fix cross-workspace job source link on databricks #5174

Merged
merged 5 commits into from Dec 24, 2021
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
6 changes: 6 additions & 0 deletions mlflow/tracking/context/databricks_job_context.py
Expand Up @@ -8,6 +8,8 @@
MLFLOW_DATABRICKS_JOB_ID,
MLFLOW_DATABRICKS_JOB_RUN_ID,
MLFLOW_DATABRICKS_JOB_TYPE,
MLFLOW_DATABRICKS_WORKSPACE_URL,
MLFLOW_DATABRICKS_WORKSPACE_ID,
)


Expand All @@ -20,6 +22,7 @@ def tags(self):
job_run_id = databricks_utils.get_job_run_id()
job_type = databricks_utils.get_job_type()
webapp_url = databricks_utils.get_webapp_url()
workspace_url, workspace_id = databricks_utils.get_workspace_info_from_dbutils()
tags = {
MLFLOW_SOURCE_NAME: (
"jobs/{job_id}/run/{job_run_id}".format(job_id=job_id, job_run_id=job_run_id)
Expand All @@ -36,4 +39,7 @@ def tags(self):
tags[MLFLOW_DATABRICKS_JOB_TYPE] = job_type
if webapp_url is not None:
tags[MLFLOW_DATABRICKS_WEBAPP_URL] = webapp_url
if workspace_url is not None:
tags[MLFLOW_DATABRICKS_WORKSPACE_URL] = workspace_url
tags[MLFLOW_DATABRICKS_WORKSPACE_ID] = workspace_id
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nit: What if workspace_url is None but workspace_id is not None, and vice versa?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, it makes sense to not assume that they are dependent on each other.

return tags
6 changes: 6 additions & 0 deletions mlflow/tracking/context/databricks_notebook_context.py
Expand Up @@ -7,6 +7,8 @@
MLFLOW_DATABRICKS_WEBAPP_URL,
MLFLOW_DATABRICKS_NOTEBOOK_PATH,
MLFLOW_DATABRICKS_NOTEBOOK_ID,
MLFLOW_DATABRICKS_WORKSPACE_URL,
MLFLOW_DATABRICKS_WORKSPACE_ID,
)


Expand All @@ -18,6 +20,7 @@ def tags(self):
notebook_id = databricks_utils.get_notebook_id()
notebook_path = databricks_utils.get_notebook_path()
webapp_url = databricks_utils.get_webapp_url()
workspace_url, workspace_id = databricks_utils.get_workspace_info_from_dbutils()
tags = {
MLFLOW_SOURCE_NAME: notebook_path,
MLFLOW_SOURCE_TYPE: SourceType.to_string(SourceType.NOTEBOOK),
Expand All @@ -28,4 +31,7 @@ def tags(self):
tags[MLFLOW_DATABRICKS_NOTEBOOK_PATH] = notebook_path
if webapp_url is not None:
tags[MLFLOW_DATABRICKS_WEBAPP_URL] = webapp_url
if workspace_url is not None:
tags[MLFLOW_DATABRICKS_WORKSPACE_URL] = workspace_url
tags[MLFLOW_DATABRICKS_WORKSPACE_ID] = workspace_id
return tags
2 changes: 2 additions & 0 deletions mlflow/utils/mlflow_tags.py
Expand Up @@ -27,6 +27,8 @@
MLFLOW_DATABRICKS_WEBAPP_URL = "mlflow.databricks.webappURL"
MLFLOW_DATABRICKS_RUN_URL = "mlflow.databricks.runURL"
MLFLOW_DATABRICKS_CLUSTER_ID = "mlflow.databricks.cluster.id"
MLFLOW_DATABRICKS_WORKSPACE_URL = "mlflow.databricks.workspaceURL"
MLFLOW_DATABRICKS_WORKSPACE_ID = "mlflow.databricks.workspaceID"
# The unique ID of a command execution in a Databricks notebook
MLFLOW_DATABRICKS_NOTEBOOK_COMMAND_ID = "mlflow.databricks.notebook.commandID"
# The SHELL_JOB_ID and SHELL_JOB_RUN_ID tags are used for tracking the
Expand Down
18 changes: 16 additions & 2 deletions tests/tracking/context/test_databricks_job_context.py
Expand Up @@ -8,6 +8,8 @@
MLFLOW_DATABRICKS_JOB_RUN_ID,
MLFLOW_DATABRICKS_JOB_TYPE,
MLFLOW_DATABRICKS_WEBAPP_URL,
MLFLOW_DATABRICKS_WORKSPACE_URL,
MLFLOW_DATABRICKS_WORKSPACE_ID,
)
from mlflow.tracking.context.databricks_job_context import DatabricksJobRunContext
from tests.helper_functions import multi_context
Expand All @@ -23,12 +25,19 @@ def test_databricks_job_run_context_tags():
patch_job_run_id = mock.patch("mlflow.utils.databricks_utils.get_job_run_id")
patch_job_type = mock.patch("mlflow.utils.databricks_utils.get_job_type")
patch_webapp_url = mock.patch("mlflow.utils.databricks_utils.get_webapp_url")
patch_workspace_info = mock.patch(
"mlflow.utils.databricks_utils.get_workspace_info_from_dbutils",
return_value=("https://databricks.com", "123456"),
)

with multi_context(patch_job_id, patch_job_run_id, patch_job_type, patch_webapp_url) as (
with multi_context(
patch_job_id, patch_job_run_id, patch_job_type, patch_webapp_url, patch_workspace_info
) as (
job_id_mock,
job_run_id_mock,
job_type_mock,
webapp_url_mock,
workspace_info_mock,
):
assert DatabricksJobRunContext().tags() == {
MLFLOW_SOURCE_NAME: "jobs/{job_id}/run/{job_run_id}".format(
Expand All @@ -39,6 +48,8 @@ def test_databricks_job_run_context_tags():
MLFLOW_DATABRICKS_JOB_RUN_ID: job_run_id_mock.return_value,
MLFLOW_DATABRICKS_JOB_TYPE: job_type_mock.return_value,
MLFLOW_DATABRICKS_WEBAPP_URL: webapp_url_mock.return_value,
MLFLOW_DATABRICKS_WORKSPACE_URL: workspace_info_mock.return_value[0],
MLFLOW_DATABRICKS_WORKSPACE_ID: workspace_info_mock.return_value[1],
}


Expand All @@ -47,8 +58,11 @@ def test_databricks_job_run_context_tags_nones():
patch_job_run_id = mock.patch("mlflow.utils.databricks_utils.get_job_run_id", return_value=None)
patch_job_type = mock.patch("mlflow.utils.databricks_utils.get_job_type", return_value=None)
patch_webapp_url = mock.patch("mlflow.utils.databricks_utils.get_webapp_url", return_value=None)
patch_workspace_info = mock.patch(
"mlflow.utils.databricks_utils.get_workspace_info_from_dbutils", return_value=(None, None)
)

with patch_job_id, patch_job_run_id, patch_job_type, patch_webapp_url:
with patch_job_id, patch_job_run_id, patch_job_type, patch_webapp_url, patch_workspace_info:
assert DatabricksJobRunContext().tags() == {
MLFLOW_SOURCE_NAME: None,
MLFLOW_SOURCE_TYPE: SourceType.to_string(SourceType.JOB),
Expand Down
18 changes: 16 additions & 2 deletions tests/tracking/context/test_databricks_notebook_context.py
Expand Up @@ -7,6 +7,8 @@
MLFLOW_DATABRICKS_NOTEBOOK_ID,
MLFLOW_DATABRICKS_NOTEBOOK_PATH,
MLFLOW_DATABRICKS_WEBAPP_URL,
MLFLOW_DATABRICKS_WORKSPACE_URL,
MLFLOW_DATABRICKS_WORKSPACE_ID,
)
from mlflow.tracking.context.databricks_notebook_context import DatabricksNotebookRunContext
from tests.helper_functions import multi_context
Expand All @@ -21,18 +23,27 @@ def test_databricks_notebook_run_context_tags():
patch_notebook_id = mock.patch("mlflow.utils.databricks_utils.get_notebook_id")
patch_notebook_path = mock.patch("mlflow.utils.databricks_utils.get_notebook_path")
patch_webapp_url = mock.patch("mlflow.utils.databricks_utils.get_webapp_url")
patch_workspace_info = mock.patch(
"mlflow.utils.databricks_utils.get_workspace_info_from_dbutils",
return_value=("https://databricks.com", "123456"),
)

with multi_context(patch_notebook_id, patch_notebook_path, patch_webapp_url) as (
with multi_context(
patch_notebook_id, patch_notebook_path, patch_webapp_url, patch_workspace_info
) as (
notebook_id_mock,
notebook_path_mock,
webapp_url_mock,
workspace_info_mock,
):
assert DatabricksNotebookRunContext().tags() == {
MLFLOW_SOURCE_NAME: notebook_path_mock.return_value,
MLFLOW_SOURCE_TYPE: SourceType.to_string(SourceType.NOTEBOOK),
MLFLOW_DATABRICKS_NOTEBOOK_ID: notebook_id_mock.return_value,
MLFLOW_DATABRICKS_NOTEBOOK_PATH: notebook_path_mock.return_value,
MLFLOW_DATABRICKS_WEBAPP_URL: webapp_url_mock.return_value,
MLFLOW_DATABRICKS_WORKSPACE_URL: workspace_info_mock.return_value[0],
MLFLOW_DATABRICKS_WORKSPACE_ID: workspace_info_mock.return_value[1],
}


Expand All @@ -44,8 +55,11 @@ def test_databricks_notebook_run_context_tags_nones():
"mlflow.utils.databricks_utils.get_notebook_path", return_value=None
)
patch_webapp_url = mock.patch("mlflow.utils.databricks_utils.get_webapp_url", return_value=None)
patch_workspace_info = mock.patch(
"mlflow.utils.databricks_utils.get_workspace_info_from_dbutils", return_value=(None, None)
)

with patch_notebook_id, patch_notebook_path, patch_webapp_url:
with patch_notebook_id, patch_notebook_path, patch_webapp_url, patch_workspace_info:
assert DatabricksNotebookRunContext().tags() == {
MLFLOW_SOURCE_NAME: None,
MLFLOW_SOURCE_TYPE: SourceType.to_string(SourceType.NOTEBOOK),
Expand Down
7 changes: 7 additions & 0 deletions tests/tracking/fluent/test_fluent.py
Expand Up @@ -382,6 +382,10 @@ def test_start_run_defaults_databricks_notebook(
webapp_url_patch = mock.patch(
"mlflow.utils.databricks_utils.get_webapp_url", return_value=mock_webapp_url
)
workspace_info_patch = mock.patch(
"mlflow.utils.databricks_utils.get_workspace_info_from_dbutils",
return_value=("https://databricks.com", "123456"),
)

expected_tags = {
mlflow_tags.MLFLOW_USER: mock_user,
Expand All @@ -391,6 +395,8 @@ def test_start_run_defaults_databricks_notebook(
mlflow_tags.MLFLOW_DATABRICKS_NOTEBOOK_ID: mock_notebook_id,
mlflow_tags.MLFLOW_DATABRICKS_NOTEBOOK_PATH: mock_notebook_path,
mlflow_tags.MLFLOW_DATABRICKS_WEBAPP_URL: mock_webapp_url,
mlflow_tags.MLFLOW_DATABRICKS_WORKSPACE_URL: "https://databricks.com",
mlflow_tags.MLFLOW_DATABRICKS_WORKSPACE_ID: "123456",
}

create_run_patch = mock.patch.object(MlflowClient, "create_run")
Expand All @@ -403,6 +409,7 @@ def test_start_run_defaults_databricks_notebook(
notebook_id_patch,
notebook_path_patch,
webapp_url_patch,
workspace_info_patch,
create_run_patch,
):
active_run = start_run()
Expand Down