Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We鈥檒l occasionally send you account related emails.

Already on GitHub? Sign in to your account

[App] Fix bug where previously deleted apps cannot be re-run from the CLI #16082

Merged
merged 4 commits into from Dec 16, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
2 changes: 2 additions & 0 deletions src/lightning_app/CHANGELOG.md
Expand Up @@ -34,6 +34,8 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).

- Fixed the debugger detection mechanism for lightning App in VSCode ([#16068](https://github.com/Lightning-AI/lightning/pull/16068))

- Fixed a bug where apps that had previously been deleted could not be run again from the CLI ([#16082](https://github.com/Lightning-AI/lightning/pull/16082))


## [1.8.4] - 2022-12-08

Expand Down
38 changes: 22 additions & 16 deletions src/lightning_app/runners/cloud.py
Expand Up @@ -320,52 +320,58 @@ def dispatch(
self._ensure_cluster_project_binding(project.project_id, cluster_id)

# Resolve the app name, instance, and cluster ID
existing_app = None
existing_instance = None
app_name = app_config.name

# List existing instances
# List existing apps
# TODO: Add pagination, otherwise this could break if users have a lot of apps.
find_instances_resp = self.backend.client.lightningapp_instance_service_list_lightningapp_instances(
all_apps = self.backend.client.lightningapp_v2_service_list_lightningapps_v2(
project_id=project.project_id
)
).lightningapps

# Seach for instances with the given name (possibly with some random characters appended)
# Seach for apps with the given name (possibly with some random characters appended)
pattern = re.escape(f"{app_name}-") + ".{4}"
instances = [
all_apps = [
lightningapp
for lightningapp in find_instances_resp.lightningapps
for lightningapp in all_apps
if lightningapp.name == app_name or (re.fullmatch(pattern, lightningapp.name) is not None)
]

# If instances exist and cluster is None, mimic cluster selection logic to choose a default
if cluster_id is None and len(instances) > 0:
# If apps exist and cluster is None, mimic cluster selection logic to choose a default
if cluster_id is None and len(all_apps) > 0:
# Determine the cluster ID
cluster_id = self._get_default_cluster(project.project_id)

# If an instance exists on the cluster with the same base name - restart it
for instance in instances:
if instance.spec.cluster_id == cluster_id:
existing_instance = instance
for app in all_apps:
instances = self.backend.client.lightningapp_instance_service_list_lightningapp_instances(
project_id=project.project_id,
app_id=app.id,
).lightningapps
if instances and instances[0].spec.cluster_id == cluster_id:
existing_app = app
existing_instance = instances[0]
break

# If instances exist but not on the cluster - choose a randomised name
if len(instances) > 0 and existing_instance is None:
# If apps exist but not on the cluster - choose a randomised name
if len(all_apps) > 0 and existing_app is None:
name_exists = True
while name_exists:
random_name = self._randomise_name(app_name)
name_exists = any([instance.name == random_name for instance in instances])
name_exists = any([app.name == random_name for app in all_apps])

app_name = random_name

# Create the app if it doesn't exist
if existing_instance is None:
if existing_app is None:
app_body = Body7(name=app_name, can_download_source_code=True)
lit_app = self.backend.client.lightningapp_v2_service_create_lightningapp_v2(
project_id=project.project_id, body=app_body
)
app_id = lit_app.id
else:
app_id = existing_instance.spec.app_id
app_id = existing_app.id

# check if user has sufficient credits to run an app
# if so set the desired state to running otherwise, create the app in stopped state,
Expand Down
6 changes: 5 additions & 1 deletion tests/tests_app/cli/test_cloud_cli.py
Expand Up @@ -11,6 +11,7 @@
from lightning_cloud.openapi import (
V1LightningappV2,
V1ListLightningappInstancesResponse,
V1ListLightningappsV2Response,
V1ListMembershipsResponse,
V1Membership,
)
Expand All @@ -36,6 +37,9 @@ class FakeResponse:


class FakeLightningClient:
def lightningapp_v2_service_list_lightningapps_v2(self, *args, **kwargs):
return V1ListLightningappsV2Response(lightningapps=[])

def lightningapp_instance_service_list_lightningapp_instances(self, *args, **kwargs):
return V1ListLightningappInstancesResponse(lightningapps=[])

Expand Down Expand Up @@ -182,7 +186,7 @@ def __init__(self, *args, message, **kwargs):
super().__init__()
self.message = message

def lightningapp_instance_service_list_lightningapp_instances(self, *args, **kwargs):
def lightningapp_v2_service_list_lightningapps_v2(self, *args, **kwargs):
raise ApiException(
http_resp=HttpHeaderDict(
data=self.message,
Expand Down
84 changes: 84 additions & 0 deletions tests/tests_app/runners/test_cloud.py
Expand Up @@ -31,6 +31,7 @@
V1LightningworkSpec,
V1ListClustersResponse,
V1ListLightningappInstancesResponse,
V1ListLightningappsV2Response,
V1ListMembershipsResponse,
V1ListProjectClusterBindingsResponse,
V1Membership,
Expand Down Expand Up @@ -210,6 +211,13 @@ def test_new_instance_on_different_cluster(self, cloud_backend, project_id, old_
app.flows = []
app.frontend = {}

existing_app = MagicMock()
existing_app.name = app_name
existing_app.id = "test-id"
mock_client.lightningapp_v2_service_list_lightningapps_v2.return_value = V1ListLightningappsV2Response(
lightningapps=[existing_app]
)

existing_instance = MagicMock()
existing_instance.name = app_name
existing_instance.status.phase = V1LightningappInstanceState.STOPPED
Expand All @@ -234,6 +242,67 @@ def test_new_instance_on_different_cluster(self, cloud_backend, project_id, old_
assert args[1]["body"].name.startswith(app_name)
assert args[1]["body"].cluster_id == new_cluster

def test_running_deleted_app(self, cloud_backend, project_id):
"""Deleted apps show up in list apps but not in list instances.

This tests that we don't try to reacreate a previously deleted app.
"""
app_name = "test-app"

mock_client = mock.MagicMock()
mock_client.projects_service_list_memberships.return_value = V1ListMembershipsResponse(
memberships=[V1Membership(name="Default Project", project_id=project_id)]
)
mock_client.lightningapp_v2_service_create_lightningapp_release.return_value = V1LightningappRelease(
cluster_id=DEFAULT_CLUSTER
)

mock_client.cluster_service_list_clusters.return_value = V1ListClustersResponse(
[
Externalv1Cluster(id=DEFAULT_CLUSTER),
]
)

mock_client.projects_service_list_project_cluster_bindings.return_value = V1ListProjectClusterBindingsResponse(
clusters=[
V1ProjectClusterBinding(cluster_id=DEFAULT_CLUSTER),
]
)

# Mock all clusters as global clusters
mock_client.cluster_service_get_cluster.side_effect = lambda cluster_id: V1GetClusterResponse(
id=cluster_id, spec=V1ClusterSpec(cluster_type=V1ClusterType.GLOBAL)
)

cloud_backend.client = mock_client

app = mock.MagicMock()
app.flows = []
app.frontend = {}

existing_app = MagicMock()
existing_app.name = app_name
existing_app.id = "test-id"
mock_client.lightningapp_v2_service_list_lightningapps_v2.return_value = V1ListLightningappsV2Response(
lightningapps=[existing_app]
)

# Simulate the app as deleted so no instance to return
mock_client.lightningapp_instance_service_list_lightningapp_instances.return_value = (
V1ListLightningappInstancesResponse(lightningapps=[])
)

cloud_runtime = cloud.CloudRuntime(app=app, entrypoint_file="entrypoint.py")
cloud_runtime._check_uploaded_folder = mock.MagicMock()

cloud_runtime.dispatch(name=app_name)

# Check that a new name was used which starts with and does not equal the old name
mock_client.lightningapp_v2_service_create_lightningapp_release_instance.assert_called_once()
args = mock_client.lightningapp_v2_service_create_lightningapp_release_instance.call_args
assert args[1]["body"].name != app_name
assert args[1]["body"].name.startswith(app_name)

@pytest.mark.parametrize("flow_cloud_compute", [None, CloudCompute(name="t2.medium")])
@mock.patch("lightning_app.runners.backends.cloud.LightningClient", mock.MagicMock())
def test_run_with_default_flow_compute_config(self, monkeypatch, flow_cloud_compute):
Expand Down Expand Up @@ -458,6 +527,9 @@ def test_call_with_work_app(self, lightningapps, start_with_flow, monkeypatch, t
lightningapps[0].name = "myapp"
lightningapps[0].status.phase = V1LightningappInstanceState.STOPPED
lightningapps[0].spec.cluster_id = "test"
mock_client.lightningapp_v2_service_list_lightningapps_v2.return_value = V1ListLightningappsV2Response(
lightningapps=lightningapps
)
mock_client.lightningapp_instance_service_list_lightningapp_instances.return_value = (
V1ListLightningappInstancesResponse(lightningapps=lightningapps)
)
Expand Down Expand Up @@ -632,6 +704,9 @@ def test_call_with_work_app_and_attached_drives(self, lightningapps, monkeypatch
lightningapps[0].name = "myapp"
lightningapps[0].status.phase = V1LightningappInstanceState.STOPPED
lightningapps[0].spec.cluster_id = "test"
mock_client.lightningapp_v2_service_list_lightningapps_v2.return_value = V1ListLightningappsV2Response(
lightningapps=lightningapps
)
mock_client.lightningapp_instance_service_list_lightningapp_instances.return_value = (
V1ListLightningappInstancesResponse(lightningapps=lightningapps)
)
Expand Down Expand Up @@ -786,6 +861,9 @@ def test_call_with_work_app_and_app_comment_command_execution_set(self, lightnin
mock_client.cluster_service_get_cluster.side_effect = lambda cluster_id: V1GetClusterResponse(
id=cluster_id, spec=V1ClusterSpec(cluster_type=V1ClusterType.GLOBAL)
)
mock_client.lightningapp_v2_service_list_lightningapps_v2.return_value = V1ListLightningappsV2Response(
lightningapps=lightningapps
)
mock_client.lightningapp_instance_service_list_lightningapp_instances.return_value = (
V1ListLightningappInstancesResponse(lightningapps=lightningapps)
)
Expand Down Expand Up @@ -912,6 +990,9 @@ def test_call_with_work_app_and_multiple_attached_drives(self, lightningapps, mo
mock_client.cluster_service_get_cluster.side_effect = lambda cluster_id: V1GetClusterResponse(
id=cluster_id, spec=V1ClusterSpec(cluster_type=V1ClusterType.GLOBAL)
)
mock_client.lightningapp_v2_service_list_lightningapps_v2.return_value = V1ListLightningappsV2Response(
lightningapps=lightningapps
)
mock_client.lightningapp_instance_service_list_lightningapp_instances.return_value = (
V1ListLightningappInstancesResponse(lightningapps=lightningapps)
)
Expand Down Expand Up @@ -1127,6 +1208,9 @@ def test_call_with_work_app_and_attached_mount_and_drive(self, lightningapps, mo
mock_client.cluster_service_get_cluster.side_effect = lambda cluster_id: V1GetClusterResponse(
id=cluster_id, spec=V1ClusterSpec(cluster_type=V1ClusterType.GLOBAL)
)
mock_client.lightningapp_v2_service_list_lightningapps_v2.return_value = V1ListLightningappsV2Response(
lightningapps=lightningapps
)
mock_client.lightningapp_instance_service_list_lightningapp_instances.return_value = (
V1ListLightningappInstancesResponse(lightningapps=lightningapps)
)
Expand Down