Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We鈥檒l occasionally send you account related emails.

Already on GitHub? Sign in to your account

[App] Fix bug where previously deleted apps cannot be re-run from the CLI #16082

Merged
merged 4 commits into from Dec 16, 2022
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
38 changes: 22 additions & 16 deletions src/lightning_app/runners/cloud.py
Expand Up @@ -320,52 +320,58 @@ def dispatch(
self._ensure_cluster_project_binding(project.project_id, cluster_id)

# Resolve the app name, instance, and cluster ID
existing_app = None
existing_instance = None
app_name = app_config.name

# List existing instances
# List existing apps
# TODO: Add pagination, otherwise this could break if users have a lot of apps.
find_instances_resp = self.backend.client.lightningapp_instance_service_list_lightningapp_instances(
all_apps = self.backend.client.lightningapp_v2_service_list_lightningapps_v2(
project_id=project.project_id
)
).lightningapps

# Seach for instances with the given name (possibly with some random characters appended)
# Seach for apps with the given name (possibly with some random characters appended)
pattern = re.escape(f"{app_name}-") + ".{4}"
instances = [
all_apps = [
lightningapp
for lightningapp in find_instances_resp.lightningapps
for lightningapp in all_apps
if lightningapp.name == app_name or (re.fullmatch(pattern, lightningapp.name) is not None)
]

# If instances exist and cluster is None, mimic cluster selection logic to choose a default
if cluster_id is None and len(instances) > 0:
# If apps exist and cluster is None, mimic cluster selection logic to choose a default
if cluster_id is None and len(all_apps) > 0:
# Determine the cluster ID
cluster_id = self._get_default_cluster(project.project_id)

# If an instance exists on the cluster with the same base name - restart it
for instance in instances:
if instance.spec.cluster_id == cluster_id:
existing_instance = instance
for app in all_apps:
instances = self.backend.client.lightningapp_instance_service_list_lightningapp_instances(
project_id=project.project_id,
app_id=app.id,
).lightningapps
if instances and instances[0].spec.cluster_id == cluster_id:
existing_app = app
existing_instance = instances[0]
break

# If instances exist but not on the cluster - choose a randomised name
if len(instances) > 0 and existing_instance is None:
# If apps exist but not on the cluster - choose a randomised name
if len(all_apps) > 0 and existing_app is None:
name_exists = True
while name_exists:
random_name = self._randomise_name(app_name)
name_exists = any([instance.name == random_name for instance in instances])
name_exists = any([app.name == random_name for app in all_apps])

app_name = random_name

# Create the app if it doesn't exist
if existing_instance is None:
if existing_app is None:
app_body = Body7(name=app_name, can_download_source_code=True)
lit_app = self.backend.client.lightningapp_v2_service_create_lightningapp_v2(
project_id=project.project_id, body=app_body
)
app_id = lit_app.id
else:
app_id = existing_instance.spec.app_id
app_id = existing_app.id

# check if user has sufficient credits to run an app
# if so set the desired state to running otherwise, create the app in stopped state,
Expand Down
23 changes: 23 additions & 0 deletions tests/tests_app/runners/test_cloud.py
Expand Up @@ -31,6 +31,7 @@
V1LightningworkSpec,
V1ListClustersResponse,
V1ListLightningappInstancesResponse,
V1ListLightningappsV2Response,
V1ListMembershipsResponse,
V1ListProjectClusterBindingsResponse,
V1Membership,
Expand Down Expand Up @@ -210,6 +211,13 @@ def test_new_instance_on_different_cluster(self, cloud_backend, project_id, old_
app.flows = []
app.frontend = {}

existing_app = MagicMock()
existing_app.name = app_name
existing_app.id = "test-id"
mock_client.lightningapp_v2_service_list_lightningapps_v2.return_value = V1ListLightningappsV2Response(
lightningapps=[existing_app]
)

existing_instance = MagicMock()
existing_instance.name = app_name
existing_instance.status.phase = V1LightningappInstanceState.STOPPED
Expand Down Expand Up @@ -458,6 +466,9 @@ def test_call_with_work_app(self, lightningapps, start_with_flow, monkeypatch, t
lightningapps[0].name = "myapp"
lightningapps[0].status.phase = V1LightningappInstanceState.STOPPED
lightningapps[0].spec.cluster_id = "test"
mock_client.lightningapp_v2_service_list_lightningapps_v2.return_value = V1ListLightningappsV2Response(
lightningapps=lightningapps
)
mock_client.lightningapp_instance_service_list_lightningapp_instances.return_value = (
V1ListLightningappInstancesResponse(lightningapps=lightningapps)
)
Expand Down Expand Up @@ -632,6 +643,9 @@ def test_call_with_work_app_and_attached_drives(self, lightningapps, monkeypatch
lightningapps[0].name = "myapp"
lightningapps[0].status.phase = V1LightningappInstanceState.STOPPED
lightningapps[0].spec.cluster_id = "test"
mock_client.lightningapp_v2_service_list_lightningapps_v2.return_value = V1ListLightningappsV2Response(
lightningapps=lightningapps
)
mock_client.lightningapp_instance_service_list_lightningapp_instances.return_value = (
V1ListLightningappInstancesResponse(lightningapps=lightningapps)
)
Expand Down Expand Up @@ -786,6 +800,9 @@ def test_call_with_work_app_and_app_comment_command_execution_set(self, lightnin
mock_client.cluster_service_get_cluster.side_effect = lambda cluster_id: V1GetClusterResponse(
id=cluster_id, spec=V1ClusterSpec(cluster_type=V1ClusterType.GLOBAL)
)
mock_client.lightningapp_v2_service_list_lightningapps_v2.return_value = V1ListLightningappsV2Response(
lightningapps=lightningapps
)
mock_client.lightningapp_instance_service_list_lightningapp_instances.return_value = (
V1ListLightningappInstancesResponse(lightningapps=lightningapps)
)
Expand Down Expand Up @@ -912,6 +929,9 @@ def test_call_with_work_app_and_multiple_attached_drives(self, lightningapps, mo
mock_client.cluster_service_get_cluster.side_effect = lambda cluster_id: V1GetClusterResponse(
id=cluster_id, spec=V1ClusterSpec(cluster_type=V1ClusterType.GLOBAL)
)
mock_client.lightningapp_v2_service_list_lightningapps_v2.return_value = V1ListLightningappsV2Response(
lightningapps=lightningapps
)
mock_client.lightningapp_instance_service_list_lightningapp_instances.return_value = (
V1ListLightningappInstancesResponse(lightningapps=lightningapps)
)
Expand Down Expand Up @@ -1127,6 +1147,9 @@ def test_call_with_work_app_and_attached_mount_and_drive(self, lightningapps, mo
mock_client.cluster_service_get_cluster.side_effect = lambda cluster_id: V1GetClusterResponse(
id=cluster_id, spec=V1ClusterSpec(cluster_type=V1ClusterType.GLOBAL)
)
mock_client.lightningapp_v2_service_list_lightningapps_v2.return_value = V1ListLightningappsV2Response(
lightningapps=lightningapps
)
mock_client.lightningapp_instance_service_list_lightningapp_instances.return_value = (
V1ListLightningappInstancesResponse(lightningapps=lightningapps)
)
Expand Down