Skip to content

Commit

Permalink
Add guards to cluster deletion from cli (#16053)
Browse files Browse the repository at this point in the history
Adds guards to cluster deletion.
- If cluster has running apps -> throw an error
- If cluster has stopped apps -> confirm w/ user that apps and logs will be deleted

(cherry picked from commit 64d0ebb)
  • Loading branch information
luca3rd authored and Borda committed Dec 15, 2022
1 parent 29b5b23 commit a476a23
Show file tree
Hide file tree
Showing 2 changed files with 143 additions and 1 deletion.
54 changes: 54 additions & 0 deletions src/lightning_app/cli/cmd_clusters.py
Expand Up @@ -9,6 +9,7 @@
import lightning_cloud
from lightning_cloud.openapi import (
Externalv1Cluster,
Externalv1LightningappInstance,
V1AWSClusterDriverSpec,
V1ClusterDriver,
V1ClusterPerformanceProfile,
Expand All @@ -18,13 +19,17 @@
V1CreateClusterRequest,
V1GetClusterResponse,
V1KubernetesClusterDriver,
V1LightningappInstanceState,
V1ListLightningappInstancesResponse,
V1Membership,
)
from lightning_utilities.core.enums import StrEnum
from rich.console import Console
from rich.table import Table
from rich.text import Text

from lightning_app.cli.core import Formatable
from lightning_app.utilities.cloud import _get_project
from lightning_app.utilities.network import LightningClient
from lightning_app.utilities.openapi import create_openapi_object, string2dict

Expand Down Expand Up @@ -198,6 +203,33 @@ def delete(self, cluster_id: str, force: bool = False, do_async: bool = False) -
)
click.confirm("Do you want to continue?", abort=True)

else:
apps = _list_apps(self.api_client, cluster_id=cluster_id, phase_in=[V1LightningappInstanceState.RUNNING])
if apps:
raise click.ClickException(
dedent(
"""\
To delete the cluster, you must first delete the apps running on it.
Use the following commands to delete the apps, then delete the cluster again:
"""
)
+ "\n".join([f"\tlightning delete app {app.name} --cluster-id {cluster_id}" for app in apps])
)

if _list_apps(self.api_client, cluster_id=cluster_id, phase_not_in=[V1LightningappInstanceState.DELETED]):
click.echo(
dedent(
"""\
This cluster has stopped apps.
Deleting this cluster will delete those apps and their logs.
App artifacts aren't deleted and will still be available in the S3 bucket for the cluster.
"""
)
)
click.confirm("Are you sure you want to continue?", abort=True)

resp: V1GetClusterResponse = self.api_client.cluster_service_get_cluster(id=cluster_id)
bucket_name = resp.spec.driver.kubernetes.aws.bucket_name

Expand Down Expand Up @@ -226,6 +258,28 @@ def delete(self, cluster_id: str, force: bool = False, do_async: bool = False) -
click.echo(background_message)


def _list_apps(
api_client: LightningClient,
**filters: Any,
) -> List[Externalv1LightningappInstance]:
"""_list_apps is a thin wrapper around lightningapp_instance_service_list_lightningapp_instances.
Args:
api_client (LightningClient): Used for listing app instances
**filters: keyword arguments passed to the list method
Returns:
List[Externalv1LightningappInstance]: List of apps matching the filters
"""
project: V1Membership = _get_project(api_client)
resp: V1ListLightningappInstancesResponse = api_client.lightningapp_instance_service_list_lightningapp_instances(
project.project_id,
**filters,
)

return resp.lightningapps


def _wait_for_cluster_state(
api_client: LightningClient,
cluster_id: str,
Expand Down
90 changes: 89 additions & 1 deletion tests/tests_app/cli/test_cmd_clusters.py
Expand Up @@ -4,6 +4,7 @@
import click
import pytest
from lightning_cloud.openapi import (
Externalv1LightningappInstance,
V1AWSClusterDriverSpec,
V1ClusterDriver,
V1ClusterPerformanceProfile,
Expand All @@ -14,6 +15,11 @@
V1CreateClusterRequest,
V1GetClusterResponse,
V1KubernetesClusterDriver,
V1LightningappInstanceState,
V1LightningappInstanceStatus,
V1ListLightningappInstancesResponse,
V1ListMembershipsResponse,
V1Membership,
)

from lightning_app.cli import cmd_clusters
Expand Down Expand Up @@ -95,17 +101,99 @@ def test_list_clusters(api: mock.MagicMock):
api.assert_called_once_with(phase_not_in=[V1ClusterState.DELETED])


@pytest.fixture()
def fixture_list_instances_empty():
return V1ListLightningappInstancesResponse([])


@mock.patch("lightning_cloud.login.Auth.authenticate", MagicMock())
@mock.patch("lightning_app.utilities.network.LightningClient.projects_service_list_memberships")
@mock.patch("lightning_app.utilities.network.LightningClient.lightningapp_instance_service_list_lightningapp_instances")
@mock.patch("lightning_app.utilities.network.LightningClient.cluster_service_delete_cluster")
@mock.patch("lightning_app.utilities.network.LightningClient.cluster_service_get_cluster")
def test_delete_cluster_api(api_get: mock.MagicMock, api_delete: mock.MagicMock, async_or_interrupt, spec):
def test_delete_cluster_api(
api_get: mock.MagicMock,
api_delete: mock.MagicMock,
api_list_instances: mock.MagicMock,
api_list_memberships: mock.MagicMock,
async_or_interrupt,
spec,
fixture_list_instances_empty,
):
api_list_memberships.return_value = V1ListMembershipsResponse([V1Membership(project_id="test-project")])
api_list_instances.return_value = fixture_list_instances_empty
api_get.return_value = V1GetClusterResponse(spec=spec)
cluster_manager = AWSClusterManager()
cluster_manager.delete(cluster_id="test-7", do_async=async_or_interrupt)

api_delete.assert_called_once_with(id="test-7", force=False)


@mock.patch("click.confirm")
@mock.patch("lightning_cloud.login.Auth.authenticate", MagicMock())
@mock.patch("lightning_app.utilities.network.LightningClient.projects_service_list_memberships")
@mock.patch("lightning_app.utilities.network.LightningClient.lightningapp_instance_service_list_lightningapp_instances")
@mock.patch("lightning_app.utilities.network.LightningClient.cluster_service_delete_cluster")
@mock.patch("lightning_app.utilities.network.LightningClient.cluster_service_get_cluster")
def test_delete_cluster_with_stopped_apps(
api_get: mock.MagicMock,
api_delete: mock.MagicMock,
api_list_instances: mock.MagicMock,
api_list_memberships: mock.MagicMock,
click_mock: mock.MagicMock,
spec,
):
api_list_memberships.return_value = V1ListMembershipsResponse([V1Membership(project_id="test-project")])
api_list_instances.side_effect = [
# when querying for running apps
V1ListLightningappInstancesResponse([]),
# when querying for stopped apps
V1ListLightningappInstancesResponse(
lightningapps=[
Externalv1LightningappInstance(
status=V1LightningappInstanceStatus(
phase=V1LightningappInstanceState.STOPPED,
)
)
]
),
]
api_get.return_value = V1GetClusterResponse(spec=spec)
cluster_manager = AWSClusterManager()

cluster_manager.delete(cluster_id="test-7", do_async=True)
api_delete.assert_called_once_with(id="test-7", force=False)
click_mock.assert_called_once_with("Are you sure you want to continue?", abort=True)


@mock.patch("lightning_cloud.login.Auth.authenticate", MagicMock())
@mock.patch("lightning_app.utilities.network.LightningClient.projects_service_list_memberships")
@mock.patch("lightning_app.utilities.network.LightningClient.lightningapp_instance_service_list_lightningapp_instances")
@mock.patch("lightning_app.utilities.network.LightningClient.cluster_service_get_cluster")
def test_delete_cluster_with_running_apps(
api_get: mock.MagicMock,
api_list_instances: mock.MagicMock,
api_list_memberships: mock.MagicMock,
spec,
):
api_list_memberships.return_value = V1ListMembershipsResponse([V1Membership(project_id="test-project")])
api_list_instances.return_value = V1ListLightningappInstancesResponse(
lightningapps=[
Externalv1LightningappInstance(
status=V1LightningappInstanceStatus(
phase=V1LightningappInstanceState.RUNNING,
)
)
]
)
api_get.return_value = V1GetClusterResponse(spec=spec)
cluster_manager = AWSClusterManager()

with pytest.raises(click.ClickException) as exception:
cluster_manager.delete(cluster_id="test-7")
exception.match("apps running")


class Test_check_cluster_id_is_valid:
@pytest.mark.parametrize("name", ["test-7", "0wildgoat"])
def test_valid(self, name):
Expand Down

0 comments on commit a476a23

Please sign in to comment.