Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[App] Raise error when launching app on multiple clusters #15484

Merged
Show file tree
Hide file tree
Changes from 9 commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
32eb9fc
Error when running on multiple clusters
luca3rd Nov 2, 2022
5a60a9d
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Nov 2, 2022
d9bc71c
Revert this in separate PR: keep this focused
luca3rd Nov 5, 2022
cad231f
Improve testing
luca3rd Nov 5, 2022
0aa51d8
fixup! Improve testing
luca3rd Nov 6, 2022
9de897e
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Nov 6, 2022
18b8b70
Merge branch 'master' into ENG-497-bug-cant-run-app-on-a-cluster-if-i…
luca3rd Nov 7, 2022
ac9ccab
pass flake8
luca3rd Nov 7, 2022
97eb392
Update changelog
luca3rd Nov 7, 2022
17af0f8
Address PR feedback
luca3rd Nov 9, 2022
be6b745
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Nov 9, 2022
2af76cd
Merge branch 'master' into ENG-497-bug-cant-run-app-on-a-cluster-if-i…
luca3rd Nov 9, 2022
58c733f
remove unused import
luca3rd Nov 9, 2022
234c782
Merge branch 'master' into ENG-497-bug-cant-run-app-on-a-cluster-if-i…
luca3rd Nov 10, 2022
d7bd429
Merge branch 'master' into ENG-497-bug-cant-run-app-on-a-cluster-if-i…
luca3rd Nov 21, 2022
7a093a8
Reword error message
luca3rd Nov 21, 2022
3474c5b
Error if running on cluster that doesn't exist
luca3rd Nov 21, 2022
9cd61e5
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Nov 21, 2022
0701419
fixup! Error if running on cluster that doesn't exist
luca3rd Nov 21, 2022
165d4ba
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Nov 21, 2022
74945de
Merge branch 'master' into ENG-497-bug-cant-run-app-on-a-cluster-if-i…
Borda Nov 21, 2022
e61ac3d
Merge branch 'master' into ENG-497-bug-cant-run-app-on-a-cluster-if-i…
luca3rd Nov 28, 2022
bc9fb75
Merge branch 'master' into ENG-497-bug-cant-run-app-on-a-cluster-if-i…
luca3rd Nov 28, 2022
511fac2
Merge branch 'master' into ENG-497-bug-cant-run-app-on-a-cluster-if-i…
luca3rd Nov 28, 2022
cc6f4e9
Remove unsued import
luca3rd Nov 29, 2022
09e06f4
Merge branch 'master' into ENG-497-bug-cant-run-app-on-a-cluster-if-i…
Borda Nov 30, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
2 changes: 2 additions & 0 deletions src/lightning_app/CHANGELOG.md
Expand Up @@ -53,6 +53,8 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).

- Fixed bug with Multi Node Component and add some examples ([#15557](https://github.com/Lightning-AI/lightning/pull/15557))

- Fixed bug when launching apps on multiple clusters ([#15484](https://github.com/Lightning-AI/lightning/pull/15484))



## [1.8.0] - 2022-11-01
Expand Down
17 changes: 17 additions & 0 deletions src/lightning_app/runners/cloud.py
Expand Up @@ -7,6 +7,7 @@
import traceback
from dataclasses import dataclass
from pathlib import Path
from textwrap import dedent
from typing import Any, Callable, List, Optional, Union

import click
Expand All @@ -16,6 +17,7 @@
Body7,
Body8,
Body9,
Externalv1LightningappInstance,
Gridv1ImageSpec,
V1BuildSpec,
V1DependencyFileInfo,
Expand Down Expand Up @@ -285,6 +287,7 @@ def dispatch(
elif CLOUD_QUEUE_TYPE == "redis":
queue_server_type = V1QueueServerType.REDIS

existing_instance: Optional[Externalv1LightningappInstance] = None
if find_instances_resp.lightningapps:
existing_instance = find_instances_resp.lightningapps[0]

Expand Down Expand Up @@ -329,6 +332,20 @@ def dispatch(
default=True,
)
app_config.cluster_id = None
if existing_instance and existing_instance.spec.cluster_id != app_config.cluster_id:
luca3rd marked this conversation as resolved.
Show resolved Hide resolved
raise ValueError(
dedent(
f"""\
Can not run app '{app_config.name}' on cluster {app_config.cluster_id} since it already exists on {existing_instance.spec.cluster_id}
(moving apps between clusters is not supported).
luca3rd marked this conversation as resolved.
Show resolved Hide resolved

You can either:
a. rename the app to run on {app_config.cluster_id} with the --name option
lightning run app script.py --name (new name) --cloud --cluster-id {app_config.cluster_id}
b. delete the app running on {existing_instance.spec.cluster_id} in the UI before running this command.
lantiga marked this conversation as resolved.
Show resolved Hide resolved
""" # noqa: E501
)
)

if app_config.cluster_id is not None:
self._ensure_cluster_project_binding(project.project_id, app_config.cluster_id)
Expand Down
98 changes: 62 additions & 36 deletions tests/tests_app/runners/test_cloud.py
@@ -1,7 +1,9 @@
import logging
import os
from contextlib import contextmanager
from copy import copy
from pathlib import Path
from textwrap import dedent
from unittest import mock
from unittest.mock import MagicMock

Expand Down Expand Up @@ -92,75 +94,99 @@ def get_cloud_runtime_request_body(**kwargs) -> "Body8":
return Body8(**default_request_body)


@pytest.fixture
def cloud_backend(monkeypatch):
cloud_backend = mock.MagicMock()
monkeypatch.setattr(cloud, "LocalSourceCodeDir", mock.MagicMock())
monkeypatch.setattr(cloud, "_prepare_lightning_wheels_and_requirements", mock.MagicMock())
monkeypatch.setattr(backends, "CloudBackend", mock.MagicMock(return_value=cloud_backend))
return cloud_backend


@pytest.fixture
def project_id():
return "test-project-id"


@contextmanager
def does_not_raise():
yield
luca3rd marked this conversation as resolved.
Show resolved Hide resolved


DEFAULT_CLUSTER = "litng-ai-03"


class TestAppCreationClient:
"""Testing the calls made using GridRestClient to create the app."""

# TODO: remove this test once there is support for multiple instances
@mock.patch("lightning_app.runners.backends.cloud.LightningClient", mock.MagicMock())
def test_new_instance_on_different_cluster(self, monkeypatch):
@pytest.mark.parametrize(
"old_cluster,new_cluster,expected_raise",
[
("test", "other", pytest.raises(ValueError)),
("test", "test", does_not_raise()),
(None, None, does_not_raise()),
(None, "litng-ai-03", does_not_raise()),
("litng-ai-03", None, does_not_raise()),
],
)
def test_new_instance_on_different_cluster(
self, cloud_backend, project_id, old_cluster, new_cluster, expected_raise
):
app_name = "test-app-name"
original_cluster = "cluster-001"
new_cluster = "cluster-002"

mock_client = mock.MagicMock()
mock_client.projects_service_list_memberships.return_value = V1ListMembershipsResponse(
memberships=[V1Membership(name="Default Project", project_id="default-project-id")]
memberships=[V1Membership(name="Default Project", project_id=project_id)]
)
mock_client.lightningapp_v2_service_create_lightningapp_release.return_value = V1LightningappRelease(
cluster_id=new_cluster
)

# backend converts "None" cluster to "litng-ai-03"
mock_client.cluster_service_list_clusters.return_value = V1ListClustersResponse(
[Externalv1Cluster(id=original_cluster), Externalv1Cluster(id=new_cluster)]
[
Externalv1Cluster(id=old_cluster or DEFAULT_CLUSTER),
luca3rd marked this conversation as resolved.
Show resolved Hide resolved
Externalv1Cluster(id=new_cluster or DEFAULT_CLUSTER),
]
)

cloud_backend = mock.MagicMock()
cloud_backend.client = mock_client
monkeypatch.setattr(cloud, "LocalSourceCodeDir", mock.MagicMock())
monkeypatch.setattr(cloud, "_prepare_lightning_wheels_and_requirements", mock.MagicMock())
monkeypatch.setattr(backends, "CloudBackend", mock.MagicMock(return_value=cloud_backend))

app = mock.MagicMock()
app.flows = []
app.frontend = {}

existing_instance = MagicMock()
existing_instance.status.phase = V1LightningappInstanceState.STOPPED
existing_instance.spec.cluster_id = original_cluster
existing_instance.spec.cluster_id = old_cluster or DEFAULT_CLUSTER
mock_client.lightningapp_instance_service_list_lightningapp_instances.return_value = (
V1ListLightningappInstancesResponse(lightningapps=[existing_instance])
)

cloud_runtime = cloud.CloudRuntime(app=app, entrypoint_file="entrypoint.py")
cloud_runtime._check_uploaded_folder = mock.MagicMock()

# without requirements file
# setting is_file to False so requirements.txt existence check will return False
monkeypatch.setattr(Path, "is_file", lambda *args, **kwargs: False)
monkeypatch.setattr(cloud, "Path", Path)

# This is the main assertion:
# we have an existing instance on `cluster-001`
# but we want to run this app on `cluster-002`
cloud_runtime.dispatch(name=app_name, cluster_id=new_cluster)

body = Body8(
cluster_id=new_cluster,
app_entrypoint_file=mock.ANY,
enable_app_server=True,
flow_servers=[],
image_spec=None,
works=[],
local_source=True,
dependency_cache_key=mock.ANY,
user_requested_flow_compute_config=mock.ANY,
)
cloud_runtime.backend.client.lightningapp_v2_service_create_lightningapp_release.assert_called_once_with(
project_id="default-project-id", app_id=mock.ANY, body=body
)
cloud_runtime.backend.client.projects_service_create_project_cluster_binding.assert_called_once_with(
project_id="default-project-id",
body=V1ProjectClusterBinding(cluster_id=new_cluster, project_id="default-project-id"),
)
with expected_raise as exception:
cloud_runtime.dispatch(name=app_name, cluster_id=new_cluster)

if exception is not None:
old_cluster = old_cluster or DEFAULT_CLUSTER
new_cluster = new_cluster or DEFAULT_CLUSTER
assert str(exception.value) == dedent(
f"""\
Can not run app '{app_name}' on cluster {new_cluster} since it already exists on {old_cluster}
(moving apps between clusters is not supported).

You can either:
a. rename the app to run on {new_cluster} with the --name option
lightning run app script.py --name (new name) --cloud --cluster-id {new_cluster}
b. delete the app running on {old_cluster} in the UI before running this command.
luca3rd marked this conversation as resolved.
Show resolved Hide resolved
"""
)

@pytest.mark.parametrize("flow_cloud_compute", [None, CloudCompute(name="t2.medium")])
@mock.patch("lightning_app.runners.backends.cloud.LightningClient", mock.MagicMock())
Expand Down