From fa0e1423ce5c769aa6c4051598e5dab71c35a1b7 Mon Sep 17 00:00:00 2001 From: Akihiro Nitta Date: Sat, 10 Dec 2022 01:12:56 +0900 Subject: [PATCH 1/5] dont try to replicate new works in the existing machine --- src/lightning_app/components/auto_scaler.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/src/lightning_app/components/auto_scaler.py b/src/lightning_app/components/auto_scaler.py index fc6a1a873769b..66cbd7beb80bc 100644 --- a/src/lightning_app/components/auto_scaler.py +++ b/src/lightning_app/components/auto_scaler.py @@ -449,8 +449,15 @@ def workers(self) -> List[LightningWork]: def create_work(self) -> LightningWork: """Replicates a LightningWork instance with args and kwargs provided via ``__init__``.""" - # TODO: Remove `start_with_flow=False` for faster initialization on the cloud - self._work_kwargs.update(dict(start_with_flow=False)) + cloud_compute = self._work_kwargs.get("cloud_compute", None) + self._work_kwargs.update( + dict( + # TODO: Remove `start_with_flow=False` for faster initialization on the cloud + start_with_flow=False, + # don't try to create a work inside a running machine + cloud_compute=cloud_compute.clone() if cloud_compute else None, + ) + ) return self._work_cls(*self._work_args, **self._work_kwargs) def add_work(self, work) -> str: From a0128aeafb9ae5d9aad9a63068df3faaae8ce269 Mon Sep 17 00:00:00 2001 From: Akihiro Nitta Date: Sat, 10 Dec 2022 01:23:12 +0900 Subject: [PATCH 2/5] update chglog --- src/lightning_app/CHANGELOG.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/lightning_app/CHANGELOG.md b/src/lightning_app/CHANGELOG.md index 7439d6a4becba..5dc5ca769c0b3 100644 --- a/src/lightning_app/CHANGELOG.md +++ b/src/lightning_app/CHANGELOG.md @@ -43,6 +43,9 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). - Fixed Registration for CloudComputes of Works in `L.app.structures` ([#15964](https://github.com/Lightning-AI/lightning/pull/15964)) +- Fixed `AutoScaler` raising an exception when non-default cloud compute is specified ([#15991](https://github.com/Lightning-AI/lightning/pull/15991)) + + ## [1.8.4] - 2022-12-08 ### Added From f84bfad6e4d025463e6fc7e7d5733f3e2eb8e993 Mon Sep 17 00:00:00 2001 From: Akihiro Nitta Date: Sat, 10 Dec 2022 05:18:35 +0900 Subject: [PATCH 3/5] Update comment --- src/lightning_app/components/auto_scaler.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/lightning_app/components/auto_scaler.py b/src/lightning_app/components/auto_scaler.py index 66cbd7beb80bc..13948ba50af89 100644 --- a/src/lightning_app/components/auto_scaler.py +++ b/src/lightning_app/components/auto_scaler.py @@ -454,7 +454,7 @@ def create_work(self) -> LightningWork: dict( # TODO: Remove `start_with_flow=False` for faster initialization on the cloud start_with_flow=False, - # don't try to create a work inside a running machine + # don't try to create multiple works in a single machine cloud_compute=cloud_compute.clone() if cloud_compute else None, ) ) From fffbb1aa56f0a42586aec0f30dc967c6b433a1b4 Mon Sep 17 00:00:00 2001 From: Akihiro Nitta Date: Sat, 10 Dec 2022 05:22:33 +0900 Subject: [PATCH 4/5] Update src/lightning_app/components/auto_scaler.py From 29f5448791f9940314747cb859d53b326dbd73b8 Mon Sep 17 00:00:00 2001 From: Akihiro Nitta Date: Sun, 11 Dec 2022 09:27:04 +0900 Subject: [PATCH 5/5] add test --- tests/tests_app/components/test_auto_scaler.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/tests/tests_app/components/test_auto_scaler.py b/tests/tests_app/components/test_auto_scaler.py index 436c3517d01ca..672b05bbc9a15 100644 --- a/tests/tests_app/components/test_auto_scaler.py +++ b/tests/tests_app/components/test_auto_scaler.py @@ -3,7 +3,7 @@ import pytest -from lightning_app import LightningWork +from lightning_app import CloudCompute, LightningWork from lightning_app.components import AutoScaler @@ -90,3 +90,11 @@ def test_scale(replicas, metrics, expected_replicas): ) assert auto_scaler.scale(replicas, metrics) == expected_replicas + + +def test_create_work_cloud_compute_cloned(): + """Test CloudCompute is cloned to avoid creating multiple works in a single machine.""" + cloud_compute = CloudCompute("gpu") + auto_scaler = AutoScaler(EmptyWork, cloud_compute=cloud_compute) + _ = auto_scaler.create_work() + assert auto_scaler._work_kwargs["cloud_compute"] is not cloud_compute