Lightning-AI · lantiga · Dec 16, 2022 · Dec 15, 2022 · Dec 15, 2022 · Dec 15, 2022
@@ -89,7 +89,8 @@ jobs:
       - name: Install Lightning package
         env:
           PACKAGE_NAME: ${{ matrix.pkg-name }}
-        run: pip install -e .
+        # do not use -e because it will make both packages available since it adds `src` to `sys.path` automatically
+        run: pip install .
 
       - name: Adjust tests
         if: ${{ matrix.pkg-name == 'lightning' }}

@@ -108,6 +108,7 @@ jobs:
         branch = f"origin/builds/{os.getenv('TAG')}"
         while True:
           remote_refs = [b.name for b in repo.remote().refs]
+          print([n for n in remote_refs if "builds" in n])
           if branch in remote_refs:
             break
           time.sleep(60)

@@ -10,7 +10,7 @@ def run(self):
         trainer = L.Trainer(max_epochs=10, strategy="ddp")
         trainer.fit(model)
 
-# 8 GPU: (2 nodes of 4 x v100)
+# 8 GPUs: (2 nodes of 4 x v100)
 component = LightningTrainerMultiNode(
     LightningTrainerDistributed,
     num_nodes=4,

@@ -31,7 +31,7 @@ def run(self):
             optimizer.step()
 
 
-# Run over 2 nodes of 4 x V100
+# 8 GPUs: (2 nodes of 4 x v100)
 app = L.LightningApp(
     LiteMultiNode(
         LitePyTorchDistributed,

@@ -11,10 +11,10 @@ def run(self):
         trainer.fit(model)
 
 
-# 8 GPU: (2 nodes of 4 x v100)
+# 8 GPUs: (2 nodes of 4 x v100)
 component = LightningTrainerMultiNode(
     LightningTrainerDistributed,
-    num_nodes=4,
+    num_nodes=2,
     cloud_compute=L.CloudCompute("gpu-fast-multi"),  # 4 x v100
 )
 app = L.LightningApp(component)
@@ -2,11 +2,11 @@
 from lightning.app.components import LightningTrainerScript
 from lightning.app.utilities.packaging.cloud_compute import CloudCompute
 
-# Run over 2 nodes of 4 x V100
+# 8 GPUs: (2 nodes of 4 x v100)
 app = L.LightningApp(
     LightningTrainerScript(
         "pl_boring_script.py",
         num_nodes=2,
-        cloud_compute=CloudCompute("gpu-fast-multi"),
+        cloud_compute=CloudCompute("gpu-fast-multi"),  # 4 x v100
     ),
 )
@@ -56,6 +56,6 @@ def run(self, main_address: str, main_port: int, num_nodes: int, node_rank: int)
 
 
 # 8 GPUs: (2 nodes x 4 v 100)
-compute = L.CloudCompute("gpu-fast-multi")  # 4xV100
+compute = L.CloudCompute("gpu-fast-multi")  # 4 x v100
 component = MultiNode(PyTorchDistributed, num_nodes=2, cloud_compute=compute)
 app = L.LightningApp(component)
@@ -42,11 +42,11 @@ def run(
             optimizer.step()
 
 
-# Run over 2 nodes of 4 x V100
+# 8 GPUs: (2 nodes x 4 v 100)
 app = L.LightningApp(
     PyTorchSpawnMultiNode(
         PyTorchDistributed,
         num_nodes=2,
-        cloud_compute=L.CloudCompute("gpu-fast-multi"),  # 4 x V100
+        cloud_compute=L.CloudCompute("gpu-fast-multi"),  # 4 x v100
     )
 )
@@ -152,12 +152,12 @@ def test_step(self, batch: Any, batch_idx: int, dataloader_idx: int = 0) -> None
 #       self.reset(...)                                                                     #
 #       self.on_run_start(...)                                                              #
 #                                                                                           #
-#        while not self.done:                                                               #
-#            self.on_advance_start(...)                                                     #
-#            self.advance(...)                                                              #
-#            self.on_advance_end(...)                                                       #
+#       while not self.done:                                                                #
+#           self.on_advance_start(...)                                                      #
+#           self.advance(...)                                                               #
+#           self.on_advance_end(...)                                                        #
 #                                                                                           #
-#        return self.on_run_end(...)                                                        #
+#       return self.on_run_end(...)                                                         #
 #############################################################################################
 
 

@@ -1 +1 @@
-version = "1.8.5"
+version = "1.8.5.post0"
@@ -24,6 +24,9 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
 - Fixed `AutoScaler` raising an exception when non-default cloud compute is specified ([#15991](https://github.com/Lightning-AI/lightning/pull/15991))
 - Fixed and improvements of login flow ([#16052](https://github.com/Lightning-AI/lightning/pull/16052))
 - Fixed the debugger detection mechanism for lightning App in VSCode ([#16068](https://github.com/Lightning-AI/lightning/pull/16068))
+- Fixed bug where components that are re-instantiated several times failed to initialize if they were modifying `self.lightningignore` ([#16080](https://github.com/Lightning-AI/lightning/pull/16080))
+- Fixed a bug where apps that had previously been deleted could not be run again from the CLI ([#16082](https://github.com/Lightning-AI/lightning/pull/16082))
+- Fixed install/upgrade - removing single quote ([#16079](https://github.com/Lightning-AI/lightning/pull/16079))
 
 
 ## [1.8.4] - 2022-12-08

@@ -1 +1 @@
-version = "1.8.5"
+version = "1.8.5.post0"
@@ -10,13 +10,7 @@
 from lightning_app.frontend import Frontend
 from lightning_app.storage import Path
 from lightning_app.storage.drive import _maybe_create_drive, Drive
-from lightning_app.utilities.app_helpers import (
-    _is_json_serializable,
-    _lightning_dispatched,
-    _LightningAppRef,
-    _set_child_name,
-    is_overridden,
-)
+from lightning_app.utilities.app_helpers import _is_json_serializable, _LightningAppRef, _set_child_name, is_overridden
 from lightning_app.utilities.component import _sanitize_state
 from lightning_app.utilities.exceptions import ExitAppException
 from lightning_app.utilities.introspection import _is_init_context, _is_run_context
@@ -325,7 +319,7 @@ def lightningignore(self) -> Tuple[str, ...]:
 
     @lightningignore.setter
     def lightningignore(self, lightningignore: Tuple[str, ...]) -> None:
-        if _lightning_dispatched():
+        if self._backend is not None:
             raise RuntimeError(
                 f"Your app has been already dispatched, so modifying the `{self.name}.lightningignore` does not have an"
                 " effect"

@@ -11,12 +11,7 @@
 from lightning_app.storage import Path
 from lightning_app.storage.drive import _maybe_create_drive, Drive
 from lightning_app.storage.payload import Payload
-from lightning_app.utilities.app_helpers import (
-    _is_json_serializable,
-    _lightning_dispatched,
-    _LightningAppRef,
-    is_overridden,
-)
+from lightning_app.utilities.app_helpers import _is_json_serializable, _LightningAppRef, is_overridden
 from lightning_app.utilities.component import _is_flow_context, _sanitize_state
 from lightning_app.utilities.enum import (
     CacheCallsKeys,
@@ -267,7 +262,7 @@ def lightningignore(self) -> Tuple[str, ...]:
 
     @lightningignore.setter
     def lightningignore(self, lightningignore: Tuple[str, ...]) -> None:
-        if _lightning_dispatched():
+        if self._backend is not None:
             raise RuntimeError(
                 f"Your app has been already dispatched, so modifying the `{self.name}.lightningignore` does not have an"
                 " effect"

@@ -320,52 +320,58 @@ def dispatch(
                 self._ensure_cluster_project_binding(project.project_id, cluster_id)
 
             # Resolve the app name, instance, and cluster ID
+            existing_app = None
             existing_instance = None
             app_name = app_config.name
 
-            # List existing instances
+            # List existing apps
             # TODO: Add pagination, otherwise this could break if users have a lot of apps.
-            find_instances_resp = self.backend.client.lightningapp_instance_service_list_lightningapp_instances(
+            all_apps = self.backend.client.lightningapp_v2_service_list_lightningapps_v2(
                 project_id=project.project_id
-            )
+            ).lightningapps
 
-            # Seach for instances with the given name (possibly with some random characters appended)
+            # Seach for apps with the given name (possibly with some random characters appended)
             pattern = re.escape(f"{app_name}-") + ".{4}"
-            instances = [
+            all_apps = [
                 lightningapp
-                for lightningapp in find_instances_resp.lightningapps
+                for lightningapp in all_apps
                 if lightningapp.name == app_name or (re.fullmatch(pattern, lightningapp.name) is not None)
             ]
 
-            # If instances exist and cluster is None, mimic cluster selection logic to choose a default
-            if cluster_id is None and len(instances) > 0:
+            # If apps exist and cluster is None, mimic cluster selection logic to choose a default
+            if cluster_id is None and len(all_apps) > 0:
                 # Determine the cluster ID
                 cluster_id = self._get_default_cluster(project.project_id)
 
             # If an instance exists on the cluster with the same base name - restart it
-            for instance in instances:
-                if instance.spec.cluster_id == cluster_id:
-                    existing_instance = instance
+            for app in all_apps:
+                instances = self.backend.client.lightningapp_instance_service_list_lightningapp_instances(
+                    project_id=project.project_id,
+                    app_id=app.id,
+                ).lightningapps
+                if instances and instances[0].spec.cluster_id == cluster_id:
+                    existing_app = app
+                    existing_instance = instances[0]
                     break
 
-            # If instances exist but not on the cluster - choose a randomised name
-            if len(instances) > 0 and existing_instance is None:
+            # If apps exist but not on the cluster - choose a randomised name
+            if len(all_apps) > 0 and existing_app is None:
                 name_exists = True
                 while name_exists:
                     random_name = self._randomise_name(app_name)
-                    name_exists = any([instance.name == random_name for instance in instances])
+                    name_exists = any([app.name == random_name for app in all_apps])
 
                 app_name = random_name
 
             # Create the app if it doesn't exist
-            if existing_instance is None:
+            if existing_app is None:
                 app_body = Body7(name=app_name, can_download_source_code=True)
                 lit_app = self.backend.client.lightningapp_v2_service_create_lightningapp_v2(
                     project_id=project.project_id, body=app_body
                 )
                 app_id = lit_app.id
             else:
-                app_id = existing_instance.spec.app_id
+                app_id = existing_app.id
 
             # check if user has sufficient credits to run an app
             # if so set the desired state to running otherwise, create the app in stopped state,

@@ -281,7 +281,7 @@ def _check_version_and_upgrade():
         prompt = f"A newer version of {__package_name__} is available ({new_version}). Would you like to upgrade?"
 
         if click.confirm(prompt, default=True):
-            command = f"pip install '{__package_name__}=={new_version}'"
+            command = f"pip install {__package_name__}=={new_version}"
 
             logger.info(f"⚡ RUN: {command}")
 

@@ -1 +1 @@
-version = "1.8.5"
+version = "1.8.5.post0"
@@ -7,7 +7,7 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
 
 ## [1.8.5] - 2022-12-15
 
-- minor cleaning
+- Add function to remove checkpoint to allow override for extended classes ([#16067](https://github.com/Lightning-AI/lightning/pull/16067))
 
 
 ## [1.8.4] - 2022-12-08

@@ -1 +1 @@
-version = "1.8.5"
+version = "1.8.5.post0"
@@ -640,7 +640,7 @@ def _save_last_checkpoint(self, trainer: "pl.Trainer", monitor_candidates: Dict[
         previous, self.last_model_path = self.last_model_path, filepath
         self._save_checkpoint(trainer, filepath)
         if previous and previous != filepath:
-            trainer.strategy.remove_checkpoint(previous)
+            self._remove_checkpoint(trainer, previous)
 
     def _save_monitor_checkpoint(self, trainer: "pl.Trainer", monitor_candidates: Dict[str, Tensor]) -> None:
         assert self.monitor
@@ -659,7 +659,7 @@ def _save_none_monitor_checkpoint(self, trainer: "pl.Trainer", monitor_candidate
         previous, self.best_model_path = self.best_model_path, filepath
         self._save_checkpoint(trainer, filepath)
         if self.save_top_k == 1 and previous and previous != filepath:
-            trainer.strategy.remove_checkpoint(previous)
+            self._remove_checkpoint(trainer, previous)
 
     def _update_best_and_save(
         self, current: Tensor, trainer: "pl.Trainer", monitor_candidates: Dict[str, Tensor]
@@ -701,7 +701,7 @@ def _update_best_and_save(
         self._save_checkpoint(trainer, filepath)
 
         if del_filepath is not None and filepath != del_filepath:
-            trainer.strategy.remove_checkpoint(del_filepath)
+            self._remove_checkpoint(trainer, del_filepath)
 
     def to_yaml(self, filepath: Optional[_PATH] = None) -> None:
         """Saves the `best_k_models` dict containing the checkpoint paths with the corresponding scores to a YAML
@@ -718,3 +718,7 @@ def file_exists(self, filepath: _PATH, trainer: "pl.Trainer") -> bool:
         state to diverge between ranks."""
         exists = self._fs.exists(filepath)
         return trainer.strategy.broadcast(exists)
+
+    def _remove_checkpoint(self, trainer: "pl.Trainer", filepath: str) -> None:
+        """Calls the strategy to remove the checkpoint file."""
+        trainer.strategy.remove_checkpoint(filepath)
@@ -11,6 +11,7 @@
 from lightning_cloud.openapi import (
     V1LightningappV2,
     V1ListLightningappInstancesResponse,
+    V1ListLightningappsV2Response,
     V1ListMembershipsResponse,
     V1Membership,
 )
@@ -36,6 +37,9 @@ class FakeResponse:
 
 
 class FakeLightningClient:
+    def lightningapp_v2_service_list_lightningapps_v2(self, *args, **kwargs):
+        return V1ListLightningappsV2Response(lightningapps=[])
+
     def lightningapp_instance_service_list_lightningapp_instances(self, *args, **kwargs):
         return V1ListLightningappInstancesResponse(lightningapps=[])
 
@@ -182,7 +186,7 @@ def __init__(self, *args, message, **kwargs):
         super().__init__()
         self.message = message
 
-    def lightningapp_instance_service_list_lightningapp_instances(self, *args, **kwargs):
+    def lightningapp_v2_service_list_lightningapps_v2(self, *args, **kwargs):
         raise ApiException(
             http_resp=HttpHeaderDict(
                 data=self.message,

@@ -66,7 +66,7 @@ def test_trainer_run_executor_mps_forced_cpu(accelerator_given, accelerator_expe
         ({"strategy": "ddp_sharded_spawn"}, {"strategy": "ddp_sharded"}),
     ],
 )
-@pytest.mark.skipif(not module_available("pytorch"), reason="Lightning is not available")
+@pytest.mark.skipif(not module_available("torch"), reason="PyTorch is not available")
 def test_trainer_run_executor_arguments_choices(
     args_given: dict,
     args_expected: dict,