Lightning-AI · lantiga · Nov 23, 2022 · Nov 21, 2022 · Nov 19, 2022 · Nov 19, 2022
@@ -35,9 +35,10 @@ pr:
       - "tests/tests_app_examples/**"
       - "setup.py"
       - ".actions/**"
-      - "!requirements/app/docs.txt"
-      - "!*.md"
-      - "!**/*.md"
+    exclude:
+      - "requirements/app/docs.txt"
+      - "*.md"
+      - "**/*.md"
 
 # variables are automatically exported as environment variables so this will override pip's default cache dir
 variables:

@@ -23,9 +23,10 @@ pr:
       - ".azure/gpu-benchmark.yml"
       - "tests/tests_pytorch/benchmarks/**"
       - "requirements/pytorch/**"
-      - "!requirements/pytorch/docs.txt"
-      - "!*.md"
-      - "!**/*.md"
+    exclude:
+      - "requirements/pytorch/docs.txt"
+      - "*.md"
+      - "**/*.md"
 
 schedules:
   - cron: "0 0 * * *" # At the end of every day

@@ -30,9 +30,10 @@ pr:
       - "tests/tests_lite/**"
       - "setup.cfg"  # includes pytest config
       - ".actions/**"
-      - "!requirements/lite/docs.txt"
-      - "!*.md"
-      - "!**/*.md"
+    exclude:
+      - "requirements/lite/docs.txt"
+      - "*.md"
+      - "**/*.md"
 
 jobs:
   - job: testing
@@ -74,7 +75,6 @@ jobs:
     - bash: |
         PYTORCH_VERSION=$(python -c "import torch; print(torch.__version__.split('+')[0])")
         python ./requirements/pytorch/adjust-versions.py requirements/lite/base.txt ${PYTORCH_VERSION}
-        python ./requirements/pytorch/adjust-versions.py requirements/lite/examples.txt ${PYTORCH_VERSION}
       displayName: 'Adjust dependencies'
 
     - bash: |

@@ -37,9 +37,10 @@ pr:
       - "requirements/lite/**"
       - "src/lightning_lite/**"
       - ".actions/**"
-      - "!requirements/**/docs.txt"
-      - "!*.md"
-      - "!**/*.md"
+    exclude:
+      - "requirements/**/docs.txt"
+      - "*.md"
+      - "**/*.md"
 
 jobs:
   - job: testing

@@ -26,9 +26,10 @@ pr:
       - "tests/tests_pytorch/**"
       - "setup.cfg"  # includes pytest config
       - ".actions/**"
-      - "!requirements/**/docs.txt"
-      - "!*.md"
-      - "!**/*.md"
+    exclude:
+      - "requirements/**/docs.txt"
+      - "*.md"
+      - "**/*.md"
 
 jobs:
   - job: testing

@@ -23,9 +23,10 @@ pr:
       - "tests/tests_pytorch/**"
       - "setup.cfg"  # includes pytest config
       - ".actions/**"
-      - "!requirements/**/docs.txt"
-      - "!*.md"
-      - "!**/*.md"
+    exclude:
+      - "requirements/**/docs.txt"
+      - "*.md"
+      - "**/*.md"
 
 variables:
   - name: poplar_sdk

@@ -244,7 +244,7 @@ subprojects:
       - ".github/workflows/ci-app-examples.yml"
       - "src/lightning_app/**"
       - "tests/tests_app_examples/**"
-      - "examples/app_*"
+      - "examples/app_*/**"
       - "requirements/app/**"
       - "setup.py"
       - ".actions/**"

@@ -50,6 +50,7 @@ pull_request_rules:
   - name: Not ready yet
     conditions:
       - or:
+        - draft # filter-out GH draft PRs
         - label="has conflicts"
         - "#approved-reviews-by=0" # number of review approvals
         - "#changes-requested-reviews-by>=1" # no requested changes

@@ -11,7 +11,7 @@ on:
       - ".github/workflows/ci-app-examples.yml"
       - "src/lightning_app/**"
       - "tests/tests_app_examples/**"
-      - "examples/app_*"
+      - "examples/app_*/**"
       - "requirements/app/**"
       - "setup.py"
       - ".actions/**"

@@ -11,7 +11,7 @@ on:
       - ".github/workflows/ci-app-tests.yml"
       - "src/lightning_app/**"
       - "tests/tests_app/**"
-      - "examples/app_*"  # some tests_app tests call examples files
+      - "examples/app_*/**"  # some tests_app tests call examples files
       - "requirements/app/**"
       - "setup.py"
       - ".actions/**"

@@ -14,12 +14,12 @@ jobs:
     if: github.event.pull_request.draft == false
     timeout-minutes: 61  # in case something is wrong with the internal timeout
     steps:
-      - uses: Lightning-AI/probot@v4
+      - uses: Lightning-AI/probot@v5.1
         env:
           GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
         with:
           job: check-group
           interval: 180  # seconds
           timeout: 60  # minutes
-          maintainers: '@Lightning-AI/lai-frameworks'
-          owner: '@carmocca'
+          maintainers: 'Lightning-AI/lai-frameworks'
+          owner: 'carmocca'
@@ -39,10 +39,9 @@ First, let's define the component we need:
     :lines: 55-79
 
 And its run method executes the steps described above.
-Additionally, ``work.stop`` is used to reduce cost when running in the cloud.
 
 .. literalinclude:: ../../../examples/app_dag/app.py
-    :lines: 81-108
+    :lines: 80-103
 
 ----
 
@@ -51,4 +50,4 @@ Step 2: Define the scheduling
 *****************************
 
 .. literalinclude:: ../../../examples/app_dag/app.py
-    :lines: 109-137
+    :lines: 106-135
@@ -5,8 +5,7 @@
 
 
 class LightningTrainerDistributed(L.LightningWork):
-    @staticmethod
-    def run():
+    def run(self):
         model = BoringModel()
         trainer = L.Trainer(max_epochs=10, strategy="ddp")
         trainer.fit(model)

@@ -22,8 +22,7 @@ def distributed_train(local_rank: int, main_address: str, main_port: int, num_no
     # 2. PREPARE DISTRIBUTED MODEL
     model = torch.nn.Linear(32, 2)
     device = torch.device(f"cuda:{local_rank}") if torch.cuda.is_available() else torch.device("cpu")
-    device_ids = device if torch.cuda.is_available() else None
-    model = DistributedDataParallel(model, device_ids=device_ids).to(device)
+    model = DistributedDataParallel(model, device_ids=[local_rank] if torch.cuda.is_available() else None).to(device)
 
     # 3. SETUP LOSS AND OPTIMIZER
     criterion = torch.nn.MSELoss()

@@ -1,7 +1,8 @@
 .. lit_tabs::
    :titles: Hello world; Hello GPU world; PyTorch & ⚡⚡⚡ Trainer (1+ cloud GPUs); Train PyTorch (cloud GPU); Train PyTorch (32 cloud GPUs); Deploy a model on cloud GPUs; Run a model script;  XGBoost; Streamlit demo
    :code_files: /levels/basic/hello_components/hello_world.py; /levels/basic/hello_components/hello_world_gpu.py; /levels/basic/hello_components/pl_multinode.py; /levels/basic/hello_components/train_pytorch.py; /levels/basic/hello_components/pt_multinode.py; /levels/basic/hello_components/deploy_model.py; /levels/basic/hello_components/run_ptl_script.py; /levels/basic/hello_components/xgboost.py; /levels/basic/hello_components/streamlit_demo.py
-   :highlights: 7; 10, 11; 10-12, 17, 18; 4, 8, 12, 18-19, 26; 5, 10, 22, 28, 32, 42, 58-60; 3, 11-12, 25, 29; 7, 10; 15, 21; 9, 15, 24
+   :highlights: 7; 10, 11; 9-11, 16, 17; 4, 8, 12, 18-19, 26; 5, 10, 22, 27, 31, 41, 57-59; 3, 11-12, 25, 29; 7, 10; 15, 21; 9, 15, 24
+   :works: [{"name":"root.work","spec":{"buildSpec":{"commands":[],"pythonDependencies":{"packageManager":"PACKAGE_MANAGER_PIP","packages":""}},"drives":[],"userRequestedComputeConfig":{"count":1,"diskSize":0,"name":"default","preemptible":false,"shmSize":0},"networkConfig":[{"name":"dzodf","port":61304}]}}];[{"name":"root.work","spec":{"buildSpec":{"commands":[],"pythonDependencies":{"packageManager":"PACKAGE_MANAGER_PIP","packages":""}},"drives":[],"networkConfig":[{"name":"qnlgd","port":61516}],"userRequestedComputeConfig":{"count":1,"diskSize":0,"name":"gpu","preemptible":false,"shmSize":0}}}];[{"name":"root.ws.0","spec":{"buildSpec":{"commands":[],"pythonDependencies":{"packageManager":"PACKAGE_MANAGER_PIP","packages":""}},"drives":[],"networkConfig":[{"name":"ajfrc","port":61553}],"userRequestedComputeConfig":{"count":1,"diskSize":0,"name":"gpu-fast-multi","preemptible":false,"shmSize":0}}},{"name":"root.ws.1","spec":{"buildSpec":{"commands":[],"pythonDependencies":{"packageManager":"PACKAGE_MANAGER_PIP","packages":""}},"drives":[],"networkConfig":[{"name":"ttyqc","port":61554}],"userRequestedComputeConfig":{"count":1,"diskSize":0,"name":"gpu-fast-multi","preemptible":false,"shmSize":0}}},{"name":"root.ws.2","spec":{"buildSpec":{"commands":[],"pythonDependencies":{"packageManager":"PACKAGE_MANAGER_PIP","packages":""}},"drives":[],"networkConfig":[{"name":"svyej","port":61555}],"userRequestedComputeConfig":{"count":1,"diskSize":0,"name":"gpu-fast-multi","preemptible":false,"shmSize":0}}},{"name":"root.ws.3","spec":{"buildSpec":{"commands":[],"pythonDependencies":{"packageManager":"PACKAGE_MANAGER_PIP","packages":""}},"drives":[],"networkConfig":[{"name":"parme","port":61556}],"userRequestedComputeConfig":{"count":1,"diskSize":0,"name":"gpu-fast-multi","preemptible":false,"shmSize":0}}}];[{"name":"root.work","spec":{"buildSpec":{"commands":[],"pythonDependencies":{"packageManager":"PACKAGE_MANAGER_PIP","packages":""}},"drives":[],"networkConfig":[{"name":"cutdu","port":61584}],"userRequestedComputeConfig":{"count":1,"diskSize":0,"name":"gpu","preemptible":false,"shmSize":0}}}];[{"name":"root.ws.0","spec":{"buildSpec":{"commands":[],"pythonDependencies":{"packageManager":"PACKAGE_MANAGER_PIP","packages":""}},"drives":[],"networkConfig":[{"name":"whhby","port":61613}],"userRequestedComputeConfig":{"count":1,"diskSize":0,"name":"gpu-fast-multi","preemptible":false,"shmSize":0}}},{"name":"root.ws.1","spec":{"buildSpec":{"commands":[],"pythonDependencies":{"packageManager":"PACKAGE_MANAGER_PIP","packages":""}},"drives":[],"networkConfig":[{"name":"yhjtf","port":61614}],"userRequestedComputeConfig":{"count":1,"diskSize":0,"name":"gpu-fast-multi","preemptible":false,"shmSize":0}}},{"name":"root.ws.2","spec":{"buildSpec":{"commands":[],"pythonDependencies":{"packageManager":"PACKAGE_MANAGER_PIP","packages":""}},"drives":[],"networkConfig":[{"name":"rqwkt","port":61615}],"userRequestedComputeConfig":{"count":1,"diskSize":0,"name":"gpu-fast-multi","preemptible":false,"shmSize":0}}},{"name":"root.ws.3","spec":{"buildSpec":{"commands":[],"pythonDependencies":{"packageManager":"PACKAGE_MANAGER_PIP","packages":""}},"drives":[],"networkConfig":[{"name":"pjdsj","port":61616}],"userRequestedComputeConfig":{"count":1,"diskSize":0,"name":"gpu-fast-multi","preemptible":false,"shmSize":0}}},{"name":"root.ws.4","spec":{"buildSpec":{"commands":[],"pythonDependencies":{"packageManager":"PACKAGE_MANAGER_PIP","packages":""}},"drives":[],"networkConfig":[{"name":"efdor","port":61617}],"userRequestedComputeConfig":{"count":1,"diskSize":0,"name":"gpu-fast-multi","preemptible":false,"shmSize":0}}},{"name":"root.ws.5","spec":{"buildSpec":{"commands":[],"pythonDependencies":{"packageManager":"PACKAGE_MANAGER_PIP","packages":""}},"drives":[],"networkConfig":[{"name":"pxmso","port":61618}],"userRequestedComputeConfig":{"count":1,"diskSize":0,"name":"gpu-fast-multi","preemptible":false,"shmSize":0}}},{"name":"root.ws.6","spec":{"buildSpec":{"commands":[],"pythonDependencies":{"packageManager":"PACKAGE_MANAGER_PIP","packages":""}},"drives":[],"networkConfig":[{"name":"feevy","port":61619}],"userRequestedComputeConfig":{"count":1,"diskSize":0,"name":"gpu-fast-multi","preemptible":false,"shmSize":0}}},{"name":"root.ws.7","spec":{"buildSpec":{"commands":[],"pythonDependencies":{"packageManager":"PACKAGE_MANAGER_PIP","packages":""}},"drives":[],"networkConfig":[{"name":"tbmse","port":61620}],"userRequestedComputeConfig":{"count":1,"diskSize":0,"name":"gpu-fast-multi","preemptible":false,"shmSize":0}}}];[{"name":"root.work","spec":{"buildSpec":{"commands":[],"pythonDependencies":{"packageManager":"PACKAGE_MANAGER_PIP","packages":""}},"drives":[],"networkConfig":[{"name":"umqqg","port":7777}],"userRequestedComputeConfig":{"count":1,"diskSize":0,"name":"gpu","preemptible":false,"shmSize":0}}}];[];[{"name":"root.work","spec":{"buildSpec":{"commands":[],"pythonDependencies":{"packageManager":"PACKAGE_MANAGER_PIP","packages":""}},"drives":[],"networkConfig":[{"name":"tggba","port":61729}],"userRequestedComputeConfig":{"count":1,"diskSize":0,"name":"default","preemptible":false,"shmSize":0}}}];[{"name":"root.work","spec":{"buildSpec":{"commands":[],"pythonDependencies":{"packageManager":"PACKAGE_MANAGER_PIP","packages":""}},"drives":[],"networkConfig":[{"name":"hpyaz","port":61763}],"userRequestedComputeConfig":{"count":1,"diskSize":0,"name":"default","preemptible":false,"shmSize":0}}}]
    :enable_run: true
    :tab_rows: 3
    :height: 620px
@@ -26,7 +26,7 @@ or cloud GPUs without code changes.
 .. lit_tabs::
    :descriptions: import Lightning; We're using a demo LightningModule; Move your training code here (usually your main.py); Pass your component to the multi-node executor (it works on CPU or single GPUs also); Select the number of machines (nodes). Here we choose 2.; Choose from over 15+ machine types. This one has 4 v100 GPUs.; Initialize the App object that executes the component logic.
    :code_files: /levels/basic/hello_components/pl_multinode.py; /levels/basic/hello_components/pl_multinode.py; /levels/basic/hello_components/pl_multinode.py; /levels/basic/hello_components/pl_multinode.py;  /levels/basic/hello_components/pl_multinode.py; /levels/basic/hello_components/pl_multinode.py; /levels/basic/hello_components/pl_multinode.py;
-   :highlights: 2; 4; 10-12; 15-18; 17; 18; 20
+   :highlights: 2; 4; 9-11; 14-17; 16; 17; 19
    :enable_run: true
    :tab_rows: 5
    :height: 420px

@@ -48,7 +48,7 @@ You can add SSH keys using Lightning.ai website (Lightning.ai > Profile > Keys)
 
 .. code:: bash
 
-   $ lightning add ssh-key --public-key ~/.ssh/id_ed25519.pub
+   $ lightning create ssh-key --public-key ~/.ssh/id_ed25519.pub
 
 You are now ready to access your Lightning Flow and Work containers.
 

@@ -107,7 +107,7 @@ Which prints out:
 
 .. code:: bash
 
-    usage: a.py [-h] [-c CONFIG] [--print_config [={comments,skip_null,skip_default}+]]
+    usage: main.py [-h] [-c CONFIG] [--print_config [={comments,skip_null,skip_default}+]]
             {fit,validate,test,predict,tune} ...
 
     pytorch-lightning trainer command line tool

@@ -0,0 +1,122 @@
+.. _dataiters:
+
+##################################
+Injecting 3rd Party Data Iterables
+##################################
+
+When training a model on a specific task, data loading and preprocessing might become a bottleneck.
+Lightning does not enforce a specific data loading approach nor does it try to control it.
+The only assumption Lightning makes is that the data is returned as an iterable of batches.
+
+For PyTorch-based programs, these iterables are typically instances of :class:`~torch.utils.data.DataLoader`.
+
+However, Lightning also supports other data types such as plain list of batches, generators or other custom iterables.
+
+.. code-block:: python
+
+    # random list of batches
+    data = [(torch.rand(32, 3, 32, 32), torch.randint(0, 10, (32,))) for _ in range(100)]
+    model = LitClassifier()
+    trainer = Trainer()
+    trainer.fit(model, data)
+
+Examples for custom iterables include `NVIDIA DALI <https://github.com/NVIDIA/DALI>`__ or `FFCV <https://github.com/libffcv/ffcv>`__ for computer vision.
+Both libraries offer support for custom data loading and preprocessing (also hardware accelerated) and can be used with Lightning.
+
+
+For example, taking the example from FFCV's readme, we can use it with Lightning by just removing the hardcoded ``ToDevice(0)``
+as Lightning takes care of GPU placement. In case you want to use some data transformations on GPUs, change the
+``ToDevice(0)`` to ``ToDevice(self.trainer.local_rank)`` to correctly map to the desired GPU in your pipeline.
+
+.. code-block:: python
+
+    from ffcv.loader import Loader, OrderOption
+    from ffcv.transforms import ToTensor, ToDevice, ToTorchImage, Cutout
+    from ffcv.fields.decoders import IntDecoder, RandomResizedCropRGBImageDecoder
+
+
+    class CustomClassifier(LitClassifier):
+        def train_dataloader(self):
+
+            # Random resized crop
+            decoder = RandomResizedCropRGBImageDecoder((224, 224))
+
+            # Data decoding and augmentation
+            image_pipeline = [decoder, Cutout(), ToTensor(), ToTorchImage()]
+            label_pipeline = [IntDecoder(), ToTensor()]
+
+            # Pipeline for each data field
+            pipelines = {"image": image_pipeline, "label": label_pipeline}
+
+            # Replaces PyTorch data loader (`torch.utils.data.Dataloader`)
+            loader = Loader(
+                write_path, batch_size=bs, num_workers=num_workers, order=OrderOption.RANDOM, pipelines=pipelines
+            )
+
+            return loader
+
+When moving data to a specific device, you can always refer to ``self.trainer.local_rank`` to get the accelerator
+used by the current process.
+
+By just changing ``device_id=0`` to ``device_id=self.trainer.local_rank`` we can also leverage DALI's GPU decoding:
+
+.. code-block:: python
+
+        from nvidia.dali.pipeline import pipeline_def
+        import nvidia.dali.types as types
+        import nvidia.dali.fn as fn
+        from nvidia.dali.plugin.pytorch import DALIGenericIterator
+        import os
+
+
+        class CustomLitClassifier(LitClassifier):
+            def train_dataloader(self):
+
+                # To run with different data, see documentation of nvidia.dali.fn.readers.file
+                # points to https://github.com/NVIDIA/DALI_extra
+                data_root_dir = os.environ["DALI_EXTRA_PATH"]
+                images_dir = os.path.join(data_root_dir, "db", "single", "jpeg")
+
+                @pipeline_def(num_threads=4, device_id=self.trainer.local_rank)
+                def get_dali_pipeline():
+                    images, labels = fn.readers.file(file_root=images_dir, random_shuffle=True, name="Reader")
+                    # decode data on the GPU
+                    images = fn.decoders.image_random_crop(images, device="mixed", output_type=types.RGB)
+                    # the rest of processing happens on the GPU as well
+                    images = fn.resize(images, resize_x=256, resize_y=256)
+                    images = fn.crop_mirror_normalize(
+                        images,
+                        crop_h=224,
+                        crop_w=224,
+                        mean=[0.485 * 255, 0.456 * 255, 0.406 * 255],
+                        std=[0.229 * 255, 0.224 * 255, 0.225 * 255],
+                        mirror=fn.random.coin_flip(),
+                    )
+                    return images, labels
+
+                train_data = DALIGenericIterator(
+                    [get_dali_pipeline(batch_size=16)],
+                    ["data", "label"],
+                    reader_name="Reader",
+                )
+
+                return train_data
+
+
+Limitations
+------------
+Lightning works with all kinds of custom data iterables as shown above. There are, however, a few features that cannot
+be supported this way. These restrictions come from the fact that for their support,
+Lightning needs to know a lot on the internals of these iterables.
+
+- In a distributed multi-GPU setting (ddp),
+  Lightning automatically replaces the DataLoader's sampler with its distributed counterpart.
+  This makes sure that each GPU sees a different part of the dataset.
+  As sampling can be implemented in arbitrary ways with custom iterables,
+  there is no way for Lightning to know, how to replace the sampler.
+
+- When training fails for some reason, Lightning is able to extract all of the relevant data from the model,
+  optimizers, trainer and dataloader to resume it at the exact same batch it crashed.
+  This feature is called fault-tolerance and is limited to PyTorch DataLoaders.
+  Lighning needs to know a lot about sampling, fast forwarding and random number handling to enable fault tolerance,
+  meaning that it cannot be supported for arbitrary iterables.
@@ -207,6 +207,7 @@ Current Lightning Users
    Train on single or multiple TPUs <accelerators/tpu>
    Train on MPS <accelerators/mps>
    Use a pretrained model <advanced/pretrained>
+   Inject Custom Data Iterables <data/custom_data_iterables>
    model/own_your_loop
 
 .. toctree::