Docs 4/n (#15628)

* remove source-lit * docs * docs * docs * docs * ic * deploy * deploy * deploy * deploy * deploy * deploy * Apply suggestions from code review * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * make build run Co-authored-by: Jirka Borovec <6035284+Borda@users.noreply.github.com> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Rick Izzo <rick@grid.ai>
Lightning-AI · Nov 10, 2022 · d5c0eff · d5c0eff
1 parent 136a090
commit d5c0eff
Show file tree

Hide file tree

Showing 25 changed files with 224 additions and 128 deletions.
diff --git a/docs/source-app/api_reference/components.rst b/docs/source-app/api_reference/components.rst
@@ -20,6 +20,6 @@ ___________________
 
     ~python.popen.PopenPythonScript
     ~python.tracer.TracerPythonScript
-    ~training.LightningTrainingComponent
+    ~training.LightningTrainerScript
     ~serve.gradio.ServeGradio
     ~serve.serve.ModelInferenceAPI
diff --git a/docs/source-app/api_references.rst b/docs/source-app/api_references.rst
@@ -34,7 +34,7 @@ ___________________
 
     ~python.popen.PopenPythonScript
     ~python.tracer.TracerPythonScript
-    ~training.LightningTrainingComponent
+    ~training.LightningTrainerScript
     ~serve.gradio.ServeGradio
     ~serve.serve.ModelInferenceAPI
 

diff --git a/docs/source-app/index.rst b/docs/source-app/index.rst
@@ -6,7 +6,7 @@
 #######################
 Welcome to ⚡ Lightning
 #######################
-Build models and full stack AI apps ⚡ *Lightning fast*.
+Build models, ML components and full stack AI apps ⚡ *Lightning fast*.
 
 .. join_slack::
    :align: left
@@ -22,23 +22,22 @@ Build models and full stack AI apps ⚡ *Lightning fast*.
 
 .. app_card::
    :title: Develop and Train
-   :description: Train an LLM (64 GPUs)
+   :description: Train a model (32 GPUs)
    :width: 280
    :image: https://lightning-ai-docs.s3.amazonaws.com/develop_n_train_v1.jpg
-   :preview: https://lightning.ai
-   :deploy: https://lightning.ai
-   :target: https://apple.com
-   :tags: Model
+   :target: levels/basic/real_lightning_component_implementations.html#ex-pytorch-lightning-trainer
+   :preview: levels/basic/real_lightning_component_implementations.html#ex-pytorch-lightning-trainer
+   :tags: Training
 
 .. app_card::
    :title: Serve and deploy
-   :description: Production-ready stable diffusion server (<2s latency)
+   :description: Production diffusion server (<2s latency)
    :width: 280
    :app_id: HvUwbEG90E
    :image: https://lightning-ai-docs.s3.amazonaws.com/serve_n_deploy_v1.jpg
    :deploy: https://lightning.ai
    :target: https://01gbx4m78rbkpczdf5cpz2hpbh.litng-ai-03.litng.ai/root.api_component/
-   :tags: App
+   :tags: Serving
 
 .. app_card::
    :title: Scale and build a product
@@ -47,7 +46,7 @@ Build models and full stack AI apps ⚡ *Lightning fast*.
    :app_id: HvUwbEG90E
    :image: https://lightning-ai-docs.s3.amazonaws.com/scale_n_build_v1.jpg
    :target: https://lightning.ai/muse
-   :tags: App
+   :tags: AI App
 
 .. raw:: html
 
@@ -56,13 +55,14 @@ Build models and full stack AI apps ⚡ *Lightning fast*.
 
 ----
 
-****************************************
-Build self-contained, modular components
-****************************************
-Lightning is a hyper-minimalistic framework designed to maximize expressivity that
-enables you to build modular, self-contained components and plug them into your existing workflows.
-A Lightning component organizes arbitrary code so it can run on the cloud. A component can train a model, deploy, or even host a web UI.
-The component manages its own infrastructure, cloud costs, networking and more, so you can focus on application logic and not engineering.
+********************************
+Build self-contained, components
+********************************
+Use Lightning, the hyper-minimalistic framework, to build machine learning components that can plug into existing ML workflows.
+A Lightning component organizes arbitrary code to run on the cloud, manage its own infrastructure, cloud costs, networking, and more.
+Focus on component logic and not engineering.
+
+Use components on their own, or compose them into full-stack AI apps with our next-generation Lightning orchestrator.
 
 .. raw:: html
 
@@ -78,6 +78,10 @@ The component manages its own infrastructure, cloud costs, networking and more,
 
 .. include:: ./levels/basic/hero_components.rst
 
+|
+
+Components run the same on the cloud and locally on your choice of hardware.
+
 .. lit_tabs::
    :code_files: landing_app_run.bash
    :highlights: 5

diff --git a/docs/source-app/landing_app_run.bash b/docs/source-app/landing_app_run.bash
@@ -1,5 +1,5 @@
 # install lightning
 pip install lightning
 
-# run the app
-lightning run app app.py --cloud
+# run the app on the --cloud (--setup installs deps automatically)
+lightning run app app.py --setup --cloud
diff --git a/docs/source-app/levels/basic/build_a_lightning_component.rst b/docs/source-app/levels/basic/build_a_lightning_component.rst
@@ -14,13 +14,12 @@ Level 1: Package code in a lightning component
 *********************************
 Why you need Lightning components
 *********************************
-A Lightning component organizes a piece of code into a self-contained, modular component that
-can be integrated into your existing workflows or assembled to form a Lightning app.
-A Lightning component manages its own infrastructure, auto-scaling, cost management, and more, so you
-can focus on the program logic and not the cloud engineering.
+A Lightning component is a self-contained, modular machine-learning component
+that you can plug into your existing ML workflows. A Lightning component organizes arbitrary code so it can run on the cloud, manages
+its own infrastructure, cloud costs, networking and more. Connect components using your current workflow management tools or
+our `next-generation reactive orchestrator <../intermediate/index.html>`_.
 
-Components run on the cloud or your laptop without code changes 🤯🤯. Connect components using your current workflow management tools or use
-Lightning apps to build powerful sequential AND reactive workflows.
+Components run on the cloud or your laptop without code changes 🤯🤯.
 
 .. raw:: html
 
@@ -109,7 +108,7 @@ First, install Lightning.
 **************************
 Build your first component
 **************************
-A Lightning component organizes Python code into a self-contained module so it can run on the cloud.
+A Lightning component organizes arbitrary code so it can run on the cloud, manages its own infrastructure, cloud costs, networking and more
 
 **Run one of these components!**
 
@@ -119,12 +118,7 @@ A Lightning component organizes Python code into a self-contained module so it c
 
 Components run the same on the cloud and locally on your choice of hardware.
 
-.. lit_tabs::
-   :titles: Lightning Cloud (fully-managed); Your AWS account; Your own hardware
-   :code_files: ./hello_components/code_run_cloud.bash; ./hello_components/code_run_cloud_yours.bash; ./hello_components/code_run_local.bash
-   :tab_rows: 4
-   :highlights: ; 5; 0
-   :height: 195px
+.. include:: /levels/basic/hero_run_setup.rst
 
 ----
 

diff --git a/docs/source-app/levels/basic/hello_components/code_run_cloud_setup.bash b/docs/source-app/levels/basic/hello_components/code_run_cloud_setup.bash
@@ -0,0 +1 @@
+lightning run app app.py --setup --cloud
diff --git a/docs/source-app/levels/basic/hello_components/code_run_cloud_yours_setup.bash b/docs/source-app/levels/basic/hello_components/code_run_cloud_yours_setup.bash
@@ -0,0 +1,5 @@
+# first create a cluster (creation could take ~30 minutes)
+lightning create cluster pikachu --provider aws --role-arn arn:aws:iam::1234567890:role/lai-byoc
+
+# run on that cluster
+lightning run app app.py --setup --cloud pikachu
diff --git a/docs/source-app/levels/basic/hello_components/code_run_local_setup.bash b/docs/source-app/levels/basic/hello_components/code_run_local_setup.bash
@@ -0,0 +1 @@
+lightning run app app.py --setup
diff --git a/docs/source-app/levels/basic/hello_components/deploy_model.py b/docs/source-app/levels/basic/hello_components/deploy_model.py
@@ -1,15 +1,31 @@
-# A hello world component
-# app.py
+# !pip install torchvision
 import lightning as L
+from lightning.app.components.serve import PythonServer, Image, Number
+import base64, io, torchvision, torch
+from PIL import Image as PILImage
 
 
-class YourComponent(L.LightningWork):
-   def run(self):
-      print('RUN ANY PYTHON CODE HERE')
+class PyTorchServer(PythonServer):
+    def setup(self):
+        self._model = torchvision.models.resnet18(pretrained=True)
+        self._device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
+        self._model.to(self._device)
 
+    def predict(self, request):
+        image = base64.b64decode(request.image.encode("utf-8"))
+        image = PILImage.open(io.BytesIO(image))
+        transforms = torchvision.transforms.Compose([
+            torchvision.transforms.Resize(224),
+            torchvision.transforms.ToTensor(),
+            torchvision.transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
+        ])
+        image = transforms(image)
+        image = image.to(self._device)
+        prediction = self._model(image.unsqueeze(0))
+        return {"prediction": prediction.argmax().item()}
 
 
-# run on a cloud machine
-compute = L.CloudCompute("cpu")
-worker = YourComponent(cloud_compute=compute)
-app = L.LightningApp(worker)
+component = PyTorchServer(
+   input_type=Image, output_type=Number, cloud_compute=L.CloudCompute('gpu')
+)
+app = L.LightningApp(component)
diff --git a/docs/source-app/levels/basic/hello_components/pl_multinode.py b/docs/source-app/levels/basic/hello_components/pl_multinode.py
@@ -0,0 +1,20 @@
+# app.py
+import lightning as L
+from lightning.app.components import PyTorchLightningMultiNode
+from lightning.pytorch.demos.boring_classes import BoringModel
+
+
+class LightningTrainerDistributed(L.LightningWork):
+    @staticmethod
+    def run():
+        model = BoringModel()
+        trainer = L.Trainer(max_epochs=10, strategy="ddp")
+        trainer.fit(model)
+
+# 8 GPU: (2 nodes of 4 x v100)
+component = PyTorchLightningMultiNode(
+    LightningTrainerDistributed,
+    num_nodes=2,
+    cloud_compute=L.CloudCompute("gpu-fast-multi"), # 4 x v100
+)
+app = L.LightningApp(component)
diff --git a/docs/source-app/levels/basic/hello_components/pt_multinode.py b/docs/source-app/levels/basic/hello_components/pt_multinode.py
@@ -1,30 +1,61 @@
-# !pip install torch
+# app.py
+# ! pip install torch
 import lightning as L
 from lightning.app.components import MultiNode
 import torch
+from torch.nn.parallel.distributed import DistributedDataParallel
 
-class MultiNodePytorchComponent(L.LightningWork):
-    def run(
-        self,
-        main_address: str,
-        main_port: int,
-        node_rank: int,
-        world_size: int,
-    ):
-        # this machine creates a group of processes and registers to the main node
-        print(f"Init process group: {main_address=}, {main_port=}, {world_size=}, {node_rank=}")
+
+def distributed_train(local_rank: int, main_address: str, main_port: int, num_nodes: int, node_rank: int, nprocs: int):
+    # 1. SET UP DISTRIBUTED ENVIRONMENT
+    global_rank = local_rank + node_rank * nprocs
+    world_size = num_nodes * nprocs
+
+    if torch.distributed.is_available() and not torch.distributed.is_initialized():
         torch.distributed.init_process_group(
-            backend="gloo",
-            init_method=f"tcp://{main_address}:{main_port}",
+            "nccl" if torch.cuda.is_available() else "gloo",
+            rank=global_rank,
             world_size=world_size,
-            rank=node_rank
+            init_method=f"tcp://{main_address}:{main_port}",
+        )
+
+    # 2. PREPARE DISTRIBUTED MODEL
+    model = torch.nn.Linear(32, 2)
+    device = torch.device(f"cuda:{local_rank}") if torch.cuda.is_available() else torch.device("cpu")
+    device_ids = device if torch.cuda.is_available() else None
+    model = DistributedDataParallel(model, device_ids=device_ids).to(device)
+
+    # 3. SETUP LOSS AND OPTIMIZER
+    criterion = torch.nn.MSELoss()
+    optimizer = torch.optim.SGD(model.parameters(), lr=0.01)
+
+    # 4.TRAIN THE MODEL FOR 50 STEPS
+    for step in range(50):
+        model.zero_grad()
+        x = torch.randn(64, 32).to(device)
+        output = model(x)
+        loss = criterion(output, torch.ones_like(output))
+        print(f"global_rank: {global_rank} step: {step} loss: {loss}")
+        loss.backward()
+        optimizer.step()
+
+    # 5. VERIFY ALL COPIES OF THE MODEL HAVE THE SAME WEIGTHS AT END OF TRAINING
+    weight = model.module.weight.clone()
+    torch.distributed.all_reduce(weight)
+    assert torch.equal(model.module.weight, weight / world_size)
+
+    print("Multi Node Distributed Training Done!")
+
+class PyTorchDistributed(L.LightningWork):
+    def run(self, main_address: str, main_port: int, num_nodes: int, node_rank: int):
+        nprocs = torch.cuda.device_count() if torch.cuda.is_available() else 1
+        torch.multiprocessing.spawn(
+            distributed_train,
+            args=(main_address, main_port, num_nodes, node_rank, nprocs),
+            nprocs=nprocs
         )
-        for step in range(10000):
-            gathered = [torch.zeros(1) for _ in range(world_size)]
-            torch.distributed.all_gather(gathered, torch.tensor([node_rank]).float())
-            print(f'step: {step}, tensor: {gathered}')
-
-# gpu-multi-fast has 4 GPUs x 8 nodes = 32 GPUs
-component = MultiNodePytorchComponent(cloud_compute=L.CloudCompute("gpu-multi-fast"))
-component = MultiNode(component, nodes=8)
+
+# 32 GPUs: (8 nodes x 4 v 100)
+compute = L.CloudCompute("gpu-fast-multi")  # 4xV100
+component = MultiNode(PyTorchDistributed, num_nodes=8, cloud_compute=compute)
 app = L.LightningApp(component)
diff --git a/docs/source-app/levels/basic/hello_components/run_ptl_script.py b/docs/source-app/levels/basic/hello_components/run_ptl_script.py
@@ -0,0 +1,13 @@
+# app.py
+# !curl https://bit.ly/demoLightningScriptpy -o pl_boring_script.py
+import lightning as L
+from lightning.app.components.training import LightningTrainerScript
+
+# run script that trains PyTorch with the Lightning Trainer
+model_script = 'pl_boring_script.py'
+component = LightningTrainerScript(
+   model_script,
+   num_nodes=1,
+   cloud_compute=L.CloudCompute("gpu")
+)
+app = L.LightningApp(component)
diff --git a/docs/source-app/levels/basic/hello_components/run_script.py b/docs/source-app/levels/basic/hello_components/run_script.py
diff --git a/...vels/basic/hello_components/build_demo.py → .../basic/hello_components/streamlit_demo.py b/...vels/basic/hello_components/build_demo.py → .../basic/hello_components/streamlit_demo.py
@@ -1,7 +1,6 @@
 # app.py
 # !pip install streamlit omegaconf scipy
 # !pip install torch
-
 import lightning as L
 import torch
 from io import BytesIO
@@ -10,21 +9,11 @@
 import streamlit as st
 
 
-class LitStreamlit(L.app.components.ServeStreamlit):
+class StreamlitApp(L.app.components.ServeStreamlit):
     def build_model(self):
         sample_rate = 48000
-
-        model, _ = torch.hub.load(
-            repo_or_dir='snakers4/silero-models',
-            model='silero_tts',
-            speaker="v3_en",
-        )
-
-        return partial(
-            model.apply_tts,
-            sample_rate=sample_rate,
-            speaker="en_0",
-        ), sample_rate
+        model, _ = torch.hub.load('snakers4/silero-models', model='silero_tts',speaker="v3_en")
+        return partial(model.apply_tts, sample_rate=sample_rate, speaker="en_0"), sample_rate
 
     def render(self):
         st.title("Text To Speech")
@@ -38,4 +27,4 @@ def render(self):
             audio.seek(0)
             st.audio(audio)
 
-app = L.LightningApp(LitStreamlit())
+app = L.LightningApp(StreamlitApp())
diff --git a/docs/source-app/levels/basic/hello_components/train_pytorch.py b/docs/source-app/levels/basic/hello_components/train_pytorch.py
@@ -1,4 +1,5 @@
 # app.py
+# ! pip install torch
 import lightning as L
 import torch
 
@@ -23,4 +24,5 @@ def run(self):
          optimizer.step()
 
 compute = L.CloudCompute('gpu')
-app = L.LightningApp(PyTorchComponent(cloud_compute=compute))
+componet = PyTorchComponent(cloud_compute=compute)
+app = L.LightningApp(componet)
diff --git a/docs/source-app/levels/basic/hello_components/xgboost.py b/docs/source-app/levels/basic/hello_components/xgboost.py
@@ -1,6 +1,5 @@
 # app.py
 # !pip install sklearn xgboost
-
 import lightning as L
 from sklearn import datasets
 from sklearn.model_selection import train_test_split