beam-cloud · andreineamtu · Jun 13, 2024
diff --git a/09_sd-1.5_load_local_weights/README.md b/09_sd-1.5_load_local_weights/README.md
@@ -0,0 +1,64 @@
+# Load local checkpoint and model weights on Beam
+
+This example demonstrates how to load from local a checkpoint, lora, vae and ti for base model Stable Diffusion v1.5
+
+Credits to [Talbo](https://x.com/TalboSocial)
+
+## Overview
+
+This app has an APIs to generate an image based on the prompt.
+
+# Pre-requisites 
+
+1. Make sure you have [Beam](https://beam.cloud) installed: `curl https://raw.githubusercontent.com/slai-labs/get-beam/main/get-beam.sh -sSfL | sh`
+2. Clone this repo and `cd` into the directory
+
+# Quickstart
+
+0. Go to your Beam dashboard and upload the model weights on a Volume you create as described [here](https://docs.beam.cloud/data/volumes#uploading-files-with-the-dashboard).
+Then make sure you add the path to each model weight in the constants section:
+```
+model_path = f"{volume_path}/Anything-V3.0-pruned-fp32.safetensors"
+lora_path = f"{volume_path}/"
+lora_name = "Crayon.safetensors"
+ti_path = f"{volume_path}/1vanakn0ll.pt"
+vae_path = f"{volume_path}/vae-ft-mse-840000-ema-pruned.safetensors"
+```
+
+If you can not load a weight from a single file then you must do the following steps:
+
+  a. download the entire folder of the weight model from HuggingFace and upload it to your beam volume
+  b. then use the "from_pretrained" method and pass the path to the folder not the model_id:
+    ```pipe.vae = AutoencoderTiny.from_pretrained("./models/vae/your-vae/", torch_dtype=precision_consistency).to("cuda")```
+
+1. Test the API Locally: `beam serve app.py:generate_image`. You can make any desired changes to the code, and Beam will automatically 
+  reload the remote server each time you update your application code. 
+  Note: Any updates to compute requirements, python packages, or shell commands will require you to manually restart the dev session
+2. Deploy the API to Beam: `beam deploy app.py --name sd-load-local-weights`
+  Once it's deployed, you can find the web URL in the dashboard.
+
+
+## Calling the Inference API
+
+Here's what a request will look like:
+
+```curl
+
+curl -X POST \
+    --compressed 'https://uc6mc.apps.beam.cloud' \
+    -H 'Accept: */*' \
+    -H 'Accept-Encoding: gzip, deflate' \
+    -H 'Authorization: Basic YOUR_AUTH_KEY' \
+    -H 'Connection: keep-alive' \
+    -H 'Content-Type: application/json' \
+    -d '{"prompt": "photo of a girl riding the subway"}'
+```
+
+The main changes for beam v2:
+- Don't define an App anymore
+- Define an Image and a Volume that you pass to @endpoint as args
+- In volume, path is renamed mount_path
+- @app.rest_api() is now @endpoint(image=image, cpu=4, memory="16Gi", gpu="T4", volumes=[volume])
+- Loader is now called on_start
+- Loader context is retrieved via context.on_start_value
+- Imports need to be inline with your remote functions
diff --git a/09_sd-1.5_load_local_weights/app.py b/09_sd-1.5_load_local_weights/app.py
@@ -0,0 +1,168 @@
+"""
+### Load local weights for base model Stable Diffusion 1.5 on Beam ###
+
+** Test it locally **
+beam serve app.py:generate_image
+
+**Deploy it as an API**
+beam deploy app.py --name sd-load-local-weights
+"""
+
+### CONFIGURATION ###
+# The volume storing the models(shared)
+volume_name = "models"
+volume_path = "./models" # add your own path from your Beam Volume
+
+# model checkpoint path(add your own path)
+model_path = f"{volume_path}/Anything-V3.0-pruned-fp32.safetensors"
+# lora path(add your own lora name that you have on your Beam Volume)
+lora_path = f"{volume_path}/"
+lora_name = "Crayon.safetensors" # this is actually the name of the file on Beam Volume
+# textual inversion path(add your own path)
+ti_path = f"{volume_path}/1vanakn0ll.pt"
+# vae file path(add your own path to the file)
+# vae_path = f"{volume_path}/vae-ft-mse-840000-ema-pruned.safetensors"
+# vae folder path(add your own path to the folder)
+vae_path = f"{volume_path}/vae/taesd" # local folder for "madebyollin/taesd" from HF
+
+# model settings(params)
+image_width = 512
+image_height = 768
+negative_prompt = "(ugly:1.3), (fused fingers), (too many fingers), (bad anatomy:1.2), (watermark:1.2), (words), letters, untracked eyes, asymmetric eyes"
+
+# Sampler settings
+from diffusers import DPMSolverMultistepScheduler
+sampler = DPMSolverMultistepScheduler.from_config # = 'DPM++ 2M' - scheduler - set bellow after the pipe is created
+sampler_type = "use_karras_sigmas" # = 'Karras' - scheduler type - set bellow after the pipe is created
+sampling_steps = 20
+
+# Other settings
+cfg_scale = 7.5 # CFG scale
+model_seed = 1736616725
+clip_skip = 2
+# Precision Consistency - Ensured that torch_dtype=torch.float32 is consistently used when loading models and other components.
+precision_consistency_val = "32" # or 16 - to fix the eror: "Input type (c10::Half) and bias type (float) mismatch" error
+                                 # also "RuntimeError: Expected all tensors to be on the same device, but found at least two devices, cuda:0 and cpu"
+
+### IMPORTS ###
+from beam import Image, Volume, endpoint                                 
+
+# The environment your app runs on
+image=Image(
+    python_version="python3.10",
+    python_packages="requirements.txt",
+    # Shell commands that run when the container first starts
+    # This is used to install cuda library to fix the "OSError: libcudart.so.11.0: cannot open shared object file: No such file or directory"
+    commands=[
+        "apt-get update && apt-get install wget"
+        " && wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/cuda-keyring_1.1-1_all.deb",
+        "dpkg -i cuda-keyring_1.1-1_all.deb",
+        "lsb_release -a && apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y ffmpeg libpq-dev cuda=11.8.0-1 libcudnn8=8.9.2.*-1+cuda11.8",
+    ]
+)
+# The volume used to store the models weights
+volume=Volume(name=volume_name, mount_path=volume_path)
+
+# Deploy as a REST API
+@endpoint(
+    image=image,
+    cpu=4,
+    memory="16Gi",
+    gpu="T4",
+    volumes=[volume]
+)
+
+# main function that runs the inference
+def generate_image(**inputs):
+    # Inline Imports to avoid loading the whole module
+    import os, base64, torch
+    from diffusers import StableDiffusionPipeline, AutoencoderKL
+
+    # read prompt from inputs passed to the API
+    try:
+        prompt = inputs["prompt"]
+    # Use a default prompt if none is provided
+    except KeyError:
+        prompt = "a renaissance style photo of elon musk"
+
+    # print(f"Image prompt: {prompt}")
+
+    # check the device to run the model on
+    # print(f"MPS available for model: {torch.backends.mps.is_available()}")
+    # print(f"Cuda available for model: {torch.cuda.is_available()}")
+    # print(f"CUDA version: {torch.version.cuda}")
+
+    # read Precision Consistency - Ensured that torch_dtype=torch.float32 is consistently used
+    precision_consistency = getattr(torch, "float" + precision_consistency_val)
+
+    # Special torch method to improve performance
+    torch.backends.cuda.matmul.allow_tf32 = True
+
+    # Load the model checkpoint
+    # `StableDiffusionPipeline` is a class that represents a pipeline for stable diffusion, a
+    # technique used in image generation tasks. It is used to load a pre-trained model
+    # checkpoint, run inference on the model, and generate images based on a given prompt.
+    # The pipeline handles the processing of the input prompt, running the inference steps on the
+    # model, and producing the final image output.
+    pipe = StableDiffusionPipeline.from_single_file(
+        # Run inference on the specific model trained(checkpoint) from the volume
+        model_path,
+        torch_dtype=precision_consistency,
+        variant="fp32",
+        # The `cache_dir` arg is used to cache the model in between requests
+        cache_dir=volume_path,
+        safety_checker=None,
+        use_safetensors=True,
+        device_map="auto"
+    ).to("cuda")
+    # It also includes functionalities for memory-efficient attention mechanisms and optimization for image generation tasks.
+    pipe.enable_xformers_memory_efficient_attention()
+
+    # VAE - Load VAE from single file
+    # pipe.vae = AutoencoderKL.from_single_file(vae_path, torch_dtype=precision_consistency).to("cuda")
+    # VAE - Load VAE from folder
+    pipe.vae = AutoencoderKL.from_pretrained(vae_path, torch_dtype=precision_consistency).to("cuda")
+
+    # LORAs - Load LORAs with float32 precision
+    pipe.load_lora_weights(lora_path, weight_name=lora_name, torch_dtype=precision_consistency)
+
+    # Textual Inversion - Load Textual Inversion with float32 precision
+    pipe.load_textual_inversion(ti_path, torch_dtype=precision_consistency)
+
+    # PIPELINE SETTINGS
+    # set schedule type
+    sch_config = pipe.scheduler.config
+    sch_config[sampler_type] = True
+    # print(f"schedule config: {sch_config}")
+
+    # set the scheduler
+    pipe.scheduler = sampler(sch_config)
+    # print(f"sampler(scheduler) info: {pipe.scheduler}")
+    # print(f"sampler(scheduler) use_karras_sigmas: {pipe.scheduler.use_karras_sigmas}")
+
+    # Image generation
+    with torch.inference_mode():
+        with torch.autocast("cuda", dtype=precision_consistency):
+            image = pipe(prompt, width=image_width, height=image_height,
+                         negative_prompt=negative_prompt,
+                         num_inference_steps=sampling_steps,
+                         guidance_scale=cfg_scale,
+                         generator = torch.Generator(device="cuda").manual_seed(model_seed),
+                         clip_skip=clip_skip,
+                        ).images[0]
+
+    print(f"Saved Image: {image}")
+
+    # Save the generated image to the volume
+    img_name = "output_img.png"
+    image.save(img_name)
+
+    # decode the image to base64
+    img_path = os.path.abspath(img_name);
+    img_base64=base64.b64encode(open(img_path, "rb").read()).decode("UTF-8")
+
+    # print it to the console
+    # print(f"Base64 Image: {img_base64}")
+
+    # return the base64 image
+    return {"data": img_base64}
diff --git a/09_sd-1.5_load_local_weights/requirements.txt b/09_sd-1.5_load_local_weights/requirements.txt
@@ -0,0 +1,16 @@
+ninja
+peft
+torch>=2.0.1
+torchvision>=0.15.2
+transformers>=4.40.2
+ftfy
+tensorboard
+Jinja2
+pillow
+datasets
+xformers
+bitsandbytes-cuda117
+deepspeed
+git+https://github.com/huggingface/diffusers.git
+git+https://github.com/huggingface/accelerate.git
+