From dde69d6ca75eb7e4c5f01fd17e6da5b62f8a401f Mon Sep 17 00:00:00 2001
From: NERDDISCO <492378+TimPietrusky@users.noreply.github.com>
Date: Fri, 26 Jul 2024 17:36:05 +0200
Subject: [PATCH] feat: support sd3 (#46)

BREAKING CHANGE: we have 3 different images now instead of just one: base, sdxl and sd3

* ci: use branch name for creating dev releases

* ci: replace "/" with "-" to have a valid tag name

* ci: correctly handle the tag  name

* ci: build an image that contains sd3 using docker bake

* ci: use "set" instead of "args"

* ci: use "env" instead of "set"

* ci: use variables instead of args

* ci: set variables directly for the targets

* ci: write the secrets into the GITHUB_ENV

* ci: handle env variables correctly

* ci: use env variables from GitHub Variables

* ci: added back to env

* ci: print out env

* ci: adding the vars directly into the workflow

* ci: example workflow for sd3

* ci: renamed DOCKERHUB_REPO to DOCKERHUB_REPOSITORY

* ci: removed quotes for DOCKERHUB_REPOSITORY

* ci: only use DOCKERHUB_REPO in bake

* ci: added vars into sd3 target

* ci: added direct target

* ci: back to basics

* ci: multi-stage build to not expose the HUGGINGFACE_ACCESS_TOKEN

* ci: write everything into GITHUB_ENV again

* ci: use correct name for final stage

* ci: use correct runner

* fix: make sure to use the latest versions of all packages

* ci: simplified variables for all targets

* docs: added 3 images, updated build your own image

* docs: updated TOC

* ci: updated name

* ci: use docker bake to publish 3 images instead of just 1
---
 .github/workflows/dev.yml                  | 22 +++--
 .github/workflows/release.yml              | 22 +++--
 Dockerfile                                 | 39 ++++++---
 README.md                                  | 59 +++++++++-----
 docker-bake.hcl                            | 49 +++++++++++
 test_resources/workflows/workflow_sd3.json | 94 ++++++++++++++++++++++
 6 files changed, 240 insertions(+), 45 deletions(-)
 create mode 100644 docker-bake.hcl
 create mode 100644 test_resources/workflows/workflow_sd3.json

diff --git a/.github/workflows/dev.yml b/.github/workflows/dev.yml
index 065cf3e..3731707 100644
--- a/.github/workflows/dev.yml
+++ b/.github/workflows/dev.yml
@@ -3,8 +3,8 @@ name: Development
 on:
   workflow_dispatch:
   push:
-    branches:
-      - "dev"
+    branches-ignore:
+      - main
 
 jobs:
   dev:
@@ -40,9 +40,19 @@ jobs:
           username: ${{ secrets.DOCKERHUB_USERNAME }}
           password: ${{ secrets.DOCKERHUB_TOKEN }}
 
-      - name: Build and push
-        uses: docker/build-push-action@v5
+      - name: Set environment variables
+        run: |
+          echo "DOCKERHUB_REPO=${{ vars.DOCKERHUB_REPO }}" >> $GITHUB_ENV
+          echo "DOCKERHUB_IMG=${{ vars.DOCKERHUB_IMG }}" >> $GITHUB_ENV
+          echo "HUGGINGFACE_ACCESS_TOKEN=${{ secrets.HUGGINGFACE_ACCESS_TOKEN }}" >> $GITHUB_ENV
+          echo "RELEASE_VERSION=${GITHUB_REF##refs/heads/}" | sed 's/\//-/g' >> $GITHUB_ENV
+
+      - name: Build and push the images to Docker Hub
+        uses: docker/bake-action@v2
         with:
           push: true
-          tags: |
-            ${{ secrets.DOCKERHUB_REPO }}/${{ secrets.DOCKERHUB_IMG }}:dev
+          set: |
+            *.args.DOCKERHUB_REPO=${{ env.DOCKERHUB_REPO }}
+            *.args.DOCKERHUB_IMG=${{ env.DOCKERHUB_IMG }}
+            *.args.RELEASE_VERSION=${{ env.RELEASE_VERSION }}
+            sd3.args.HUGGINGFACE_ACCESS_TOKEN=${{ env.HUGGINGFACE_ACCESS_TOKEN }}
diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
index 4ca0245..320164b 100644
--- a/.github/workflows/release.yml
+++ b/.github/workflows/release.yml
@@ -60,14 +60,22 @@ jobs:
         env:
           GITHUB_TOKEN: ${{ secrets.BLIBLA_SEMANTIC_RELEASE }}
 
-      - name: Build image and push it to Docker Hub
-        if: steps.semanticrelease.outputs.new-release-published == 'true'
-        uses: docker/build-push-action@v5
+      - name: Set environment variables
+        run: |
+          echo "DOCKERHUB_REPO=${{ vars.DOCKERHUB_REPO }}" >> $GITHUB_ENV
+          echo "DOCKERHUB_IMG=${{ vars.DOCKERHUB_IMG }}" >> $GITHUB_ENV
+          echo "HUGGINGFACE_ACCESS_TOKEN=${{ secrets.HUGGINGFACE_ACCESS_TOKEN }}" >> $GITHUB_ENV
+          echo "RELEASE_VERSION=${{ steps.semanticrelease.outputs.release-version }}" >> $GITHUB_ENV
+
+      - name: Build and push the images to Docker Hub
+        uses: docker/bake-action@v2
         with:
           push: true
-          tags: |
-            ${{ secrets.DOCKERHUB_REPO }}/${{ secrets.DOCKERHUB_IMG }}:${{ steps.semanticrelease.outputs.release-version }}
-            ${{ secrets.DOCKERHUB_REPO }}/${{ secrets.DOCKERHUB_IMG }}:latest
+          set: |
+            *.args.DOCKERHUB_REPO=${{ env.DOCKERHUB_REPO }}
+            *.args.DOCKERHUB_IMG=${{ env.DOCKERHUB_IMG }}
+            *.args.RELEASE_VERSION=${{ env.RELEASE_VERSION }}
+            sd3.args.HUGGINGFACE_ACCESS_TOKEN=${{ env.HUGGINGFACE_ACCESS_TOKEN }}
 
       - name: Update description on Docker Hub
         if: steps.semanticrelease.outputs.new-release-published == 'true'
@@ -75,4 +83,4 @@ jobs:
         with:
           username: ${{ secrets.DOCKERHUB_USERNAME }}
           password: ${{ secrets.DOCKERHUB_TOKEN }}
-          repository: ${{ secrets.DOCKERHUB_REPO }}/${{ secrets.DOCKERHUB_IMG }}
+          repository: ${{ env.DOCKERHUB_REPO }}/${{ env.DOCKERHUB_IMG }}
diff --git a/Dockerfile b/Dockerfile
index a809a94..25696b3 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -1,4 +1,4 @@
-# Use Nvidia CUDA base image
+# Stage 1: Base image with common dependencies
 FROM nvidia/cuda:11.8.0-cudnn8-runtime-ubuntu22.04 as base
 
 # Prevents prompts from packages asking for user input during installation
@@ -24,16 +24,9 @@ RUN git clone https://github.com/comfyanonymous/ComfyUI.git /comfyui
 # Change working directory to ComfyUI
 WORKDIR /comfyui
 
-ARG SKIP_DEFAULT_MODELS
-# Download checkpoints/vae/LoRA to include in image.
-RUN if [ -z "$SKIP_DEFAULT_MODELS" ]; then wget -O models/checkpoints/sd_xl_base_1.0.safetensors https://huggingface.co/stabilityai/stable-diffusion-xl-base-1.0/resolve/main/sd_xl_base_1.0.safetensors; fi
-RUN if [ -z "$SKIP_DEFAULT_MODELS" ]; then wget -O models/vae/sdxl_vae.safetensors https://huggingface.co/stabilityai/sdxl-vae/resolve/main/sdxl_vae.safetensors; fi
-RUN if [ -z "$SKIP_DEFAULT_MODELS" ]; then wget -O models/vae/sdxl-vae-fp16-fix.safetensors https://huggingface.co/madebyollin/sdxl-vae-fp16-fix/resolve/main/sdxl_vae.safetensors; fi
-
 # Install ComfyUI dependencies
-RUN pip3 install --no-cache-dir torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu121 \
-    && pip3 install --no-cache-dir xformers==0.0.21 \
-    && pip3 install -r requirements.txt
+RUN pip3 install --upgrade --no-cache-dir torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu121 \
+    && pip3 install --upgrade -r requirements.txt
 
 # Install runpod
 RUN pip3 install runpod requests
@@ -48,5 +41,29 @@ WORKDIR /
 ADD src/start.sh src/rp_handler.py test_input.json ./
 RUN chmod +x /start.sh
 
+# Stage 2: Download models
+FROM base as downloader
+
+ARG HUGGINGFACE_ACCESS_TOKEN
+ARG MODEL_TYPE
+
+# Change working directory to ComfyUI
+WORKDIR /comfyui
+
+# Download checkpoints/vae/LoRA to include in image based on model type
+RUN if [ "$MODEL_TYPE" = "sdxl" ]; then \
+      wget -O models/checkpoints/sd_xl_base_1.0.safetensors https://huggingface.co/stabilityai/stable-diffusion-xl-base-1.0/resolve/main/sd_xl_base_1.0.safetensors && \
+      wget -O models/vae/sdxl_vae.safetensors https://huggingface.co/stabilityai/sdxl-vae/resolve/main/sdxl_vae.safetensors && \
+      wget -O models/vae/sdxl-vae-fp16-fix.safetensors https://huggingface.co/madebyollin/sdxl-vae-fp16-fix/resolve/main/sdxl_vae.safetensors; \
+    elif [ "$MODEL_TYPE" = "sd3" ]; then \
+      wget --header="Authorization: Bearer ${HUGGINGFACE_ACCESS_TOKEN}" -O models/checkpoints/sd3_medium_incl_clips_t5xxlfp8.safetensors https://huggingface.co/stabilityai/stable-diffusion-3-medium/resolve/main/sd3_medium_incl_clips_t5xxlfp8.safetensors; \
+    fi
+
+# Stage 3: Final image
+FROM base as final
+
+# Copy models from stage 2 to the final image
+COPY --from=downloader /comfyui/models /comfyui/models
+
 # Start the container
-CMD /start.sh
+CMD /start.sh
\ No newline at end of file
diff --git a/README.md b/README.md
index c898a65..9c262a2 100644
--- a/README.md
+++ b/README.md
@@ -49,7 +49,10 @@ Read our article here: https://blib.la/blog/comfyui-on-runpod
 
 ## Quickstart
 
-- 🐳 Use the latest release of the image for your worker: [timpietruskyblibla/runpod-worker-comfy:2.1.3](https://hub.docker.com/r/timpietruskyblibla/runpod-worker-comfy)
+- 🐳 Choose one of the three available images for your serverless endpoint:
+  - `timpietruskyblibla/runpod-worker-comfy:3.0.0-base`: doesn't contain any checkpoints, just a clean ComfyUI image
+  - `timpietruskyblibla/runpod-worker-comfy:3.0.0-sdxl`: contains the checkpoints and VAE for Stable Diffusion XL
+  - `timpietruskyblibla/runpod-worker-comfy:3.0.0-sd3`: contains the medium checkpoint for Stable Diffusion 3
 - ⚙️ [Set the environment variables](#config)
 - ℹ️ [Use the Docker image on RunPod](#use-the-docker-image-on-runpod)
 
@@ -60,11 +63,14 @@ Read our article here: https://blib.la/blog/comfyui-on-runpod
 - The generated image is either:
   - Returned as base64-encoded string (default)
   - Uploaded to AWS S3 ([if AWS S3 is configured](#upload-image-to-aws-s3))
-- Build-in checkpoint:
-  - [sd_xl_base_1.0.safetensors](https://huggingface.co/stabilityai/stable-diffusion-xl-base-1.0)
-- Build-in VAE:
-  - [sdxl_vae.safetensors](https://huggingface.co/stabilityai/sdxl-vae/)
-  - [sdxl-vae-fp16-fix](https://huggingface.co/madebyollin/sdxl-vae-fp16-fix/)
+- There are three different Docker images to choose from:
+  - `<version>-base`: doesn't contain any checkpoints, just a clean ComfyUI image
+  - `<version>-sdxl`: contains the checkpoints and VAE for Stable Diffusion XL
+    - Checkpoint: [sd_xl_base_1.0.safetensors](https://huggingface.co/stabilityai/stable-diffusion-xl-base-1.0)
+    - VAEs:
+      - [sdxl_vae.safetensors](https://huggingface.co/stabilityai/sdxl-vae/)
+      - [sdxl-vae-fp16-fix](https://huggingface.co/madebyollin/sdxl-vae-fp16-fix/)
+  - `<version>-sd3`: contains the checkpoint [sd3_medium_incl_clips_t5xxlfp8.safetensors](https://huggingface.co/stabilityai/stable-diffusion-3-medium) for Stable Diffusion 3
 - [Bring your own models](#bring-your-own-models)
 - Based on [Ubuntu + NVIDIA CUDA](https://hub.docker.com/r/nvidia/cuda)
 
@@ -98,10 +104,10 @@ This is only needed if you want to upload the generated picture to AWS S3. If yo
 - In the dialog, configure:
   - Template Name: `runpod-worker-comfy` (it can be anything you want)
   - Template Type: serverless (change template type to "serverless")
-  - Container Image: `<dockerhub_username>/<repository_name>:tag`, in this case: `timpietruskyblibla/runpod-worker-comfy:2.1.3` (or `dev` if you want to have the development release)
+  - Container Image: `<dockerhub_username>/<repository_name>:tag`, in this case: `timpietruskyblibla/runpod-worker-comfy:3.0.0-sd3` (or `-base` for a clean image or `-sdxl` for Stable Diffusion XL)
   - Container Registry Credentials: You can leave everything as it is, as this repo is public
   - Container Disk: `20 GB`
-  - Enviroment Variables: [Configure S3](#upload-image-to-aws-s3)
+  - (optional) Environment Variables: [Configure S3](#upload-image-to-aws-s3)
     - Note: You can also not configure it, the images will then stay in the worker. In order to have them stored permanently, [we have to add the network volume](https://github.com/blib-la/runpod-worker-comfy/issues/1)
 - Click on `Save Template`
 - Navigate to [`Serverless > Endpoints`](https://www.runpod.io/console/serverless/user/endpoints) and click on `New Endpoint`
@@ -112,7 +118,7 @@ This is only needed if you want to upload the generated picture to AWS S3. If yo
   - Max Workers: `3` (whatever makes sense for you)
   - Idle Timeout: `5` (you can leave the default)
   - Flash Boot: `enabled` (doesn't cost more, but provides faster boot of our worker, which is good)
-  - Advanced: If you are using a Network Volume, select it under `Select Network Volume`. Otherwise leave the defaults.
+  - (optional) Advanced: If you are using a Network Volume, select it under `Select Network Volume`. Otherwise leave the defaults.
   - Select a GPU that has some availability
   - GPUs/Worker: `1`
 - Click `deploy`
@@ -283,15 +289,21 @@ If you prefer to include your models directly in the Docker image, follow these
      ```
 
 3. **Build Your Docker Image**:
-   - Build the image locally:
+   - Build the **base** image locally:
      ```bash
-     docker build -t <your_dockerhub_username>/runpod-worker-comfy:dev --platform linux/amd64 .
+     docker build -t <your_dockerhub_username>/runpod-worker-comfy:dev-base --target base --platform linux/amd64 .
      ```
-   - Optionally, skip downloading the default models to reduce the image size:
+   - Build the **sdxl** image locally:
      ```bash
-     docker build --build-arg SKIP_DEFAULT_MODELS=1 -t <your_dockerhub_username>/runpod-worker-comfy:dev --platform linux/amd64 .
+     docker build --build-arg MODEL_TYPE=sdxl -t <your_dockerhub_username>/runpod-worker-comfy:dev-sdxl --platform linux/amd64 .
      ```
-   - Ensure to specify `--platform linux/amd64` to avoid errors on RunPod, see [issue #13](https://github.com/blib-la/runpod-worker-comfy/issues/13).
+   - Build the **sd3** image locally:
+     ```bash
+     docker build --build-arg MODEL_TYPE=sd3 --build-arg HUGGINGFACE_ACCESS_TOKEN=<your-huggingface-token> -t <your_dockerhub_username>/runpod-worker-comfy:dev-sd3 --platform linux/amd64 .
+     ```
+
+> [!NOTE]  
+> Ensure to specify `--platform linux/amd64` to avoid errors on RunPod, see [issue #13](https://github.com/blib-la/runpod-worker-comfy/issues/13).
 
 ## Local testing
 
@@ -385,14 +397,19 @@ The repo contains two workflows that publish the image to Docker hub using GitHu
 - [dev.yml](.github/workflows/dev.yml): Creates the image and pushes it to Docker hub with the `dev` tag on every push to the `main` branch
 - [release.yml](.github/workflows/release.yml): Creates the image and pushes it to Docker hub with the `latest` and the release tag. It will only be triggered when you create a release on GitHub
 
-If you want to use this, you should add these secrets to your repository:
+If you want to use this, you should add these **secrets** to your repository:
+
+| Configuration Variable | Description                               | Example Value   |
+| ---------------------- | ----------------------------------------- | --------------- |
+| `DOCKERHUB_USERNAME`   | Your Docker Hub username.                 | `your-username` |
+| `DOCKERHUB_TOKEN`      | Your Docker Hub token for authentication. | `your-token`    |
+
+And also make sure to add these **variables** to your repository:
 
-| Configuration Variable | Description                                                  | Example Value         |
-| ---------------------- | ------------------------------------------------------------ | --------------------- |
-| `DOCKERHUB_USERNAME`   | Your Docker Hub username.                                    | `your-username`       |
-| `DOCKERHUB_TOKEN`      | Your Docker Hub token for authentication.                    | `your-token`          |
-| `DOCKERHUB_REPO`       | The repository on Docker Hub where the image will be pushed. | `timpietruskyblibla`  |
-| `DOCKERHUB_IMG`        | The name of the image to be pushed to Docker Hub.            | `runpod-worker-comfy` |
+| Variable Name    | Description                                                  | Example Value         |
+| ---------------- | ------------------------------------------------------------ | --------------------- |
+| `DOCKERHUB_REPO` | The repository on Docker Hub where the image will be pushed. | `timpietruskyblibla`  |
+| `DOCKERHUB_IMG`  | The name of the image to be pushed to Docker Hub.            | `runpod-worker-comfy` |
 
 ## Acknowledgments
 
diff --git a/docker-bake.hcl b/docker-bake.hcl
new file mode 100644
index 0000000..99d06fd
--- /dev/null
+++ b/docker-bake.hcl
@@ -0,0 +1,49 @@
+variable "DOCKERHUB_REPO" {
+  default = ""
+}
+
+variable "DOCKERHUB_IMG" {
+  default = ""
+}
+
+variable "RELEASE_VERSION" {
+  default = ""
+}
+
+variable "HUGGINGFACE_ACCESS_TOKEN" {
+  default = ""
+}
+
+group "default" {
+  targets = ["base", "sdxl", "sd3"]
+}
+
+target "base" {
+  context = "."
+  dockerfile = "Dockerfile"
+  target = "base"
+  tags = ["${DOCKERHUB_REPO}/${DOCKERHUB_IMG}:${RELEASE_VERSION}-base"]
+}
+
+target "sdxl" {
+  context = "."
+  dockerfile = "Dockerfile"
+  target = "final"
+  args = {
+    MODEL_TYPE = "sdxl"
+  }
+  tags = ["${DOCKERHUB_REPO}/${DOCKERHUB_IMG}:${RELEASE_VERSION}-sdxl"]
+  inherits = ["base"]
+}
+
+target "sd3" {
+  context = "."
+  dockerfile = "Dockerfile"
+  target = "final"
+  args = {
+    MODEL_TYPE = "sd3"
+    HUGGINGFACE_ACCESS_TOKEN = "${HUGGINGFACE_ACCESS_TOKEN}"
+  }
+  tags = ["${DOCKERHUB_REPO}/${DOCKERHUB_IMG}:${RELEASE_VERSION}-sd3"]
+  inherits = ["base"]
+}
diff --git a/test_resources/workflows/workflow_sd3.json b/test_resources/workflows/workflow_sd3.json
new file mode 100644
index 0000000..3118f74
--- /dev/null
+++ b/test_resources/workflows/workflow_sd3.json
@@ -0,0 +1,94 @@
+{
+  "input": {
+    "workflow": {
+      "6": {
+        "inputs": {
+          "text": "comic illustration of a white unicorn with a golden horn and pink mane and tail standing amidst a colorful and magical fantasy landscape. The background is filled with pastel-colored mountains and fluffy clouds and colorful balloons and stars. There are vibrant rainbows arching across the sky. The ground is adorned with oversized, candy-like plants, trees shaped like lollipops, and swirling ice cream cones. The scene is bathed in soft, dreamy light, giving it an enchanting and otherworldly feel. 4k, high resolution",
+          "clip": ["252", 1]
+        },
+        "class_type": "CLIPTextEncode",
+        "_meta": {
+          "title": "CLIP Text Encode (Prompt)"
+        }
+      },
+      "13": {
+        "inputs": {
+          "shift": 3,
+          "model": ["252", 0]
+        },
+        "class_type": "ModelSamplingSD3",
+        "_meta": {
+          "title": "ModelSamplingSD3"
+        }
+      },
+      "71": {
+        "inputs": {
+          "text": "worst quality, lowres, blurry, deformed, overexposure, bright, hands, oversaturated, burned, oversharpened, artifacts, hand, human, handwriting, nsfw, breast, breasts",
+          "clip": ["252", 1]
+        },
+        "class_type": "CLIPTextEncode",
+        "_meta": {
+          "title": "CLIP Text Encode (Negative Prompt)"
+        }
+      },
+      "135": {
+        "inputs": {
+          "width": 1152,
+          "height": 768,
+          "batch_size": 1
+        },
+        "class_type": "EmptySD3LatentImage",
+        "_meta": {
+          "title": "EmptySD3LatentImage"
+        }
+      },
+      "231": {
+        "inputs": {
+          "samples": ["271", 0],
+          "vae": ["252", 2]
+        },
+        "class_type": "VAEDecode",
+        "_meta": {
+          "title": "VAE Decode"
+        }
+      },
+      "252": {
+        "inputs": {
+          "ckpt_name": "sd3_medium_incl_clips_t5xxlfp8.safetensors"
+        },
+        "class_type": "CheckpointLoaderSimple",
+        "_meta": {
+          "title": "Load Checkpoint"
+        }
+      },
+      "271": {
+        "inputs": {
+          "seed": 291740611171897,
+          "steps": 28,
+          "cfg": 4.5,
+          "sampler_name": "dpmpp_2m",
+          "scheduler": "sgm_uniform",
+          "denoise": 1,
+          "model": ["13", 0],
+          "positive": ["6", 0],
+          "negative": ["71", 0],
+          "latent_image": ["135", 0]
+        },
+        "class_type": "KSampler",
+        "_meta": {
+          "title": "KSampler"
+        }
+      },
+      "273": {
+        "inputs": {
+          "filename_prefix": "sd3/sd3",
+          "images": ["231", 0]
+        },
+        "class_type": "SaveImage",
+        "_meta": {
+          "title": "Save Image"
+        }
+      }
+    }
+  }
+}