LoRA fuse sample for Text2Image pipeline. (#1170)

Sample demonstrates how to maximize performance of LoRA adapters with Text2Image pipeline.
openvinotoolkit · Nov 12, 2024 · 366662b · 366662b
2 parents 8341634 + 88916d3
commit 366662b
Show file tree

Hide file tree

Showing 9 changed files with 180 additions and 11 deletions.
diff --git a/.github/workflows/lcm_dreamshaper_cpp.yml b/.github/workflows/lcm_dreamshaper_cpp.yml
@@ -51,7 +51,7 @@ jobs:
         run: |
           source ${{ env.OV_INSTALL_DIR }}/setupvars.sh
           cmake -DCMAKE_BUILD_TYPE=Release -S ./ -B ${{ env.build_dir }}
-          cmake --build ${{ env.build_dir }} --config Release --target stable_diffusion heterogeneous_stable_diffusion lora_stable_diffusion py_openvino_genai --parallel
+          cmake --build ${{ env.build_dir }} --config Release --target stable_diffusion heterogeneous_stable_diffusion lora_stable_diffusion lora_fuse_stable_diffusion py_openvino_genai --parallel
 
       - name: Create virtual environment
         run: python3 -m venv openvino_lcm_cpp
@@ -106,15 +106,15 @@ jobs:
         with:
           python-version: ${{ env.PYTHON_VERSION }}
           cache: 'pip'
-  
+
       - name: Create virtual environment
         run: python -m venv openvino_lcm_cpp
-  
+
       - name: Build app
         run: |
           . "${{ env.OV_INSTALL_DIR }}/setupvars.ps1"
           cmake -DCMAKE_BUILD_TYPE=Release -S ./ -B ${{ env.build_dir }}
-          cmake --build ${{ env.build_dir }} --config Release --target stable_diffusion heterogeneous_stable_diffusion lora_stable_diffusion py_openvino_genai --parallel
+          cmake --build ${{ env.build_dir }} --config Release --target stable_diffusion heterogeneous_stable_diffusion lora_stable_diffusion lora_fuse_stable_diffusion py_openvino_genai --parallel
 
       - name: Install python dependencies
         run: |
@@ -140,7 +140,7 @@ jobs:
           python .\samples\python\text2image\main.py .\models\lcm_dreamshaper_v7\FP16 "cyberpunk cityscape like Tokyo New York with tall buildings at dusk golden hour cinematic lighting"
         env:
           PYTHONPATH: ${{ env.build_dir }}
-  
+
   Overall_Status:
     name: ci/gha_overall_status_lcm
     needs: [lcm_dreamshaper_v7_cpp-linux, lcm_dreamshaper_v7_cpp-windows]

diff --git a/.github/workflows/stable_diffusion_1_5_cpp.yml b/.github/workflows/stable_diffusion_1_5_cpp.yml
@@ -51,7 +51,7 @@ jobs:
         run: |
           source ${{ env.OV_INSTALL_DIR }}/setupvars.sh
           cmake -DCMAKE_BUILD_TYPE=Release -S ./ -B ${{ env.build_dir }}
-          cmake --build ${{ env.build_dir }} --config Release --target stable_diffusion lora_stable_diffusion py_openvino_genai --parallel
+          cmake --build ${{ env.build_dir }} --config Release --target stable_diffusion lora_stable_diffusion lora_fuse_stable_diffusion py_openvino_genai --parallel
 
       - name: Create virtual environment
         run: python3 -m venv openvino_sd_cpp
@@ -78,6 +78,11 @@ jobs:
           source ${{ env.OV_INSTALL_DIR }}/setupvars.sh
           ${{ env.build_dir }}/samples/cpp/text2image/lora_stable_diffusion ./models/dreamlike-art-dreamlike-anime-1.0/FP16 "curly-haired unicorn in the forest, anime, line" ./models/soulcard.safetensors 0.7
 
+      - name: Run LoRA fuse app
+        run: |
+          source ${{ env.OV_INSTALL_DIR }}/setupvars.sh
+          ${{ env.build_dir }}/samples/cpp/text2image/lora_fuse_stable_diffusion ./models/dreamlike-art-dreamlike-anime-1.0/FP16 "curly-haired unicorn in the forest, anime, line" ./models/soulcard.safetensors 0.7
+
       - name: Run Python main app
         run: |
           source openvino_sd_cpp/bin/activate
@@ -94,6 +99,14 @@ jobs:
         env:
           PYTHONPATH: ${{ env.build_dir }}
 
+      - name: Run Python LoRA fuse app
+        run: |
+          source openvino_sd_cpp/bin/activate
+          source ./ov/setupvars.sh
+          python ./samples/python/text2image/lora_fuse.py ./models/dreamlike-art-dreamlike-anime-1.0/FP16 "curly-haired unicorn in the forest, anime, line" ./models/soulcard.safetensors 0.7
+        env:
+          PYTHONPATH: ${{ env.build_dir }}
+
   stable_diffusion_1_5_cpp-windows:
     runs-on: windows-2019
     defaults:
@@ -125,7 +138,7 @@ jobs:
         run: |
           . "${{ env.OV_INSTALL_DIR }}/setupvars.ps1"
           cmake -DCMAKE_BUILD_TYPE=Release -S ./ -B ${{ env.build_dir }}
-          cmake --build ${{ env.build_dir }} --config Release --target stable_diffusion lora_stable_diffusion py_openvino_genai --parallel
+          cmake --build ${{ env.build_dir }} --config Release --target stable_diffusion lora_stable_diffusion lora_fuse_stable_diffusion py_openvino_genai --parallel
 
       - name: Create virtual environment
         run: python -m venv openvino_sd_cpp
@@ -156,6 +169,13 @@ jobs:
         env:
           PATH: ${{ env.build_dir }}\openvino_genai
 
+      - name: Run LoRA fuse app
+        run: |
+          . "${{ env.OV_INSTALL_DIR }}/setupvars.ps1"
+          "${{ env.build_dir }}/samples/cpp/text2image/Release/lora_fuse_stable_diffusion.exe ./models/dreamlike-art-dreamlike-anime-1.0/FP16 'curly-haired unicorn in the forest, anime, line' ./models/soulcard.safetensors 0.7"
+        env:
+          PATH: ${{ env.build_dir }}\openvino_genai
+
       - name: Run Python main app
         run: |
           . "./openvino_sd_cpp/Scripts/Activate.ps1"
@@ -173,6 +193,14 @@ jobs:
           python .\samples\python\text2image\lora.py .\models\dreamlike-art-dreamlike-anime-1.0\FP16 "curly-haired unicorn in the forest, anime, line" .\models\soulcard.safetensors 0.7
         env:
           PYTHONPATH: ${{ env.build_dir }}
+      - name: Run Python LoRA app
+        run: |
+          . "./openvino_sd_cpp/Scripts/Activate.ps1"
+          . "${{ env.OV_INSTALL_DIR }}/setupvars.ps1"
+          $env:Path += "${{ env.build_dir }}\openvino_genai"
+          python .\samples\python\text2image\lora_fuse.py .\models\dreamlike-art-dreamlike-anime-1.0\FP16 "curly-haired unicorn in the forest, anime, line" .\models\soulcard.safetensors 0.7
+        env:
+          PYTHONPATH: ${{ env.build_dir }}
 
   Overall_Status:
     name: ci/gha_overall_status_stable_diffusion

diff --git a/samples/cpp/text2image/CMakeLists.txt b/samples/cpp/text2image/CMakeLists.txt
@@ -46,6 +46,25 @@ install(TARGETS lora_stable_diffusion
         COMPONENT samples_bin
         EXCLUDE_FROM_ALL)
 
+# create LoRA fuse sample executable
+
+add_executable(lora_fuse_stable_diffusion
+    ${CMAKE_CURRENT_SOURCE_DIR}/lora_fuse.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/imwrite.cpp)
+
+target_include_directories(lora_fuse_stable_diffusion PRIVATE ${CMAKE_CURRENT_SOURCE_DIR})
+target_link_libraries(lora_fuse_stable_diffusion PRIVATE openvino::genai)
+
+set_target_properties(lora_fuse_stable_diffusion PROPERTIES
+    COMPILE_PDB_NAME lora_fuse_stable_diffusion
+    # Ensure out of box LC_RPATH on macOS with SIP
+    INSTALL_RPATH_USE_LINK_PATH ON)
+
+install(TARGETS lora_fuse_stable_diffusion
+        RUNTIME DESTINATION samples_bin/
+        COMPONENT samples_bin
+        EXCLUDE_FROM_ALL)
+
 # create heterogeneous_stable_diffusion sample executable
 
 add_executable(heterogeneous_stable_diffusion

diff --git a/samples/cpp/text2image/README.md b/samples/cpp/text2image/README.md
@@ -2,9 +2,10 @@
 
 Examples in this folder showcase inference of text to image models like Stable Diffusion 1.5, 2.1, LCM. The application doesn't have many configuration options to encourage the reader to explore and modify the source code. For example, change the device for inference to GPU. The sample features `ov::genai::Text2ImagePipeline` and uses a text prompt as input source.
 
-There are three sample files:
+There are several sample files:
  - [`main.cpp`](./main.cpp) demonstrates basic usage of the text to image pipeline
  - [`lora.cpp`](./lora.cpp) shows how to apply LoRA adapters to the pipeline
+ - [`lora_fuse.cpp`](./lora_fuse.cpp) shows how to maximize performance of LoRA adapters by fusing them into base model weights
  - [`heterogeneous_stable_diffusion.cpp`](./heterogeneous_stable_diffusion.cpp) shows how to assemble a heterogeneous txt2image pipeline from individual subcomponents (scheduler, text encoder, unet, vae decoder)
 
 Users can change the sample code and play with the following generation parameters:
@@ -62,6 +63,9 @@ With adapter | Without adapter
 :---:|:---:
 ![](./lora.bmp) | ![](./baseline.bmp)
 
+# Fuse LoRA adapters into model weights
+
+To maximize inference performance using a LoRA adapter, refer to `lora_fuse.cpp`, which demonstrates fusing the adapter into the model weights. This approach achieves the same performance as the base model without a LoRA adapter but removes the flexibility to switch adapters between generate calls. This mode is ideal when performing multiple generations with the same LoRA adapters and blending alpha parameters, and when model recompilation on adapter changes is feasible. The example outputs the resulting image as `lora.bmp`.
 
 ## Note
 

diff --git a/samples/cpp/text2image/lora.cpp b/samples/cpp/text2image/lora.cpp
@@ -9,7 +9,7 @@ int32_t main(int32_t argc, char* argv[]) try {
     OPENVINO_ASSERT(argc >= 3 && (argc - 3) % 2 == 0, "Usage: ", argv[0], " <MODEL_DIR> '<PROMPT>' [<LORA_SAFETENSORS> <ALPHA> ...]]");
 
     const std::string models_path = argv[1], prompt = argv[2];
-    const std::string device = "CPU";  // GPU, NPU can be used as well
+    const std::string device = "CPU";  // GPU can be used as well
 
     ov::genai::AdapterConfig adapter_config;
     // Multiple LoRA adapters applied simultaneously are supported, parse them all and corresponding alphas from cmd parameters:

diff --git a/samples/cpp/text2image/lora_fuse.cpp b/samples/cpp/text2image/lora_fuse.cpp
@@ -0,0 +1,48 @@
+// Copyright (C) 2023-2024 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+
+#include "openvino/genai/image_generation/text2image_pipeline.hpp"
+
+#include "imwrite.hpp"
+
+int32_t main(int32_t argc, char* argv[]) try {
+    OPENVINO_ASSERT(argc >= 3 && (argc - 3) % 2 == 0, "Usage: ", argv[0], " <MODEL_DIR> '<PROMPT>' [<LORA_SAFETENSORS> <ALPHA> ...]]");
+
+    const std::string models_path = argv[1], prompt = argv[2];
+    const std::string device = "CPU";  // GPU can be used as well
+
+    // MODE_FUSE instructs pipeline to fuse adapter tensors into original model weights loaded into memory
+    // giving the same performance level for inference as for the original model. After doing it you cannot
+    // change adapter dynamically without re-initializing the pipeline from scratch.
+    ov::genai::AdapterConfig adapter_config(ov::genai::AdapterConfig::MODE_FUSE);
+
+    // Multiple LoRA adapters applied simultaneously are supported, parse them all and corresponding alphas from cmd parameters:
+    for(size_t i = 0; i < (argc - 3)/2; ++i) {
+        ov::genai::Adapter adapter(argv[3 + 2*i]);
+        float alpha = std::atof(argv[3 + 2*i + 1]);
+        adapter_config.add(adapter, alpha);
+    }
+
+    // LoRA adapters passed to the constructor will be activated by default in next generates
+    ov::genai::Text2ImagePipeline pipe(models_path, device, ov::genai::adapters(adapter_config));
+
+    std::cout << "Generating image with LoRA adapters fused into original weights, resulting image will be in lora.bmp\n";
+    ov::Tensor image = pipe.generate(prompt,
+        ov::genai::generator(std::make_shared<ov::genai::CppStdGenerator>(42)),
+        ov::genai::width(512),
+        ov::genai::height(896),
+        ov::genai::num_inference_steps(20));
+    imwrite("lora.bmp", image, true);
+
+    return EXIT_SUCCESS;
+} catch (const std::exception& error) {
+    try {
+        std::cerr << error.what() << '\n';
+    } catch (const std::ios_base::failure&) {}
+    return EXIT_FAILURE;
+} catch (...) {
+    try {
+        std::cerr << "Non-exception object thrown\n";
+    } catch (const std::ios_base::failure&) {}
+    return EXIT_FAILURE;
+}
diff --git a/samples/python/text2image/README.md b/samples/python/text2image/README.md
@@ -2,9 +2,10 @@
 
 Examples in this folder showcase inference of text to image models like Stable Diffusion 1.5, 2.1, LCM. The application doesn't have many configuration options to encourage the reader to explore and modify the source code. For example, change the device for inference to GPU. The sample features `openvino_genai.Text2ImagePipeline` and uses a text prompt as input source.
 
-There are two sample files:
+There are several sample files:
  - [`main.py`](./main.py) demonstrates basic usage of the text to image pipeline
  - [`lora.py`](./lora.py) shows how to apply LoRA adapters to the pipeline
+ - [`lora_fuse.py`](./lora_fuse.py) shows how to maximize performance of LoRA adapters by fusing them into base model weights
 
 Users can change the sample code and play with the following generation parameters:
 
@@ -60,3 +61,8 @@ Check the difference:
 With adapter | Without adapter
 :---:|:---:
 ![](./lora.bmp) | ![](./baseline.bmp)
+
+
+# Fuse LoRA adapters into model weights
+
+To maximize inference performance using a LoRA adapter, refer to `lora_fuse.py`, which demonstrates fusing the adapter into the model weights. This approach achieves the same performance as the base model without a LoRA adapter but removes the flexibility to switch adapters between generate calls. This mode is ideal when performing multiple generations with the same LoRA adapters and blending alpha parameters, and when model recompilation on adapter changes is feasible. The example outputs the resulting image as `lora.bmp`.
diff --git a/samples/python/text2image/lora.py b/samples/python/text2image/lora.py
@@ -35,7 +35,7 @@ def main():
 
     prompt = args.prompt
 
-    device = "CPU"  # GPU, NPU can be used as well
+    device = "CPU"  # GPU can be used as well
     adapter_config = openvino_genai.AdapterConfig()
 
     # Multiple LoRA adapters applied simultaneously are supported, parse them all and corresponding alphas from cmd parameters:

diff --git a/samples/python/text2image/lora_fuse.py b/samples/python/text2image/lora_fuse.py
@@ -0,0 +1,64 @@
+#!/usr/bin/env python3
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+import argparse
+
+import openvino as ov
+import openvino_genai
+import numpy as np
+import sys
+
+
+class Generator(openvino_genai.Generator):
+    def __init__(self, seed, mu=0.0, sigma=1.0):
+        openvino_genai.Generator.__init__(self)
+        np.random.seed(seed)
+        self.mu = mu
+        self.sigma = sigma
+
+    def next(self):
+        return np.random.normal(self.mu, self.sigma)
+
+
+def image_write(path: str, image_tensor: ov.Tensor):
+    from PIL import Image
+    image = Image.fromarray(image_tensor.data[0])
+    image.save(path)
+
+
+def main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument('models_path')
+    parser.add_argument('prompt')
+    args, adapters = parser.parse_known_args()
+
+    prompt = args.prompt
+
+    device = "CPU"  # GPU can be used as well
+
+    # MODE_FUSE instructs pipeline to fuse adapter tensors into original model weights loaded into memory
+    # giving the same performance level for inference as for the original model. After doing it you cannot
+    # change adapter dynamically without re-initializing the pipeline from scratch.
+    adapter_config = openvino_genai.AdapterConfig(openvino_genai.AdapterConfig.Mode.MODE_FUSE)
+
+    # Multiple LoRA adapters applied simultaneously are supported, parse them all and corresponding alphas from cmd parameters:
+    for i in range(int(len(adapters) / 2)):
+        adapter = openvino_genai.Adapter(adapters[2 * i])
+        alpha = float(adapters[2 * i + 1])
+        adapter_config.add(adapter, alpha)
+
+    # LoRA adapters passed to the constructor will be activated by default in next generates
+    pipe = openvino_genai.Text2ImagePipeline(args.models_path, device, adapters=adapter_config)
+    print("Generating image with LoRA adapters applied, resulting image will be in lora.bmp")
+    image = pipe.generate(prompt,
+                          generator=Generator(42),
+                          width=512,
+                          height=896,
+                          num_inference_steps=20)
+
+    image_write("lora.bmp", image)
+
+
+if '__main__' == __name__:
+    main()