Skip to content

Commit

Permalink
LoRA fuse sample for Text2Image pipeline. (#1170)
Browse files Browse the repository at this point in the history
Sample demonstrates how to maximize performance of LoRA adapters with
Text2Image pipeline.
  • Loading branch information
ilya-lavrenov authored Nov 12, 2024
2 parents 8341634 + 88916d3 commit 366662b
Show file tree
Hide file tree
Showing 9 changed files with 180 additions and 11 deletions.
10 changes: 5 additions & 5 deletions .github/workflows/lcm_dreamshaper_cpp.yml
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ jobs:
run: |
source ${{ env.OV_INSTALL_DIR }}/setupvars.sh
cmake -DCMAKE_BUILD_TYPE=Release -S ./ -B ${{ env.build_dir }}
cmake --build ${{ env.build_dir }} --config Release --target stable_diffusion heterogeneous_stable_diffusion lora_stable_diffusion py_openvino_genai --parallel
cmake --build ${{ env.build_dir }} --config Release --target stable_diffusion heterogeneous_stable_diffusion lora_stable_diffusion lora_fuse_stable_diffusion py_openvino_genai --parallel
- name: Create virtual environment
run: python3 -m venv openvino_lcm_cpp
Expand Down Expand Up @@ -106,15 +106,15 @@ jobs:
with:
python-version: ${{ env.PYTHON_VERSION }}
cache: 'pip'

- name: Create virtual environment
run: python -m venv openvino_lcm_cpp

- name: Build app
run: |
. "${{ env.OV_INSTALL_DIR }}/setupvars.ps1"
cmake -DCMAKE_BUILD_TYPE=Release -S ./ -B ${{ env.build_dir }}
cmake --build ${{ env.build_dir }} --config Release --target stable_diffusion heterogeneous_stable_diffusion lora_stable_diffusion py_openvino_genai --parallel
cmake --build ${{ env.build_dir }} --config Release --target stable_diffusion heterogeneous_stable_diffusion lora_stable_diffusion lora_fuse_stable_diffusion py_openvino_genai --parallel
- name: Install python dependencies
run: |
Expand All @@ -140,7 +140,7 @@ jobs:
python .\samples\python\text2image\main.py .\models\lcm_dreamshaper_v7\FP16 "cyberpunk cityscape like Tokyo New York with tall buildings at dusk golden hour cinematic lighting"
env:
PYTHONPATH: ${{ env.build_dir }}

Overall_Status:
name: ci/gha_overall_status_lcm
needs: [lcm_dreamshaper_v7_cpp-linux, lcm_dreamshaper_v7_cpp-windows]
Expand Down
32 changes: 30 additions & 2 deletions .github/workflows/stable_diffusion_1_5_cpp.yml
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ jobs:
run: |
source ${{ env.OV_INSTALL_DIR }}/setupvars.sh
cmake -DCMAKE_BUILD_TYPE=Release -S ./ -B ${{ env.build_dir }}
cmake --build ${{ env.build_dir }} --config Release --target stable_diffusion lora_stable_diffusion py_openvino_genai --parallel
cmake --build ${{ env.build_dir }} --config Release --target stable_diffusion lora_stable_diffusion lora_fuse_stable_diffusion py_openvino_genai --parallel
- name: Create virtual environment
run: python3 -m venv openvino_sd_cpp
Expand All @@ -78,6 +78,11 @@ jobs:
source ${{ env.OV_INSTALL_DIR }}/setupvars.sh
${{ env.build_dir }}/samples/cpp/text2image/lora_stable_diffusion ./models/dreamlike-art-dreamlike-anime-1.0/FP16 "curly-haired unicorn in the forest, anime, line" ./models/soulcard.safetensors 0.7
- name: Run LoRA fuse app
run: |
source ${{ env.OV_INSTALL_DIR }}/setupvars.sh
${{ env.build_dir }}/samples/cpp/text2image/lora_fuse_stable_diffusion ./models/dreamlike-art-dreamlike-anime-1.0/FP16 "curly-haired unicorn in the forest, anime, line" ./models/soulcard.safetensors 0.7
- name: Run Python main app
run: |
source openvino_sd_cpp/bin/activate
Expand All @@ -94,6 +99,14 @@ jobs:
env:
PYTHONPATH: ${{ env.build_dir }}

- name: Run Python LoRA fuse app
run: |
source openvino_sd_cpp/bin/activate
source ./ov/setupvars.sh
python ./samples/python/text2image/lora_fuse.py ./models/dreamlike-art-dreamlike-anime-1.0/FP16 "curly-haired unicorn in the forest, anime, line" ./models/soulcard.safetensors 0.7
env:
PYTHONPATH: ${{ env.build_dir }}

stable_diffusion_1_5_cpp-windows:
runs-on: windows-2019
defaults:
Expand Down Expand Up @@ -125,7 +138,7 @@ jobs:
run: |
. "${{ env.OV_INSTALL_DIR }}/setupvars.ps1"
cmake -DCMAKE_BUILD_TYPE=Release -S ./ -B ${{ env.build_dir }}
cmake --build ${{ env.build_dir }} --config Release --target stable_diffusion lora_stable_diffusion py_openvino_genai --parallel
cmake --build ${{ env.build_dir }} --config Release --target stable_diffusion lora_stable_diffusion lora_fuse_stable_diffusion py_openvino_genai --parallel
- name: Create virtual environment
run: python -m venv openvino_sd_cpp
Expand Down Expand Up @@ -156,6 +169,13 @@ jobs:
env:
PATH: ${{ env.build_dir }}\openvino_genai

- name: Run LoRA fuse app
run: |
. "${{ env.OV_INSTALL_DIR }}/setupvars.ps1"
"${{ env.build_dir }}/samples/cpp/text2image/Release/lora_fuse_stable_diffusion.exe ./models/dreamlike-art-dreamlike-anime-1.0/FP16 'curly-haired unicorn in the forest, anime, line' ./models/soulcard.safetensors 0.7"
env:
PATH: ${{ env.build_dir }}\openvino_genai

- name: Run Python main app
run: |
. "./openvino_sd_cpp/Scripts/Activate.ps1"
Expand All @@ -173,6 +193,14 @@ jobs:
python .\samples\python\text2image\lora.py .\models\dreamlike-art-dreamlike-anime-1.0\FP16 "curly-haired unicorn in the forest, anime, line" .\models\soulcard.safetensors 0.7
env:
PYTHONPATH: ${{ env.build_dir }}
- name: Run Python LoRA app
run: |
. "./openvino_sd_cpp/Scripts/Activate.ps1"
. "${{ env.OV_INSTALL_DIR }}/setupvars.ps1"
$env:Path += "${{ env.build_dir }}\openvino_genai"
python .\samples\python\text2image\lora_fuse.py .\models\dreamlike-art-dreamlike-anime-1.0\FP16 "curly-haired unicorn in the forest, anime, line" .\models\soulcard.safetensors 0.7
env:
PYTHONPATH: ${{ env.build_dir }}

Overall_Status:
name: ci/gha_overall_status_stable_diffusion
Expand Down
19 changes: 19 additions & 0 deletions samples/cpp/text2image/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,25 @@ install(TARGETS lora_stable_diffusion
COMPONENT samples_bin
EXCLUDE_FROM_ALL)

# create LoRA fuse sample executable

add_executable(lora_fuse_stable_diffusion
${CMAKE_CURRENT_SOURCE_DIR}/lora_fuse.cpp
${CMAKE_CURRENT_SOURCE_DIR}/imwrite.cpp)

target_include_directories(lora_fuse_stable_diffusion PRIVATE ${CMAKE_CURRENT_SOURCE_DIR})
target_link_libraries(lora_fuse_stable_diffusion PRIVATE openvino::genai)

set_target_properties(lora_fuse_stable_diffusion PROPERTIES
COMPILE_PDB_NAME lora_fuse_stable_diffusion
# Ensure out of box LC_RPATH on macOS with SIP
INSTALL_RPATH_USE_LINK_PATH ON)

install(TARGETS lora_fuse_stable_diffusion
RUNTIME DESTINATION samples_bin/
COMPONENT samples_bin
EXCLUDE_FROM_ALL)

# create heterogeneous_stable_diffusion sample executable

add_executable(heterogeneous_stable_diffusion
Expand Down
6 changes: 5 additions & 1 deletion samples/cpp/text2image/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,10 @@

Examples in this folder showcase inference of text to image models like Stable Diffusion 1.5, 2.1, LCM. The application doesn't have many configuration options to encourage the reader to explore and modify the source code. For example, change the device for inference to GPU. The sample features `ov::genai::Text2ImagePipeline` and uses a text prompt as input source.

There are three sample files:
There are several sample files:
- [`main.cpp`](./main.cpp) demonstrates basic usage of the text to image pipeline
- [`lora.cpp`](./lora.cpp) shows how to apply LoRA adapters to the pipeline
- [`lora_fuse.cpp`](./lora_fuse.cpp) shows how to maximize performance of LoRA adapters by fusing them into base model weights
- [`heterogeneous_stable_diffusion.cpp`](./heterogeneous_stable_diffusion.cpp) shows how to assemble a heterogeneous txt2image pipeline from individual subcomponents (scheduler, text encoder, unet, vae decoder)

Users can change the sample code and play with the following generation parameters:
Expand Down Expand Up @@ -62,6 +63,9 @@ With adapter | Without adapter
:---:|:---:
![](./lora.bmp) | ![](./baseline.bmp)

# Fuse LoRA adapters into model weights

To maximize inference performance using a LoRA adapter, refer to `lora_fuse.cpp`, which demonstrates fusing the adapter into the model weights. This approach achieves the same performance as the base model without a LoRA adapter but removes the flexibility to switch adapters between generate calls. This mode is ideal when performing multiple generations with the same LoRA adapters and blending alpha parameters, and when model recompilation on adapter changes is feasible. The example outputs the resulting image as `lora.bmp`.

## Note

Expand Down
2 changes: 1 addition & 1 deletion samples/cpp/text2image/lora.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ int32_t main(int32_t argc, char* argv[]) try {
OPENVINO_ASSERT(argc >= 3 && (argc - 3) % 2 == 0, "Usage: ", argv[0], " <MODEL_DIR> '<PROMPT>' [<LORA_SAFETENSORS> <ALPHA> ...]]");

const std::string models_path = argv[1], prompt = argv[2];
const std::string device = "CPU"; // GPU, NPU can be used as well
const std::string device = "CPU"; // GPU can be used as well

ov::genai::AdapterConfig adapter_config;
// Multiple LoRA adapters applied simultaneously are supported, parse them all and corresponding alphas from cmd parameters:
Expand Down
48 changes: 48 additions & 0 deletions samples/cpp/text2image/lora_fuse.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
// Copyright (C) 2023-2024 Intel Corporation
// SPDX-License-Identifier: Apache-2.0

#include "openvino/genai/image_generation/text2image_pipeline.hpp"

#include "imwrite.hpp"

int32_t main(int32_t argc, char* argv[]) try {
OPENVINO_ASSERT(argc >= 3 && (argc - 3) % 2 == 0, "Usage: ", argv[0], " <MODEL_DIR> '<PROMPT>' [<LORA_SAFETENSORS> <ALPHA> ...]]");

const std::string models_path = argv[1], prompt = argv[2];
const std::string device = "CPU"; // GPU can be used as well

// MODE_FUSE instructs pipeline to fuse adapter tensors into original model weights loaded into memory
// giving the same performance level for inference as for the original model. After doing it you cannot
// change adapter dynamically without re-initializing the pipeline from scratch.
ov::genai::AdapterConfig adapter_config(ov::genai::AdapterConfig::MODE_FUSE);

// Multiple LoRA adapters applied simultaneously are supported, parse them all and corresponding alphas from cmd parameters:
for(size_t i = 0; i < (argc - 3)/2; ++i) {
ov::genai::Adapter adapter(argv[3 + 2*i]);
float alpha = std::atof(argv[3 + 2*i + 1]);
adapter_config.add(adapter, alpha);
}

// LoRA adapters passed to the constructor will be activated by default in next generates
ov::genai::Text2ImagePipeline pipe(models_path, device, ov::genai::adapters(adapter_config));

std::cout << "Generating image with LoRA adapters fused into original weights, resulting image will be in lora.bmp\n";
ov::Tensor image = pipe.generate(prompt,
ov::genai::generator(std::make_shared<ov::genai::CppStdGenerator>(42)),
ov::genai::width(512),
ov::genai::height(896),
ov::genai::num_inference_steps(20));
imwrite("lora.bmp", image, true);

return EXIT_SUCCESS;
} catch (const std::exception& error) {
try {
std::cerr << error.what() << '\n';
} catch (const std::ios_base::failure&) {}
return EXIT_FAILURE;
} catch (...) {
try {
std::cerr << "Non-exception object thrown\n";
} catch (const std::ios_base::failure&) {}
return EXIT_FAILURE;
}
8 changes: 7 additions & 1 deletion samples/python/text2image/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,10 @@

Examples in this folder showcase inference of text to image models like Stable Diffusion 1.5, 2.1, LCM. The application doesn't have many configuration options to encourage the reader to explore and modify the source code. For example, change the device for inference to GPU. The sample features `openvino_genai.Text2ImagePipeline` and uses a text prompt as input source.

There are two sample files:
There are several sample files:
- [`main.py`](./main.py) demonstrates basic usage of the text to image pipeline
- [`lora.py`](./lora.py) shows how to apply LoRA adapters to the pipeline
- [`lora_fuse.py`](./lora_fuse.py) shows how to maximize performance of LoRA adapters by fusing them into base model weights

Users can change the sample code and play with the following generation parameters:

Expand Down Expand Up @@ -60,3 +61,8 @@ Check the difference:
With adapter | Without adapter
:---:|:---:
![](./lora.bmp) | ![](./baseline.bmp)


# Fuse LoRA adapters into model weights

To maximize inference performance using a LoRA adapter, refer to `lora_fuse.py`, which demonstrates fusing the adapter into the model weights. This approach achieves the same performance as the base model without a LoRA adapter but removes the flexibility to switch adapters between generate calls. This mode is ideal when performing multiple generations with the same LoRA adapters and blending alpha parameters, and when model recompilation on adapter changes is feasible. The example outputs the resulting image as `lora.bmp`.
2 changes: 1 addition & 1 deletion samples/python/text2image/lora.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ def main():

prompt = args.prompt

device = "CPU" # GPU, NPU can be used as well
device = "CPU" # GPU can be used as well
adapter_config = openvino_genai.AdapterConfig()

# Multiple LoRA adapters applied simultaneously are supported, parse them all and corresponding alphas from cmd parameters:
Expand Down
64 changes: 64 additions & 0 deletions samples/python/text2image/lora_fuse.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
#!/usr/bin/env python3
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0

import argparse

import openvino as ov
import openvino_genai
import numpy as np
import sys


class Generator(openvino_genai.Generator):
def __init__(self, seed, mu=0.0, sigma=1.0):
openvino_genai.Generator.__init__(self)
np.random.seed(seed)
self.mu = mu
self.sigma = sigma

def next(self):
return np.random.normal(self.mu, self.sigma)


def image_write(path: str, image_tensor: ov.Tensor):
from PIL import Image
image = Image.fromarray(image_tensor.data[0])
image.save(path)


def main():
parser = argparse.ArgumentParser()
parser.add_argument('models_path')
parser.add_argument('prompt')
args, adapters = parser.parse_known_args()

prompt = args.prompt

device = "CPU" # GPU can be used as well

# MODE_FUSE instructs pipeline to fuse adapter tensors into original model weights loaded into memory
# giving the same performance level for inference as for the original model. After doing it you cannot
# change adapter dynamically without re-initializing the pipeline from scratch.
adapter_config = openvino_genai.AdapterConfig(openvino_genai.AdapterConfig.Mode.MODE_FUSE)

# Multiple LoRA adapters applied simultaneously are supported, parse them all and corresponding alphas from cmd parameters:
for i in range(int(len(adapters) / 2)):
adapter = openvino_genai.Adapter(adapters[2 * i])
alpha = float(adapters[2 * i + 1])
adapter_config.add(adapter, alpha)

# LoRA adapters passed to the constructor will be activated by default in next generates
pipe = openvino_genai.Text2ImagePipeline(args.models_path, device, adapters=adapter_config)
print("Generating image with LoRA adapters applied, resulting image will be in lora.bmp")
image = pipe.generate(prompt,
generator=Generator(42),
width=512,
height=896,
num_inference_steps=20)

image_write("lora.bmp", image)


if '__main__' == __name__:
main()

0 comments on commit 366662b

Please sign in to comment.