diff --git a/.github/workflows/lcm_dreamshaper_cpp.yml b/.github/workflows/lcm_dreamshaper_cpp.yml index 4c9972a491..9da36e49c1 100644 --- a/.github/workflows/lcm_dreamshaper_cpp.yml +++ b/.github/workflows/lcm_dreamshaper_cpp.yml @@ -51,7 +51,7 @@ jobs: run: | source ${{ env.OV_INSTALL_DIR }}/setupvars.sh cmake -DCMAKE_BUILD_TYPE=Release -S ./ -B ${{ env.build_dir }} - cmake --build ${{ env.build_dir }} --config Release --target stable_diffusion heterogeneous_stable_diffusion lora_stable_diffusion py_openvino_genai --parallel + cmake --build ${{ env.build_dir }} --config Release --target stable_diffusion heterogeneous_stable_diffusion lora_stable_diffusion lora_fuse_stable_diffusion py_openvino_genai --parallel - name: Create virtual environment run: python3 -m venv openvino_lcm_cpp @@ -106,15 +106,15 @@ jobs: with: python-version: ${{ env.PYTHON_VERSION }} cache: 'pip' - + - name: Create virtual environment run: python -m venv openvino_lcm_cpp - + - name: Build app run: | . "${{ env.OV_INSTALL_DIR }}/setupvars.ps1" cmake -DCMAKE_BUILD_TYPE=Release -S ./ -B ${{ env.build_dir }} - cmake --build ${{ env.build_dir }} --config Release --target stable_diffusion heterogeneous_stable_diffusion lora_stable_diffusion py_openvino_genai --parallel + cmake --build ${{ env.build_dir }} --config Release --target stable_diffusion heterogeneous_stable_diffusion lora_stable_diffusion lora_fuse_stable_diffusion py_openvino_genai --parallel - name: Install python dependencies run: | @@ -140,7 +140,7 @@ jobs: python .\samples\python\text2image\main.py .\models\lcm_dreamshaper_v7\FP16 "cyberpunk cityscape like Tokyo New York with tall buildings at dusk golden hour cinematic lighting" env: PYTHONPATH: ${{ env.build_dir }} - + Overall_Status: name: ci/gha_overall_status_lcm needs: [lcm_dreamshaper_v7_cpp-linux, lcm_dreamshaper_v7_cpp-windows] diff --git a/.github/workflows/stable_diffusion_1_5_cpp.yml b/.github/workflows/stable_diffusion_1_5_cpp.yml index 119b279a2a..0dc55b7c02 100644 --- a/.github/workflows/stable_diffusion_1_5_cpp.yml +++ b/.github/workflows/stable_diffusion_1_5_cpp.yml @@ -51,7 +51,7 @@ jobs: run: | source ${{ env.OV_INSTALL_DIR }}/setupvars.sh cmake -DCMAKE_BUILD_TYPE=Release -S ./ -B ${{ env.build_dir }} - cmake --build ${{ env.build_dir }} --config Release --target stable_diffusion lora_stable_diffusion py_openvino_genai --parallel + cmake --build ${{ env.build_dir }} --config Release --target stable_diffusion lora_stable_diffusion lora_fuse_stable_diffusion py_openvino_genai --parallel - name: Create virtual environment run: python3 -m venv openvino_sd_cpp @@ -78,6 +78,11 @@ jobs: source ${{ env.OV_INSTALL_DIR }}/setupvars.sh ${{ env.build_dir }}/samples/cpp/text2image/lora_stable_diffusion ./models/dreamlike-art-dreamlike-anime-1.0/FP16 "curly-haired unicorn in the forest, anime, line" ./models/soulcard.safetensors 0.7 + - name: Run LoRA fuse app + run: | + source ${{ env.OV_INSTALL_DIR }}/setupvars.sh + ${{ env.build_dir }}/samples/cpp/text2image/lora_fuse_stable_diffusion ./models/dreamlike-art-dreamlike-anime-1.0/FP16 "curly-haired unicorn in the forest, anime, line" ./models/soulcard.safetensors 0.7 + - name: Run Python main app run: | source openvino_sd_cpp/bin/activate @@ -94,6 +99,14 @@ jobs: env: PYTHONPATH: ${{ env.build_dir }} + - name: Run Python LoRA fuse app + run: | + source openvino_sd_cpp/bin/activate + source ./ov/setupvars.sh + python ./samples/python/text2image/lora_fuse.py ./models/dreamlike-art-dreamlike-anime-1.0/FP16 "curly-haired unicorn in the forest, anime, line" ./models/soulcard.safetensors 0.7 + env: + PYTHONPATH: ${{ env.build_dir }} + stable_diffusion_1_5_cpp-windows: runs-on: windows-2019 defaults: @@ -125,7 +138,7 @@ jobs: run: | . "${{ env.OV_INSTALL_DIR }}/setupvars.ps1" cmake -DCMAKE_BUILD_TYPE=Release -S ./ -B ${{ env.build_dir }} - cmake --build ${{ env.build_dir }} --config Release --target stable_diffusion lora_stable_diffusion py_openvino_genai --parallel + cmake --build ${{ env.build_dir }} --config Release --target stable_diffusion lora_stable_diffusion lora_fuse_stable_diffusion py_openvino_genai --parallel - name: Create virtual environment run: python -m venv openvino_sd_cpp @@ -156,6 +169,13 @@ jobs: env: PATH: ${{ env.build_dir }}\openvino_genai + - name: Run LoRA fuse app + run: | + . "${{ env.OV_INSTALL_DIR }}/setupvars.ps1" + "${{ env.build_dir }}/samples/cpp/text2image/Release/lora_fuse_stable_diffusion.exe ./models/dreamlike-art-dreamlike-anime-1.0/FP16 'curly-haired unicorn in the forest, anime, line' ./models/soulcard.safetensors 0.7" + env: + PATH: ${{ env.build_dir }}\openvino_genai + - name: Run Python main app run: | . "./openvino_sd_cpp/Scripts/Activate.ps1" @@ -173,6 +193,14 @@ jobs: python .\samples\python\text2image\lora.py .\models\dreamlike-art-dreamlike-anime-1.0\FP16 "curly-haired unicorn in the forest, anime, line" .\models\soulcard.safetensors 0.7 env: PYTHONPATH: ${{ env.build_dir }} + - name: Run Python LoRA app + run: | + . "./openvino_sd_cpp/Scripts/Activate.ps1" + . "${{ env.OV_INSTALL_DIR }}/setupvars.ps1" + $env:Path += "${{ env.build_dir }}\openvino_genai" + python .\samples\python\text2image\lora_fuse.py .\models\dreamlike-art-dreamlike-anime-1.0\FP16 "curly-haired unicorn in the forest, anime, line" .\models\soulcard.safetensors 0.7 + env: + PYTHONPATH: ${{ env.build_dir }} Overall_Status: name: ci/gha_overall_status_stable_diffusion diff --git a/samples/cpp/text2image/CMakeLists.txt b/samples/cpp/text2image/CMakeLists.txt index 42b994dd71..08a3e11693 100644 --- a/samples/cpp/text2image/CMakeLists.txt +++ b/samples/cpp/text2image/CMakeLists.txt @@ -46,6 +46,25 @@ install(TARGETS lora_stable_diffusion COMPONENT samples_bin EXCLUDE_FROM_ALL) +# create LoRA fuse sample executable + +add_executable(lora_fuse_stable_diffusion + ${CMAKE_CURRENT_SOURCE_DIR}/lora_fuse.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/imwrite.cpp) + +target_include_directories(lora_fuse_stable_diffusion PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}) +target_link_libraries(lora_fuse_stable_diffusion PRIVATE openvino::genai) + +set_target_properties(lora_fuse_stable_diffusion PROPERTIES + COMPILE_PDB_NAME lora_fuse_stable_diffusion + # Ensure out of box LC_RPATH on macOS with SIP + INSTALL_RPATH_USE_LINK_PATH ON) + +install(TARGETS lora_fuse_stable_diffusion + RUNTIME DESTINATION samples_bin/ + COMPONENT samples_bin + EXCLUDE_FROM_ALL) + # create heterogeneous_stable_diffusion sample executable add_executable(heterogeneous_stable_diffusion diff --git a/samples/cpp/text2image/README.md b/samples/cpp/text2image/README.md index 590efb4f6d..a58b1d1070 100644 --- a/samples/cpp/text2image/README.md +++ b/samples/cpp/text2image/README.md @@ -2,9 +2,10 @@ Examples in this folder showcase inference of text to image models like Stable Diffusion 1.5, 2.1, LCM. The application doesn't have many configuration options to encourage the reader to explore and modify the source code. For example, change the device for inference to GPU. The sample features `ov::genai::Text2ImagePipeline` and uses a text prompt as input source. -There are three sample files: +There are several sample files: - [`main.cpp`](./main.cpp) demonstrates basic usage of the text to image pipeline - [`lora.cpp`](./lora.cpp) shows how to apply LoRA adapters to the pipeline + - [`lora_fuse.cpp`](./lora_fuse.cpp) shows how to maximize performance of LoRA adapters by fusing them into base model weights - [`heterogeneous_stable_diffusion.cpp`](./heterogeneous_stable_diffusion.cpp) shows how to assemble a heterogeneous txt2image pipeline from individual subcomponents (scheduler, text encoder, unet, vae decoder) Users can change the sample code and play with the following generation parameters: @@ -62,6 +63,9 @@ With adapter | Without adapter :---:|:---: ![](./lora.bmp) | ![](./baseline.bmp) +# Fuse LoRA adapters into model weights + +To maximize inference performance using a LoRA adapter, refer to `lora_fuse.cpp`, which demonstrates fusing the adapter into the model weights. This approach achieves the same performance as the base model without a LoRA adapter but removes the flexibility to switch adapters between generate calls. This mode is ideal when performing multiple generations with the same LoRA adapters and blending alpha parameters, and when model recompilation on adapter changes is feasible. The example outputs the resulting image as `lora.bmp`. ## Note diff --git a/samples/cpp/text2image/lora.cpp b/samples/cpp/text2image/lora.cpp index 3fe4b74ff6..c6bc90fd25 100644 --- a/samples/cpp/text2image/lora.cpp +++ b/samples/cpp/text2image/lora.cpp @@ -9,7 +9,7 @@ int32_t main(int32_t argc, char* argv[]) try { OPENVINO_ASSERT(argc >= 3 && (argc - 3) % 2 == 0, "Usage: ", argv[0], " '' [ ...]]"); const std::string models_path = argv[1], prompt = argv[2]; - const std::string device = "CPU"; // GPU, NPU can be used as well + const std::string device = "CPU"; // GPU can be used as well ov::genai::AdapterConfig adapter_config; // Multiple LoRA adapters applied simultaneously are supported, parse them all and corresponding alphas from cmd parameters: diff --git a/samples/cpp/text2image/lora_fuse.cpp b/samples/cpp/text2image/lora_fuse.cpp new file mode 100644 index 0000000000..9b5332e891 --- /dev/null +++ b/samples/cpp/text2image/lora_fuse.cpp @@ -0,0 +1,48 @@ +// Copyright (C) 2023-2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +#include "openvino/genai/image_generation/text2image_pipeline.hpp" + +#include "imwrite.hpp" + +int32_t main(int32_t argc, char* argv[]) try { + OPENVINO_ASSERT(argc >= 3 && (argc - 3) % 2 == 0, "Usage: ", argv[0], " '' [ ...]]"); + + const std::string models_path = argv[1], prompt = argv[2]; + const std::string device = "CPU"; // GPU can be used as well + + // MODE_FUSE instructs pipeline to fuse adapter tensors into original model weights loaded into memory + // giving the same performance level for inference as for the original model. After doing it you cannot + // change adapter dynamically without re-initializing the pipeline from scratch. + ov::genai::AdapterConfig adapter_config(ov::genai::AdapterConfig::MODE_FUSE); + + // Multiple LoRA adapters applied simultaneously are supported, parse them all and corresponding alphas from cmd parameters: + for(size_t i = 0; i < (argc - 3)/2; ++i) { + ov::genai::Adapter adapter(argv[3 + 2*i]); + float alpha = std::atof(argv[3 + 2*i + 1]); + adapter_config.add(adapter, alpha); + } + + // LoRA adapters passed to the constructor will be activated by default in next generates + ov::genai::Text2ImagePipeline pipe(models_path, device, ov::genai::adapters(adapter_config)); + + std::cout << "Generating image with LoRA adapters fused into original weights, resulting image will be in lora.bmp\n"; + ov::Tensor image = pipe.generate(prompt, + ov::genai::generator(std::make_shared(42)), + ov::genai::width(512), + ov::genai::height(896), + ov::genai::num_inference_steps(20)); + imwrite("lora.bmp", image, true); + + return EXIT_SUCCESS; +} catch (const std::exception& error) { + try { + std::cerr << error.what() << '\n'; + } catch (const std::ios_base::failure&) {} + return EXIT_FAILURE; +} catch (...) { + try { + std::cerr << "Non-exception object thrown\n"; + } catch (const std::ios_base::failure&) {} + return EXIT_FAILURE; +} diff --git a/samples/python/text2image/README.md b/samples/python/text2image/README.md index 1a59107e85..d8dc23d0fa 100644 --- a/samples/python/text2image/README.md +++ b/samples/python/text2image/README.md @@ -2,9 +2,10 @@ Examples in this folder showcase inference of text to image models like Stable Diffusion 1.5, 2.1, LCM. The application doesn't have many configuration options to encourage the reader to explore and modify the source code. For example, change the device for inference to GPU. The sample features `openvino_genai.Text2ImagePipeline` and uses a text prompt as input source. -There are two sample files: +There are several sample files: - [`main.py`](./main.py) demonstrates basic usage of the text to image pipeline - [`lora.py`](./lora.py) shows how to apply LoRA adapters to the pipeline + - [`lora_fuse.py`](./lora_fuse.py) shows how to maximize performance of LoRA adapters by fusing them into base model weights Users can change the sample code and play with the following generation parameters: @@ -60,3 +61,8 @@ Check the difference: With adapter | Without adapter :---:|:---: ![](./lora.bmp) | ![](./baseline.bmp) + + +# Fuse LoRA adapters into model weights + +To maximize inference performance using a LoRA adapter, refer to `lora_fuse.py`, which demonstrates fusing the adapter into the model weights. This approach achieves the same performance as the base model without a LoRA adapter but removes the flexibility to switch adapters between generate calls. This mode is ideal when performing multiple generations with the same LoRA adapters and blending alpha parameters, and when model recompilation on adapter changes is feasible. The example outputs the resulting image as `lora.bmp`. \ No newline at end of file diff --git a/samples/python/text2image/lora.py b/samples/python/text2image/lora.py index 95e31ca0ea..a8109de7e8 100644 --- a/samples/python/text2image/lora.py +++ b/samples/python/text2image/lora.py @@ -35,7 +35,7 @@ def main(): prompt = args.prompt - device = "CPU" # GPU, NPU can be used as well + device = "CPU" # GPU can be used as well adapter_config = openvino_genai.AdapterConfig() # Multiple LoRA adapters applied simultaneously are supported, parse them all and corresponding alphas from cmd parameters: diff --git a/samples/python/text2image/lora_fuse.py b/samples/python/text2image/lora_fuse.py new file mode 100644 index 0000000000..75416365ea --- /dev/null +++ b/samples/python/text2image/lora_fuse.py @@ -0,0 +1,64 @@ +#!/usr/bin/env python3 +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import argparse + +import openvino as ov +import openvino_genai +import numpy as np +import sys + + +class Generator(openvino_genai.Generator): + def __init__(self, seed, mu=0.0, sigma=1.0): + openvino_genai.Generator.__init__(self) + np.random.seed(seed) + self.mu = mu + self.sigma = sigma + + def next(self): + return np.random.normal(self.mu, self.sigma) + + +def image_write(path: str, image_tensor: ov.Tensor): + from PIL import Image + image = Image.fromarray(image_tensor.data[0]) + image.save(path) + + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument('models_path') + parser.add_argument('prompt') + args, adapters = parser.parse_known_args() + + prompt = args.prompt + + device = "CPU" # GPU can be used as well + + # MODE_FUSE instructs pipeline to fuse adapter tensors into original model weights loaded into memory + # giving the same performance level for inference as for the original model. After doing it you cannot + # change adapter dynamically without re-initializing the pipeline from scratch. + adapter_config = openvino_genai.AdapterConfig(openvino_genai.AdapterConfig.Mode.MODE_FUSE) + + # Multiple LoRA adapters applied simultaneously are supported, parse them all and corresponding alphas from cmd parameters: + for i in range(int(len(adapters) / 2)): + adapter = openvino_genai.Adapter(adapters[2 * i]) + alpha = float(adapters[2 * i + 1]) + adapter_config.add(adapter, alpha) + + # LoRA adapters passed to the constructor will be activated by default in next generates + pipe = openvino_genai.Text2ImagePipeline(args.models_path, device, adapters=adapter_config) + print("Generating image with LoRA adapters applied, resulting image will be in lora.bmp") + image = pipe.generate(prompt, + generator=Generator(42), + width=512, + height=896, + num_inference_steps=20) + + image_write("lora.bmp", image) + + +if '__main__' == __name__: + main()