Skip to content

Commit

Permalink
[Image Generation] Image2Image for FLUX (#1621)
Browse files Browse the repository at this point in the history
  • Loading branch information
likholat and ilya-lavrenov authored Jan 24, 2025
1 parent cc3b65a commit 6bdc704
Show file tree
Hide file tree
Showing 12 changed files with 190 additions and 70 deletions.
2 changes: 1 addition & 1 deletion SUPPORTED_MODELS.md
Original file line number Diff line number Diff line change
Expand Up @@ -242,7 +242,7 @@ The pipeline can work with other similar topologies produced by `optimum-intel`
<tr>
<td><code>Flux</code></td>
<td>Supported</td>
<td>Not supported</td>
<td>Supported</td>
<td>Not supported</td>
<td>
<ul>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,14 @@ class OPENVINO_GENAI_EXPORTS Image2ImagePipeline {
const UNet2DConditionModel& unet,
const AutoencoderKL& vae);

// creates Flux pipeline from building blocks
static Image2ImagePipeline flux(
const std::shared_ptr<Scheduler>& scheduler,
const CLIPTextModel& clip_text_model,
const T5EncoderModel t5_encoder_model,
const FluxTransformer2DModel& transformer,
const AutoencoderKL& vae);

ImageGenerationConfig get_generation_config() const;
void set_generation_config(const ImageGenerationConfig& generation_config);

Expand Down
150 changes: 85 additions & 65 deletions src/cpp/src/image_generation/flux_pipeline.hpp

Large diffs are not rendered by default.

21 changes: 21 additions & 0 deletions src/cpp/src/image_generation/image2image_pipeline.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@

#include "image_generation/stable_diffusion_pipeline.hpp"
#include "image_generation/stable_diffusion_xl_pipeline.hpp"
#include "image_generation/flux_pipeline.hpp"

#include "utils.hpp"

Expand All @@ -22,6 +23,8 @@ Image2ImagePipeline::Image2ImagePipeline(const std::filesystem::path& root_dir)
m_impl = std::make_shared<StableDiffusionPipeline>(PipelineType::IMAGE_2_IMAGE, root_dir);
} else if (class_name == "StableDiffusionXLPipeline") {
m_impl = std::make_shared<StableDiffusionXLPipeline>(PipelineType::IMAGE_2_IMAGE, root_dir);
} else if (class_name == "FluxPipeline") {
m_impl = std::make_shared<FluxPipeline>(PipelineType::IMAGE_2_IMAGE, root_dir);
} else {
OPENVINO_THROW("Unsupported image to image generation pipeline '", class_name, "'");
}
Expand All @@ -34,6 +37,8 @@ Image2ImagePipeline::Image2ImagePipeline(const std::filesystem::path& root_dir,
m_impl = std::make_shared<StableDiffusionPipeline>(PipelineType::IMAGE_2_IMAGE, root_dir, device, properties);
} else if (class_name == "StableDiffusionXLPipeline") {
m_impl = std::make_shared<StableDiffusionXLPipeline>(PipelineType::IMAGE_2_IMAGE, root_dir, device, properties);
} else if (class_name == "FluxPipeline") {
m_impl = std::make_shared<FluxPipeline>(PipelineType::IMAGE_2_IMAGE, root_dir, device, properties);
} else {
OPENVINO_THROW("Unsupported image to image generation pipeline '", class_name, "'");
}
Expand All @@ -44,6 +49,8 @@ Image2ImagePipeline::Image2ImagePipeline(const InpaintingPipeline& pipe) {
m_impl = std::make_shared<StableDiffusionXLPipeline>(PipelineType::IMAGE_2_IMAGE, *stable_diffusion_xl);
} else if (auto stable_diffusion = std::dynamic_pointer_cast<StableDiffusionPipeline>(pipe.m_impl); stable_diffusion != nullptr) {
m_impl = std::make_shared<StableDiffusionPipeline>(PipelineType::IMAGE_2_IMAGE, *stable_diffusion);
} else if (auto flux = std::dynamic_pointer_cast<FluxPipeline>(pipe.m_impl); flux != nullptr) {
m_impl = std::make_shared<FluxPipeline>(PipelineType::IMAGE_2_IMAGE, *flux);
} else {
OPENVINO_ASSERT("Cannot convert specified InpaintingPipeline to Image2ImagePipeline");
}
Expand Down Expand Up @@ -94,6 +101,20 @@ Image2ImagePipeline Image2ImagePipeline::stable_diffusion_xl(
return Image2ImagePipeline(impl);
}

Image2ImagePipeline Image2ImagePipeline::flux(
const std::shared_ptr<Scheduler>& scheduler,
const CLIPTextModel& clip_text_model,
const T5EncoderModel t5_encoder_model,
const FluxTransformer2DModel& transformer,
const AutoencoderKL& vae){
auto impl = std::make_shared<FluxPipeline>(PipelineType::IMAGE_2_IMAGE, clip_text_model, t5_encoder_model, transformer, vae);

assert(scheduler != nullptr);
impl->set_scheduler(scheduler);

return Image2ImagePipeline(impl);
}

ImageGenerationConfig Image2ImagePipeline::get_generation_config() const {
return m_impl->get_generation_config();
}
Expand Down
2 changes: 2 additions & 0 deletions src/cpp/src/image_generation/image_processor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ IImageProcessor::IImageProcessor(const std::string& device) :
}

ov::Tensor IImageProcessor::execute(ov::Tensor image) {
OPENVINO_ASSERT(m_request, "ImageProcessor model must be compiled first. Cannot infer non-compiled model");
m_request.set_input_tensor(image);
m_request.infer();
return m_request.get_output_tensor();
Expand Down Expand Up @@ -124,6 +125,7 @@ ImageResizer::ImageResizer(const std::string& device, ov::element::Type type, ov
}

ov::Tensor ImageResizer::execute(ov::Tensor image, int64_t dst_height, int64_t dst_width) {
OPENVINO_ASSERT(m_request, "ImageResizer model must be compiled first. Cannot infer non-compiled model");
ov::Tensor target_spatial_tensor(ov::element::i64, ov::Shape{2});
target_spatial_tensor.data<int64_t>()[0] = dst_height;
target_spatial_tensor.data<int64_t>()[1] = dst_width;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -208,7 +208,7 @@ std::map<std::string, ov::Tensor> EulerAncestralDiscreteScheduler::step(ov::Tens
return {{"latent", prev_sample}, {"denoised", pred_original_sample}};
}

size_t EulerAncestralDiscreteScheduler::_index_for_timestep(int64_t timestep) const{
size_t EulerAncestralDiscreteScheduler::_index_for_timestep(int64_t timestep) const {
for (size_t i = 0; i < m_schedule_timesteps.size(); ++i) {
if (timestep == m_schedule_timesteps[i]) {
return i;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -146,6 +146,43 @@ void FlowMatchEulerDiscreteScheduler::add_noise(ov::Tensor init_latent, ov::Tens
OPENVINO_THROW("Not implemented");
}

size_t FlowMatchEulerDiscreteScheduler::_index_for_timestep(float timestep) {
if (m_schedule_timesteps.empty()) {
m_schedule_timesteps = m_timesteps;
}

for (size_t i = 0; i < m_schedule_timesteps.size(); ++i) {
if (timestep == m_schedule_timesteps[i]) {
return i;
}
}

OPENVINO_THROW("Failed to find index for timestep ", timestep);
}

void FlowMatchEulerDiscreteScheduler::scale_noise(ov::Tensor sample, float timestep, ov::Tensor noise) {
OPENVINO_ASSERT(timestep == -1, "Timestep is not computed yet");

size_t index_for_timestep;
if (m_begin_index == -1) {
index_for_timestep = _index_for_timestep(timestep);
} else if (m_step_index != -1) {
index_for_timestep = m_step_index;
} else {
index_for_timestep = m_begin_index;
}

const float sigma = m_sigmas[index_for_timestep];

float * sample_data = sample.data<float>();
const float * noise_data = noise.data<float>();

for (size_t i = 0; i < sample.get_size(); ++i) {
sample_data[i] = sigma * noise_data[i] + (1.0f - sigma) * sample_data[i];
}

}

void FlowMatchEulerDiscreteScheduler::set_timesteps_with_sigma(std::vector<float> sigma, float mu) {
m_timesteps.clear();
m_sigmas.clear();
Expand Down Expand Up @@ -184,5 +221,13 @@ float FlowMatchEulerDiscreteScheduler::calculate_shift(size_t image_seq_len) {
return mu;
}

void FlowMatchEulerDiscreteScheduler::set_begin_index(size_t begin_index) {
m_begin_index = begin_index;
}

size_t FlowMatchEulerDiscreteScheduler::get_begin_index() {
return m_begin_index;
}

} // namespace genai
} // namespace ov
Original file line number Diff line number Diff line change
Expand Up @@ -42,20 +42,27 @@ class FlowMatchEulerDiscreteScheduler : public IScheduler {

void add_noise(ov::Tensor init_latent, ov::Tensor noise, int64_t latent_timestep) const override;

void scale_noise(ov::Tensor sample, float timestep, ov::Tensor noise) override;

float calculate_shift(size_t image_seq_len) override;

void set_begin_index(size_t begin_index) override;

size_t get_begin_index() override;

private:
Config m_config;

std::vector<float> m_sigmas;
std::vector<float> m_timesteps;
std::vector<float> m_timesteps, m_schedule_timesteps;

float m_sigma_min, m_sigma_max;
size_t m_step_index, m_begin_index;
size_t m_num_inference_steps;

void init_step_index();
double sigma_to_t(double simga);
size_t _index_for_timestep(float timestep);
};

} // namespace genai
Expand Down
10 changes: 10 additions & 0 deletions src/cpp/src/image_generation/schedulers/ischeduler.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,16 @@ class IScheduler : public Scheduler {
virtual std::vector<float> get_float_timesteps() const {
OPENVINO_THROW("Scheduler doesn't support float timesteps");
}

virtual void scale_noise(ov::Tensor sample, float timestep, ov::Tensor noise) {
OPENVINO_THROW("Scheduler doesn't support `scale_noise` method");
}

virtual void set_begin_index(size_t begin_index) {};

virtual size_t get_begin_index() {
OPENVINO_THROW("Scheduler doesn't support `get_begin_index` method");
}
};

} // namespace genai
Expand Down
3 changes: 3 additions & 0 deletions src/python/openvino_genai/py_openvino_genai.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -774,6 +774,9 @@ class Image2ImagePipeline:
This class is used for generation with image-to-image models.
"""
@staticmethod
def flux(scheduler: Scheduler, clip_text_model: CLIPTextModel, t5_encoder_model: T5EncoderModel, transformer: FluxTransformer2DModel, vae: AutoencoderKL) -> Image2ImagePipeline:
...
@staticmethod
def latent_consistency_model(scheduler: Scheduler, clip_text_model: CLIPTextModel, unet: UNet2DConditionModel, vae: AutoencoderKL) -> Image2ImagePipeline:
...
@staticmethod
Expand Down
1 change: 1 addition & 0 deletions src/python/py_image_generation_pipelines.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -330,6 +330,7 @@ void init_image_generation_pipelines(py::module_& m) {
.def_static("stable_diffusion", &ov::genai::Image2ImagePipeline::stable_diffusion, py::arg("scheduler"), py::arg("clip_text_model"), py::arg("unet"), py::arg("vae"))
.def_static("latent_consistency_model", &ov::genai::Image2ImagePipeline::latent_consistency_model, py::arg("scheduler"), py::arg("clip_text_model"), py::arg("unet"), py::arg("vae"))
.def_static("stable_diffusion_xl", &ov::genai::Image2ImagePipeline::stable_diffusion_xl, py::arg("scheduler"), py::arg("clip_text_model"), py::arg("clip_text_model_with_projection"), py::arg("unet"), py::arg("vae"))
.def_static("flux", &ov::genai::Image2ImagePipeline::flux, py::arg("scheduler"), py::arg("clip_text_model"), py::arg("t5_encoder_model"), py::arg("transformer"), py::arg("vae"))
.def(
"compile",
[](ov::genai::Image2ImagePipeline& pipe,
Expand Down
7 changes: 5 additions & 2 deletions tools/who_what_benchmark/tests/test_cli_image.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,8 +103,11 @@ def test_image_model_types(model_id, model_type, backend):
])),
)
def test_image_model_genai(model_id, model_type):
if ("flux" in model_id or "stable-diffusion-3" in model_id) and model_type != "text-to-image":
pytest.skip(reason="FLUX or SD3 are supported as text to image only")
if ("stable-diffusion-3" in model_id) and model_type != "text-to-image":
pytest.skip(reason="SD3 is supported as text to image only")

if ("flux" in model_id) and model_type == "image-inpainting":
pytest.skip(reason="FLUX is not yet supported as image inpainting")

with tempfile.TemporaryDirectory() as temp_dir:
GT_FILE = os.path.join(temp_dir, "gt.csv")
Expand Down

0 comments on commit 6bdc704

Please sign in to comment.