diff --git a/src/cpp/include/openvino/genai/image_generation/autoencoder_kl.hpp b/src/cpp/include/openvino/genai/image_generation/autoencoder_kl.hpp index d48661d899..ca5b4120c3 100644 --- a/src/cpp/include/openvino/genai/image_generation/autoencoder_kl.hpp +++ b/src/cpp/include/openvino/genai/image_generation/autoencoder_kl.hpp @@ -127,7 +127,7 @@ class OPENVINO_GENAI_EXPORTS AutoencoderKL { return compile(device, ov::AnyMap{std::forward(properties)...}); } - ov::Tensor decode(ov::Tensor latent); + ov::Tensor decode(ov::Tensor latent, float& infer_duration); ov::Tensor encode(ov::Tensor image, std::shared_ptr generator); diff --git a/src/cpp/include/openvino/genai/image_generation/clip_text_model.hpp b/src/cpp/include/openvino/genai/image_generation/clip_text_model.hpp index a3b9ebbd88..db7ae41004 100644 --- a/src/cpp/include/openvino/genai/image_generation/clip_text_model.hpp +++ b/src/cpp/include/openvino/genai/image_generation/clip_text_model.hpp @@ -9,6 +9,7 @@ #include "openvino/genai/visibility.hpp" #include "openvino/genai/tokenizer.hpp" #include "openvino/genai/lora_adapter.hpp" +#include "openvino/genai/perf_metrics.hpp" #include "openvino/core/any.hpp" #include "openvino/runtime/tensor.hpp" @@ -84,7 +85,7 @@ class OPENVINO_GENAI_EXPORTS CLIPTextModel { void set_adapters(const std::optional& adapters); - ov::Tensor infer(const std::string& pos_prompt, const std::string& neg_prompt, bool do_classifier_free_guidance); + ov::Tensor infer(const std::string& pos_prompt, const std::string& neg_prompt, bool do_classifier_free_guidance, float& infer_duration); ov::Tensor get_output_tensor(const size_t idx); diff --git a/src/cpp/include/openvino/genai/image_generation/clip_text_model_with_projection.hpp b/src/cpp/include/openvino/genai/image_generation/clip_text_model_with_projection.hpp index 563fb8711d..c12ed96469 100644 --- a/src/cpp/include/openvino/genai/image_generation/clip_text_model_with_projection.hpp +++ b/src/cpp/include/openvino/genai/image_generation/clip_text_model_with_projection.hpp @@ -9,6 +9,7 @@ #include "openvino/genai/visibility.hpp" #include "openvino/genai/tokenizer.hpp" #include "openvino/genai/lora_adapter.hpp" +#include "openvino/genai/perf_metrics.hpp" #include "openvino/core/any.hpp" #include "openvino/runtime/tensor.hpp" @@ -84,7 +85,7 @@ class OPENVINO_GENAI_EXPORTS CLIPTextModelWithProjection { void set_adapters(const std::optional& adapters); - ov::Tensor infer(const std::string& pos_prompt, const std::string& neg_prompt, bool do_classifier_free_guidance); + ov::Tensor infer(const std::string& pos_prompt, const std::string& neg_prompt, bool do_classifier_free_guidance, float& infer_duration); ov::Tensor get_output_tensor(const size_t idx); diff --git a/src/cpp/include/openvino/genai/image_generation/flux_transformer_2d_model.hpp b/src/cpp/include/openvino/genai/image_generation/flux_transformer_2d_model.hpp index 95f846668b..a5a8de5ee9 100644 --- a/src/cpp/include/openvino/genai/image_generation/flux_transformer_2d_model.hpp +++ b/src/cpp/include/openvino/genai/image_generation/flux_transformer_2d_model.hpp @@ -12,6 +12,7 @@ #include "openvino/runtime/tensor.hpp" #include "openvino/genai/visibility.hpp" +#include "openvino/genai/perf_metrics.hpp" namespace ov { namespace genai { @@ -75,7 +76,7 @@ class OPENVINO_GENAI_EXPORTS FluxTransformer2DModel { void set_hidden_states(const std::string& tensor_name, ov::Tensor encoder_hidden_states); - ov::Tensor infer(const ov::Tensor latent, const ov::Tensor timestep); + ov::Tensor infer(const ov::Tensor latent, const ov::Tensor timestep, float& infer_duration); private: Config m_config; diff --git a/src/cpp/include/openvino/genai/image_generation/generation_config.hpp b/src/cpp/include/openvino/genai/image_generation/generation_config.hpp index bd7073520a..bc56c3a5f8 100644 --- a/src/cpp/include/openvino/genai/image_generation/generation_config.hpp +++ b/src/cpp/include/openvino/genai/image_generation/generation_config.hpp @@ -12,6 +12,7 @@ #include "openvino/genai/lora_adapter.hpp" #include "openvino/genai/visibility.hpp" +#include "openvino/genai/perf_metrics.hpp" namespace ov { namespace genai { diff --git a/src/cpp/include/openvino/genai/image_generation/image2image_pipeline.hpp b/src/cpp/include/openvino/genai/image_generation/image2image_pipeline.hpp index c6c1f59c88..995cb9be49 100644 --- a/src/cpp/include/openvino/genai/image_generation/image2image_pipeline.hpp +++ b/src/cpp/include/openvino/genai/image_generation/image2image_pipeline.hpp @@ -87,8 +87,11 @@ class OPENVINO_GENAI_EXPORTS Image2ImagePipeline { ov::Tensor decode(const ov::Tensor latent); + ImageGenerationPerfMetrics get_perfomance_metrics(); + private: std::shared_ptr m_impl; + ImageGenerationPerfMetrics m_perf_metrics; explicit Image2ImagePipeline(const std::shared_ptr& impl); diff --git a/src/cpp/include/openvino/genai/image_generation/image_generation_perf_metrics.hpp b/src/cpp/include/openvino/genai/image_generation/image_generation_perf_metrics.hpp new file mode 100644 index 0000000000..9df6dc3f62 --- /dev/null +++ b/src/cpp/include/openvino/genai/image_generation/image_generation_perf_metrics.hpp @@ -0,0 +1,45 @@ +#pragma once + +#include +#include +#include +#include + +#include "openvino/genai/visibility.hpp" +#include "openvino/genai/perf_metrics.hpp" + +namespace ov::genai { + +struct OPENVINO_GENAI_EXPORTS RawImageGenerationPerfMetrics { + std::vector unet_inference_durations; // unet inference durations for each step + std::vector transformer_inference_durations; // transformer inference durations for each step + std::vector iteration_durations; // durations of each step +}; + +struct OPENVINO_GENAI_EXPORTS ImageGenerationPerfMetrics { + float load_time; // model load time (includes reshape & read_model time), ms + float generate_duration; // duration of method generate(...), ms + + MeanStdPair iteration_duration; // Mean-Std time of one generation iteration, ms + std::map encoder_inference_duration; // inference durations for each encoder, ms + MeanStdPair unet_inference_duration; // inference duration for unet model, should be filled with zeros if we don't have unet, ms + MeanStdPair transformer_inference_duration; // inference duration for transformer model, should be filled with zeros if we don't have transformer, ms + float vae_encoder_inference_duration; // inference duration of vae_encoder model, should be filled with zeros if we don't use it, ms + float vae_decoder_inference_duration; // inference duration of vae_decoder model, ms + + bool m_evaluated = false; + + RawImageGenerationPerfMetrics raw_metrics; + + void clean_up(); + void evaluate_statistics(); + + MeanStdPair get_unet_inference_duration(); + MeanStdPair get_transformer_inference_duration(); + MeanStdPair get_iteration_duration(); + float get_inference_total_duration(); + float get_load_time(); + float get_generate_duration(); + +}; +} \ No newline at end of file diff --git a/src/cpp/include/openvino/genai/image_generation/inpainting_pipeline.hpp b/src/cpp/include/openvino/genai/image_generation/inpainting_pipeline.hpp index 03dd9468f7..9fe1847a22 100644 --- a/src/cpp/include/openvino/genai/image_generation/inpainting_pipeline.hpp +++ b/src/cpp/include/openvino/genai/image_generation/inpainting_pipeline.hpp @@ -13,6 +13,7 @@ #include "openvino/genai/image_generation/scheduler.hpp" #include "openvino/genai/image_generation/generation_config.hpp" +#include "openvino/genai/image_generation/image_generation_perf_metrics.hpp" #include "openvino/genai/image_generation/clip_text_model.hpp" #include "openvino/genai/image_generation/clip_text_model_with_projection.hpp" @@ -110,8 +111,11 @@ class OPENVINO_GENAI_EXPORTS InpaintingPipeline { ov::Tensor decode(const ov::Tensor latent); + ImageGenerationPerfMetrics get_perfomance_metrics(); + private: std::shared_ptr m_impl; + ImageGenerationPerfMetrics m_perf_metrics; explicit InpaintingPipeline(const std::shared_ptr& impl); diff --git a/src/cpp/include/openvino/genai/image_generation/sd3_transformer_2d_model.hpp b/src/cpp/include/openvino/genai/image_generation/sd3_transformer_2d_model.hpp index 7f96af49c2..540c038a7e 100644 --- a/src/cpp/include/openvino/genai/image_generation/sd3_transformer_2d_model.hpp +++ b/src/cpp/include/openvino/genai/image_generation/sd3_transformer_2d_model.hpp @@ -13,6 +13,7 @@ #include "openvino/runtime/tensor.hpp" #include "openvino/genai/visibility.hpp" +#include "openvino/genai/perf_metrics.hpp" namespace ov { namespace genai { @@ -77,7 +78,7 @@ class OPENVINO_GENAI_EXPORTS SD3Transformer2DModel { void set_hidden_states(const std::string& tensor_name, ov::Tensor encoder_hidden_states); - ov::Tensor infer(const ov::Tensor latent, const ov::Tensor timestep); + ov::Tensor infer(const ov::Tensor latent, const ov::Tensor timestep, float& infer_duration); private: Config m_config; diff --git a/src/cpp/include/openvino/genai/image_generation/t5_encoder_model.hpp b/src/cpp/include/openvino/genai/image_generation/t5_encoder_model.hpp index 11797226eb..421d7aca83 100644 --- a/src/cpp/include/openvino/genai/image_generation/t5_encoder_model.hpp +++ b/src/cpp/include/openvino/genai/image_generation/t5_encoder_model.hpp @@ -9,6 +9,7 @@ #include "openvino/genai/visibility.hpp" #include "openvino/genai/tokenizer.hpp" #include "openvino/genai/lora_adapter.hpp" +#include "openvino/genai/perf_metrics.hpp" #include "openvino/core/any.hpp" #include "openvino/runtime/tensor.hpp" @@ -68,7 +69,8 @@ class OPENVINO_GENAI_EXPORTS T5EncoderModel { ov::Tensor infer(const std::string& pos_prompt, const std::string& neg_prompt, bool do_classifier_free_guidance, - int max_sequence_length); + int max_sequence_length, + float& infer_duration); ov::Tensor get_output_tensor(const size_t idx); diff --git a/src/cpp/include/openvino/genai/image_generation/text2image_pipeline.hpp b/src/cpp/include/openvino/genai/image_generation/text2image_pipeline.hpp index 3dc1fc0803..d31f5e487c 100644 --- a/src/cpp/include/openvino/genai/image_generation/text2image_pipeline.hpp +++ b/src/cpp/include/openvino/genai/image_generation/text2image_pipeline.hpp @@ -221,6 +221,8 @@ class OPENVINO_GENAI_EXPORTS Text2ImagePipeline { */ ov::Tensor decode(const ov::Tensor latent); + ImageGenerationPerfMetrics get_perfomance_metrics(); + private: std::shared_ptr m_impl; diff --git a/src/cpp/include/openvino/genai/image_generation/unet2d_condition_model.hpp b/src/cpp/include/openvino/genai/image_generation/unet2d_condition_model.hpp index 4acfd2ce9b..09822bd5d7 100644 --- a/src/cpp/include/openvino/genai/image_generation/unet2d_condition_model.hpp +++ b/src/cpp/include/openvino/genai/image_generation/unet2d_condition_model.hpp @@ -16,6 +16,7 @@ #include "openvino/genai/visibility.hpp" #include "openvino/genai/lora_adapter.hpp" +#include "openvino/genai/perf_metrics.hpp" namespace ov { namespace genai { @@ -89,7 +90,7 @@ class OPENVINO_GENAI_EXPORTS UNet2DConditionModel { void set_adapters(const std::optional& adapters); - ov::Tensor infer(ov::Tensor sample, ov::Tensor timestep); + ov::Tensor infer(ov::Tensor sample, ov::Tensor timestep, float& infer_duration); bool do_classifier_free_guidance(float guidance_scale) const { return guidance_scale > 1.0f && m_config.time_cond_proj_dim < 0; diff --git a/src/cpp/src/image_generation/diffusion_pipeline.hpp b/src/cpp/src/image_generation/diffusion_pipeline.hpp index 86d8ba9009..db0057bfb3 100644 --- a/src/cpp/src/image_generation/diffusion_pipeline.hpp +++ b/src/cpp/src/image_generation/diffusion_pipeline.hpp @@ -88,7 +88,9 @@ class DiffusionPipeline { virtual ov::Tensor generate(const std::string& positive_prompt, ov::Tensor initial_image, ov::Tensor mask_image, const ov::AnyMap& properties) = 0; - virtual ov::Tensor decode(const ov::Tensor latent) = 0; + virtual ov::Tensor decode(const ov::Tensor latent, float& infer_duration) = 0; + + virtual ImageGenerationPerfMetrics get_perfomance_metrics() = 0; virtual ~DiffusionPipeline() = default; diff --git a/src/cpp/src/image_generation/flux_pipeline.hpp b/src/cpp/src/image_generation/flux_pipeline.hpp index e74cd441ce..c2ec4d2cb2 100644 --- a/src/cpp/src/image_generation/flux_pipeline.hpp +++ b/src/cpp/src/image_generation/flux_pipeline.hpp @@ -254,12 +254,15 @@ class FluxPipeline : public DiffusionPipeline { } void compute_hidden_states(const std::string& positive_prompt, const ImageGenerationConfig& generation_config) override { + float infer_duration; // encode_prompt std::string prompt_2_str = generation_config.prompt_2 != std::nullopt ? *generation_config.prompt_2 : positive_prompt; - m_clip_text_encoder->infer(positive_prompt, {}, false); + m_clip_text_encoder->infer(positive_prompt, {}, false, infer_duration); + m_perf_metrics.encoder_inference_duration["text_encoder"] = infer_duration / 1000.0f; ov::Tensor pooled_prompt_embeds = m_clip_text_encoder->get_output_tensor(1); - ov::Tensor prompt_embeds = m_t5_text_encoder->infer(prompt_2_str, "", false, generation_config.max_sequence_length); + ov::Tensor prompt_embeds = m_t5_text_encoder->infer(prompt_2_str, "", false, generation_config.max_sequence_length, infer_duration); + m_perf_metrics.encoder_inference_duration["text_encoder_2"] = infer_duration / 1000.0f; pooled_prompt_embeds = numpy_utils::repeat(pooled_prompt_embeds, generation_config.num_images_per_prompt); prompt_embeds = numpy_utils::repeat(prompt_embeds, generation_config.num_images_per_prompt); @@ -319,6 +322,9 @@ class FluxPipeline : public DiffusionPipeline { ov::Tensor initial_image, ov::Tensor mask_image, const ov::AnyMap& properties) override { + const auto gen_start = std::chrono::steady_clock::now(); + float infer_duration; + m_perf_metrics.clean_up(); m_custom_generation_config = m_generation_config; m_custom_generation_config.update_generation_config(properties); @@ -358,28 +364,44 @@ class FluxPipeline : public DiffusionPipeline { float* timestep_data = timestep.data(); for (size_t inference_step = 0; inference_step < timesteps.size(); ++inference_step) { + auto step_start = std::chrono::steady_clock::now(); timestep_data[0] = timesteps[inference_step] / 1000; - ov::Tensor noise_pred_tensor = m_transformer->infer(latents, timestep); + ov::Tensor noise_pred_tensor = m_transformer->infer(latents, timestep, infer_duration); + m_perf_metrics.raw_metrics.transformer_inference_durations.emplace_back(MicroSeconds(infer_duration)); auto scheduler_step_result = m_scheduler->step(noise_pred_tensor, latents, inference_step, m_custom_generation_config.generator); latents = scheduler_step_result["latent"]; + auto step_ms = ov::genai::PerfMetrics::get_microsec(std::chrono::steady_clock::now() - gen_start); + m_perf_metrics.raw_metrics.iteration_durations.emplace_back(MicroSeconds(step_ms)); if (callback && callback(inference_step, timesteps.size(), latents)) { - return ov::Tensor(ov::element::u8, {}); + auto image = ov::Tensor(ov::element::u8, {}); + m_perf_metrics.generate_duration = + std::chrono::duration_cast(std::chrono::steady_clock::now() - gen_start) + .count(); + return image; } } latents = unpack_latents(latents, m_custom_generation_config.height, m_custom_generation_config.width, vae_scale_factor); - return m_vae->decode(latents); + auto image = m_vae->decode(latents, infer_duration); + m_perf_metrics.vae_decoder_inference_duration = infer_duration / 1000.0f; + m_perf_metrics.generate_duration = + std::chrono::duration_cast(std::chrono::steady_clock::now() - gen_start).count(); + return image; } - ov::Tensor decode(const ov::Tensor latent) override { + ov::Tensor decode(const ov::Tensor latent, float& infer_duration) override { ov::Tensor unpacked_latent = unpack_latents(latent, m_custom_generation_config.height, m_custom_generation_config.width, m_vae->get_vae_scale_factor()); - return m_vae->decode(unpacked_latent); + return m_vae->decode(unpacked_latent, infer_duration); + } + + ImageGenerationPerfMetrics get_perfomance_metrics() override { + return m_perf_metrics; } private: @@ -477,6 +499,7 @@ class FluxPipeline : public DiffusionPipeline { std::shared_ptr m_t5_text_encoder = nullptr; std::shared_ptr m_vae = nullptr; ImageGenerationConfig m_custom_generation_config; + ImageGenerationPerfMetrics m_perf_metrics; }; } // namespace genai diff --git a/src/cpp/src/image_generation/image2image_pipeline.cpp b/src/cpp/src/image_generation/image2image_pipeline.cpp index 38ff5a0a4c..5435892cd6 100644 --- a/src/cpp/src/image_generation/image2image_pipeline.cpp +++ b/src/cpp/src/image_generation/image2image_pipeline.cpp @@ -120,7 +120,12 @@ ov::Tensor Image2ImagePipeline::generate(const std::string& positive_prompt, ov: } ov::Tensor Image2ImagePipeline::decode(const ov::Tensor latent) { - return m_impl->decode(latent); + float infer_duration; + return m_impl->decode(latent, infer_duration); +} + +ImageGenerationPerfMetrics Image2ImagePipeline::get_perfomance_metrics() { + return m_impl->get_perfomance_metrics(); } } // namespace genai diff --git a/src/cpp/src/image_generation/image_generation_perf_metrics.cpp b/src/cpp/src/image_generation/image_generation_perf_metrics.cpp new file mode 100644 index 0000000000..6e9739140d --- /dev/null +++ b/src/cpp/src/image_generation/image_generation_perf_metrics.cpp @@ -0,0 +1,109 @@ +#include +#include + +#include "openvino/genai/image_generation/image_generation_perf_metrics.hpp" + +namespace ov { +namespace genai { +ov::genai::MeanStdPair calculation(const std::vector& durations) { + if (durations.size() == 0) { + return {-1, -1}; + } + // Accepts time durations in microseconds and returns standard deviation and mean in milliseconds. + float mean = std::accumulate(durations.begin(), + durations.end(), + 0.0f, + [](const float& acc, const ov::genai::MicroSeconds& duration) -> float { + return acc + duration.count() / 1000.0f; + }); + mean /= durations.size(); + + float sum_square_durations = + std::accumulate(durations.begin(), + durations.end(), + 0.0f, + [](const float& acc, const ov::genai::MicroSeconds& duration) -> float { + auto d = duration.count() / 1000.0f; + return acc + d * d; + }); + float std = std::sqrt(sum_square_durations / durations.size() - mean * mean); + return {mean, std}; +} + +void ImageGenerationPerfMetrics::clean_up() { + m_evaluated = false; + load_time = 0.f; + generate_duration = 0.f; + vae_encoder_inference_duration = 0.f; + vae_decoder_inference_duration = 0.f; + encoder_inference_duration.clear(); + raw_metrics.unet_inference_durations.clear(); + raw_metrics.transformer_inference_durations.clear(); + raw_metrics.iteration_durations.clear(); +} + +void ImageGenerationPerfMetrics::evaluate_statistics() { + if (m_evaluated) { + return; + } + + // calc_mean_and_std will convert microsecond to milliseconds. + unet_inference_duration = calculation(raw_metrics.unet_inference_durations); + transformer_inference_duration = calculation(raw_metrics.transformer_inference_durations); + iteration_duration = calculation(raw_metrics.iteration_durations); + + m_evaluated = true; +} + +MeanStdPair ImageGenerationPerfMetrics::get_unet_inference_duration() { + evaluate_statistics(); + return unet_inference_duration; +} + +MeanStdPair ImageGenerationPerfMetrics::get_transformer_inference_duration() { + evaluate_statistics(); + return transformer_inference_duration; +} +MeanStdPair ImageGenerationPerfMetrics::get_iteration_duration() { + evaluate_statistics(); + return iteration_duration; +} + +float ImageGenerationPerfMetrics::get_inference_total_duration() { + float total_duration = 0; + if (!raw_metrics.unet_inference_durations.empty()) { + float total = std::accumulate(raw_metrics.unet_inference_durations.begin(), + raw_metrics.unet_inference_durations.end(), + 0.0f, + [](const float& acc, const ov::genai::MicroSeconds& duration) -> float { + return acc + duration.count(); + }); + total_duration += total; + } else if (!raw_metrics.transformer_inference_durations.empty()) { + float total = std::accumulate(raw_metrics.transformer_inference_durations.begin(), + raw_metrics.transformer_inference_durations.end(), + 0.0f, + [](const float& acc, const ov::genai::MicroSeconds& duration) -> float { + return acc + duration.count(); + }); + total_duration += total; + } + + total_duration += vae_decoder_inference_duration; + + for (auto encoder = encoder_inference_duration.begin(); encoder != encoder_inference_duration.end(); encoder++) { + total_duration += encoder->second; + } + // Return milliseconds + return total_duration / 1000.0f; +} + +float ImageGenerationPerfMetrics::get_load_time() { + return load_time; +} + +float ImageGenerationPerfMetrics::get_generate_duration() { + return generate_duration; +} +} // namespace genai +} // namespace ov \ No newline at end of file diff --git a/src/cpp/src/image_generation/inpainting_pipeline.cpp b/src/cpp/src/image_generation/inpainting_pipeline.cpp index a510be0a57..5a85d36996 100644 --- a/src/cpp/src/image_generation/inpainting_pipeline.cpp +++ b/src/cpp/src/image_generation/inpainting_pipeline.cpp @@ -126,7 +126,12 @@ ov::Tensor InpaintingPipeline::generate(const std::string& positive_prompt, ov:: } ov::Tensor InpaintingPipeline::decode(const ov::Tensor latent) { - return m_impl->decode(latent); + float infer_duration; + return m_impl->decode(latent, infer_duration); +} + +ImageGenerationPerfMetrics InpaintingPipeline::get_perfomance_metrics() { + return m_impl->get_perfomance_metrics(); } } // namespace genai diff --git a/src/cpp/src/image_generation/models/autoencoder_kl.cpp b/src/cpp/src/image_generation/models/autoencoder_kl.cpp index ab8b87a13e..a5608db80f 100644 --- a/src/cpp/src/image_generation/models/autoencoder_kl.cpp +++ b/src/cpp/src/image_generation/models/autoencoder_kl.cpp @@ -223,11 +223,14 @@ AutoencoderKL& AutoencoderKL::compile(const std::string& device, const ov::AnyMa return *this; } -ov::Tensor AutoencoderKL::decode(ov::Tensor latent) { +ov::Tensor AutoencoderKL::decode(ov::Tensor latent, float& infer_duration) { OPENVINO_ASSERT(m_decoder_request, "VAE decoder model must be compiled first. Cannot infer non-compiled model"); m_decoder_request.set_input_tensor(latent); + const auto infer_start = std::chrono::steady_clock::now(); m_decoder_request.infer(); + infer_duration = ov::genai::PerfMetrics::get_microsec(std::chrono::steady_clock::now() - infer_start); + return m_decoder_request.get_output_tensor(); } diff --git a/src/cpp/src/image_generation/models/clip_text_model.cpp b/src/cpp/src/image_generation/models/clip_text_model.cpp index c49bd5f000..ece88572f9 100644 --- a/src/cpp/src/image_generation/models/clip_text_model.cpp +++ b/src/cpp/src/image_generation/models/clip_text_model.cpp @@ -109,7 +109,10 @@ void CLIPTextModel::set_adapters(const std::optional& adapters) { } } -ov::Tensor CLIPTextModel::infer(const std::string& pos_prompt, const std::string& neg_prompt, bool do_classifier_free_guidance) { +ov::Tensor CLIPTextModel::infer(const std::string& pos_prompt, + const std::string& neg_prompt, + bool do_classifier_free_guidance, + float& infer_duration) { OPENVINO_ASSERT(m_request, "CLIP text encoder model must be compiled first. Cannot infer non-compiled model"); const int32_t pad_token_id = m_clip_tokenizer.get_pad_token_id(); @@ -146,8 +149,9 @@ ov::Tensor CLIPTextModel::infer(const std::string& pos_prompt, const std::string {current_batch_idx + 1, m_config.max_position_embeddings})); // text embeddings + const auto infer_start = std::chrono::steady_clock::now(); m_request.infer(); - + infer_duration = ov::genai::PerfMetrics::get_microsec(std::chrono::steady_clock::now() - infer_start); return m_request.get_output_tensor(0); } diff --git a/src/cpp/src/image_generation/models/clip_text_model_with_projection.cpp b/src/cpp/src/image_generation/models/clip_text_model_with_projection.cpp index eb9289ab3e..e695c763cb 100644 --- a/src/cpp/src/image_generation/models/clip_text_model_with_projection.cpp +++ b/src/cpp/src/image_generation/models/clip_text_model_with_projection.cpp @@ -100,7 +100,10 @@ void CLIPTextModelWithProjection::set_adapters(const std::optional& adap } } -ov::Tensor UNet2DConditionModel::infer(ov::Tensor sample, ov::Tensor timestep) { +ov::Tensor UNet2DConditionModel::infer(ov::Tensor sample, ov::Tensor timestep, float& infer_duration) { OPENVINO_ASSERT(m_impl, "UNet model must be compiled first. Cannot infer non-compiled model"); - return m_impl->infer(sample, timestep); + return m_impl->infer(sample, timestep, infer_duration); } } // namespace genai diff --git a/src/cpp/src/image_generation/models/unet_inference.hpp b/src/cpp/src/image_generation/models/unet_inference.hpp index ae928aac30..5438e1bf36 100644 --- a/src/cpp/src/image_generation/models/unet_inference.hpp +++ b/src/cpp/src/image_generation/models/unet_inference.hpp @@ -14,7 +14,7 @@ class UNet2DConditionModel::UNetInference { virtual void compile(std::shared_ptr model, const std::string& device, const ov::AnyMap& properties) = 0; virtual void set_hidden_states(const std::string& tensor_name, ov::Tensor encoder_hidden_states) = 0; virtual void set_adapters(AdapterController& adapter_controller, const AdapterConfig& adapters) = 0; - virtual ov::Tensor infer(ov::Tensor sample, ov::Tensor timestep) = 0; + virtual ov::Tensor infer(ov::Tensor sample, ov::Tensor timestep, float& infer_duration) = 0; // utility function to resize model given optional dimensions. static void reshape(std::shared_ptr model, diff --git a/src/cpp/src/image_generation/models/unet_inference_dynamic.hpp b/src/cpp/src/image_generation/models/unet_inference_dynamic.hpp index dd265e3eca..2dc1b9ef0b 100644 --- a/src/cpp/src/image_generation/models/unet_inference_dynamic.hpp +++ b/src/cpp/src/image_generation/models/unet_inference_dynamic.hpp @@ -28,13 +28,15 @@ class UNet2DConditionModel::UNetInferenceDynamic : public UNet2DConditionModel:: adapter_controller.apply(m_request, adapters); } - virtual ov::Tensor infer(ov::Tensor sample, ov::Tensor timestep) override { + virtual ov::Tensor infer(ov::Tensor sample, ov::Tensor timestep, float& infer_duration) override + { OPENVINO_ASSERT(m_request, "UNet model must be compiled first. Cannot infer non-compiled model"); m_request.set_tensor("sample", sample); m_request.set_tensor("timestep", timestep); - + const auto infer_start = std::chrono::steady_clock::now(); m_request.infer(); + infer_duration = ov::genai::PerfMetrics::get_microsec(std::chrono::steady_clock::now() - infer_start); return m_request.get_output_tensor(); } diff --git a/src/cpp/src/image_generation/models/unet_inference_static_bs1.hpp b/src/cpp/src/image_generation/models/unet_inference_static_bs1.hpp index f63a8ea237..d7c9d2d77d 100644 --- a/src/cpp/src/image_generation/models/unet_inference_static_bs1.hpp +++ b/src/cpp/src/image_generation/models/unet_inference_static_bs1.hpp @@ -88,7 +88,7 @@ class UNet2DConditionModel::UNetInferenceStaticBS1 : public UNet2DConditionModel } } - virtual ov::Tensor infer(ov::Tensor sample, ov::Tensor timestep) override { + virtual ov::Tensor infer(ov::Tensor sample, ov::Tensor timestep, float& infer_duration) override { OPENVINO_ASSERT(m_native_batch_size && m_native_batch_size == m_requests.size(), "UNet model must be compiled first"); @@ -105,6 +105,8 @@ class UNet2DConditionModel::UNetInferenceStaticBS1 : public UNet2DConditionModel auto bs1_sample_shape = sample.get_shape(); bs1_sample_shape[0] = 1; + const auto infer_start = std::chrono::steady_clock::now(); + for (int i = 0; i < m_native_batch_size; i++) { m_requests[i].set_tensor("timestep", timestep); @@ -132,6 +134,7 @@ class UNet2DConditionModel::UNetInferenceStaticBS1 : public UNet2DConditionModel // wait for infer to complete. m_requests[i].wait(); } + infer_duration = ov::genai::PerfMetrics::get_microsec(std::chrono::steady_clock::now() - infer_start); return out_sample; } diff --git a/src/cpp/src/image_generation/stable_diffusion_3_pipeline.hpp b/src/cpp/src/image_generation/stable_diffusion_3_pipeline.hpp index e3e720109d..4e43a720ad 100644 --- a/src/cpp/src/image_generation/stable_diffusion_3_pipeline.hpp +++ b/src/cpp/src/image_generation/stable_diffusion_3_pipeline.hpp @@ -264,6 +264,7 @@ class StableDiffusion3Pipeline : public DiffusionPipeline { // Input tensors for transformer model ov::Tensor prompt_embeds_inp, pooled_prompt_embeds_inp; + float infer_duration; // 1. Encode positive prompt: std::string prompt_2_str = generation_config.prompt_2 != std::nullopt ? *generation_config.prompt_2 : positive_prompt; @@ -274,14 +275,24 @@ class StableDiffusion3Pipeline : public DiffusionPipeline { std::string negative_prompt_3_str = generation_config.negative_prompt_3 != std::nullopt ? *generation_config.negative_prompt_3 : negative_prompt_1_str; // text_encoder_1_output - stores positive and negative pooled_prompt_embeds - ov::Tensor text_encoder_1_output = m_clip_text_encoder_1->infer(positive_prompt, negative_prompt_1_str, do_classifier_free_guidance(generation_config.guidance_scale)); + ov::Tensor text_encoder_1_output = + m_clip_text_encoder_1->infer(positive_prompt, + negative_prompt_1_str, + do_classifier_free_guidance(generation_config.guidance_scale), + infer_duration); + m_perf_metrics.encoder_inference_duration["text_encode"] = infer_duration / 1000.0f; // text_encoder_1_hidden_state - stores positive and negative prompt_embeds size_t idx_hidden_state_1 = m_clip_text_encoder_1->get_config().num_hidden_layers + 1; ov::Tensor text_encoder_1_hidden_state = m_clip_text_encoder_1->get_output_tensor(idx_hidden_state_1); // text_encoder_2_output - stores positive and negative pooled_prompt_2_embeds - ov::Tensor text_encoder_2_output = m_clip_text_encoder_2->infer(prompt_2_str, negative_prompt_2_str, do_classifier_free_guidance(generation_config.guidance_scale)); + ov::Tensor text_encoder_2_output = + m_clip_text_encoder_2->infer(prompt_2_str, + negative_prompt_2_str, + do_classifier_free_guidance(generation_config.guidance_scale), + infer_duration); + m_perf_metrics.encoder_inference_duration["text_encode_2"] = infer_duration / 1000.0f; // text_encoder_2_hidden_state - stores positive and negative prompt_2_embeds size_t idx_hidden_state_2 = m_clip_text_encoder_2->get_config().num_hidden_layers + 1; @@ -292,7 +303,9 @@ class StableDiffusion3Pipeline : public DiffusionPipeline { text_encoder_3_output = m_t5_text_encoder->infer(prompt_3_str, negative_prompt_3_str, do_classifier_free_guidance(generation_config.guidance_scale), - generation_config.max_sequence_length); + generation_config.max_sequence_length, + infer_duration); + m_perf_metrics.encoder_inference_duration["text_encode_3"] = infer_duration / 1000.0f; } else { ov::Shape t5_prompt_embed_shape = {generation_config.num_images_per_prompt, m_clip_text_encoder_1->get_config().max_position_embeddings, @@ -431,6 +444,9 @@ class StableDiffusion3Pipeline : public DiffusionPipeline { ov::Tensor initial_image, ov::Tensor mask_image, const ov::AnyMap& properties) override { + const auto gen_start = std::chrono::steady_clock::now(); + float infer_duration; + m_perf_metrics.clean_up(); ImageGenerationConfig generation_config = m_generation_config; generation_config.update_generation_config(properties); @@ -471,6 +487,7 @@ class StableDiffusion3Pipeline : public DiffusionPipeline { ov::Tensor noisy_residual_tensor(ov::element::f32, {}); for (size_t inference_step = 0; inference_step < timesteps.size(); ++inference_step) { + auto step_start = std::chrono::steady_clock::now(); // concat the same latent twice along a batch dimension in case of CFG if (batch_size_multiplier > 1) { numpy_utils::batch_copy(latent, latent_cfg, 0, 0, generation_config.num_images_per_prompt); @@ -479,9 +496,9 @@ class StableDiffusion3Pipeline : public DiffusionPipeline { // just assign to save memory copy latent_cfg = latent; } - ov::Tensor timestep(ov::element::f32, {1}, ×teps[inference_step]); - ov::Tensor noise_pred_tensor = m_transformer->infer(latent_cfg, timestep); + ov::Tensor noise_pred_tensor = m_transformer->infer(latent_cfg, timestep, infer_duration); + m_perf_metrics.raw_metrics.transformer_inference_durations.emplace_back(MicroSeconds(infer_duration)); ov::Shape noise_pred_shape = noise_pred_tensor.get_shape(); noise_pred_shape[0] /= batch_size_multiplier; @@ -505,16 +522,31 @@ class StableDiffusion3Pipeline : public DiffusionPipeline { auto scheduler_step_result = m_scheduler->step(noisy_residual_tensor, latent, inference_step, generation_config.generator); latent = scheduler_step_result["latent"]; + auto step_ms = ov::genai::PerfMetrics::get_microsec(std::chrono::steady_clock::now() - step_start); + m_perf_metrics.raw_metrics.iteration_durations.emplace_back(MicroSeconds(step_ms)); + if (callback && callback(inference_step, timesteps.size(), latent)) { - return ov::Tensor(ov::element::u8, {}); + auto image = ov::Tensor(ov::element::u8, {}); + m_perf_metrics.generate_duration = + std::chrono::duration_cast(std::chrono::steady_clock::now() - gen_start) + .count(); + return image; } } - return decode(latent); + auto image = decode(latent, infer_duration); + m_perf_metrics.vae_decoder_inference_duration = infer_duration / 1000.0f; + m_perf_metrics.generate_duration = + std::chrono::duration_cast(std::chrono::steady_clock::now() - gen_start).count(); + return image; + } + + ov::Tensor decode(const ov::Tensor latent, float& infer_duration) override { + return m_vae->decode(latent, infer_duration); } - ov::Tensor decode(const ov::Tensor latent) override { - return m_vae->decode(latent); + ImageGenerationPerfMetrics get_perfomance_metrics() override { + return m_perf_metrics; } private: @@ -621,6 +653,7 @@ class StableDiffusion3Pipeline : public DiffusionPipeline { std::shared_ptr m_t5_text_encoder = nullptr; std::shared_ptr m_transformer = nullptr; std::shared_ptr m_vae = nullptr; + ImageGenerationPerfMetrics m_perf_metrics; }; } // namespace genai diff --git a/src/cpp/src/image_generation/stable_diffusion_pipeline.hpp b/src/cpp/src/image_generation/stable_diffusion_pipeline.hpp index 3801c855fd..1298196e28 100644 --- a/src/cpp/src/image_generation/stable_diffusion_pipeline.hpp +++ b/src/cpp/src/image_generation/stable_diffusion_pipeline.hpp @@ -179,10 +179,12 @@ class StableDiffusionPipeline : public DiffusionPipeline { void compute_hidden_states(const std::string& positive_prompt, const ImageGenerationConfig& generation_config) override { const auto& unet_config = m_unet->get_config(); const size_t batch_size_multiplier = m_unet->do_classifier_free_guidance(generation_config.guidance_scale) ? 2 : 1; // Unet accepts 2x batch in case of CFG + float infer_duration; std::string negative_prompt = generation_config.negative_prompt != std::nullopt ? *generation_config.negative_prompt : std::string{}; ov::Tensor encoder_hidden_states = m_clip_text_encoder->infer(positive_prompt, negative_prompt, - batch_size_multiplier > 1); + batch_size_multiplier > 1, infer_duration); + m_perf_metrics.encoder_inference_duration["text_encoder"] = infer_duration / 1000.0f; // replicate encoder hidden state to UNet model if (generation_config.num_images_per_prompt == 1) { @@ -307,7 +309,10 @@ class StableDiffusionPipeline : public DiffusionPipeline { ov::Tensor initial_image, ov::Tensor mask_image, const ov::AnyMap& properties) override { + const auto gen_start = std::chrono::steady_clock::now(); + float infer_duration; using namespace numpy_utils; + m_perf_metrics.clean_up(); ImageGenerationConfig generation_config = m_generation_config; generation_config.update_generation_config(properties); @@ -357,6 +362,7 @@ class StableDiffusionPipeline : public DiffusionPipeline { ov::Tensor latent_cfg(ov::element::f32, latent_shape_cfg), denoised, noisy_residual_tensor(ov::element::f32, {}), latent_model_input; for (size_t inference_step = 0; inference_step < timesteps.size(); inference_step++) { + auto step_start = std::chrono::steady_clock::now(); numpy_utils::batch_copy(latent, latent_cfg, 0, 0, generation_config.num_images_per_prompt); // concat the same latent twice along a batch dimension in case of CFG if (batch_size_multiplier > 1) { @@ -367,7 +373,8 @@ class StableDiffusionPipeline : public DiffusionPipeline { ov::Tensor latent_model_input = is_inpainting_model() ? numpy_utils::concat(numpy_utils::concat(latent_cfg, mask, 1), masked_image_latent, 1) : latent_cfg; ov::Tensor timestep(ov::element::i64, {1}, ×teps[inference_step]); - ov::Tensor noise_pred_tensor = m_unet->infer(latent_model_input, timestep); + ov::Tensor noise_pred_tensor = m_unet->infer(latent_model_input, timestep, infer_duration); + m_perf_metrics.raw_metrics.unet_inference_durations.emplace_back(MicroSeconds(infer_duration)); ov::Shape noise_pred_shape = noise_pred_tensor.get_shape(); noise_pred_shape[0] /= batch_size_multiplier; @@ -400,16 +407,30 @@ class StableDiffusionPipeline : public DiffusionPipeline { const auto it = scheduler_step_result.find("denoised"); denoised = it != scheduler_step_result.end() ? it->second : latent; + auto step_ms = ov::genai::PerfMetrics::get_microsec(std::chrono::steady_clock::now() - step_start); + m_perf_metrics.raw_metrics.iteration_durations.emplace_back(MicroSeconds(step_ms)); + if (callback && callback(inference_step, timesteps.size(), denoised)) { - return ov::Tensor(ov::element::u8, {}); + auto image = ov::Tensor(ov::element::u8, {}); + m_perf_metrics.generate_duration = + std::chrono::duration_cast(std::chrono::steady_clock::now() - gen_start) + .count(); + return image; } } + auto image = decode(denoised, infer_duration); + m_perf_metrics.vae_decoder_inference_duration = infer_duration / 1000.0f; + m_perf_metrics.generate_duration = + std::chrono::duration_cast(std::chrono::steady_clock::now() - gen_start).count(); + return image; + } - return decode(denoised); + ov::Tensor decode(const ov::Tensor latent, float& infer_duration) override { + return m_vae->decode(latent, infer_duration); } - ov::Tensor decode(const ov::Tensor latent) override { - return m_vae->decode(latent); + ImageGenerationPerfMetrics get_perfomance_metrics() override { + return m_perf_metrics; } protected: @@ -505,6 +526,7 @@ class StableDiffusionPipeline : public DiffusionPipeline { std::shared_ptr m_vae = nullptr; std::shared_ptr m_image_processor = nullptr, m_mask_processor_rgb = nullptr, m_mask_processor_gray = nullptr; std::shared_ptr m_image_resizer = nullptr, m_mask_resizer = nullptr; + ImageGenerationPerfMetrics m_perf_metrics; }; } // namespace genai diff --git a/src/cpp/src/image_generation/stable_diffusion_xl_pipeline.hpp b/src/cpp/src/image_generation/stable_diffusion_xl_pipeline.hpp index c3ebcdf1f4..b806315cad 100644 --- a/src/cpp/src/image_generation/stable_diffusion_xl_pipeline.hpp +++ b/src/cpp/src/image_generation/stable_diffusion_xl_pipeline.hpp @@ -147,6 +147,7 @@ class StableDiffusionXLPipeline : public StableDiffusionPipeline { void compute_hidden_states(const std::string& positive_prompt, const ImageGenerationConfig& generation_config) override { const auto& unet_config = m_unet->get_config(); const size_t batch_size_multiplier = m_unet->do_classifier_free_guidance(generation_config.guidance_scale) ? 2 : 1; // Unet accepts 2x batch in case of CFG + float infer_duration; std::vector time_ids = {static_cast(generation_config.width), static_cast(generation_config.height), @@ -177,8 +178,13 @@ class StableDiffusionXLPipeline : public StableDiffusionPipeline { ov::Tensor encoder_hidden_states(ov::element::f32, {}), add_text_embeds(ov::element::f32, {}); if (compute_negative_prompt) { - add_text_embeds = m_clip_text_encoder_with_projection->infer(positive_prompt, negative_prompt_1_str, batch_size_multiplier > 1); - m_clip_text_encoder->infer(prompt_2_str, negative_prompt_2_str, batch_size_multiplier > 1); + add_text_embeds = m_clip_text_encoder_with_projection->infer(positive_prompt, + negative_prompt_1_str, + batch_size_multiplier > 1, + infer_duration); + m_perf_metrics.encoder_inference_duration["text_encoder_2"] = infer_duration / 1000.0f; + m_clip_text_encoder->infer(prompt_2_str, negative_prompt_2_str, batch_size_multiplier > 1, infer_duration); + m_perf_metrics.encoder_inference_duration["text_encoder"] = infer_duration / 1000.0f; // prompt_embeds = prompt_embeds.hidden_states[-2] ov::Tensor encoder_hidden_states_1 = m_clip_text_encoder->get_output_tensor(idx_hidden_state_1); @@ -186,8 +192,11 @@ class StableDiffusionXLPipeline : public StableDiffusionPipeline { encoder_hidden_states = numpy_utils::concat(encoder_hidden_states_1, encoder_hidden_states_2, -1); } else { - ov::Tensor add_text_embeds_positive = m_clip_text_encoder_with_projection->infer(positive_prompt, negative_prompt_1_str, false); - m_clip_text_encoder->infer(prompt_2_str, negative_prompt_2_str, false); + ov::Tensor add_text_embeds_positive = + m_clip_text_encoder_with_projection->infer(positive_prompt, negative_prompt_1_str, false, infer_duration); + m_perf_metrics.encoder_inference_duration["text_encoder_2"] = infer_duration / 1000.0f; + m_clip_text_encoder->infer(prompt_2_str, negative_prompt_2_str, false, infer_duration); + m_perf_metrics.encoder_inference_duration["text_encoder"] = infer_duration / 1000.0f; ov::Tensor encoder_hidden_states_1_positive = m_clip_text_encoder->get_output_tensor(idx_hidden_state_1); ov::Tensor encoder_hidden_states_2_positive = m_clip_text_encoder_with_projection->get_output_tensor(idx_hidden_state_2); diff --git a/src/cpp/src/image_generation/text2image_pipeline.cpp b/src/cpp/src/image_generation/text2image_pipeline.cpp index 56b02a2e10..c7ee90f804 100644 --- a/src/cpp/src/image_generation/text2image_pipeline.cpp +++ b/src/cpp/src/image_generation/text2image_pipeline.cpp @@ -190,7 +190,12 @@ ov::Tensor Text2ImagePipeline::generate(const std::string& positive_prompt, cons } ov::Tensor Text2ImagePipeline::decode(const ov::Tensor latent) { - return m_impl->decode(latent); + float infer_duration; + return m_impl->decode(latent, infer_duration); +} + +ImageGenerationPerfMetrics Text2ImagePipeline::get_perfomance_metrics() { + return m_impl->get_perfomance_metrics(); } } // namespace genai diff --git a/src/python/compare_pyi.cmake b/src/python/compare_pyi.cmake index 62234d60d4..d9bb01efdc 100644 --- a/src/python/compare_pyi.cmake +++ b/src/python/compare_pyi.cmake @@ -21,7 +21,14 @@ foreach(pyi_file IN LISTS pyi_files) ERROR_VARIABLE error_message RESULT_VARIABLE exit_code OUTPUT_STRIP_TRAILING_WHITESPACE) + message(STATUS "compare output_message is ${output_message}") + message(STATUS "compare error_message is ${error_message}") if(NOT exit_code EQUAL 0) + message(STATUS "commited_pyi_file is ${commited_pyi_file}, pyi_file is ${pyi_file}") + file(READ "${pyi_file}" file_content) + message(STATUS "Content of ${pyi_file} is: \n${file_content}") + file(READ "${commited_pyi_file}" file_content_commit) + message(STATUS "Content of ${commited_pyi_file} is: \n${file_content_commit}") message(FATAL_ERROR "File ${commited_pyi_file} is outdated and need to be regenerated with pybind11-stubgen") endif() endif() diff --git a/src/python/openvino_genai/__init__.py b/src/python/openvino_genai/__init__.py index 0ad7ba3f12..7620fd22d4 100644 --- a/src/python/openvino_genai/__init__.py +++ b/src/python/openvino_genai/__init__.py @@ -76,6 +76,8 @@ Generator, CppStdGenerator, TorchGenerator, + ImageGenerationPerfMetrics, + RawImageGenerationPerfMetrics, ) # Continuous batching diff --git a/src/python/openvino_genai/__init__.pyi b/src/python/openvino_genai/__init__.pyi index 0a401ae958..5e6f6da0ed 100644 --- a/src/python/openvino_genai/__init__.pyi +++ b/src/python/openvino_genai/__init__.pyi @@ -21,9 +21,11 @@ from openvino_genai.py_openvino_genai import GenerationResult from openvino_genai.py_openvino_genai import Generator from openvino_genai.py_openvino_genai import Image2ImagePipeline from openvino_genai.py_openvino_genai import ImageGenerationConfig +from openvino_genai.py_openvino_genai import ImageGenerationPerfMetrics from openvino_genai.py_openvino_genai import InpaintingPipeline from openvino_genai.py_openvino_genai import LLMPipeline from openvino_genai.py_openvino_genai import PerfMetrics +from openvino_genai.py_openvino_genai import RawImageGenerationPerfMetrics from openvino_genai.py_openvino_genai import RawPerfMetrics from openvino_genai.py_openvino_genai import SD3Transformer2DModel from openvino_genai.py_openvino_genai import Scheduler @@ -45,5 +47,5 @@ from openvino_genai.py_openvino_genai import draft_model from openvino_genai.py_openvino_genai import get_version import os as os from . import py_openvino_genai -__all__ = ['Adapter', 'AdapterConfig', 'AggregationMode', 'AutoencoderKL', 'CLIPTextModel', 'CLIPTextModelWithProjection', 'CacheEvictionConfig', 'ChunkStreamerBase', 'ContinuousBatchingPipeline', 'CppStdGenerator', 'DecodedResults', 'EncodedResults', 'FluxTransformer2DModel', 'GenerationConfig', 'GenerationResult', 'Generator', 'Image2ImagePipeline', 'ImageGenerationConfig', 'InpaintingPipeline', 'LLMPipeline', 'PerfMetrics', 'RawPerfMetrics', 'SD3Transformer2DModel', 'Scheduler', 'SchedulerConfig', 'StopCriteria', 'StreamerBase', 'T5EncoderModel', 'Text2ImagePipeline', 'TokenizedInputs', 'Tokenizer', 'TorchGenerator', 'UNet2DConditionModel', 'VLMPipeline', 'WhisperGenerationConfig', 'WhisperPerfMetrics', 'WhisperPipeline', 'WhisperRawPerfMetrics', 'draft_model', 'get_version', 'openvino', 'os', 'py_openvino_genai'] +__all__ = ['Adapter', 'AdapterConfig', 'AggregationMode', 'AutoencoderKL', 'CLIPTextModel', 'CLIPTextModelWithProjection', 'CacheEvictionConfig', 'ChunkStreamerBase', 'ContinuousBatchingPipeline', 'CppStdGenerator', 'DecodedResults', 'EncodedResults', 'FluxTransformer2DModel', 'GenerationConfig', 'GenerationResult', 'Generator', 'Image2ImagePipeline', 'ImageGenerationConfig', 'ImageGenerationPerfMetrics', 'InpaintingPipeline', 'LLMPipeline', 'PerfMetrics', 'RawImageGenerationPerfMetrics', 'RawPerfMetrics', 'SD3Transformer2DModel', 'Scheduler', 'SchedulerConfig', 'StopCriteria', 'StreamerBase', 'T5EncoderModel', 'Text2ImagePipeline', 'TokenizedInputs', 'Tokenizer', 'TorchGenerator', 'UNet2DConditionModel', 'VLMPipeline', 'WhisperGenerationConfig', 'WhisperPerfMetrics', 'WhisperPipeline', 'WhisperRawPerfMetrics', 'draft_model', 'get_version', 'openvino', 'os', 'py_openvino_genai'] __version__: str diff --git a/src/python/openvino_genai/py_openvino_genai.pyi b/src/python/openvino_genai/py_openvino_genai.pyi index 5adde32db4..fe28a91306 100644 --- a/src/python/openvino_genai/py_openvino_genai.pyi +++ b/src/python/openvino_genai/py_openvino_genai.pyi @@ -5,7 +5,7 @@ from __future__ import annotations import openvino._pyopenvino import os import typing -__all__ = ['Adapter', 'AdapterConfig', 'AggregationMode', 'AutoencoderKL', 'CLIPTextModel', 'CLIPTextModelWithProjection', 'CacheEvictionConfig', 'ChunkStreamerBase', 'ContinuousBatchingPipeline', 'CppStdGenerator', 'DecodedResults', 'EncodedGenerationResult', 'EncodedResults', 'FluxTransformer2DModel', 'GenerationConfig', 'GenerationFinishReason', 'GenerationHandle', 'GenerationOutput', 'GenerationResult', 'GenerationStatus', 'Generator', 'Image2ImagePipeline', 'ImageGenerationConfig', 'InpaintingPipeline', 'LLMPipeline', 'MeanStdPair', 'PerfMetrics', 'PipelineMetrics', 'RawPerfMetrics', 'SD3Transformer2DModel', 'Scheduler', 'SchedulerConfig', 'StopCriteria', 'StreamerBase', 'T5EncoderModel', 'Text2ImagePipeline', 'TokenizedInputs', 'Tokenizer', 'TorchGenerator', 'UNet2DConditionModel', 'VLMDecodedResults', 'VLMPerfMetrics', 'VLMPipeline', 'VLMRawPerfMetrics', 'WhisperDecodedResultChunk', 'WhisperDecodedResults', 'WhisperGenerationConfig', 'WhisperPerfMetrics', 'WhisperPipeline', 'WhisperRawPerfMetrics', 'draft_model', 'get_version'] +__all__ = ['Adapter', 'AdapterConfig', 'AggregationMode', 'AutoencoderKL', 'CLIPTextModel', 'CLIPTextModelWithProjection', 'CacheEvictionConfig', 'ChunkStreamerBase', 'ContinuousBatchingPipeline', 'CppStdGenerator', 'DecodedResults', 'EncodedGenerationResult', 'EncodedResults', 'FluxTransformer2DModel', 'GenerationConfig', 'GenerationFinishReason', 'GenerationHandle', 'GenerationOutput', 'GenerationResult', 'GenerationStatus', 'Generator', 'Image2ImagePipeline', 'ImageGenerationConfig', 'ImageGenerationPerfMetrics', 'InpaintingPipeline', 'LLMPipeline', 'MeanStdPair', 'PerfMetrics', 'PipelineMetrics', 'RawImageGenerationPerfMetrics', 'RawPerfMetrics', 'SD3Transformer2DModel', 'Scheduler', 'SchedulerConfig', 'StopCriteria', 'StreamerBase', 'T5EncoderModel', 'Text2ImagePipeline', 'TokenizedInputs', 'Tokenizer', 'TorchGenerator', 'UNet2DConditionModel', 'VLMDecodedResults', 'VLMPerfMetrics', 'VLMPipeline', 'VLMRawPerfMetrics', 'WhisperDecodedResultChunk', 'WhisperDecodedResults', 'WhisperGenerationConfig', 'WhisperPerfMetrics', 'WhisperPipeline', 'WhisperRawPerfMetrics', 'draft_model', 'get_version'] class Adapter: """ Immutable LoRA Adapter that carries the adaptation matrices and serves as unique adapter identifier. @@ -202,7 +202,7 @@ class AutoencoderKL: device (str): Device to run the model on (e.g., CPU, GPU). kwargs: Device properties. """ - def decode(self, latent: openvino._pyopenvino.Tensor) -> openvino._pyopenvino.Tensor: + def decode(self, latent: openvino._pyopenvino.Tensor, infer_duration: float) -> openvino._pyopenvino.Tensor: ... def encode(self, image: openvino._pyopenvino.Tensor, generator: Generator) -> openvino._pyopenvino.Tensor: ... @@ -255,7 +255,7 @@ class CLIPTextModel: ... def get_output_tensor(self, idx: int) -> openvino._pyopenvino.Tensor: ... - def infer(self, pos_prompt: str, neg_prompt: str, do_classifier_free_guidance: bool) -> openvino._pyopenvino.Tensor: + def infer(self, pos_prompt: str, neg_prompt: str, do_classifier_free_guidance: bool, infer_duration: float) -> openvino._pyopenvino.Tensor: ... def reshape(self, batch_size: int) -> CLIPTextModel: ... @@ -304,7 +304,7 @@ class CLIPTextModelWithProjection: ... def get_output_tensor(self, idx: int) -> openvino._pyopenvino.Tensor: ... - def infer(self, pos_prompt: str, neg_prompt: str, do_classifier_free_guidance: bool) -> openvino._pyopenvino.Tensor: + def infer(self, pos_prompt: str, neg_prompt: str, do_classifier_free_guidance: bool, infer_duration: float) -> openvino._pyopenvino.Tensor: ... def reshape(self, batch_size: int) -> CLIPTextModelWithProjection: ... @@ -513,7 +513,7 @@ class FluxTransformer2DModel: """ def get_config(self) -> FluxTransformer2DModel.Config: ... - def infer(self, sample: openvino._pyopenvino.Tensor, timestep: openvino._pyopenvino.Tensor) -> openvino._pyopenvino.Tensor: + def infer(self, sample: openvino._pyopenvino.Tensor, timestep: openvino._pyopenvino.Tensor, infer_duration: float) -> openvino._pyopenvino.Tensor: ... def reshape(self, batch_size: int, height: int, width: int, tokenizer_model_max_length: int) -> FluxTransformer2DModel: ... @@ -829,6 +829,8 @@ class Image2ImagePipeline: """ def get_generation_config(self) -> ImageGenerationConfig: ... + def get_perfomance_metrics(self) -> ImageGenerationPerfMetrics: + ... def reshape(self, num_images_per_prompt: int, height: int, width: int, guidance_scale: float) -> None: ... def set_generation_config(self, config: ImageGenerationConfig) -> None: @@ -860,6 +862,64 @@ class ImageGenerationConfig: ... def validate(self) -> None: ... +class ImageGenerationPerfMetrics: + """ + + Holds performance metrics for each generate call. + + PerfMetrics holds fields with mean and standard deviations for the following metrics: + - Generate iteration duration, ms + - Inference duration for unet model, ms + - Inference duration for transformer model, ms + + Additional fields include: + - Load time, ms + - Generate total duration, ms + - inference durations for each encoder, ms + - inference duration of vae_encoder model, ms + - inference duration of vae_decoder model, ms + + Preferable way to access values is via get functions. Getters calculate mean and std values from raw_metrics and return pairs. + If mean and std were already calculated, getters return cached values. + + :param get_load_time: Returns the load time in milliseconds. + :type get_load_time: float + + :param get_generate_duration: Returns the generate duration in milliseconds. + :type get_generate_duration: float + + :param get_inference_total_duration: Returns the total inference durations (including encoder, unet/transformer and decoder inference) in milliseconds. + :type get_inference_total_duration: float + + :param get_iteration_duration: Returns the mean and standard deviation of one generation iteration in milliseconds. + :type get_iteration_duration: MeanStdPair + + :param unet_inference_duration: Returns the mean and standard deviation of one unet inference in milliseconds. + :type unet_inference_duration: MeanStdPair + + :param get_transformer_inference_duration: Returns the mean and standard deviation of one transformer inference in milliseconds. + :type get_transformer_inference_duration: MeanStdPair + + :param raw_metrics: A structure of RawImageGenerationPerfMetrics type that holds raw metrics. + :type raw_metrics: RawImageGenerationPerfMetrics + """ + def __init__(self) -> None: + ... + def get_generate_duration(self) -> float: + ... + def get_inference_total_duration(self) -> float: + ... + def get_iteration_duration(self) -> MeanStdPair: + ... + def get_load_time(self) -> float: + ... + def get_transformer_inference_duration(self) -> MeanStdPair: + ... + def get_unet_inference_duration(self) -> MeanStdPair: + ... + @property + def raw_metrics(self) -> RawImageGenerationPerfMetrics: + ... class InpaintingPipeline: """ This class is used for generation with inpainting models. @@ -930,6 +990,8 @@ class InpaintingPipeline: """ def get_generation_config(self) -> ImageGenerationConfig: ... + def get_perfomance_metrics(self) -> ImageGenerationPerfMetrics: + ... def reshape(self, num_images_per_prompt: int, height: int, width: int, guidance_scale: float) -> None: ... def set_generation_config(self, config: ImageGenerationConfig) -> None: @@ -1229,6 +1291,31 @@ class PipelineMetrics: @property def scheduled_requests(self) -> int: ... +class RawImageGenerationPerfMetrics: + """ + + Structure with raw performance metrics for each generation before any statistics are calculated. + + :param unet_inference_durations: Durations for each unet inference in microseconds. + :type unet_inference_durations: List[float] + + :param transformer_inference_durations: Durations for each transformer inference in microseconds. + :type transformer_inference_durations: List[float] + + :param iteration_durations: Durations for each step iteration in microseconds. + :type iteration_durations: List[float] + """ + def __init__(self) -> None: + ... + @property + def iteration_durations(self) -> list[float]: + ... + @property + def transformer_inference_durations(self) -> list[float]: + ... + @property + def unet_inference_durations(self) -> list[float]: + ... class RawPerfMetrics: """ @@ -1333,7 +1420,7 @@ class SD3Transformer2DModel: """ def get_config(self) -> SD3Transformer2DModel.Config: ... - def infer(self, sample: openvino._pyopenvino.Tensor, timestep: openvino._pyopenvino.Tensor) -> openvino._pyopenvino.Tensor: + def infer(self, sample: openvino._pyopenvino.Tensor, timestep: openvino._pyopenvino.Tensor, infer_duration: float) -> openvino._pyopenvino.Tensor: ... def reshape(self, batch_size: int, height: int, width: int, tokenizer_model_max_length: int) -> SD3Transformer2DModel: ... @@ -1530,7 +1617,7 @@ class T5EncoderModel: """ def get_output_tensor(self, idx: int) -> openvino._pyopenvino.Tensor: ... - def infer(self, pos_prompt: str, neg_prompt: str, do_classifier_free_guidance: bool, max_sequence_length: int) -> openvino._pyopenvino.Tensor: + def infer(self, pos_prompt: str, neg_prompt: str, do_classifier_free_guidance: bool, max_sequence_length: int, infer_duration: float) -> openvino._pyopenvino.Tensor: ... def reshape(self, batch_size: int, max_sequence_length: int) -> T5EncoderModel: ... @@ -1618,6 +1705,8 @@ class Text2ImagePipeline: """ def get_generation_config(self) -> ImageGenerationConfig: ... + def get_perfomance_metrics(self) -> ImageGenerationPerfMetrics: + ... def reshape(self, num_images_per_prompt: int, height: int, width: int, guidance_scale: float) -> None: ... def set_generation_config(self, config: ImageGenerationConfig) -> None: @@ -1737,7 +1826,7 @@ class UNet2DConditionModel: ... def get_config(self) -> UNet2DConditionModel.Config: ... - def infer(self, sample: openvino._pyopenvino.Tensor, timestep: openvino._pyopenvino.Tensor) -> openvino._pyopenvino.Tensor: + def infer(self, sample: openvino._pyopenvino.Tensor, timestep: openvino._pyopenvino.Tensor, infer_duration: float) -> openvino._pyopenvino.Tensor: ... def reshape(self, batch_size: int, height: int, width: int, tokenizer_model_max_length: int) -> UNet2DConditionModel: ... diff --git a/src/python/py_image_generation_models.cpp b/src/python/py_image_generation_models.cpp index 75be28233f..79009cd11d 100644 --- a/src/python/py_image_generation_models.cpp +++ b/src/python/py_image_generation_models.cpp @@ -70,7 +70,7 @@ void init_clip_text_model(py::module_& m) { clip_text_model.def("get_config", &ov::genai::CLIPTextModel::get_config) .def("reshape", &ov::genai::CLIPTextModel::reshape, py::arg("batch_size")) .def("set_adapters", &ov::genai::CLIPTextModel::set_adapters, py::arg("adapters")) - .def("infer", &ov::genai::CLIPTextModel::infer, py::arg("pos_prompt"), py::arg("neg_prompt"), py::arg("do_classifier_free_guidance")) + .def("infer", &ov::genai::CLIPTextModel::infer, py::arg("pos_prompt"), py::arg("neg_prompt"), py::arg("do_classifier_free_guidance"), py::arg("infer_duration")) .def("get_output_tensor", &ov::genai::CLIPTextModel::get_output_tensor, py::arg("idx")) .def( "compile", @@ -133,7 +133,7 @@ void init_clip_text_model_with_projection(py::module_& m) { .def_readwrite("num_hidden_layers", &ov::genai::CLIPTextModelWithProjection::Config::num_hidden_layers); clip_text_model_with_projection.def("reshape", &ov::genai::CLIPTextModelWithProjection::reshape, py::arg("batch_size")) - .def("infer", &ov::genai::CLIPTextModelWithProjection::infer, py::arg("pos_prompt"), py::arg("neg_prompt"), py::arg("do_classifier_free_guidance")) + .def("infer", &ov::genai::CLIPTextModelWithProjection::infer, py::arg("pos_prompt"), py::arg("neg_prompt"), py::arg("do_classifier_free_guidance"), py::arg("infer_duration")) .def("get_config", &ov::genai::CLIPTextModelWithProjection::get_config) .def("get_output_tensor", &ov::genai::CLIPTextModelWithProjection::get_output_tensor, py::arg("idx")) .def("set_adapters", &ov::genai::CLIPTextModelWithProjection::set_adapters, py::arg("adapters")) @@ -189,7 +189,7 @@ void init_t5_encoder_model(py::module_& m) { model (T5EncoderModel): T5EncoderModel model )") .def("reshape", &ov::genai::T5EncoderModel::reshape, py::arg("batch_size"), py::arg("max_sequence_length")) - .def("infer", &ov::genai::T5EncoderModel::infer, py::arg("pos_prompt"), py::arg("neg_prompt"), py::arg("do_classifier_free_guidance"), py::arg("max_sequence_length")) + .def("infer", &ov::genai::T5EncoderModel::infer, py::arg("pos_prompt"), py::arg("neg_prompt"), py::arg("do_classifier_free_guidance"), py::arg("max_sequence_length"), py::arg("infer_duration")) .def("get_output_tensor", &ov::genai::T5EncoderModel::get_output_tensor, py::arg("idx")) // .def("set_adapters", &ov::genai::T5EncoderModel::set_adapters, py::arg("adapters")) .def( @@ -254,7 +254,7 @@ void init_unet2d_condition_model(py::module_& m) { unet2d_condition_model.def("get_config", &ov::genai::UNet2DConditionModel::get_config) .def("reshape", &ov::genai::UNet2DConditionModel::reshape, py::arg("batch_size"), py::arg("height"), py::arg("width"), py::arg("tokenizer_model_max_length")) .def("set_adapters", &ov::genai::UNet2DConditionModel::set_adapters, py::arg("adapters")) - .def("infer", &ov::genai::UNet2DConditionModel::infer, py::arg("sample"), py::arg("timestep")) + .def("infer", &ov::genai::UNet2DConditionModel::infer, py::arg("sample"), py::arg("timestep"), py::arg("infer_duration")) .def("set_hidden_states", &ov::genai::UNet2DConditionModel::set_hidden_states, py::arg("tensor_name"), py::arg("encoder_hidden_states")) .def("do_classifier_free_guidance", &ov::genai::UNet2DConditionModel::do_classifier_free_guidance, py::arg("guidance_scale")) .def( @@ -320,7 +320,7 @@ void init_sd3_transformer_2d_model(py::module_& m) { sd3_transformer_2d_model.def("get_config", &ov::genai::SD3Transformer2DModel::get_config) .def("reshape", &ov::genai::SD3Transformer2DModel::reshape, py::arg("batch_size"), py::arg("height"), py::arg("width"), py::arg("tokenizer_model_max_length")) // .def("set_adapters", &ov::genai::SD3Transformer2DModel::set_adapters, py::arg("adapters")) - .def("infer", &ov::genai::SD3Transformer2DModel::infer, py::arg("sample"), py::arg("timestep")) + .def("infer", &ov::genai::SD3Transformer2DModel::infer, py::arg("sample"), py::arg("timestep"), py::arg("infer_duration")) .def("set_hidden_states", &ov::genai::SD3Transformer2DModel::set_hidden_states, py::arg("tensor_name"), py::arg("encoder_hidden_states")) .def( "compile", @@ -383,7 +383,7 @@ void init_flux_transformer_2d_model(py::module_& m) { flux_transformer_2d_model.def("get_config", &ov::genai::FluxTransformer2DModel::get_config) .def("reshape", &ov::genai::FluxTransformer2DModel::reshape, py::arg("batch_size"), py::arg("height"), py::arg("width"), py::arg("tokenizer_model_max_length")) // .def("set_adapters", &ov::genai::FluxTransformer2DModel::set_adapters, py::arg("adapters")) - .def("infer", &ov::genai::FluxTransformer2DModel::infer, py::arg("sample"), py::arg("timestep")) + .def("infer", &ov::genai::FluxTransformer2DModel::infer, py::arg("sample"), py::arg("timestep"), py::arg("infer_duration")) .def("set_hidden_states", &ov::genai::FluxTransformer2DModel::set_hidden_states, py::arg("tensor_name"), py::arg("encoder_hidden_states")) .def( "compile", @@ -492,7 +492,7 @@ void init_autoencoder_kl(py::module_& m) { device (str): Device to run the model on (e.g., CPU, GPU). kwargs: Device properties. )") - .def("decode", &ov::genai::AutoencoderKL::decode, py::arg("latent")) + .def("decode", &ov::genai::AutoencoderKL::decode, py::arg("latent"), py::arg("infer_duration")) .def("encode", &ov::genai::AutoencoderKL::encode, py::arg("image"), py::arg("generator")) .def("get_config", &ov::genai::AutoencoderKL::get_config) .def("get_vae_scale_factor", &ov::genai::AutoencoderKL::get_vae_scale_factor); diff --git a/src/python/py_image_generation_pipelines.cpp b/src/python/py_image_generation_pipelines.cpp index c246557a97..3499712c3e 100644 --- a/src/python/py_image_generation_pipelines.cpp +++ b/src/python/py_image_generation_pipelines.cpp @@ -13,6 +13,7 @@ #include "openvino/genai/image_generation/text2image_pipeline.hpp" #include "openvino/genai/image_generation/image2image_pipeline.hpp" #include "openvino/genai/image_generation/inpainting_pipeline.hpp" +#include "openvino/genai/image_generation/image_generation_perf_metrics.hpp" #include "tokenizers_path.hpp" #include "py_utils.hpp" @@ -21,6 +22,8 @@ namespace py = pybind11; namespace pyutils = ov::genai::pybind::utils; using namespace pybind11::literals; +using ov::genai::ImageGenerationPerfMetrics; +using ov::genai::RawImageGenerationPerfMetrics; namespace { @@ -54,6 +57,59 @@ auto text2image_generate_docstring = R"( :rtype: ov.Tensor )"; +auto raw_image_generation_perf_metrics_docstring = R"( + Structure with raw performance metrics for each generation before any statistics are calculated. + + :param unet_inference_durations: Durations for each unet inference in microseconds. + :type unet_inference_durations: List[float] + + :param transformer_inference_durations: Durations for each transformer inference in microseconds. + :type transformer_inference_durations: List[float] + + :param iteration_durations: Durations for each step iteration in microseconds. + :type iteration_durations: List[float] +)"; + +auto image_generation_perf_metrics_docstring = R"( + Holds performance metrics for each generate call. + + PerfMetrics holds fields with mean and standard deviations for the following metrics: + - Generate iteration duration, ms + - Inference duration for unet model, ms + - Inference duration for transformer model, ms + + Additional fields include: + - Load time, ms + - Generate total duration, ms + - inference durations for each encoder, ms + - inference duration of vae_encoder model, ms + - inference duration of vae_decoder model, ms + + Preferable way to access values is via get functions. Getters calculate mean and std values from raw_metrics and return pairs. + If mean and std were already calculated, getters return cached values. + + :param get_load_time: Returns the load time in milliseconds. + :type get_load_time: float + + :param get_generate_duration: Returns the generate duration in milliseconds. + :type get_generate_duration: float + + :param get_inference_total_duration: Returns the total inference durations (including encoder, unet/transformer and decoder inference) in milliseconds. + :type get_inference_total_duration: float + + :param get_iteration_duration: Returns the mean and standard deviation of one generation iteration in milliseconds. + :type get_iteration_duration: MeanStdPair + + :param unet_inference_duration: Returns the mean and standard deviation of one unet inference in milliseconds. + :type unet_inference_duration: MeanStdPair + + :param get_transformer_inference_duration: Returns the mean and standard deviation of one transformer inference in milliseconds. + :type get_transformer_inference_duration: MeanStdPair + + :param raw_metrics: A structure of RawImageGenerationPerfMetrics type that holds raw metrics. + :type raw_metrics: RawImageGenerationPerfMetrics +)"; + // Trampoline class to support inheritance from Generator in Python class PyGenerator : public ov::genai::Generator { public: @@ -229,6 +285,28 @@ void init_image_generation_pipelines(py::module_& m) { config.update_generation_config(pyutils::kwargs_to_any_map(kwargs)); }); + py::class_(m, "RawImageGenerationPerfMetrics", raw_image_generation_perf_metrics_docstring) + .def(py::init<>()) + .def_property_readonly("unet_inference_durations", [](const RawImageGenerationPerfMetrics &rw) { + return pyutils::get_ms(rw, &RawImageGenerationPerfMetrics::unet_inference_durations); + }) + .def_property_readonly("transformer_inference_durations", [](const RawImageGenerationPerfMetrics &rw) { + return pyutils::get_ms(rw, &RawImageGenerationPerfMetrics::transformer_inference_durations); + }) + .def_property_readonly("iteration_durations", [](const RawImageGenerationPerfMetrics &rw) { + return pyutils::get_ms(rw, &RawImageGenerationPerfMetrics::iteration_durations); + }); + + py::class_(m, "ImageGenerationPerfMetrics", image_generation_perf_metrics_docstring) + .def(py::init<>()) + .def("get_load_time", &ImageGenerationPerfMetrics::get_load_time) + .def("get_generate_duration", &ImageGenerationPerfMetrics::get_generate_duration) + .def("get_unet_inference_duration", &ImageGenerationPerfMetrics::get_unet_inference_duration) + .def("get_transformer_inference_duration", &ImageGenerationPerfMetrics::get_transformer_inference_duration) + .def("get_iteration_duration", &ImageGenerationPerfMetrics::get_iteration_duration) + .def("get_inference_total_duration", &ImageGenerationPerfMetrics::get_inference_total_duration) + .def_readonly("raw_metrics", &ImageGenerationPerfMetrics::raw_metrics); + auto text2image_pipeline = py::class_(m, "Text2ImagePipeline", "This class is used for generation with text-to-image models.") .def(py::init([](const std::filesystem::path& models_path) { ScopedVar env_manager(pyutils::ov_tokenizers_module_path()); @@ -294,7 +372,8 @@ void init_image_generation_pipelines(py::module_& m) { }, py::arg("prompt"), "Input string", (text2image_generate_docstring + std::string(" \n ")).c_str()) - .def("decode", &ov::genai::Text2ImagePipeline::decode, py::arg("latent")); + .def("decode", &ov::genai::Text2ImagePipeline::decode, py::arg("latent")) + .def("get_perfomance_metrics", &ov::genai::Text2ImagePipeline::get_perfomance_metrics); auto image2image_pipeline = py::class_(m, "Image2ImagePipeline", "This class is used for generation with image-to-image models.") @@ -357,7 +436,8 @@ void init_image_generation_pipelines(py::module_& m) { py::arg("prompt"), "Input string", py::arg("image"), "Initial image", (text2image_generate_docstring + std::string(" \n ")).c_str()) - .def("decode", &ov::genai::Image2ImagePipeline::decode, py::arg("latent")); + .def("decode", &ov::genai::Image2ImagePipeline::decode, py::arg("latent")) + .def("get_perfomance_metrics", &ov::genai::Image2ImagePipeline::get_perfomance_metrics); auto inpainting_pipeline = py::class_(m, "InpaintingPipeline", "This class is used for generation with inpainting models.") @@ -422,7 +502,8 @@ void init_image_generation_pipelines(py::module_& m) { py::arg("image"), "Initial image", py::arg("mask_image"), "Mask image", (text2image_generate_docstring + std::string(" \n ")).c_str()) - .def("decode", &ov::genai::InpaintingPipeline::decode, py::arg("latent")); + .def("decode", &ov::genai::InpaintingPipeline::decode, py::arg("latent")) + .def("get_perfomance_metrics", &ov::genai::InpaintingPipeline::get_perfomance_metrics); // define constructors to create one pipeline from another // NOTE: needs to be defined once all pipelines are created