From 41137defb29e16bd6e60d3a8d0ee02bf4e88441b Mon Sep 17 00:00:00 2001 From: xufang Date: Wed, 18 Dec 2024 09:49:00 +0800 Subject: [PATCH 01/19] add profile for image generation --- .../heterogeneous_stable_diffusion.cpp | 7 +++- samples/cpp/image_generation/image2image.cpp | 6 ++- samples/cpp/image_generation/inpainting.cpp | 7 +++- .../cpp/image_generation/lora_text2image.cpp | 12 ++++-- samples/cpp/image_generation/text2image.cpp | 7 +++- .../genai/image_generation/autoencoder_kl.hpp | 2 +- .../image_generation/clip_text_model.hpp | 3 +- .../clip_text_model_with_projection.hpp | 3 +- .../flux_transformer_2d_model.hpp | 3 +- .../image_generation/generation_config.hpp | 7 ++++ .../image_generation/image2image_pipeline.hpp | 4 +- .../image_generation/inpainting_pipeline.hpp | 4 +- .../sd3_transformer_2d_model.hpp | 3 +- .../image_generation/t5_encoder_model.hpp | 4 +- .../image_generation/text2image_pipeline.hpp | 4 +- .../unet2d_condition_model.hpp | 3 +- .../image_generation/diffusion_pipeline.hpp | 6 +-- .../src/image_generation/flux_pipeline.hpp | 31 +++++++++----- .../image_generation/image2image_pipeline.cpp | 5 ++- .../image_generation/inpainting_pipeline.cpp | 5 ++- .../models/autoencoder_kl.cpp | 6 ++- .../models/clip_text_model.cpp | 6 ++- .../clip_text_model_with_projection.cpp | 5 ++- .../models/flux_transformer_2d_model.cpp | 5 ++- .../models/sd3_transformer_2d_model.cpp | 5 ++- .../models/t5_encoder_model.cpp | 5 ++- .../models/unet2d_condition_model.cpp | 4 +- .../models/unet_inference.hpp | 2 +- .../models/unet_inference_dynamic.hpp | 6 ++- .../models/unet_inference_static_bs1.hpp | 6 ++- .../stable_diffusion_3_pipeline.hpp | 42 ++++++++++++++----- .../stable_diffusion_pipeline.hpp | 30 ++++++++----- .../stable_diffusion_xl_pipeline.hpp | 14 ++++--- .../image_generation/text2image_pipeline.cpp | 5 ++- src/python/py_image_generation_models.cpp | 14 +++---- src/python/py_image_generation_pipelines.cpp | 6 +-- 36 files changed, 194 insertions(+), 93 deletions(-) diff --git a/samples/cpp/image_generation/heterogeneous_stable_diffusion.cpp b/samples/cpp/image_generation/heterogeneous_stable_diffusion.cpp index 8203c37345..d089804ba7 100644 --- a/samples/cpp/image_generation/heterogeneous_stable_diffusion.cpp +++ b/samples/cpp/image_generation/heterogeneous_stable_diffusion.cpp @@ -98,13 +98,16 @@ int32_t main(int32_t argc, char* argv[]) try { for (int imagei = 0; imagei < number_of_images_to_generate; imagei++) { std::cout << "Generating image " << imagei << std::endl; - ov::Tensor image = pipe.generate(prompt, + auto image_results = pipe.generate(prompt, ov::genai::width(width), ov::genai::height(height), ov::genai::guidance_scale(guidance_scale), ov::genai::num_inference_steps(number_of_inference_steps_per_image)); - imwrite("image_" + std::to_string(imagei) + ".bmp", image, true); + imwrite("image_" + std::to_string(imagei) + ".bmp", image_results.image, true); + + std::cout << "pipeline generate duration ms:" << image_results.perf_metrics.get_generate_duration().mean << std::endl; + std::cout << "pipeline inference duration ms:" << image_results.perf_metrics.get_inference_duration().mean << std::endl; } return EXIT_SUCCESS; diff --git a/samples/cpp/image_generation/image2image.cpp b/samples/cpp/image_generation/image2image.cpp index c071b88362..8aa31acebc 100644 --- a/samples/cpp/image_generation/image2image.cpp +++ b/samples/cpp/image_generation/image2image.cpp @@ -15,12 +15,14 @@ int32_t main(int32_t argc, char* argv[]) try { ov::Tensor image = utils::load_image(image_path); ov::genai::Image2ImagePipeline pipe(models_path, device); - ov::Tensor generated_image = pipe.generate(prompt, image, + auto image_results = pipe.generate(prompt, image, // controls how initial image is noised after being converted to latent space. `1` means initial image is fully noised ov::genai::strength(0.8f)); // writes `num_images_per_prompt` images by pattern name - imwrite("image_%d.bmp", generated_image, true); + imwrite("image_%d.bmp", image_results.image, true); + std::cout << "pipeline generate duration ms:" << image_results.perf_metrics.get_generate_duration().mean << std::endl; + std::cout << "pipeline inference duration ms:" << image_results.perf_metrics.get_inference_duration().mean << std::endl; return EXIT_SUCCESS; } catch (const std::exception& error) { diff --git a/samples/cpp/image_generation/inpainting.cpp b/samples/cpp/image_generation/inpainting.cpp index 4c7a758450..f79c04b8a1 100644 --- a/samples/cpp/image_generation/inpainting.cpp +++ b/samples/cpp/image_generation/inpainting.cpp @@ -16,10 +16,13 @@ int32_t main(int32_t argc, char* argv[]) try { ov::Tensor mask_image = utils::load_image(mask_image_path); ov::genai::InpaintingPipeline pipe(models_path, device); - ov::Tensor generated_image = pipe.generate(prompt, image, mask_image); + auto image_results = pipe.generate(prompt, image, mask_image); // writes `num_images_per_prompt` images by pattern name - imwrite("image_%d.bmp", generated_image, true); + imwrite("image_%d.bmp", image_results.image, true); + + std::cout << "pipeline generate duration ms:" << image_results.perf_metrics.get_generate_duration().mean << std::endl; + std::cout << "pipeline inference duration ms:" << image_results.perf_metrics.get_inference_duration().mean << std::endl; return EXIT_SUCCESS; } catch (const std::exception& error) { diff --git a/samples/cpp/image_generation/lora_text2image.cpp b/samples/cpp/image_generation/lora_text2image.cpp index 3fe4b74ff6..6ea0214c08 100644 --- a/samples/cpp/image_generation/lora_text2image.cpp +++ b/samples/cpp/image_generation/lora_text2image.cpp @@ -23,21 +23,25 @@ int32_t main(int32_t argc, char* argv[]) try { ov::genai::Text2ImagePipeline pipe(models_path, device, ov::genai::adapters(adapter_config)); std::cout << "Generating image with LoRA adapters applied, resulting image will be in lora.bmp\n"; - ov::Tensor image = pipe.generate(prompt, + auto image_results = pipe.generate(prompt, ov::genai::generator(std::make_shared(42)), ov::genai::width(512), ov::genai::height(896), ov::genai::num_inference_steps(20)); - imwrite("lora.bmp", image, true); + imwrite("lora.bmp", image_results.image, true); + std::cout << "pipeline generate duration ms:" << image_results.perf_metrics.get_generate_duration().mean << std::endl; + std::cout << "pipeline inference duration ms:" << image_results.perf_metrics.get_inference_duration().mean << std::endl; std::cout << "Generating image without LoRA adapters applied, resulting image will be in baseline.bmp\n"; - image = pipe.generate(prompt, + image_results = pipe.generate(prompt, ov::genai::adapters(), // passing adapters in generate overrides adapters set in the constructor; adapters() means no adapters ov::genai::generator(std::make_shared(42)), ov::genai::width(512), ov::genai::height(896), ov::genai::num_inference_steps(20)); - imwrite("baseline.bmp", image, true); + imwrite("baseline.bmp", image_results.image, true); + std::cout << "pipeline generate duration ms:" << image_results.perf_metrics.get_generate_duration().mean << std::endl; + std::cout << "pipeline inference duration ms:" << image_results.perf_metrics.get_inference_duration().mean << std::endl; return EXIT_SUCCESS; } catch (const std::exception& error) { diff --git a/samples/cpp/image_generation/text2image.cpp b/samples/cpp/image_generation/text2image.cpp index 6a97b3a074..84ddfa9c26 100644 --- a/samples/cpp/image_generation/text2image.cpp +++ b/samples/cpp/image_generation/text2image.cpp @@ -12,14 +12,17 @@ int32_t main(int32_t argc, char* argv[]) try { const std::string device = "CPU"; // GPU can be used as well ov::genai::Text2ImagePipeline pipe(models_path, device); - ov::Tensor image = pipe.generate(prompt, + auto image_results = pipe.generate(prompt, ov::genai::width(512), ov::genai::height(512), ov::genai::num_inference_steps(20), ov::genai::num_images_per_prompt(1)); // writes `num_images_per_prompt` images by pattern name - imwrite("image_%d.bmp", image, true); + imwrite("image_%d.bmp", image_results.image, true); + + std::cout << "pipeline generate duration ms:" << image_results.perf_metrics.get_generate_duration().mean << std::endl; + std::cout << "pipeline inference duration ms:" << image_results.perf_metrics.get_inference_duration().mean << std::endl; return EXIT_SUCCESS; } catch (const std::exception& error) { diff --git a/src/cpp/include/openvino/genai/image_generation/autoencoder_kl.hpp b/src/cpp/include/openvino/genai/image_generation/autoencoder_kl.hpp index d48661d899..04a3c17762 100644 --- a/src/cpp/include/openvino/genai/image_generation/autoencoder_kl.hpp +++ b/src/cpp/include/openvino/genai/image_generation/autoencoder_kl.hpp @@ -127,7 +127,7 @@ class OPENVINO_GENAI_EXPORTS AutoencoderKL { return compile(device, ov::AnyMap{std::forward(properties)...}); } - ov::Tensor decode(ov::Tensor latent); + ov::Tensor decode(ov::Tensor latent, RawPerfMetrics &raw_metrics); ov::Tensor encode(ov::Tensor image, std::shared_ptr generator); diff --git a/src/cpp/include/openvino/genai/image_generation/clip_text_model.hpp b/src/cpp/include/openvino/genai/image_generation/clip_text_model.hpp index a3b9ebbd88..9e14bbcaaa 100644 --- a/src/cpp/include/openvino/genai/image_generation/clip_text_model.hpp +++ b/src/cpp/include/openvino/genai/image_generation/clip_text_model.hpp @@ -9,6 +9,7 @@ #include "openvino/genai/visibility.hpp" #include "openvino/genai/tokenizer.hpp" #include "openvino/genai/lora_adapter.hpp" +#include "openvino/genai/perf_metrics.hpp" #include "openvino/core/any.hpp" #include "openvino/runtime/tensor.hpp" @@ -84,7 +85,7 @@ class OPENVINO_GENAI_EXPORTS CLIPTextModel { void set_adapters(const std::optional& adapters); - ov::Tensor infer(const std::string& pos_prompt, const std::string& neg_prompt, bool do_classifier_free_guidance); + ov::Tensor infer(const std::string& pos_prompt, const std::string& neg_prompt, bool do_classifier_free_guidance, RawPerfMetrics& raw_metrics); ov::Tensor get_output_tensor(const size_t idx); diff --git a/src/cpp/include/openvino/genai/image_generation/clip_text_model_with_projection.hpp b/src/cpp/include/openvino/genai/image_generation/clip_text_model_with_projection.hpp index 563fb8711d..a7a77a49dc 100644 --- a/src/cpp/include/openvino/genai/image_generation/clip_text_model_with_projection.hpp +++ b/src/cpp/include/openvino/genai/image_generation/clip_text_model_with_projection.hpp @@ -9,6 +9,7 @@ #include "openvino/genai/visibility.hpp" #include "openvino/genai/tokenizer.hpp" #include "openvino/genai/lora_adapter.hpp" +#include "openvino/genai/perf_metrics.hpp" #include "openvino/core/any.hpp" #include "openvino/runtime/tensor.hpp" @@ -84,7 +85,7 @@ class OPENVINO_GENAI_EXPORTS CLIPTextModelWithProjection { void set_adapters(const std::optional& adapters); - ov::Tensor infer(const std::string& pos_prompt, const std::string& neg_prompt, bool do_classifier_free_guidance); + ov::Tensor infer(const std::string& pos_prompt, const std::string& neg_prompt, bool do_classifier_free_guidance, RawPerfMetrics& raw_metrics); ov::Tensor get_output_tensor(const size_t idx); diff --git a/src/cpp/include/openvino/genai/image_generation/flux_transformer_2d_model.hpp b/src/cpp/include/openvino/genai/image_generation/flux_transformer_2d_model.hpp index 95f846668b..7339244420 100644 --- a/src/cpp/include/openvino/genai/image_generation/flux_transformer_2d_model.hpp +++ b/src/cpp/include/openvino/genai/image_generation/flux_transformer_2d_model.hpp @@ -12,6 +12,7 @@ #include "openvino/runtime/tensor.hpp" #include "openvino/genai/visibility.hpp" +#include "openvino/genai/perf_metrics.hpp" namespace ov { namespace genai { @@ -75,7 +76,7 @@ class OPENVINO_GENAI_EXPORTS FluxTransformer2DModel { void set_hidden_states(const std::string& tensor_name, ov::Tensor encoder_hidden_states); - ov::Tensor infer(const ov::Tensor latent, const ov::Tensor timestep); + ov::Tensor infer(const ov::Tensor latent, const ov::Tensor timestep, RawPerfMetrics& raw_metrics); private: Config m_config; diff --git a/src/cpp/include/openvino/genai/image_generation/generation_config.hpp b/src/cpp/include/openvino/genai/image_generation/generation_config.hpp index 50e576466d..ee75ed0944 100644 --- a/src/cpp/include/openvino/genai/image_generation/generation_config.hpp +++ b/src/cpp/include/openvino/genai/image_generation/generation_config.hpp @@ -12,6 +12,7 @@ #include "openvino/genai/lora_adapter.hpp" #include "openvino/genai/visibility.hpp" +#include "openvino/genai/perf_metrics.hpp" namespace ov { namespace genai { @@ -229,5 +230,11 @@ static constexpr ov::Property> OPENVINO_GENAI_EXPORTS std::pair generation_config(const ImageGenerationConfig& generation_config); +class ImageResults { +public: + ov::Tensor image; + PerfMetrics perf_metrics; +}; + } // namespace genai } // namespace ov diff --git a/src/cpp/include/openvino/genai/image_generation/image2image_pipeline.hpp b/src/cpp/include/openvino/genai/image_generation/image2image_pipeline.hpp index ea02969c5e..aa4beef79e 100644 --- a/src/cpp/include/openvino/genai/image_generation/image2image_pipeline.hpp +++ b/src/cpp/include/openvino/genai/image_generation/image2image_pipeline.hpp @@ -68,10 +68,10 @@ class OPENVINO_GENAI_EXPORTS Image2ImagePipeline { } // Returns a tensor with the following dimensions [num_images_per_prompt, height, width, 3] - ov::Tensor generate(const std::string& positive_prompt, ov::Tensor initial_image, const ov::AnyMap& properties = {}); + ImageResults generate(const std::string& positive_prompt, ov::Tensor initial_image, const ov::AnyMap& properties = {}); template - ov::util::EnableIfAllStringAny generate( + ov::util::EnableIfAllStringAny generate( const std::string& positive_prompt, ov::Tensor initial_image, Properties&&... properties) { diff --git a/src/cpp/include/openvino/genai/image_generation/inpainting_pipeline.hpp b/src/cpp/include/openvino/genai/image_generation/inpainting_pipeline.hpp index 6eead673e4..2a5ce3387b 100644 --- a/src/cpp/include/openvino/genai/image_generation/inpainting_pipeline.hpp +++ b/src/cpp/include/openvino/genai/image_generation/inpainting_pipeline.hpp @@ -90,10 +90,10 @@ class OPENVINO_GENAI_EXPORTS InpaintingPipeline { } // Returns a tensor with the following dimensions [num_images_per_prompt, height, width, 3] - ov::Tensor generate(const std::string& positive_prompt, ov::Tensor initial_image, ov::Tensor mask_image, const ov::AnyMap& properties = {}); + ImageResults generate(const std::string& positive_prompt, ov::Tensor initial_image, ov::Tensor mask_image, const ov::AnyMap& properties = {}); template - ov::util::EnableIfAllStringAny generate( + ov::util::EnableIfAllStringAny generate( const std::string& positive_prompt, ov::Tensor initial_image, ov::Tensor mask, diff --git a/src/cpp/include/openvino/genai/image_generation/sd3_transformer_2d_model.hpp b/src/cpp/include/openvino/genai/image_generation/sd3_transformer_2d_model.hpp index 7f96af49c2..5348877e0c 100644 --- a/src/cpp/include/openvino/genai/image_generation/sd3_transformer_2d_model.hpp +++ b/src/cpp/include/openvino/genai/image_generation/sd3_transformer_2d_model.hpp @@ -13,6 +13,7 @@ #include "openvino/runtime/tensor.hpp" #include "openvino/genai/visibility.hpp" +#include "openvino/genai/perf_metrics.hpp" namespace ov { namespace genai { @@ -77,7 +78,7 @@ class OPENVINO_GENAI_EXPORTS SD3Transformer2DModel { void set_hidden_states(const std::string& tensor_name, ov::Tensor encoder_hidden_states); - ov::Tensor infer(const ov::Tensor latent, const ov::Tensor timestep); + ov::Tensor infer(const ov::Tensor latent, const ov::Tensor timestep, RawPerfMetrics& raw_metrics); private: Config m_config; diff --git a/src/cpp/include/openvino/genai/image_generation/t5_encoder_model.hpp b/src/cpp/include/openvino/genai/image_generation/t5_encoder_model.hpp index 11797226eb..edc185cd0e 100644 --- a/src/cpp/include/openvino/genai/image_generation/t5_encoder_model.hpp +++ b/src/cpp/include/openvino/genai/image_generation/t5_encoder_model.hpp @@ -9,6 +9,7 @@ #include "openvino/genai/visibility.hpp" #include "openvino/genai/tokenizer.hpp" #include "openvino/genai/lora_adapter.hpp" +#include "openvino/genai/perf_metrics.hpp" #include "openvino/core/any.hpp" #include "openvino/runtime/tensor.hpp" @@ -68,7 +69,8 @@ class OPENVINO_GENAI_EXPORTS T5EncoderModel { ov::Tensor infer(const std::string& pos_prompt, const std::string& neg_prompt, bool do_classifier_free_guidance, - int max_sequence_length); + int max_sequence_length, + RawPerfMetrics& raw_metrics); ov::Tensor get_output_tensor(const size_t idx); diff --git a/src/cpp/include/openvino/genai/image_generation/text2image_pipeline.hpp b/src/cpp/include/openvino/genai/image_generation/text2image_pipeline.hpp index 34b9d6e341..18292223cf 100644 --- a/src/cpp/include/openvino/genai/image_generation/text2image_pipeline.hpp +++ b/src/cpp/include/openvino/genai/image_generation/text2image_pipeline.hpp @@ -205,10 +205,10 @@ class OPENVINO_GENAI_EXPORTS Text2ImagePipeline { * @param properties Image generation parameters specified as properties. Values in 'properties' override default value for generation parameters. * @returns A tensor which has dimensions [num_images_per_prompt, height, width, 3] */ - ov::Tensor generate(const std::string& positive_prompt, const ov::AnyMap& properties = {}); + ImageResults generate(const std::string& positive_prompt, const ov::AnyMap& properties = {}); template - ov::util::EnableIfAllStringAny generate( + ov::util::EnableIfAllStringAny generate( const std::string& positive_prompt, Properties&&... properties) { return generate(positive_prompt, ov::AnyMap{std::forward(properties)...}); diff --git a/src/cpp/include/openvino/genai/image_generation/unet2d_condition_model.hpp b/src/cpp/include/openvino/genai/image_generation/unet2d_condition_model.hpp index 4acfd2ce9b..6ae6e93613 100644 --- a/src/cpp/include/openvino/genai/image_generation/unet2d_condition_model.hpp +++ b/src/cpp/include/openvino/genai/image_generation/unet2d_condition_model.hpp @@ -16,6 +16,7 @@ #include "openvino/genai/visibility.hpp" #include "openvino/genai/lora_adapter.hpp" +#include "openvino/genai/perf_metrics.hpp" namespace ov { namespace genai { @@ -89,7 +90,7 @@ class OPENVINO_GENAI_EXPORTS UNet2DConditionModel { void set_adapters(const std::optional& adapters); - ov::Tensor infer(ov::Tensor sample, ov::Tensor timestep); + ov::Tensor infer(ov::Tensor sample, ov::Tensor timestep, RawPerfMetrics& raw_metrics); bool do_classifier_free_guidance(float guidance_scale) const { return guidance_scale > 1.0f && m_config.time_cond_proj_dim < 0; diff --git a/src/cpp/src/image_generation/diffusion_pipeline.hpp b/src/cpp/src/image_generation/diffusion_pipeline.hpp index 86d8ba9009..ddf7f0ff60 100644 --- a/src/cpp/src/image_generation/diffusion_pipeline.hpp +++ b/src/cpp/src/image_generation/diffusion_pipeline.hpp @@ -82,13 +82,13 @@ class DiffusionPipeline { virtual std::tuple prepare_latents(ov::Tensor initial_image, const ImageGenerationConfig& generation_config) const = 0; - virtual void compute_hidden_states(const std::string& positive_prompt, const ImageGenerationConfig& generation_config) = 0; + virtual void compute_hidden_states(const std::string& positive_prompt, const ImageGenerationConfig& generation_config, RawPerfMetrics& raw_metrics) = 0; virtual void set_lora_adapters(std::optional adapters) = 0; - virtual ov::Tensor generate(const std::string& positive_prompt, ov::Tensor initial_image, ov::Tensor mask_image, const ov::AnyMap& properties) = 0; + virtual ImageResults generate(const std::string& positive_prompt, ov::Tensor initial_image, ov::Tensor mask_image, const ov::AnyMap& properties) = 0; - virtual ov::Tensor decode(const ov::Tensor latent) = 0; + virtual ov::Tensor decode(const ov::Tensor latent, RawPerfMetrics& raw_metircs) = 0; virtual ~DiffusionPipeline() = default; diff --git a/src/cpp/src/image_generation/flux_pipeline.hpp b/src/cpp/src/image_generation/flux_pipeline.hpp index 716ba6b61b..700a5fb040 100644 --- a/src/cpp/src/image_generation/flux_pipeline.hpp +++ b/src/cpp/src/image_generation/flux_pipeline.hpp @@ -254,13 +254,13 @@ class FluxPipeline : public DiffusionPipeline { m_transformer->compile(device, properties); } - void compute_hidden_states(const std::string& positive_prompt, const ImageGenerationConfig& generation_config) override { + void compute_hidden_states(const std::string& positive_prompt, const ImageGenerationConfig& generation_config, RawPerfMetrics& raw_metrics) override { // encode_prompt std::string prompt_2_str = generation_config.prompt_2 != std::nullopt ? *generation_config.prompt_2 : positive_prompt; - m_clip_text_encoder->infer(positive_prompt, {}, false); + m_clip_text_encoder->infer(positive_prompt, {}, false, raw_metrics); ov::Tensor pooled_prompt_embeds = m_clip_text_encoder->get_output_tensor(1); - ov::Tensor prompt_embeds = m_t5_text_encoder->infer(prompt_2_str, "", false, generation_config.max_sequence_length); + ov::Tensor prompt_embeds = m_t5_text_encoder->infer(prompt_2_str, "", false, generation_config.max_sequence_length, raw_metrics); pooled_prompt_embeds = numpy_utils::repeat(pooled_prompt_embeds, generation_config.num_images_per_prompt); prompt_embeds = numpy_utils::repeat(prompt_embeds, generation_config.num_images_per_prompt); @@ -316,10 +316,15 @@ class FluxPipeline : public DiffusionPipeline { OPENVINO_THROW("LORA adapters are not implemented for FLUX pipeline yet"); } - ov::Tensor generate(const std::string& positive_prompt, + ImageResults generate(const std::string& positive_prompt, ov::Tensor initial_image, ov::Tensor mask_image, const ov::AnyMap& properties) override { + ImageResults image_results; + RawPerfMetrics &raw_metrics = image_results.perf_metrics.raw_metrics; + raw_metrics.generate_durations.clear(); + raw_metrics.m_inference_durations.clear(); + const auto gen_start = std::chrono::steady_clock::now(); m_custom_generation_config = m_generation_config; m_custom_generation_config.update_generation_config(properties); @@ -340,7 +345,7 @@ class FluxPipeline : public DiffusionPipeline { check_inputs(m_custom_generation_config, initial_image); - compute_hidden_states(positive_prompt, m_custom_generation_config); + compute_hidden_states(positive_prompt, m_custom_generation_config, raw_metrics); ov::Tensor latents, processed_image, image_latent, noise; std::tie(latents, processed_image, image_latent, noise) = prepare_latents(initial_image, m_custom_generation_config); @@ -361,26 +366,32 @@ class FluxPipeline : public DiffusionPipeline { for (size_t inference_step = 0; inference_step < timesteps.size(); ++inference_step) { timestep_data[0] = timesteps[inference_step] / 1000; - ov::Tensor noise_pred_tensor = m_transformer->infer(latents, timestep); + ov::Tensor noise_pred_tensor = m_transformer->infer(latents, timestep, raw_metrics); auto scheduler_step_result = m_scheduler->step(noise_pred_tensor, latents, inference_step, m_custom_generation_config.generator); latents = scheduler_step_result["latent"]; if (callback && callback(inference_step, timesteps.size(), latents)) { - return ov::Tensor(ov::element::u8, {}); + image_results.image = ov::Tensor(ov::element::u8, {}); + const auto gen_ms = ov::genai::PerfMetrics::get_microsec(std::chrono::steady_clock::now() - gen_start); + raw_metrics.generate_durations.emplace_back(gen_ms); + return image_results; } } latents = unpack_latents(latents, m_custom_generation_config.height, m_custom_generation_config.width, vae_scale_factor); - return m_vae->decode(latents); + image_results.image = m_vae->decode(latents, raw_metrics); + const auto gen_ms = ov::genai::PerfMetrics::get_microsec(std::chrono::steady_clock::now() - gen_start); + raw_metrics.generate_durations.emplace_back(gen_ms); + return image_results; } - ov::Tensor decode(const ov::Tensor latent) override { + ov::Tensor decode(const ov::Tensor latent, RawPerfMetrics& raw_metrics) override { ov::Tensor unpacked_latent = unpack_latents(latent, m_custom_generation_config.height, m_custom_generation_config.width, m_vae->get_vae_scale_factor()); - return m_vae->decode(unpacked_latent); + return m_vae->decode(unpacked_latent, raw_metrics); } private: diff --git a/src/cpp/src/image_generation/image2image_pipeline.cpp b/src/cpp/src/image_generation/image2image_pipeline.cpp index 38ff5a0a4c..a545d11c89 100644 --- a/src/cpp/src/image_generation/image2image_pipeline.cpp +++ b/src/cpp/src/image_generation/image2image_pipeline.cpp @@ -114,13 +114,14 @@ void Image2ImagePipeline::compile(const std::string& device, const ov::AnyMap& p m_impl->compile(device, properties); } -ov::Tensor Image2ImagePipeline::generate(const std::string& positive_prompt, ov::Tensor initial_image, const ov::AnyMap& properties) { +ImageResults Image2ImagePipeline::generate(const std::string& positive_prompt, ov::Tensor initial_image, const ov::AnyMap& properties) { OPENVINO_ASSERT(initial_image, "Initial image cannot be empty when passed to Image2ImagePipeline::generate"); return m_impl->generate(positive_prompt, initial_image, {}, properties); } ov::Tensor Image2ImagePipeline::decode(const ov::Tensor latent) { - return m_impl->decode(latent); + ov::genai::RawPerfMetrics raw_metrics; + return m_impl->decode(latent, raw_metrics); } } // namespace genai diff --git a/src/cpp/src/image_generation/inpainting_pipeline.cpp b/src/cpp/src/image_generation/inpainting_pipeline.cpp index a510be0a57..4e1ac1efb7 100644 --- a/src/cpp/src/image_generation/inpainting_pipeline.cpp +++ b/src/cpp/src/image_generation/inpainting_pipeline.cpp @@ -119,14 +119,15 @@ void InpaintingPipeline::compile(const std::string& device, const ov::AnyMap& pr m_impl->compile(device, properties); } -ov::Tensor InpaintingPipeline::generate(const std::string& positive_prompt, ov::Tensor initial_image, ov::Tensor mask, const ov::AnyMap& properties) { +ImageResults InpaintingPipeline::generate(const std::string& positive_prompt, ov::Tensor initial_image, ov::Tensor mask, const ov::AnyMap& properties) { OPENVINO_ASSERT(initial_image, "Initial image cannot be empty when passed to InpaintingPipeline::generate"); OPENVINO_ASSERT(mask, "Mask image cannot be empty when passed to InpaintingPipeline::generate"); return m_impl->generate(positive_prompt, initial_image, mask, properties); } ov::Tensor InpaintingPipeline::decode(const ov::Tensor latent) { - return m_impl->decode(latent); + ov::genai::RawPerfMetrics raw_metrics; + return m_impl->decode(latent, raw_metrics); } } // namespace genai diff --git a/src/cpp/src/image_generation/models/autoencoder_kl.cpp b/src/cpp/src/image_generation/models/autoencoder_kl.cpp index e0d6a44189..a1b615f57d 100644 --- a/src/cpp/src/image_generation/models/autoencoder_kl.cpp +++ b/src/cpp/src/image_generation/models/autoencoder_kl.cpp @@ -225,11 +225,15 @@ AutoencoderKL& AutoencoderKL::compile(const std::string& device, const ov::AnyMa return *this; } -ov::Tensor AutoencoderKL::decode(ov::Tensor latent) { +ov::Tensor AutoencoderKL::decode(ov::Tensor latent, RawPerfMetrics &raw_metrics) { OPENVINO_ASSERT(m_decoder_request, "VAE decoder model must be compiled first. Cannot infer non-compiled model"); m_decoder_request.set_input_tensor(latent); + const auto infer_start = std::chrono::steady_clock::now(); m_decoder_request.infer(); + const auto infer_ms = ov::genai::PerfMetrics::get_microsec(std::chrono::steady_clock::now() - infer_start); + raw_metrics.m_inference_durations[0] += MicroSeconds(infer_ms); + return m_decoder_request.get_output_tensor(); } diff --git a/src/cpp/src/image_generation/models/clip_text_model.cpp b/src/cpp/src/image_generation/models/clip_text_model.cpp index d2dab30bcf..839ba78840 100644 --- a/src/cpp/src/image_generation/models/clip_text_model.cpp +++ b/src/cpp/src/image_generation/models/clip_text_model.cpp @@ -110,7 +110,7 @@ void CLIPTextModel::set_adapters(const std::optional& adapters) { } } -ov::Tensor CLIPTextModel::infer(const std::string& pos_prompt, const std::string& neg_prompt, bool do_classifier_free_guidance) { +ov::Tensor CLIPTextModel::infer(const std::string& pos_prompt, const std::string& neg_prompt, bool do_classifier_free_guidance, RawPerfMetrics& raw_metrics) { OPENVINO_ASSERT(m_request, "CLIP text encoder model must be compiled first. Cannot infer non-compiled model"); const int32_t pad_token_id = m_clip_tokenizer.get_pad_token_id(); @@ -141,8 +141,10 @@ ov::Tensor CLIPTextModel::infer(const std::string& pos_prompt, const std::string // text embeddings m_request.set_tensor("input_ids", input_ids); + const auto infer_start = std::chrono::steady_clock::now(); m_request.infer(); - + const auto infer_ms = ov::genai::PerfMetrics::get_microsec(std::chrono::steady_clock::now() - infer_start); + raw_metrics.m_inference_durations[0] += MicroSeconds(infer_ms); return m_request.get_output_tensor(0); } diff --git a/src/cpp/src/image_generation/models/clip_text_model_with_projection.cpp b/src/cpp/src/image_generation/models/clip_text_model_with_projection.cpp index 13c7f5a442..ad5bd6db36 100644 --- a/src/cpp/src/image_generation/models/clip_text_model_with_projection.cpp +++ b/src/cpp/src/image_generation/models/clip_text_model_with_projection.cpp @@ -101,7 +101,7 @@ void CLIPTextModelWithProjection::set_adapters(const std::optional& adap } } -ov::Tensor UNet2DConditionModel::infer(ov::Tensor sample, ov::Tensor timestep) { +ov::Tensor UNet2DConditionModel::infer(ov::Tensor sample, ov::Tensor timestep, RawPerfMetrics& raw_metrics) { OPENVINO_ASSERT(m_impl, "UNet model must be compiled first. Cannot infer non-compiled model"); - return m_impl->infer(sample, timestep); + return m_impl->infer(sample, timestep, raw_metrics); } } // namespace genai diff --git a/src/cpp/src/image_generation/models/unet_inference.hpp b/src/cpp/src/image_generation/models/unet_inference.hpp index ae928aac30..a210282d0f 100644 --- a/src/cpp/src/image_generation/models/unet_inference.hpp +++ b/src/cpp/src/image_generation/models/unet_inference.hpp @@ -14,7 +14,7 @@ class UNet2DConditionModel::UNetInference { virtual void compile(std::shared_ptr model, const std::string& device, const ov::AnyMap& properties) = 0; virtual void set_hidden_states(const std::string& tensor_name, ov::Tensor encoder_hidden_states) = 0; virtual void set_adapters(AdapterController& adapter_controller, const AdapterConfig& adapters) = 0; - virtual ov::Tensor infer(ov::Tensor sample, ov::Tensor timestep) = 0; + virtual ov::Tensor infer(ov::Tensor sample, ov::Tensor timestep, RawPerfMetrics& raw_metrics) = 0; // utility function to resize model given optional dimensions. static void reshape(std::shared_ptr model, diff --git a/src/cpp/src/image_generation/models/unet_inference_dynamic.hpp b/src/cpp/src/image_generation/models/unet_inference_dynamic.hpp index c8658a1c1a..9d055de978 100644 --- a/src/cpp/src/image_generation/models/unet_inference_dynamic.hpp +++ b/src/cpp/src/image_generation/models/unet_inference_dynamic.hpp @@ -35,14 +35,16 @@ class UNet2DConditionModel::UNetInferenceDynamic : public UNet2DConditionModel:: adapter_controller.apply(m_request, adapters); } - virtual ov::Tensor infer(ov::Tensor sample, ov::Tensor timestep) override + virtual ov::Tensor infer(ov::Tensor sample, ov::Tensor timestep, RawPerfMetrics& raw_metrics) override { OPENVINO_ASSERT(m_request, "UNet model must be compiled first. Cannot infer non-compiled model"); m_request.set_tensor("sample", sample); m_request.set_tensor("timestep", timestep); - + const auto infer_start = std::chrono::steady_clock::now(); m_request.infer(); + const auto infer_ms = ov::genai::PerfMetrics::get_microsec(std::chrono::steady_clock::now() - infer_start); + raw_metrics.m_inference_durations[0] += MicroSeconds(infer_ms); return m_request.get_output_tensor(); } diff --git a/src/cpp/src/image_generation/models/unet_inference_static_bs1.hpp b/src/cpp/src/image_generation/models/unet_inference_static_bs1.hpp index fcde31e9ee..f575435742 100644 --- a/src/cpp/src/image_generation/models/unet_inference_static_bs1.hpp +++ b/src/cpp/src/image_generation/models/unet_inference_static_bs1.hpp @@ -88,7 +88,7 @@ class UNet2DConditionModel::UNetInferenceStaticBS1 : public UNet2DConditionModel } } - virtual ov::Tensor infer(ov::Tensor sample, ov::Tensor timestep) override { + virtual ov::Tensor infer(ov::Tensor sample, ov::Tensor timestep, RawPerfMetrics& raw_metrics) override { OPENVINO_ASSERT(m_native_batch_size && m_native_batch_size == m_requests.size(), "UNet model must be compiled first"); @@ -105,6 +105,8 @@ class UNet2DConditionModel::UNetInferenceStaticBS1 : public UNet2DConditionModel auto bs1_sample_shape = sample.get_shape(); bs1_sample_shape[0] = 1; + const auto infer_start = std::chrono::steady_clock::now(); + for (int i = 0; i < m_native_batch_size; i++) { m_requests[i].set_tensor("timestep", timestep); @@ -132,6 +134,8 @@ class UNet2DConditionModel::UNetInferenceStaticBS1 : public UNet2DConditionModel // wait for infer to complete. m_requests[i].wait(); } + const auto infer_ms = ov::genai::PerfMetrics::get_microsec(std::chrono::steady_clock::now() - infer_start); + raw_metrics.m_inference_durations[0] += MicroSeconds(infer_ms); return out_sample; } diff --git a/src/cpp/src/image_generation/stable_diffusion_3_pipeline.hpp b/src/cpp/src/image_generation/stable_diffusion_3_pipeline.hpp index 18a3e0346f..3326471feb 100644 --- a/src/cpp/src/image_generation/stable_diffusion_3_pipeline.hpp +++ b/src/cpp/src/image_generation/stable_diffusion_3_pipeline.hpp @@ -259,7 +259,7 @@ class StableDiffusion3Pipeline : public DiffusionPipeline { m_vae->compile(device, properties); } - void compute_hidden_states(const std::string& positive_prompt, const ImageGenerationConfig& generation_config) override { + void compute_hidden_states(const std::string& positive_prompt, const ImageGenerationConfig& generation_config, RawPerfMetrics& raw_metrics) override { const auto& transformer_config = m_transformer->get_config(); const size_t batch_size_multiplier = do_classifier_free_guidance(generation_config.guidance_scale) ? 2 : 1; // Transformer accepts 2x batch in case of CFG @@ -275,14 +275,22 @@ class StableDiffusion3Pipeline : public DiffusionPipeline { std::string negative_prompt_3_str = generation_config.negative_prompt_3 != std::nullopt ? *generation_config.negative_prompt_3 : negative_prompt_1_str; // text_encoder_1_output - stores positive and negative pooled_prompt_embeds - ov::Tensor text_encoder_1_output = m_clip_text_encoder_1->infer(positive_prompt, negative_prompt_1_str, do_classifier_free_guidance(generation_config.guidance_scale)); + ov::Tensor text_encoder_1_output = + m_clip_text_encoder_1->infer(positive_prompt, + negative_prompt_1_str, + do_classifier_free_guidance(generation_config.guidance_scale), + raw_metrics); // text_encoder_1_hidden_state - stores positive and negative prompt_embeds size_t idx_hidden_state_1 = m_clip_text_encoder_1->get_config().num_hidden_layers + 1; ov::Tensor text_encoder_1_hidden_state = m_clip_text_encoder_1->get_output_tensor(idx_hidden_state_1); // text_encoder_2_output - stores positive and negative pooled_prompt_2_embeds - ov::Tensor text_encoder_2_output = m_clip_text_encoder_2->infer(prompt_2_str, negative_prompt_2_str, do_classifier_free_guidance(generation_config.guidance_scale)); + ov::Tensor text_encoder_2_output = + m_clip_text_encoder_2->infer(prompt_2_str, + negative_prompt_2_str, + do_classifier_free_guidance(generation_config.guidance_scale), + raw_metrics); // text_encoder_2_hidden_state - stores positive and negative prompt_2_embeds size_t idx_hidden_state_2 = m_clip_text_encoder_2->get_config().num_hidden_layers + 1; @@ -293,7 +301,8 @@ class StableDiffusion3Pipeline : public DiffusionPipeline { text_encoder_3_output = m_t5_text_encoder->infer(prompt_3_str, negative_prompt_3_str, do_classifier_free_guidance(generation_config.guidance_scale), - generation_config.max_sequence_length); + generation_config.max_sequence_length, + raw_metrics); } else { ov::Shape t5_prompt_embed_shape = {generation_config.num_images_per_prompt, m_clip_text_encoder_1->get_config().max_position_embeddings, @@ -428,10 +437,15 @@ class StableDiffusion3Pipeline : public DiffusionPipeline { OPENVINO_THROW("LORA adapters are not implemented for Stable Diffusion 3 yet"); } - ov::Tensor generate(const std::string& positive_prompt, + ImageResults generate(const std::string& positive_prompt, ov::Tensor initial_image, ov::Tensor mask_image, const ov::AnyMap& properties) override { + ImageResults image_results; + RawPerfMetrics& raw_metrics = image_results.perf_metrics.raw_metrics; + raw_metrics.generate_durations.clear(); + raw_metrics.m_inference_durations.clear(); + const auto gen_start = std::chrono::steady_clock::now(); ImageGenerationConfig generation_config = m_generation_config; generation_config.update_generation_config(properties); @@ -463,7 +477,7 @@ class StableDiffusion3Pipeline : public DiffusionPipeline { std::vector timesteps = m_scheduler->get_float_timesteps(); // 4 compute text encoders and set hidden states - compute_hidden_states(positive_prompt, generation_config); + compute_hidden_states(positive_prompt, generation_config, raw_metrics); // 5. Prepare latent variables ov::Tensor latent, processed_image, image_latent, noise; @@ -487,7 +501,7 @@ class StableDiffusion3Pipeline : public DiffusionPipeline { } ov::Tensor timestep(ov::element::f32, {1}, ×teps[inference_step]); - ov::Tensor noise_pred_tensor = m_transformer->infer(latent_cfg, timestep); + ov::Tensor noise_pred_tensor = m_transformer->infer(latent_cfg, timestep, raw_metrics); ov::Shape noise_pred_shape = noise_pred_tensor.get_shape(); noise_pred_shape[0] /= batch_size_multiplier; @@ -512,15 +526,21 @@ class StableDiffusion3Pipeline : public DiffusionPipeline { latent = scheduler_step_result["latent"]; if (callback && callback(inference_step, timesteps.size(), latent)) { - return ov::Tensor(ov::element::u8, {}); + image_results.image = ov::Tensor(ov::element::u8, {}); + const auto gen_end = std::chrono::steady_clock::now(); + raw_metrics.generate_durations.emplace_back(PerfMetrics::get_microsec(gen_start - gen_end)); + return image_results; } } - return decode(latent); + image_results.image = decode(latent, raw_metrics); + const auto gen_end = std::chrono::steady_clock::now(); + raw_metrics.generate_durations.emplace_back(PerfMetrics::get_microsec(gen_start - gen_end)); + return image_results; } - ov::Tensor decode(const ov::Tensor latent) override { - return m_vae->decode(latent); + ov::Tensor decode(const ov::Tensor latent, RawPerfMetrics& raw_metrics) override { + return m_vae->decode(latent, raw_metrics); } private: diff --git a/src/cpp/src/image_generation/stable_diffusion_pipeline.hpp b/src/cpp/src/image_generation/stable_diffusion_pipeline.hpp index 4afbd3ac78..4e842d1915 100644 --- a/src/cpp/src/image_generation/stable_diffusion_pipeline.hpp +++ b/src/cpp/src/image_generation/stable_diffusion_pipeline.hpp @@ -176,13 +176,13 @@ class StableDiffusionPipeline : public DiffusionPipeline { m_vae->compile(device, properties); } - void compute_hidden_states(const std::string& positive_prompt, const ImageGenerationConfig& generation_config) override { + void compute_hidden_states(const std::string& positive_prompt, const ImageGenerationConfig& generation_config, RawPerfMetrics& raw_metrics) override { const auto& unet_config = m_unet->get_config(); const size_t batch_size_multiplier = m_unet->do_classifier_free_guidance(generation_config.guidance_scale) ? 2 : 1; // Unet accepts 2x batch in case of CFG std::string negative_prompt = generation_config.negative_prompt != std::nullopt ? *generation_config.negative_prompt : std::string{}; ov::Tensor encoder_hidden_states = m_clip_text_encoder->infer(positive_prompt, negative_prompt, - batch_size_multiplier > 1); + batch_size_multiplier > 1, raw_metrics); // replicate encoder hidden state to UNet model if (generation_config.num_images_per_prompt == 1) { @@ -302,10 +302,15 @@ class StableDiffusionPipeline : public DiffusionPipeline { m_unet->set_adapters(adapters); } - ov::Tensor generate(const std::string& positive_prompt, + ImageResults generate(const std::string& positive_prompt, ov::Tensor initial_image, ov::Tensor mask_image, const ov::AnyMap& properties) override { + ImageResults image_results; + RawPerfMetrics& raw_metrics = image_results.perf_metrics.raw_metrics; + raw_metrics.generate_durations.clear(); + raw_metrics.m_inference_durations.clear(); + const auto gen_start = std::chrono::steady_clock::now(); using namespace numpy_utils; ImageGenerationConfig generation_config = m_generation_config; generation_config.update_generation_config(properties); @@ -342,7 +347,7 @@ class StableDiffusionPipeline : public DiffusionPipeline { std::vector timesteps = m_scheduler->get_timesteps(); // compute text encoders and set hidden states - compute_hidden_states(positive_prompt, generation_config); + compute_hidden_states(positive_prompt, generation_config, raw_metrics); // preparate initial / image latents ov::Tensor latent, processed_image, image_latent, noise; @@ -371,7 +376,7 @@ class StableDiffusionPipeline : public DiffusionPipeline { ov::Tensor latent_model_input = is_inpainting_model() ? numpy_utils::concat(numpy_utils::concat(latent_cfg, mask, 1), masked_image_latent, 1) : latent_cfg; ov::Tensor timestep(ov::element::i64, {1}, ×teps[inference_step]); - ov::Tensor noise_pred_tensor = m_unet->infer(latent_model_input, timestep); + ov::Tensor noise_pred_tensor = m_unet->infer(latent_model_input, timestep, raw_metrics); ov::Shape noise_pred_shape = noise_pred_tensor.get_shape(); noise_pred_shape[0] /= batch_size_multiplier; @@ -405,15 +410,20 @@ class StableDiffusionPipeline : public DiffusionPipeline { denoised = it != scheduler_step_result.end() ? it->second : latent; if (callback && callback(inference_step, timesteps.size(), denoised)) { - return ov::Tensor(ov::element::u8, {}); + image_results.image = ov::Tensor(ov::element::u8, {}); + const auto gen_ms = ov::genai::PerfMetrics::get_microsec(std::chrono::steady_clock::now() - gen_start); + raw_metrics.generate_durations.emplace_back(gen_ms); + return image_results; } } - - return decode(denoised); + image_results.image = decode(denoised, raw_metrics); + const auto gen_ms = ov::genai::PerfMetrics::get_microsec(std::chrono::steady_clock::now() - gen_start); + raw_metrics.generate_durations.emplace_back(gen_ms); + return image_results; } - ov::Tensor decode(const ov::Tensor latent) override { - return m_vae->decode(latent); + ov::Tensor decode(const ov::Tensor latent, RawPerfMetrics& raw_metrics) override { + return m_vae->decode(latent, raw_metrics); } protected: diff --git a/src/cpp/src/image_generation/stable_diffusion_xl_pipeline.hpp b/src/cpp/src/image_generation/stable_diffusion_xl_pipeline.hpp index 15f15219c2..8fe68f0ed7 100644 --- a/src/cpp/src/image_generation/stable_diffusion_xl_pipeline.hpp +++ b/src/cpp/src/image_generation/stable_diffusion_xl_pipeline.hpp @@ -144,7 +144,7 @@ class StableDiffusionXLPipeline : public StableDiffusionPipeline { m_vae->compile(device, properties); } - void compute_hidden_states(const std::string& positive_prompt, const ImageGenerationConfig& generation_config) override { + void compute_hidden_states(const std::string& positive_prompt, const ImageGenerationConfig& generation_config, RawPerfMetrics& raw_metrics) override { const auto& unet_config = m_unet->get_config(); const size_t batch_size_multiplier = m_unet->do_classifier_free_guidance(generation_config.guidance_scale) ? 2 : 1; // Unet accepts 2x batch in case of CFG @@ -177,8 +177,11 @@ class StableDiffusionXLPipeline : public StableDiffusionPipeline { ov::Tensor encoder_hidden_states(ov::element::f32, {}), add_text_embeds(ov::element::f32, {}); if (compute_negative_prompt) { - add_text_embeds = m_clip_text_encoder_with_projection->infer(positive_prompt, negative_prompt_1_str, batch_size_multiplier > 1); - m_clip_text_encoder->infer(prompt_2_str, negative_prompt_2_str, batch_size_multiplier > 1); + add_text_embeds = m_clip_text_encoder_with_projection->infer(positive_prompt, + negative_prompt_1_str, + batch_size_multiplier > 1, + raw_metrics); + m_clip_text_encoder->infer(prompt_2_str, negative_prompt_2_str, batch_size_multiplier > 1, raw_metrics); // prompt_embeds = prompt_embeds.hidden_states[-2] ov::Tensor encoder_hidden_states_1 = m_clip_text_encoder->get_output_tensor(idx_hidden_state_1); @@ -186,8 +189,9 @@ class StableDiffusionXLPipeline : public StableDiffusionPipeline { encoder_hidden_states = numpy_utils::concat(encoder_hidden_states_1, encoder_hidden_states_2, -1); } else { - ov::Tensor add_text_embeds_positive = m_clip_text_encoder_with_projection->infer(positive_prompt, negative_prompt_1_str, false); - m_clip_text_encoder->infer(prompt_2_str, negative_prompt_2_str, false); + ov::Tensor add_text_embeds_positive = + m_clip_text_encoder_with_projection->infer(positive_prompt, negative_prompt_1_str, false, raw_metrics); + m_clip_text_encoder->infer(prompt_2_str, negative_prompt_2_str, false, raw_metrics); ov::Tensor encoder_hidden_states_1_positive = m_clip_text_encoder->get_output_tensor(idx_hidden_state_1); ov::Tensor encoder_hidden_states_2_positive = m_clip_text_encoder_with_projection->get_output_tensor(idx_hidden_state_2); diff --git a/src/cpp/src/image_generation/text2image_pipeline.cpp b/src/cpp/src/image_generation/text2image_pipeline.cpp index 56b02a2e10..564f5a7fc1 100644 --- a/src/cpp/src/image_generation/text2image_pipeline.cpp +++ b/src/cpp/src/image_generation/text2image_pipeline.cpp @@ -185,12 +185,13 @@ void Text2ImagePipeline::compile(const std::string& device, const ov::AnyMap& pr m_impl->compile(device, properties); } -ov::Tensor Text2ImagePipeline::generate(const std::string& positive_prompt, const ov::AnyMap& properties) { +ImageResults Text2ImagePipeline::generate(const std::string& positive_prompt, const ov::AnyMap& properties) { return m_impl->generate(positive_prompt, {}, {}, properties); } ov::Tensor Text2ImagePipeline::decode(const ov::Tensor latent) { - return m_impl->decode(latent); + RawPerfMetrics raw_metrics; + return m_impl->decode(latent, raw_metrics); } } // namespace genai diff --git a/src/python/py_image_generation_models.cpp b/src/python/py_image_generation_models.cpp index 75be28233f..b7e71092cd 100644 --- a/src/python/py_image_generation_models.cpp +++ b/src/python/py_image_generation_models.cpp @@ -70,7 +70,7 @@ void init_clip_text_model(py::module_& m) { clip_text_model.def("get_config", &ov::genai::CLIPTextModel::get_config) .def("reshape", &ov::genai::CLIPTextModel::reshape, py::arg("batch_size")) .def("set_adapters", &ov::genai::CLIPTextModel::set_adapters, py::arg("adapters")) - .def("infer", &ov::genai::CLIPTextModel::infer, py::arg("pos_prompt"), py::arg("neg_prompt"), py::arg("do_classifier_free_guidance")) + .def("infer", &ov::genai::CLIPTextModel::infer, py::arg("pos_prompt"), py::arg("neg_prompt"), py::arg("do_classifier_free_guidance"), py::arg("raw_metrics")) .def("get_output_tensor", &ov::genai::CLIPTextModel::get_output_tensor, py::arg("idx")) .def( "compile", @@ -133,7 +133,7 @@ void init_clip_text_model_with_projection(py::module_& m) { .def_readwrite("num_hidden_layers", &ov::genai::CLIPTextModelWithProjection::Config::num_hidden_layers); clip_text_model_with_projection.def("reshape", &ov::genai::CLIPTextModelWithProjection::reshape, py::arg("batch_size")) - .def("infer", &ov::genai::CLIPTextModelWithProjection::infer, py::arg("pos_prompt"), py::arg("neg_prompt"), py::arg("do_classifier_free_guidance")) + .def("infer", &ov::genai::CLIPTextModelWithProjection::infer, py::arg("pos_prompt"), py::arg("neg_prompt"), py::arg("do_classifier_free_guidance"), py::arg("raw_metrics")) .def("get_config", &ov::genai::CLIPTextModelWithProjection::get_config) .def("get_output_tensor", &ov::genai::CLIPTextModelWithProjection::get_output_tensor, py::arg("idx")) .def("set_adapters", &ov::genai::CLIPTextModelWithProjection::set_adapters, py::arg("adapters")) @@ -189,7 +189,7 @@ void init_t5_encoder_model(py::module_& m) { model (T5EncoderModel): T5EncoderModel model )") .def("reshape", &ov::genai::T5EncoderModel::reshape, py::arg("batch_size"), py::arg("max_sequence_length")) - .def("infer", &ov::genai::T5EncoderModel::infer, py::arg("pos_prompt"), py::arg("neg_prompt"), py::arg("do_classifier_free_guidance"), py::arg("max_sequence_length")) + .def("infer", &ov::genai::T5EncoderModel::infer, py::arg("pos_prompt"), py::arg("neg_prompt"), py::arg("do_classifier_free_guidance"), py::arg("max_sequence_length"), py::arg("raw_metrics")) .def("get_output_tensor", &ov::genai::T5EncoderModel::get_output_tensor, py::arg("idx")) // .def("set_adapters", &ov::genai::T5EncoderModel::set_adapters, py::arg("adapters")) .def( @@ -254,7 +254,7 @@ void init_unet2d_condition_model(py::module_& m) { unet2d_condition_model.def("get_config", &ov::genai::UNet2DConditionModel::get_config) .def("reshape", &ov::genai::UNet2DConditionModel::reshape, py::arg("batch_size"), py::arg("height"), py::arg("width"), py::arg("tokenizer_model_max_length")) .def("set_adapters", &ov::genai::UNet2DConditionModel::set_adapters, py::arg("adapters")) - .def("infer", &ov::genai::UNet2DConditionModel::infer, py::arg("sample"), py::arg("timestep")) + .def("infer", &ov::genai::UNet2DConditionModel::infer, py::arg("sample"), py::arg("timestep"), py::arg("raw_metrics")) .def("set_hidden_states", &ov::genai::UNet2DConditionModel::set_hidden_states, py::arg("tensor_name"), py::arg("encoder_hidden_states")) .def("do_classifier_free_guidance", &ov::genai::UNet2DConditionModel::do_classifier_free_guidance, py::arg("guidance_scale")) .def( @@ -320,7 +320,7 @@ void init_sd3_transformer_2d_model(py::module_& m) { sd3_transformer_2d_model.def("get_config", &ov::genai::SD3Transformer2DModel::get_config) .def("reshape", &ov::genai::SD3Transformer2DModel::reshape, py::arg("batch_size"), py::arg("height"), py::arg("width"), py::arg("tokenizer_model_max_length")) // .def("set_adapters", &ov::genai::SD3Transformer2DModel::set_adapters, py::arg("adapters")) - .def("infer", &ov::genai::SD3Transformer2DModel::infer, py::arg("sample"), py::arg("timestep")) + .def("infer", &ov::genai::SD3Transformer2DModel::infer, py::arg("sample"), py::arg("timestep"), py::arg("raw_metrics")) .def("set_hidden_states", &ov::genai::SD3Transformer2DModel::set_hidden_states, py::arg("tensor_name"), py::arg("encoder_hidden_states")) .def( "compile", @@ -383,7 +383,7 @@ void init_flux_transformer_2d_model(py::module_& m) { flux_transformer_2d_model.def("get_config", &ov::genai::FluxTransformer2DModel::get_config) .def("reshape", &ov::genai::FluxTransformer2DModel::reshape, py::arg("batch_size"), py::arg("height"), py::arg("width"), py::arg("tokenizer_model_max_length")) // .def("set_adapters", &ov::genai::FluxTransformer2DModel::set_adapters, py::arg("adapters")) - .def("infer", &ov::genai::FluxTransformer2DModel::infer, py::arg("sample"), py::arg("timestep")) + .def("infer", &ov::genai::FluxTransformer2DModel::infer, py::arg("sample"), py::arg("timestep"), py::arg("raw_metrics")) .def("set_hidden_states", &ov::genai::FluxTransformer2DModel::set_hidden_states, py::arg("tensor_name"), py::arg("encoder_hidden_states")) .def( "compile", @@ -492,7 +492,7 @@ void init_autoencoder_kl(py::module_& m) { device (str): Device to run the model on (e.g., CPU, GPU). kwargs: Device properties. )") - .def("decode", &ov::genai::AutoencoderKL::decode, py::arg("latent")) + .def("decode", &ov::genai::AutoencoderKL::decode, py::arg("latent"), py::arg("raw_metrics")) .def("encode", &ov::genai::AutoencoderKL::encode, py::arg("image"), py::arg("generator")) .def("get_config", &ov::genai::AutoencoderKL::get_config) .def("get_vae_scale_factor", &ov::genai::AutoencoderKL::get_vae_scale_factor); diff --git a/src/python/py_image_generation_pipelines.cpp b/src/python/py_image_generation_pipelines.cpp index 55be1708c1..888d8fa13f 100644 --- a/src/python/py_image_generation_pipelines.cpp +++ b/src/python/py_image_generation_pipelines.cpp @@ -196,7 +196,7 @@ void init_image_generation_pipelines(py::module_& m) { [](ov::genai::Text2ImagePipeline& pipe, const std::string& prompt, const py::kwargs& kwargs - ) -> py::typing::Union { + ) -> py::typing::Union { ov::AnyMap params = pyutils::kwargs_to_any_map(kwargs); return py::cast(pipe.generate(prompt, params)); }, @@ -258,7 +258,7 @@ void init_image_generation_pipelines(py::module_& m) { const std::string& prompt, const ov::Tensor& image, const py::kwargs& kwargs - ) -> py::typing::Union { + ) -> py::typing::Union { ov::AnyMap params = pyutils::kwargs_to_any_map(kwargs); return py::cast(pipe.generate(prompt, image, params)); }, @@ -322,7 +322,7 @@ void init_image_generation_pipelines(py::module_& m) { const ov::Tensor& image, const ov::Tensor& mask_image, const py::kwargs& kwargs - ) -> py::typing::Union { + ) -> py::typing::Union { ov::AnyMap params = pyutils::kwargs_to_any_map(kwargs); return py::cast(pipe.generate(prompt, image, mask_image, params)); }, From 778510ceec55252c17312fc5dd9fc719e678a4f6 Mon Sep 17 00:00:00 2001 From: xufang Date: Wed, 18 Dec 2024 10:20:57 +0800 Subject: [PATCH 02/19] modify comments --- .../openvino/genai/image_generation/text2image_pipeline.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/cpp/include/openvino/genai/image_generation/text2image_pipeline.hpp b/src/cpp/include/openvino/genai/image_generation/text2image_pipeline.hpp index 1be9d0e6dc..c07d214ec7 100644 --- a/src/cpp/include/openvino/genai/image_generation/text2image_pipeline.hpp +++ b/src/cpp/include/openvino/genai/image_generation/text2image_pipeline.hpp @@ -203,7 +203,7 @@ class OPENVINO_GENAI_EXPORTS Text2ImagePipeline { * Generates image(s) based on prompt and other image generation parameters * @param positive_prompt Prompt to generate image(s) from * @param properties Image generation parameters specified as properties. Values in 'properties' override default value for generation parameters. - * @returns A tensor which has dimensions [num_images_per_prompt, height, width, 3] + * @returns ImageResults includes a tensor which has dimensions [num_images_per_prompt, height, width, 3] */ ImageResults generate(const std::string& positive_prompt, const ov::AnyMap& properties = {}); From ce3d3af7d2ee4f733ab272ed2263f05bcd989aae Mon Sep 17 00:00:00 2001 From: xufang Date: Wed, 18 Dec 2024 19:48:55 +0800 Subject: [PATCH 03/19] reset inference duration --- src/cpp/src/image_generation/flux_pipeline.hpp | 2 +- src/cpp/src/image_generation/stable_diffusion_3_pipeline.hpp | 2 +- src/cpp/src/image_generation/stable_diffusion_pipeline.hpp | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/cpp/src/image_generation/flux_pipeline.hpp b/src/cpp/src/image_generation/flux_pipeline.hpp index 021f07c2a7..7239ea705f 100644 --- a/src/cpp/src/image_generation/flux_pipeline.hpp +++ b/src/cpp/src/image_generation/flux_pipeline.hpp @@ -322,7 +322,7 @@ class FluxPipeline : public DiffusionPipeline { ImageResults image_results; RawPerfMetrics &raw_metrics = image_results.perf_metrics.raw_metrics; raw_metrics.generate_durations.clear(); - raw_metrics.m_inference_durations.clear(); + raw_metrics.m_inference_durations = {{ MicroSeconds(0.0f) }}; const auto gen_start = std::chrono::steady_clock::now(); m_custom_generation_config = m_generation_config; m_custom_generation_config.update_generation_config(properties); diff --git a/src/cpp/src/image_generation/stable_diffusion_3_pipeline.hpp b/src/cpp/src/image_generation/stable_diffusion_3_pipeline.hpp index fe4ece846e..013a5f7478 100644 --- a/src/cpp/src/image_generation/stable_diffusion_3_pipeline.hpp +++ b/src/cpp/src/image_generation/stable_diffusion_3_pipeline.hpp @@ -443,7 +443,7 @@ class StableDiffusion3Pipeline : public DiffusionPipeline { ImageResults image_results; RawPerfMetrics& raw_metrics = image_results.perf_metrics.raw_metrics; raw_metrics.generate_durations.clear(); - raw_metrics.m_inference_durations.clear(); + raw_metrics.m_inference_durations = {{ MicroSeconds(0.0f) }}; const auto gen_start = std::chrono::steady_clock::now(); ImageGenerationConfig generation_config = m_generation_config; generation_config.update_generation_config(properties); diff --git a/src/cpp/src/image_generation/stable_diffusion_pipeline.hpp b/src/cpp/src/image_generation/stable_diffusion_pipeline.hpp index 5c1ffb908b..067dc12a61 100644 --- a/src/cpp/src/image_generation/stable_diffusion_pipeline.hpp +++ b/src/cpp/src/image_generation/stable_diffusion_pipeline.hpp @@ -310,7 +310,7 @@ class StableDiffusionPipeline : public DiffusionPipeline { ImageResults image_results; RawPerfMetrics& raw_metrics = image_results.perf_metrics.raw_metrics; raw_metrics.generate_durations.clear(); - raw_metrics.m_inference_durations.clear(); + raw_metrics.m_inference_durations = {{ MicroSeconds(0.0f) }}; const auto gen_start = std::chrono::steady_clock::now(); using namespace numpy_utils; ImageGenerationConfig generation_config = m_generation_config; From fb7ea85e9df6c39c8c3d0e3c7eff417cb6ce5042 Mon Sep 17 00:00:00 2001 From: xufang Date: Mon, 30 Dec 2024 17:59:53 +0800 Subject: [PATCH 04/19] add get_perfomance_metrics() method --- .../heterogeneous_stable_diffusion.cpp | 11 +- samples/cpp/image_generation/image2image.cpp | 10 +- samples/cpp/image_generation/inpainting.cpp | 10 +- .../cpp/image_generation/lora_text2image.cpp | 20 ++-- samples/cpp/image_generation/text2image.cpp | 10 +- .../genai/image_generation/autoencoder_kl.hpp | 2 +- .../image_generation/clip_text_model.hpp | 2 +- .../clip_text_model_with_projection.hpp | 2 +- .../flux_transformer_2d_model.hpp | 2 +- .../image_generation/generation_config.hpp | 6 -- .../image_generation/image2image_pipeline.hpp | 9 +- .../image_generation_perf_metrics.hpp | 44 ++++++++ .../image_generation/inpainting_pipeline.hpp | 10 +- .../sd3_transformer_2d_model.hpp | 2 +- .../image_generation/t5_encoder_model.hpp | 2 +- .../image_generation/text2image_pipeline.hpp | 8 +- .../unet2d_condition_model.hpp | 2 +- .../image_generation/diffusion_pipeline.hpp | 8 +- .../src/image_generation/flux_pipeline.hpp | 51 +++++---- .../image_generation/image2image_pipeline.cpp | 10 +- .../image_generation_perf_metrics.cpp | 101 ++++++++++++++++++ .../image_generation/inpainting_pipeline.cpp | 10 +- .../models/autoencoder_kl.cpp | 4 +- .../models/clip_text_model.cpp | 7 +- .../clip_text_model_with_projection.cpp | 7 +- .../models/flux_transformer_2d_model.cpp | 6 +- .../models/sd3_transformer_2d_model.cpp | 4 +- .../models/t5_encoder_model.cpp | 8 +- .../models/unet2d_condition_model.cpp | 4 +- .../models/unet_inference.hpp | 2 +- .../models/unet_inference_dynamic.hpp | 4 +- .../models/unet_inference_static_bs1.hpp | 4 +- .../stable_diffusion_3_pipeline.hpp | 54 ++++++---- .../stable_diffusion_pipeline.hpp | 49 +++++---- .../stable_diffusion_xl_pipeline.hpp | 15 ++- .../image_generation/text2image_pipeline.cpp | 10 +- src/python/py_image_generation_models.cpp | 14 +-- src/python/py_image_generation_pipelines.cpp | 6 +- 38 files changed, 376 insertions(+), 154 deletions(-) create mode 100644 src/cpp/include/openvino/genai/image_generation/image_generation_perf_metrics.hpp create mode 100644 src/cpp/src/image_generation/image_generation_perf_metrics.cpp diff --git a/samples/cpp/image_generation/heterogeneous_stable_diffusion.cpp b/samples/cpp/image_generation/heterogeneous_stable_diffusion.cpp index d089804ba7..9a29235fa0 100644 --- a/samples/cpp/image_generation/heterogeneous_stable_diffusion.cpp +++ b/samples/cpp/image_generation/heterogeneous_stable_diffusion.cpp @@ -98,16 +98,17 @@ int32_t main(int32_t argc, char* argv[]) try { for (int imagei = 0; imagei < number_of_images_to_generate; imagei++) { std::cout << "Generating image " << imagei << std::endl; - auto image_results = pipe.generate(prompt, + ov::Tensor image = pipe.generate(prompt, ov::genai::width(width), ov::genai::height(height), ov::genai::guidance_scale(guidance_scale), ov::genai::num_inference_steps(number_of_inference_steps_per_image)); - imwrite("image_" + std::to_string(imagei) + ".bmp", image_results.image, true); - - std::cout << "pipeline generate duration ms:" << image_results.perf_metrics.get_generate_duration().mean << std::endl; - std::cout << "pipeline inference duration ms:" << image_results.perf_metrics.get_inference_duration().mean << std::endl; + imwrite("image_" + std::to_string(imagei) + ".bmp", image, true); + auto perf_metrics = pipe.get_perfomance_metrics(); + std::cout << "pipeline generate duration ms:" << perf_metrics.generate_duration / 1000.0f << std::endl; + std::cout << "pipeline inference duration ms:" << perf_metrics.get_inference_total_duration() << std::endl; + std::cout << "pipeline iteration:" << perf_metrics.raw_metrics.iteration_durations.size() << std::endl; } return EXIT_SUCCESS; diff --git a/samples/cpp/image_generation/image2image.cpp b/samples/cpp/image_generation/image2image.cpp index 8aa31acebc..bab0954361 100644 --- a/samples/cpp/image_generation/image2image.cpp +++ b/samples/cpp/image_generation/image2image.cpp @@ -15,14 +15,16 @@ int32_t main(int32_t argc, char* argv[]) try { ov::Tensor image = utils::load_image(image_path); ov::genai::Image2ImagePipeline pipe(models_path, device); - auto image_results = pipe.generate(prompt, image, + ov::Tensor generated_image = pipe.generate(prompt, image, // controls how initial image is noised after being converted to latent space. `1` means initial image is fully noised ov::genai::strength(0.8f)); // writes `num_images_per_prompt` images by pattern name - imwrite("image_%d.bmp", image_results.image, true); - std::cout << "pipeline generate duration ms:" << image_results.perf_metrics.get_generate_duration().mean << std::endl; - std::cout << "pipeline inference duration ms:" << image_results.perf_metrics.get_inference_duration().mean << std::endl; + imwrite("image_%d.bmp", generated_image, true); + auto perf_metrics = pipe.get_perfomance_metrics(); + std::cout << "pipeline generate duration ms:" << perf_metrics.generate_duration / 1000.0f << std::endl; + std::cout << "pipeline inference duration ms:" << perf_metrics.get_inference_total_duration() << std::endl; + std::cout << "pipeline iteration:" << perf_metrics.raw_metrics.iteration_durations.size() << std::endl; return EXIT_SUCCESS; } catch (const std::exception& error) { diff --git a/samples/cpp/image_generation/inpainting.cpp b/samples/cpp/image_generation/inpainting.cpp index f79c04b8a1..42d4ccc0d4 100644 --- a/samples/cpp/image_generation/inpainting.cpp +++ b/samples/cpp/image_generation/inpainting.cpp @@ -16,13 +16,15 @@ int32_t main(int32_t argc, char* argv[]) try { ov::Tensor mask_image = utils::load_image(mask_image_path); ov::genai::InpaintingPipeline pipe(models_path, device); - auto image_results = pipe.generate(prompt, image, mask_image); + ov::Tensor generated_image = pipe.generate(prompt, image, mask_image); // writes `num_images_per_prompt` images by pattern name - imwrite("image_%d.bmp", image_results.image, true); + imwrite("image_%d.bmp", generated_image, true); - std::cout << "pipeline generate duration ms:" << image_results.perf_metrics.get_generate_duration().mean << std::endl; - std::cout << "pipeline inference duration ms:" << image_results.perf_metrics.get_inference_duration().mean << std::endl; + auto perf_metrics = pipe.get_perfomance_metrics(); + std::cout << "pipeline generate duration ms:" << perf_metrics.generate_duration / 1000.0f << std::endl; + std::cout << "pipeline inference duration ms:" << perf_metrics.get_inference_total_duration() << std::endl; + std::cout << "pipeline iteration:" << perf_metrics.raw_metrics.iteration_durations.size() << std::endl; return EXIT_SUCCESS; } catch (const std::exception& error) { diff --git a/samples/cpp/image_generation/lora_text2image.cpp b/samples/cpp/image_generation/lora_text2image.cpp index 1a6e422c00..d2f60af613 100644 --- a/samples/cpp/image_generation/lora_text2image.cpp +++ b/samples/cpp/image_generation/lora_text2image.cpp @@ -23,25 +23,29 @@ int32_t main(int32_t argc, char* argv[]) try { ov::genai::Text2ImagePipeline pipe(models_path, device, ov::genai::adapters(adapter_config)); std::cout << "Generating image with LoRA adapters applied, resulting image will be in lora.bmp\n"; - auto image_results = pipe.generate(prompt, + ov::Tensor image = pipe.generate(prompt, ov::genai::width(512), ov::genai::height(896), ov::genai::num_inference_steps(20), ov::genai::rng_seed(42)); - imwrite("lora.bmp", image_results.image, true); - std::cout << "pipeline generate duration ms:" << image_results.perf_metrics.get_generate_duration().mean << std::endl; - std::cout << "pipeline inference duration ms:" << image_results.perf_metrics.get_inference_duration().mean << std::endl; + imwrite("lora.bmp", image, true); + auto perf_metrics = pipe.get_perfomance_metrics(); + std::cout << "pipeline generate duration ms:" << perf_metrics.generate_duration / 1000.0f << std::endl; + std::cout << "pipeline inference duration ms:" << perf_metrics.get_inference_total_duration() << std::endl; + std::cout << "pipeline iteration:" << perf_metrics.raw_metrics.iteration_durations.size() << std::endl; std::cout << "Generating image without LoRA adapters applied, resulting image will be in baseline.bmp\n"; - image_results = pipe.generate(prompt, + image = pipe.generate(prompt, ov::genai::adapters(), // passing adapters in generate overrides adapters set in the constructor; adapters() means no adapters ov::genai::width(512), ov::genai::height(896), ov::genai::num_inference_steps(20), ov::genai::rng_seed(42)); - imwrite("baseline.bmp", image_results.image, true); - std::cout << "pipeline generate duration ms:" << image_results.perf_metrics.get_generate_duration().mean << std::endl; - std::cout << "pipeline inference duration ms:" << image_results.perf_metrics.get_inference_duration().mean << std::endl; + imwrite("baseline.bmp", image, true); + perf_metrics = pipe.get_perfomance_metrics(); + std::cout << "pipeline generate duration ms:" << perf_metrics.generate_duration / 1000.0f << std::endl; + std::cout << "pipeline inference duration ms:" << perf_metrics.get_inference_total_duration() << std::endl; + std::cout << "pipeline iteration:" << perf_metrics.raw_metrics.iteration_durations.size() << std::endl; return EXIT_SUCCESS; } catch (const std::exception& error) { diff --git a/samples/cpp/image_generation/text2image.cpp b/samples/cpp/image_generation/text2image.cpp index 84ddfa9c26..d6870ea7fe 100644 --- a/samples/cpp/image_generation/text2image.cpp +++ b/samples/cpp/image_generation/text2image.cpp @@ -12,17 +12,19 @@ int32_t main(int32_t argc, char* argv[]) try { const std::string device = "CPU"; // GPU can be used as well ov::genai::Text2ImagePipeline pipe(models_path, device); - auto image_results = pipe.generate(prompt, + ov::Tensor image = pipe.generate(prompt, ov::genai::width(512), ov::genai::height(512), ov::genai::num_inference_steps(20), ov::genai::num_images_per_prompt(1)); // writes `num_images_per_prompt` images by pattern name - imwrite("image_%d.bmp", image_results.image, true); + imwrite("image_%d.bmp", image, true); - std::cout << "pipeline generate duration ms:" << image_results.perf_metrics.get_generate_duration().mean << std::endl; - std::cout << "pipeline inference duration ms:" << image_results.perf_metrics.get_inference_duration().mean << std::endl; + auto perf_metrics = pipe.get_perfomance_metrics(); + std::cout << "pipeline generate duration ms:" << perf_metrics.generate_duration / 1000.0f << std::endl; + std::cout << "pipeline inference duration ms:" << perf_metrics.get_inference_total_duration() << std::endl; + std::cout << "pipeline iteration:" << perf_metrics.raw_metrics.iteration_durations.size() << std::endl; return EXIT_SUCCESS; } catch (const std::exception& error) { diff --git a/src/cpp/include/openvino/genai/image_generation/autoencoder_kl.hpp b/src/cpp/include/openvino/genai/image_generation/autoencoder_kl.hpp index 04a3c17762..211d6cca0a 100644 --- a/src/cpp/include/openvino/genai/image_generation/autoencoder_kl.hpp +++ b/src/cpp/include/openvino/genai/image_generation/autoencoder_kl.hpp @@ -127,7 +127,7 @@ class OPENVINO_GENAI_EXPORTS AutoencoderKL { return compile(device, ov::AnyMap{std::forward(properties)...}); } - ov::Tensor decode(ov::Tensor latent, RawPerfMetrics &raw_metrics); + ov::Tensor decode(ov::Tensor latent, MicroSeconds& infer_duration); ov::Tensor encode(ov::Tensor image, std::shared_ptr generator); diff --git a/src/cpp/include/openvino/genai/image_generation/clip_text_model.hpp b/src/cpp/include/openvino/genai/image_generation/clip_text_model.hpp index 9e14bbcaaa..835470b2b8 100644 --- a/src/cpp/include/openvino/genai/image_generation/clip_text_model.hpp +++ b/src/cpp/include/openvino/genai/image_generation/clip_text_model.hpp @@ -85,7 +85,7 @@ class OPENVINO_GENAI_EXPORTS CLIPTextModel { void set_adapters(const std::optional& adapters); - ov::Tensor infer(const std::string& pos_prompt, const std::string& neg_prompt, bool do_classifier_free_guidance, RawPerfMetrics& raw_metrics); + ov::Tensor infer(const std::string& pos_prompt, const std::string& neg_prompt, bool do_classifier_free_guidance, MicroSeconds& infer_duration); ov::Tensor get_output_tensor(const size_t idx); diff --git a/src/cpp/include/openvino/genai/image_generation/clip_text_model_with_projection.hpp b/src/cpp/include/openvino/genai/image_generation/clip_text_model_with_projection.hpp index a7a77a49dc..d109823751 100644 --- a/src/cpp/include/openvino/genai/image_generation/clip_text_model_with_projection.hpp +++ b/src/cpp/include/openvino/genai/image_generation/clip_text_model_with_projection.hpp @@ -85,7 +85,7 @@ class OPENVINO_GENAI_EXPORTS CLIPTextModelWithProjection { void set_adapters(const std::optional& adapters); - ov::Tensor infer(const std::string& pos_prompt, const std::string& neg_prompt, bool do_classifier_free_guidance, RawPerfMetrics& raw_metrics); + ov::Tensor infer(const std::string& pos_prompt, const std::string& neg_prompt, bool do_classifier_free_guidance, MicroSeconds& infer_duration); ov::Tensor get_output_tensor(const size_t idx); diff --git a/src/cpp/include/openvino/genai/image_generation/flux_transformer_2d_model.hpp b/src/cpp/include/openvino/genai/image_generation/flux_transformer_2d_model.hpp index 7339244420..27053b876c 100644 --- a/src/cpp/include/openvino/genai/image_generation/flux_transformer_2d_model.hpp +++ b/src/cpp/include/openvino/genai/image_generation/flux_transformer_2d_model.hpp @@ -76,7 +76,7 @@ class OPENVINO_GENAI_EXPORTS FluxTransformer2DModel { void set_hidden_states(const std::string& tensor_name, ov::Tensor encoder_hidden_states); - ov::Tensor infer(const ov::Tensor latent, const ov::Tensor timestep, RawPerfMetrics& raw_metrics); + ov::Tensor infer(const ov::Tensor latent, const ov::Tensor timestep, MicroSeconds& infer_duration); private: Config m_config; diff --git a/src/cpp/include/openvino/genai/image_generation/generation_config.hpp b/src/cpp/include/openvino/genai/image_generation/generation_config.hpp index f7d7daad42..bc56c3a5f8 100644 --- a/src/cpp/include/openvino/genai/image_generation/generation_config.hpp +++ b/src/cpp/include/openvino/genai/image_generation/generation_config.hpp @@ -252,11 +252,5 @@ static constexpr ov::Property> OPENVINO_GENAI_EXPORTS std::pair generation_config(const ImageGenerationConfig& generation_config); -class ImageResults { -public: - ov::Tensor image; - PerfMetrics perf_metrics; -}; - } // namespace genai } // namespace ov diff --git a/src/cpp/include/openvino/genai/image_generation/image2image_pipeline.hpp b/src/cpp/include/openvino/genai/image_generation/image2image_pipeline.hpp index 8b0c94cc8a..995cb9be49 100644 --- a/src/cpp/include/openvino/genai/image_generation/image2image_pipeline.hpp +++ b/src/cpp/include/openvino/genai/image_generation/image2image_pipeline.hpp @@ -72,13 +72,13 @@ class OPENVINO_GENAI_EXPORTS Image2ImagePipeline { * @param positive_prompt Prompt to generate image(s) from * @param initial_image RGB/BGR image of [1, height, width, 3] shape used to initialize latent image * @param properties Image generation parameters specified as properties. Values in 'properties' override default value for generation parameters. - * @returns ImageResults includes a tensor which has dimensions [num_images_per_prompt, height, width, 3] + * @returns A tensor which has dimensions [num_images_per_prompt, height, width, 3] * @note Output image size is the same as initial image size, but rounded down to be divisible by VAE scale factor (usually, 8) */ - ImageResults generate(const std::string& positive_prompt, ov::Tensor initial_image, const ov::AnyMap& properties = {}); + ov::Tensor generate(const std::string& positive_prompt, ov::Tensor initial_image, const ov::AnyMap& properties = {}); template - ov::util::EnableIfAllStringAny generate( + ov::util::EnableIfAllStringAny generate( const std::string& positive_prompt, ov::Tensor initial_image, Properties&&... properties) { @@ -87,8 +87,11 @@ class OPENVINO_GENAI_EXPORTS Image2ImagePipeline { ov::Tensor decode(const ov::Tensor latent); + ImageGenerationPerfMetrics get_perfomance_metrics(); + private: std::shared_ptr m_impl; + ImageGenerationPerfMetrics m_perf_metrics; explicit Image2ImagePipeline(const std::shared_ptr& impl); diff --git a/src/cpp/include/openvino/genai/image_generation/image_generation_perf_metrics.hpp b/src/cpp/include/openvino/genai/image_generation/image_generation_perf_metrics.hpp new file mode 100644 index 0000000000..3d2ec5db69 --- /dev/null +++ b/src/cpp/include/openvino/genai/image_generation/image_generation_perf_metrics.hpp @@ -0,0 +1,44 @@ +#pragma once + +#include +#include +#include + +#include "openvino/genai/visibility.hpp" +#include "openvino/genai/perf_metrics.hpp" + +namespace ov::genai { + +struct OPENVINO_GENAI_EXPORTS RawImageGenerationPerfMetrics { + std::vector unet_inference_durations; // unet durations for each step + std::vector transformer_inference_durations; // transformer durations for each step + std::vector iteration_durations; // durations of each step +}; + +struct OPENVINO_GENAI_EXPORTS ImageGenerationPerfMetrics { + float load_time; // model load time (includes reshape & read_model time) + float generate_duration; // duration of method generate(...) + + MeanStdPair iteration_duration; // Mean-Std time of one generation iteration + std::map encoder_inference_duration; // inference durations for each encoder + MeanStdPair unet_inference_duration; // inference duration for unet model, should be filled with zeros if we don't have unet + MeanStdPair transformer_inference_duration; // inference duration for transformer model, should be filled with zeros if we don't have transformer + float vae_encoder_inference_duration; // inference duration of vae_encoder model, should be filled with zeros if we don't use it + float vae_decoder_inference_duration; // inference duration of vae_decoder model + + bool m_evaluated = false; + + RawImageGenerationPerfMetrics raw_metrics; + + void clean_up(); + void evaluate_statistics(); + + MeanStdPair get_unet_inference_duration(); + + MeanStdPair get_transformer_inference_duration(); + MeanStdPair get_iteration_duration(); + + float get_inference_total_duration(); + +}; +} \ No newline at end of file diff --git a/src/cpp/include/openvino/genai/image_generation/inpainting_pipeline.hpp b/src/cpp/include/openvino/genai/image_generation/inpainting_pipeline.hpp index c65d222a4d..9fe1847a22 100644 --- a/src/cpp/include/openvino/genai/image_generation/inpainting_pipeline.hpp +++ b/src/cpp/include/openvino/genai/image_generation/inpainting_pipeline.hpp @@ -13,6 +13,7 @@ #include "openvino/genai/image_generation/scheduler.hpp" #include "openvino/genai/image_generation/generation_config.hpp" +#include "openvino/genai/image_generation/image_generation_perf_metrics.hpp" #include "openvino/genai/image_generation/clip_text_model.hpp" #include "openvino/genai/image_generation/clip_text_model_with_projection.hpp" @@ -95,12 +96,12 @@ class OPENVINO_GENAI_EXPORTS InpaintingPipeline { * @param initial_image RGB/BGR image of [1, height, width, 3] shape used to initialize latent image * @param mask_image RGB/BGR or GRAY/BINARY image of [1, height, width, 3 or 1] shape used as a mask * @param properties Image generation parameters specified as properties. Values in 'properties' override default value for generation parameters. - * @returns ImageResults includes a tensor which has dimensions [num_images_per_prompt, height, width, 3] + * @returns A tensor which has dimensions [num_images_per_prompt, height, width, 3] */ - ImageResults generate(const std::string& positive_prompt, ov::Tensor initial_image, ov::Tensor mask_image, const ov::AnyMap& properties = {}); + ov::Tensor generate(const std::string& positive_prompt, ov::Tensor initial_image, ov::Tensor mask_image, const ov::AnyMap& properties = {}); template - ov::util::EnableIfAllStringAny generate( + ov::util::EnableIfAllStringAny generate( const std::string& positive_prompt, ov::Tensor initial_image, ov::Tensor mask, @@ -110,8 +111,11 @@ class OPENVINO_GENAI_EXPORTS InpaintingPipeline { ov::Tensor decode(const ov::Tensor latent); + ImageGenerationPerfMetrics get_perfomance_metrics(); + private: std::shared_ptr m_impl; + ImageGenerationPerfMetrics m_perf_metrics; explicit InpaintingPipeline(const std::shared_ptr& impl); diff --git a/src/cpp/include/openvino/genai/image_generation/sd3_transformer_2d_model.hpp b/src/cpp/include/openvino/genai/image_generation/sd3_transformer_2d_model.hpp index 5348877e0c..133b65a67e 100644 --- a/src/cpp/include/openvino/genai/image_generation/sd3_transformer_2d_model.hpp +++ b/src/cpp/include/openvino/genai/image_generation/sd3_transformer_2d_model.hpp @@ -78,7 +78,7 @@ class OPENVINO_GENAI_EXPORTS SD3Transformer2DModel { void set_hidden_states(const std::string& tensor_name, ov::Tensor encoder_hidden_states); - ov::Tensor infer(const ov::Tensor latent, const ov::Tensor timestep, RawPerfMetrics& raw_metrics); + ov::Tensor infer(const ov::Tensor latent, const ov::Tensor timestep, MicroSeconds& infer_duration); private: Config m_config; diff --git a/src/cpp/include/openvino/genai/image_generation/t5_encoder_model.hpp b/src/cpp/include/openvino/genai/image_generation/t5_encoder_model.hpp index edc185cd0e..9e3c185737 100644 --- a/src/cpp/include/openvino/genai/image_generation/t5_encoder_model.hpp +++ b/src/cpp/include/openvino/genai/image_generation/t5_encoder_model.hpp @@ -70,7 +70,7 @@ class OPENVINO_GENAI_EXPORTS T5EncoderModel { const std::string& neg_prompt, bool do_classifier_free_guidance, int max_sequence_length, - RawPerfMetrics& raw_metrics); + MicroSeconds& infer_duration); ov::Tensor get_output_tensor(const size_t idx); diff --git a/src/cpp/include/openvino/genai/image_generation/text2image_pipeline.hpp b/src/cpp/include/openvino/genai/image_generation/text2image_pipeline.hpp index c07d214ec7..d31f5e487c 100644 --- a/src/cpp/include/openvino/genai/image_generation/text2image_pipeline.hpp +++ b/src/cpp/include/openvino/genai/image_generation/text2image_pipeline.hpp @@ -203,12 +203,12 @@ class OPENVINO_GENAI_EXPORTS Text2ImagePipeline { * Generates image(s) based on prompt and other image generation parameters * @param positive_prompt Prompt to generate image(s) from * @param properties Image generation parameters specified as properties. Values in 'properties' override default value for generation parameters. - * @returns ImageResults includes a tensor which has dimensions [num_images_per_prompt, height, width, 3] + * @returns A tensor which has dimensions [num_images_per_prompt, height, width, 3] */ - ImageResults generate(const std::string& positive_prompt, const ov::AnyMap& properties = {}); + ov::Tensor generate(const std::string& positive_prompt, const ov::AnyMap& properties = {}); template - ov::util::EnableIfAllStringAny generate( + ov::util::EnableIfAllStringAny generate( const std::string& positive_prompt, Properties&&... properties) { return generate(positive_prompt, ov::AnyMap{std::forward(properties)...}); @@ -221,6 +221,8 @@ class OPENVINO_GENAI_EXPORTS Text2ImagePipeline { */ ov::Tensor decode(const ov::Tensor latent); + ImageGenerationPerfMetrics get_perfomance_metrics(); + private: std::shared_ptr m_impl; diff --git a/src/cpp/include/openvino/genai/image_generation/unet2d_condition_model.hpp b/src/cpp/include/openvino/genai/image_generation/unet2d_condition_model.hpp index 6ae6e93613..25eef5cd80 100644 --- a/src/cpp/include/openvino/genai/image_generation/unet2d_condition_model.hpp +++ b/src/cpp/include/openvino/genai/image_generation/unet2d_condition_model.hpp @@ -90,7 +90,7 @@ class OPENVINO_GENAI_EXPORTS UNet2DConditionModel { void set_adapters(const std::optional& adapters); - ov::Tensor infer(ov::Tensor sample, ov::Tensor timestep, RawPerfMetrics& raw_metrics); + ov::Tensor infer(ov::Tensor sample, ov::Tensor timestep, MicroSeconds& infer_duration); bool do_classifier_free_guidance(float guidance_scale) const { return guidance_scale > 1.0f && m_config.time_cond_proj_dim < 0; diff --git a/src/cpp/src/image_generation/diffusion_pipeline.hpp b/src/cpp/src/image_generation/diffusion_pipeline.hpp index ddf7f0ff60..886be27da1 100644 --- a/src/cpp/src/image_generation/diffusion_pipeline.hpp +++ b/src/cpp/src/image_generation/diffusion_pipeline.hpp @@ -82,13 +82,15 @@ class DiffusionPipeline { virtual std::tuple prepare_latents(ov::Tensor initial_image, const ImageGenerationConfig& generation_config) const = 0; - virtual void compute_hidden_states(const std::string& positive_prompt, const ImageGenerationConfig& generation_config, RawPerfMetrics& raw_metrics) = 0; + virtual void compute_hidden_states(const std::string& positive_prompt, const ImageGenerationConfig& generation_config) = 0; virtual void set_lora_adapters(std::optional adapters) = 0; - virtual ImageResults generate(const std::string& positive_prompt, ov::Tensor initial_image, ov::Tensor mask_image, const ov::AnyMap& properties) = 0; + virtual ov::Tensor generate(const std::string& positive_prompt, ov::Tensor initial_image, ov::Tensor mask_image, const ov::AnyMap& properties) = 0; - virtual ov::Tensor decode(const ov::Tensor latent, RawPerfMetrics& raw_metircs) = 0; + virtual ov::Tensor decode(const ov::Tensor latent, MicroSeconds& infer_duration) = 0; + + virtual ImageGenerationPerfMetrics get_perfomance_metrics() = 0; virtual ~DiffusionPipeline() = default; diff --git a/src/cpp/src/image_generation/flux_pipeline.hpp b/src/cpp/src/image_generation/flux_pipeline.hpp index 7239ea705f..240dcfd3ae 100644 --- a/src/cpp/src/image_generation/flux_pipeline.hpp +++ b/src/cpp/src/image_generation/flux_pipeline.hpp @@ -253,13 +253,16 @@ class FluxPipeline : public DiffusionPipeline { m_transformer->compile(device, properties); } - void compute_hidden_states(const std::string& positive_prompt, const ImageGenerationConfig& generation_config, RawPerfMetrics& raw_metrics) override { + void compute_hidden_states(const std::string& positive_prompt, const ImageGenerationConfig& generation_config) override { + MicroSeconds infer_duration; // encode_prompt std::string prompt_2_str = generation_config.prompt_2 != std::nullopt ? *generation_config.prompt_2 : positive_prompt; - m_clip_text_encoder->infer(positive_prompt, {}, false, raw_metrics); + m_clip_text_encoder->infer(positive_prompt, {}, false, infer_duration); + m_perf_metrics.encoder_inference_duration["text_encoder"] = infer_duration.count(); ov::Tensor pooled_prompt_embeds = m_clip_text_encoder->get_output_tensor(1); - ov::Tensor prompt_embeds = m_t5_text_encoder->infer(prompt_2_str, "", false, generation_config.max_sequence_length, raw_metrics); + ov::Tensor prompt_embeds = m_t5_text_encoder->infer(prompt_2_str, "", false, generation_config.max_sequence_length, infer_duration); + m_perf_metrics.encoder_inference_duration["text_encoder_2"] = infer_duration.count(); pooled_prompt_embeds = numpy_utils::repeat(pooled_prompt_embeds, generation_config.num_images_per_prompt); prompt_embeds = numpy_utils::repeat(prompt_embeds, generation_config.num_images_per_prompt); @@ -315,15 +318,13 @@ class FluxPipeline : public DiffusionPipeline { OPENVINO_THROW("LORA adapters are not implemented for FLUX pipeline yet"); } - ImageResults generate(const std::string& positive_prompt, + ov::Tensor generate(const std::string& positive_prompt, ov::Tensor initial_image, ov::Tensor mask_image, const ov::AnyMap& properties) override { - ImageResults image_results; - RawPerfMetrics &raw_metrics = image_results.perf_metrics.raw_metrics; - raw_metrics.generate_durations.clear(); - raw_metrics.m_inference_durations = {{ MicroSeconds(0.0f) }}; const auto gen_start = std::chrono::steady_clock::now(); + MicroSeconds infer_duration; + m_perf_metrics.clean_up(); m_custom_generation_config = m_generation_config; m_custom_generation_config.update_generation_config(properties); @@ -344,7 +345,7 @@ class FluxPipeline : public DiffusionPipeline { check_inputs(m_custom_generation_config, initial_image); - compute_hidden_states(positive_prompt, m_custom_generation_config, raw_metrics); + compute_hidden_states(positive_prompt, m_custom_generation_config); ov::Tensor latents, processed_image, image_latent, noise; std::tie(latents, processed_image, image_latent, noise) = prepare_latents(initial_image, m_custom_generation_config); @@ -363,34 +364,43 @@ class FluxPipeline : public DiffusionPipeline { float* timestep_data = timestep.data(); for (size_t inference_step = 0; inference_step < timesteps.size(); ++inference_step) { + auto step_start = std::chrono::steady_clock::now(); timestep_data[0] = timesteps[inference_step] / 1000; - ov::Tensor noise_pred_tensor = m_transformer->infer(latents, timestep, raw_metrics); + ov::Tensor noise_pred_tensor = m_transformer->infer(latents, timestep, infer_duration); + m_perf_metrics.raw_metrics.transformer_inference_durations.emplace_back(infer_duration); auto scheduler_step_result = m_scheduler->step(noise_pred_tensor, latents, inference_step, m_custom_generation_config.generator); latents = scheduler_step_result["latent"]; + auto step_ms = ov::genai::PerfMetrics::get_microsec(std::chrono::steady_clock::now() - gen_start); + m_perf_metrics.raw_metrics.iteration_durations.emplace_back(MicroSeconds(step_ms)); if (callback && callback(inference_step, timesteps.size(), latents)) { - image_results.image = ov::Tensor(ov::element::u8, {}); - const auto gen_ms = ov::genai::PerfMetrics::get_microsec(std::chrono::steady_clock::now() - gen_start); - raw_metrics.generate_durations.emplace_back(gen_ms); - return image_results; + auto image = ov::Tensor(ov::element::u8, {}); + m_perf_metrics.generate_duration = + ov::genai::PerfMetrics::get_microsec(std::chrono::steady_clock::now() - gen_start); + return image; } } latents = unpack_latents(latents, m_custom_generation_config.height, m_custom_generation_config.width, vae_scale_factor); - image_results.image = m_vae->decode(latents, raw_metrics); - const auto gen_ms = ov::genai::PerfMetrics::get_microsec(std::chrono::steady_clock::now() - gen_start); - raw_metrics.generate_durations.emplace_back(gen_ms); - return image_results; + auto image = m_vae->decode(latents, infer_duration); + m_perf_metrics.vae_decoder_inference_duration = infer_duration.count(); + m_perf_metrics.generate_duration = + ov::genai::PerfMetrics::get_microsec(std::chrono::steady_clock::now() - gen_start); + return image; } - ov::Tensor decode(const ov::Tensor latent, RawPerfMetrics& raw_metrics) override { + ov::Tensor decode(const ov::Tensor latent, MicroSeconds& infer_duration) override { ov::Tensor unpacked_latent = unpack_latents(latent, m_custom_generation_config.height, m_custom_generation_config.width, m_vae->get_vae_scale_factor()); - return m_vae->decode(unpacked_latent, raw_metrics); + return m_vae->decode(unpacked_latent, infer_duration); + } + + ImageGenerationPerfMetrics get_perfomance_metrics() override { + return m_perf_metrics; } private: @@ -488,6 +498,7 @@ class FluxPipeline : public DiffusionPipeline { std::shared_ptr m_t5_text_encoder = nullptr; std::shared_ptr m_vae = nullptr; ImageGenerationConfig m_custom_generation_config; + ImageGenerationPerfMetrics m_perf_metrics; }; } // namespace genai diff --git a/src/cpp/src/image_generation/image2image_pipeline.cpp b/src/cpp/src/image_generation/image2image_pipeline.cpp index a545d11c89..18911e1bc8 100644 --- a/src/cpp/src/image_generation/image2image_pipeline.cpp +++ b/src/cpp/src/image_generation/image2image_pipeline.cpp @@ -114,14 +114,18 @@ void Image2ImagePipeline::compile(const std::string& device, const ov::AnyMap& p m_impl->compile(device, properties); } -ImageResults Image2ImagePipeline::generate(const std::string& positive_prompt, ov::Tensor initial_image, const ov::AnyMap& properties) { +ov::Tensor Image2ImagePipeline::generate(const std::string& positive_prompt, ov::Tensor initial_image, const ov::AnyMap& properties) { OPENVINO_ASSERT(initial_image, "Initial image cannot be empty when passed to Image2ImagePipeline::generate"); return m_impl->generate(positive_prompt, initial_image, {}, properties); } ov::Tensor Image2ImagePipeline::decode(const ov::Tensor latent) { - ov::genai::RawPerfMetrics raw_metrics; - return m_impl->decode(latent, raw_metrics); + MicroSeconds infer_duration; + return m_impl->decode(latent, infer_duration); +} + +ImageGenerationPerfMetrics Image2ImagePipeline::get_perfomance_metrics() { + return m_impl->get_perfomance_metrics(); } } // namespace genai diff --git a/src/cpp/src/image_generation/image_generation_perf_metrics.cpp b/src/cpp/src/image_generation/image_generation_perf_metrics.cpp new file mode 100644 index 0000000000..9f91082ba5 --- /dev/null +++ b/src/cpp/src/image_generation/image_generation_perf_metrics.cpp @@ -0,0 +1,101 @@ +#include +#include + +#include "openvino/genai/image_generation/image_generation_perf_metrics.hpp" + +namespace ov { +namespace genai { +ov::genai::MeanStdPair calculation(const std::vector& durations) { + if (durations.size() == 0) { + return {-1, -1}; + } + // Accepts time durations in microseconds and returns standard deviation and mean in milliseconds. + float mean = std::accumulate(durations.begin(), + durations.end(), + 0.0f, + [](const float& acc, const ov::genai::MicroSeconds& duration) -> float { + return acc + duration.count() / 1000.0f; + }); + mean /= durations.size(); + + float sum_square_durations = + std::accumulate(durations.begin(), + durations.end(), + 0.0f, + [](const float& acc, const ov::genai::MicroSeconds& duration) -> float { + auto d = duration.count() / 1000.0f; + return acc + d * d; + }); + float std = std::sqrt(sum_square_durations / durations.size() - mean * mean); + return {mean, std}; +} + +void ImageGenerationPerfMetrics::clean_up() { + m_evaluated = false; + load_time = 0.f; + generate_duration = 0.f; + vae_encoder_inference_duration = 0.f; + vae_decoder_inference_duration = 0.f; + encoder_inference_duration.clear(); + raw_metrics.unet_inference_durations.clear(); + raw_metrics.transformer_inference_durations.clear(); + raw_metrics.iteration_durations.clear(); +} + +void ImageGenerationPerfMetrics::evaluate_statistics() { + if (m_evaluated) { + return; + } + + // calc_mean_and_std will convert microsecond to milliseconds. + unet_inference_duration = calculation(raw_metrics.unet_inference_durations); + transformer_inference_duration = calculation(raw_metrics.transformer_inference_durations); + iteration_duration = calculation(raw_metrics.iteration_durations); + + m_evaluated = true; +} + +MeanStdPair ImageGenerationPerfMetrics::get_unet_inference_duration() { + evaluate_statistics(); + return unet_inference_duration; +} + +MeanStdPair ImageGenerationPerfMetrics::get_transformer_inference_duration() { + evaluate_statistics(); + return transformer_inference_duration; +} +MeanStdPair ImageGenerationPerfMetrics::get_iteration_duration() { + evaluate_statistics(); + return iteration_duration; +} + +float ImageGenerationPerfMetrics::get_inference_total_duration() { + float total_duration = 0; + if (!raw_metrics.unet_inference_durations.empty()) { + float total = std::accumulate(raw_metrics.unet_inference_durations.begin(), + raw_metrics.unet_inference_durations.end(), + 0.0f, + [](const float& acc, const ov::genai::MicroSeconds& duration) -> float { + return acc + duration.count(); + }); + total_duration += total; + } else if (!raw_metrics.transformer_inference_durations.empty()) { + float total = std::accumulate(raw_metrics.transformer_inference_durations.begin(), + raw_metrics.transformer_inference_durations.end(), + 0.0f, + [](const float& acc, const ov::genai::MicroSeconds& duration) -> float { + return acc + duration.count(); + }); + total_duration += total; + } + + total_duration += vae_decoder_inference_duration; + + for (auto encoder = encoder_inference_duration.begin(); encoder != encoder_inference_duration.end(); encoder++) { + total_duration += encoder->second; + } + + return total_duration / 1000.0f; +} +} // namespace genai +} // namespace ov \ No newline at end of file diff --git a/src/cpp/src/image_generation/inpainting_pipeline.cpp b/src/cpp/src/image_generation/inpainting_pipeline.cpp index 4e1ac1efb7..bee94813f9 100644 --- a/src/cpp/src/image_generation/inpainting_pipeline.cpp +++ b/src/cpp/src/image_generation/inpainting_pipeline.cpp @@ -119,15 +119,19 @@ void InpaintingPipeline::compile(const std::string& device, const ov::AnyMap& pr m_impl->compile(device, properties); } -ImageResults InpaintingPipeline::generate(const std::string& positive_prompt, ov::Tensor initial_image, ov::Tensor mask, const ov::AnyMap& properties) { +ov::Tensor InpaintingPipeline::generate(const std::string& positive_prompt, ov::Tensor initial_image, ov::Tensor mask, const ov::AnyMap& properties) { OPENVINO_ASSERT(initial_image, "Initial image cannot be empty when passed to InpaintingPipeline::generate"); OPENVINO_ASSERT(mask, "Mask image cannot be empty when passed to InpaintingPipeline::generate"); return m_impl->generate(positive_prompt, initial_image, mask, properties); } ov::Tensor InpaintingPipeline::decode(const ov::Tensor latent) { - ov::genai::RawPerfMetrics raw_metrics; - return m_impl->decode(latent, raw_metrics); + MicroSeconds infer_duration; + return m_impl->decode(latent, infer_duration); +} + +ImageGenerationPerfMetrics InpaintingPipeline::get_perfomance_metrics() { + return m_impl->get_perfomance_metrics(); } } // namespace genai diff --git a/src/cpp/src/image_generation/models/autoencoder_kl.cpp b/src/cpp/src/image_generation/models/autoencoder_kl.cpp index f9cc014529..5e2f10f710 100644 --- a/src/cpp/src/image_generation/models/autoencoder_kl.cpp +++ b/src/cpp/src/image_generation/models/autoencoder_kl.cpp @@ -227,14 +227,14 @@ AutoencoderKL& AutoencoderKL::compile(const std::string& device, const ov::AnyMa return *this; } -ov::Tensor AutoencoderKL::decode(ov::Tensor latent, RawPerfMetrics &raw_metrics) { +ov::Tensor AutoencoderKL::decode(ov::Tensor latent, MicroSeconds& infer_duration) { OPENVINO_ASSERT(m_decoder_request, "VAE decoder model must be compiled first. Cannot infer non-compiled model"); m_decoder_request.set_input_tensor(latent); const auto infer_start = std::chrono::steady_clock::now(); m_decoder_request.infer(); const auto infer_ms = ov::genai::PerfMetrics::get_microsec(std::chrono::steady_clock::now() - infer_start); - raw_metrics.m_inference_durations[0] += MicroSeconds(infer_ms); + infer_duration = MicroSeconds(infer_ms); return m_decoder_request.get_output_tensor(); } diff --git a/src/cpp/src/image_generation/models/clip_text_model.cpp b/src/cpp/src/image_generation/models/clip_text_model.cpp index 80243838e7..ab7c86a449 100644 --- a/src/cpp/src/image_generation/models/clip_text_model.cpp +++ b/src/cpp/src/image_generation/models/clip_text_model.cpp @@ -111,7 +111,10 @@ void CLIPTextModel::set_adapters(const std::optional& adapters) { } } -ov::Tensor CLIPTextModel::infer(const std::string& pos_prompt, const std::string& neg_prompt, bool do_classifier_free_guidance, RawPerfMetrics& raw_metrics) { +ov::Tensor CLIPTextModel::infer(const std::string& pos_prompt, + const std::string& neg_prompt, + bool do_classifier_free_guidance, + MicroSeconds& infer_duration) { OPENVINO_ASSERT(m_request, "CLIP text encoder model must be compiled first. Cannot infer non-compiled model"); const int32_t pad_token_id = m_clip_tokenizer.get_pad_token_id(); @@ -151,7 +154,7 @@ ov::Tensor CLIPTextModel::infer(const std::string& pos_prompt, const std::string const auto infer_start = std::chrono::steady_clock::now(); m_request.infer(); const auto infer_ms = ov::genai::PerfMetrics::get_microsec(std::chrono::steady_clock::now() - infer_start); - raw_metrics.m_inference_durations[0] += MicroSeconds(infer_ms); + infer_duration = MicroSeconds(infer_ms); return m_request.get_output_tensor(0); } diff --git a/src/cpp/src/image_generation/models/clip_text_model_with_projection.cpp b/src/cpp/src/image_generation/models/clip_text_model_with_projection.cpp index d0e5a7dbbb..ee0366d06f 100644 --- a/src/cpp/src/image_generation/models/clip_text_model_with_projection.cpp +++ b/src/cpp/src/image_generation/models/clip_text_model_with_projection.cpp @@ -102,7 +102,10 @@ void CLIPTextModelWithProjection::set_adapters(const std::optional& adap } } -ov::Tensor UNet2DConditionModel::infer(ov::Tensor sample, ov::Tensor timestep, RawPerfMetrics& raw_metrics) { +ov::Tensor UNet2DConditionModel::infer(ov::Tensor sample, ov::Tensor timestep, MicroSeconds& infer_duration) { OPENVINO_ASSERT(m_impl, "UNet model must be compiled first. Cannot infer non-compiled model"); - return m_impl->infer(sample, timestep, raw_metrics); + return m_impl->infer(sample, timestep, infer_duration); } } // namespace genai diff --git a/src/cpp/src/image_generation/models/unet_inference.hpp b/src/cpp/src/image_generation/models/unet_inference.hpp index a210282d0f..c2a841b9ce 100644 --- a/src/cpp/src/image_generation/models/unet_inference.hpp +++ b/src/cpp/src/image_generation/models/unet_inference.hpp @@ -14,7 +14,7 @@ class UNet2DConditionModel::UNetInference { virtual void compile(std::shared_ptr model, const std::string& device, const ov::AnyMap& properties) = 0; virtual void set_hidden_states(const std::string& tensor_name, ov::Tensor encoder_hidden_states) = 0; virtual void set_adapters(AdapterController& adapter_controller, const AdapterConfig& adapters) = 0; - virtual ov::Tensor infer(ov::Tensor sample, ov::Tensor timestep, RawPerfMetrics& raw_metrics) = 0; + virtual ov::Tensor infer(ov::Tensor sample, ov::Tensor timestep, MicroSeconds& infer_duration) = 0; // utility function to resize model given optional dimensions. static void reshape(std::shared_ptr model, diff --git a/src/cpp/src/image_generation/models/unet_inference_dynamic.hpp b/src/cpp/src/image_generation/models/unet_inference_dynamic.hpp index f6fc36748a..b228421df2 100644 --- a/src/cpp/src/image_generation/models/unet_inference_dynamic.hpp +++ b/src/cpp/src/image_generation/models/unet_inference_dynamic.hpp @@ -31,7 +31,7 @@ class UNet2DConditionModel::UNetInferenceDynamic : public UNet2DConditionModel:: adapter_controller.apply(m_request, adapters); } - virtual ov::Tensor infer(ov::Tensor sample, ov::Tensor timestep, RawPerfMetrics& raw_metrics) override + virtual ov::Tensor infer(ov::Tensor sample, ov::Tensor timestep, MicroSeconds& infer_duration) override { OPENVINO_ASSERT(m_request, "UNet model must be compiled first. Cannot infer non-compiled model"); @@ -40,7 +40,7 @@ class UNet2DConditionModel::UNetInferenceDynamic : public UNet2DConditionModel:: const auto infer_start = std::chrono::steady_clock::now(); m_request.infer(); const auto infer_ms = ov::genai::PerfMetrics::get_microsec(std::chrono::steady_clock::now() - infer_start); - raw_metrics.m_inference_durations[0] += MicroSeconds(infer_ms); + infer_duration = MicroSeconds(infer_ms); return m_request.get_output_tensor(); } diff --git a/src/cpp/src/image_generation/models/unet_inference_static_bs1.hpp b/src/cpp/src/image_generation/models/unet_inference_static_bs1.hpp index 3e0f509a38..07b16134db 100644 --- a/src/cpp/src/image_generation/models/unet_inference_static_bs1.hpp +++ b/src/cpp/src/image_generation/models/unet_inference_static_bs1.hpp @@ -88,7 +88,7 @@ class UNet2DConditionModel::UNetInferenceStaticBS1 : public UNet2DConditionModel } } - virtual ov::Tensor infer(ov::Tensor sample, ov::Tensor timestep, RawPerfMetrics& raw_metrics) override { + virtual ov::Tensor infer(ov::Tensor sample, ov::Tensor timestep, MicroSeconds& infer_duration) override { OPENVINO_ASSERT(m_native_batch_size && m_native_batch_size == m_requests.size(), "UNet model must be compiled first"); @@ -135,7 +135,7 @@ class UNet2DConditionModel::UNetInferenceStaticBS1 : public UNet2DConditionModel m_requests[i].wait(); } const auto infer_ms = ov::genai::PerfMetrics::get_microsec(std::chrono::steady_clock::now() - infer_start); - raw_metrics.m_inference_durations[0] += MicroSeconds(infer_ms); + infer_duration = MicroSeconds(infer_ms); return out_sample; } diff --git a/src/cpp/src/image_generation/stable_diffusion_3_pipeline.hpp b/src/cpp/src/image_generation/stable_diffusion_3_pipeline.hpp index 013a5f7478..de74bb4a98 100644 --- a/src/cpp/src/image_generation/stable_diffusion_3_pipeline.hpp +++ b/src/cpp/src/image_generation/stable_diffusion_3_pipeline.hpp @@ -258,12 +258,13 @@ class StableDiffusion3Pipeline : public DiffusionPipeline { m_vae->compile(device, properties); } - void compute_hidden_states(const std::string& positive_prompt, const ImageGenerationConfig& generation_config, RawPerfMetrics& raw_metrics) override { + void compute_hidden_states(const std::string& positive_prompt, const ImageGenerationConfig& generation_config) override { const auto& transformer_config = m_transformer->get_config(); const size_t batch_size_multiplier = do_classifier_free_guidance(generation_config.guidance_scale) ? 2 : 1; // Transformer accepts 2x batch in case of CFG // Input tensors for transformer model ov::Tensor prompt_embeds_inp, pooled_prompt_embeds_inp; + MicroSeconds infer_duration; // 1. Encode positive prompt: std::string prompt_2_str = generation_config.prompt_2 != std::nullopt ? *generation_config.prompt_2 : positive_prompt; @@ -278,7 +279,8 @@ class StableDiffusion3Pipeline : public DiffusionPipeline { m_clip_text_encoder_1->infer(positive_prompt, negative_prompt_1_str, do_classifier_free_guidance(generation_config.guidance_scale), - raw_metrics); + infer_duration); + m_perf_metrics.encoder_inference_duration["text_encode"] = infer_duration.count(); // text_encoder_1_hidden_state - stores positive and negative prompt_embeds size_t idx_hidden_state_1 = m_clip_text_encoder_1->get_config().num_hidden_layers + 1; @@ -289,7 +291,8 @@ class StableDiffusion3Pipeline : public DiffusionPipeline { m_clip_text_encoder_2->infer(prompt_2_str, negative_prompt_2_str, do_classifier_free_guidance(generation_config.guidance_scale), - raw_metrics); + infer_duration); + m_perf_metrics.encoder_inference_duration["text_encode_2"] = infer_duration.count(); // text_encoder_2_hidden_state - stores positive and negative prompt_2_embeds size_t idx_hidden_state_2 = m_clip_text_encoder_2->get_config().num_hidden_layers + 1; @@ -301,7 +304,8 @@ class StableDiffusion3Pipeline : public DiffusionPipeline { negative_prompt_3_str, do_classifier_free_guidance(generation_config.guidance_scale), generation_config.max_sequence_length, - raw_metrics); + infer_duration); + m_perf_metrics.encoder_inference_duration["text_encode_3"] = infer_duration.count(); } else { ov::Shape t5_prompt_embed_shape = {generation_config.num_images_per_prompt, m_clip_text_encoder_1->get_config().max_position_embeddings, @@ -436,15 +440,13 @@ class StableDiffusion3Pipeline : public DiffusionPipeline { OPENVINO_THROW("LORA adapters are not implemented for Stable Diffusion 3 yet"); } - ImageResults generate(const std::string& positive_prompt, + ov::Tensor generate(const std::string& positive_prompt, ov::Tensor initial_image, ov::Tensor mask_image, const ov::AnyMap& properties) override { - ImageResults image_results; - RawPerfMetrics& raw_metrics = image_results.perf_metrics.raw_metrics; - raw_metrics.generate_durations.clear(); - raw_metrics.m_inference_durations = {{ MicroSeconds(0.0f) }}; const auto gen_start = std::chrono::steady_clock::now(); + MicroSeconds infer_duration; + m_perf_metrics.clean_up(); ImageGenerationConfig generation_config = m_generation_config; generation_config.update_generation_config(properties); @@ -471,7 +473,7 @@ class StableDiffusion3Pipeline : public DiffusionPipeline { std::vector timesteps = m_scheduler->get_float_timesteps(); // 4 compute text encoders and set hidden states - compute_hidden_states(positive_prompt, generation_config, raw_metrics); + compute_hidden_states(positive_prompt, generation_config); // 5. Prepare latent variables ov::Tensor latent, processed_image, image_latent, noise; @@ -485,6 +487,7 @@ class StableDiffusion3Pipeline : public DiffusionPipeline { ov::Tensor noisy_residual_tensor(ov::element::f32, {}); for (size_t inference_step = 0; inference_step < timesteps.size(); ++inference_step) { + auto step_start = std::chrono::steady_clock::now(); // concat the same latent twice along a batch dimension in case of CFG if (batch_size_multiplier > 1) { numpy_utils::batch_copy(latent, latent_cfg, 0, 0, generation_config.num_images_per_prompt); @@ -493,9 +496,9 @@ class StableDiffusion3Pipeline : public DiffusionPipeline { // just assign to save memory copy latent_cfg = latent; } - ov::Tensor timestep(ov::element::f32, {1}, ×teps[inference_step]); - ov::Tensor noise_pred_tensor = m_transformer->infer(latent_cfg, timestep, raw_metrics); + ov::Tensor noise_pred_tensor = m_transformer->infer(latent_cfg, timestep, infer_duration); + m_perf_metrics.raw_metrics.transformer_inference_durations.emplace_back(infer_duration); ov::Shape noise_pred_shape = noise_pred_tensor.get_shape(); noise_pred_shape[0] /= batch_size_multiplier; @@ -519,22 +522,28 @@ class StableDiffusion3Pipeline : public DiffusionPipeline { auto scheduler_step_result = m_scheduler->step(noisy_residual_tensor, latent, inference_step, generation_config.generator); latent = scheduler_step_result["latent"]; + auto step_ms = ov::genai::PerfMetrics::get_microsec(std::chrono::steady_clock::now() - step_start); + m_perf_metrics.raw_metrics.iteration_durations.emplace_back(MicroSeconds(step_ms)); + if (callback && callback(inference_step, timesteps.size(), latent)) { - image_results.image = ov::Tensor(ov::element::u8, {}); - const auto gen_end = std::chrono::steady_clock::now(); - raw_metrics.generate_durations.emplace_back(PerfMetrics::get_microsec(gen_start - gen_end)); - return image_results; + auto image = ov::Tensor(ov::element::u8, {}); + m_perf_metrics.generate_duration = ov::genai::PerfMetrics::get_microsec(std::chrono::steady_clock::now() - gen_start); + return image; } } - image_results.image = decode(latent, raw_metrics); - const auto gen_end = std::chrono::steady_clock::now(); - raw_metrics.generate_durations.emplace_back(PerfMetrics::get_microsec(gen_start - gen_end)); - return image_results; + auto image = decode(latent, infer_duration); + m_perf_metrics.vae_decoder_inference_duration = infer_duration.count(); + m_perf_metrics.generate_duration = ov::genai::PerfMetrics::get_microsec(std::chrono::steady_clock::now() - gen_start); + return image; + } + + ov::Tensor decode(const ov::Tensor latent, MicroSeconds& infer_duration) override { + return m_vae->decode(latent, infer_duration); } - ov::Tensor decode(const ov::Tensor latent, RawPerfMetrics& raw_metrics) override { - return m_vae->decode(latent, raw_metrics); + ImageGenerationPerfMetrics get_perfomance_metrics() override { + return m_perf_metrics; } private: @@ -641,6 +650,7 @@ class StableDiffusion3Pipeline : public DiffusionPipeline { std::shared_ptr m_t5_text_encoder = nullptr; std::shared_ptr m_transformer = nullptr; std::shared_ptr m_vae = nullptr; + ImageGenerationPerfMetrics m_perf_metrics; }; } // namespace genai diff --git a/src/cpp/src/image_generation/stable_diffusion_pipeline.hpp b/src/cpp/src/image_generation/stable_diffusion_pipeline.hpp index 067dc12a61..6520714486 100644 --- a/src/cpp/src/image_generation/stable_diffusion_pipeline.hpp +++ b/src/cpp/src/image_generation/stable_diffusion_pipeline.hpp @@ -176,13 +176,15 @@ class StableDiffusionPipeline : public DiffusionPipeline { m_vae->compile(device, properties); } - void compute_hidden_states(const std::string& positive_prompt, const ImageGenerationConfig& generation_config, RawPerfMetrics& raw_metrics) override { + void compute_hidden_states(const std::string& positive_prompt, const ImageGenerationConfig& generation_config) override { const auto& unet_config = m_unet->get_config(); const size_t batch_size_multiplier = m_unet->do_classifier_free_guidance(generation_config.guidance_scale) ? 2 : 1; // Unet accepts 2x batch in case of CFG + MicroSeconds infer_duration; std::string negative_prompt = generation_config.negative_prompt != std::nullopt ? *generation_config.negative_prompt : std::string{}; ov::Tensor encoder_hidden_states = m_clip_text_encoder->infer(positive_prompt, negative_prompt, - batch_size_multiplier > 1, raw_metrics); + batch_size_multiplier > 1, infer_duration); + m_perf_metrics.encoder_inference_duration["text_encoder"] = infer_duration.count(); // replicate encoder hidden state to UNet model if (generation_config.num_images_per_prompt == 1) { @@ -303,16 +305,14 @@ class StableDiffusionPipeline : public DiffusionPipeline { m_unet->set_adapters(adapters); } - ImageResults generate(const std::string& positive_prompt, + ov::Tensor generate(const std::string& positive_prompt, ov::Tensor initial_image, ov::Tensor mask_image, const ov::AnyMap& properties) override { - ImageResults image_results; - RawPerfMetrics& raw_metrics = image_results.perf_metrics.raw_metrics; - raw_metrics.generate_durations.clear(); - raw_metrics.m_inference_durations = {{ MicroSeconds(0.0f) }}; const auto gen_start = std::chrono::steady_clock::now(); + MicroSeconds infer_duration; using namespace numpy_utils; + m_perf_metrics.clean_up(); ImageGenerationConfig generation_config = m_generation_config; generation_config.update_generation_config(properties); @@ -343,7 +343,7 @@ class StableDiffusionPipeline : public DiffusionPipeline { std::vector timesteps = m_scheduler->get_timesteps(); // compute text encoders and set hidden states - compute_hidden_states(positive_prompt, generation_config, raw_metrics); + compute_hidden_states(positive_prompt, generation_config); // preparate initial / image latents ov::Tensor latent, processed_image, image_latent, noise; @@ -362,6 +362,7 @@ class StableDiffusionPipeline : public DiffusionPipeline { ov::Tensor latent_cfg(ov::element::f32, latent_shape_cfg), denoised, noisy_residual_tensor(ov::element::f32, {}), latent_model_input; for (size_t inference_step = 0; inference_step < timesteps.size(); inference_step++) { + auto step_start = std::chrono::steady_clock::now(); numpy_utils::batch_copy(latent, latent_cfg, 0, 0, generation_config.num_images_per_prompt); // concat the same latent twice along a batch dimension in case of CFG if (batch_size_multiplier > 1) { @@ -372,7 +373,8 @@ class StableDiffusionPipeline : public DiffusionPipeline { ov::Tensor latent_model_input = is_inpainting_model() ? numpy_utils::concat(numpy_utils::concat(latent_cfg, mask, 1), masked_image_latent, 1) : latent_cfg; ov::Tensor timestep(ov::element::i64, {1}, ×teps[inference_step]); - ov::Tensor noise_pred_tensor = m_unet->infer(latent_model_input, timestep, raw_metrics); + ov::Tensor noise_pred_tensor = m_unet->infer(latent_model_input, timestep, infer_duration); + m_perf_metrics.raw_metrics.unet_inference_durations.emplace_back(infer_duration); ov::Shape noise_pred_shape = noise_pred_tensor.get_shape(); noise_pred_shape[0] /= batch_size_multiplier; @@ -405,21 +407,29 @@ class StableDiffusionPipeline : public DiffusionPipeline { const auto it = scheduler_step_result.find("denoised"); denoised = it != scheduler_step_result.end() ? it->second : latent; + auto step_ms = ov::genai::PerfMetrics::get_microsec(std::chrono::steady_clock::now() - step_start); + m_perf_metrics.raw_metrics.iteration_durations.emplace_back(MicroSeconds(step_ms)); + if (callback && callback(inference_step, timesteps.size(), denoised)) { - image_results.image = ov::Tensor(ov::element::u8, {}); - const auto gen_ms = ov::genai::PerfMetrics::get_microsec(std::chrono::steady_clock::now() - gen_start); - raw_metrics.generate_durations.emplace_back(gen_ms); - return image_results; + auto image = ov::Tensor(ov::element::u8, {}); + m_perf_metrics.generate_duration = + ov::genai::PerfMetrics::get_microsec(std::chrono::steady_clock::now() - gen_start); + return image; } } - image_results.image = decode(denoised, raw_metrics); - const auto gen_ms = ov::genai::PerfMetrics::get_microsec(std::chrono::steady_clock::now() - gen_start); - raw_metrics.generate_durations.emplace_back(gen_ms); - return image_results; + auto image = decode(denoised, infer_duration); + m_perf_metrics.vae_decoder_inference_duration = infer_duration.count(); + m_perf_metrics.generate_duration = + ov::genai::PerfMetrics::get_microsec(std::chrono::steady_clock::now() - gen_start); + return image; + } + + ov::Tensor decode(const ov::Tensor latent, MicroSeconds& infer_duration) override { + return m_vae->decode(latent, infer_duration); } - ov::Tensor decode(const ov::Tensor latent, RawPerfMetrics& raw_metrics) override { - return m_vae->decode(latent, raw_metrics); + ImageGenerationPerfMetrics get_perfomance_metrics() override { + return m_perf_metrics; } protected: @@ -515,6 +525,7 @@ class StableDiffusionPipeline : public DiffusionPipeline { std::shared_ptr m_vae = nullptr; std::shared_ptr m_image_processor = nullptr, m_mask_processor_rgb = nullptr, m_mask_processor_gray = nullptr; std::shared_ptr m_image_resizer = nullptr, m_mask_resizer = nullptr; + ImageGenerationPerfMetrics m_perf_metrics; }; } // namespace genai diff --git a/src/cpp/src/image_generation/stable_diffusion_xl_pipeline.hpp b/src/cpp/src/image_generation/stable_diffusion_xl_pipeline.hpp index 2672acbcb1..1a2c229432 100644 --- a/src/cpp/src/image_generation/stable_diffusion_xl_pipeline.hpp +++ b/src/cpp/src/image_generation/stable_diffusion_xl_pipeline.hpp @@ -144,9 +144,10 @@ class StableDiffusionXLPipeline : public StableDiffusionPipeline { m_vae->compile(device, properties); } - void compute_hidden_states(const std::string& positive_prompt, const ImageGenerationConfig& generation_config, RawPerfMetrics& raw_metrics) override { + void compute_hidden_states(const std::string& positive_prompt, const ImageGenerationConfig& generation_config) override { const auto& unet_config = m_unet->get_config(); const size_t batch_size_multiplier = m_unet->do_classifier_free_guidance(generation_config.guidance_scale) ? 2 : 1; // Unet accepts 2x batch in case of CFG + MicroSeconds infer_duration; std::vector time_ids = {static_cast(generation_config.width), static_cast(generation_config.height), @@ -180,8 +181,10 @@ class StableDiffusionXLPipeline : public StableDiffusionPipeline { add_text_embeds = m_clip_text_encoder_with_projection->infer(positive_prompt, negative_prompt_1_str, batch_size_multiplier > 1, - raw_metrics); - m_clip_text_encoder->infer(prompt_2_str, negative_prompt_2_str, batch_size_multiplier > 1, raw_metrics); + infer_duration); + m_perf_metrics.encoder_inference_duration["text_encoder_2"] = infer_duration.count(); + m_clip_text_encoder->infer(prompt_2_str, negative_prompt_2_str, batch_size_multiplier > 1, infer_duration); + m_perf_metrics.encoder_inference_duration["text_encoder"] = infer_duration.count(); // prompt_embeds = prompt_embeds.hidden_states[-2] ov::Tensor encoder_hidden_states_1 = m_clip_text_encoder->get_output_tensor(idx_hidden_state_1); @@ -190,8 +193,10 @@ class StableDiffusionXLPipeline : public StableDiffusionPipeline { encoder_hidden_states = numpy_utils::concat(encoder_hidden_states_1, encoder_hidden_states_2, -1); } else { ov::Tensor add_text_embeds_positive = - m_clip_text_encoder_with_projection->infer(positive_prompt, negative_prompt_1_str, false, raw_metrics); - m_clip_text_encoder->infer(prompt_2_str, negative_prompt_2_str, false, raw_metrics); + m_clip_text_encoder_with_projection->infer(positive_prompt, negative_prompt_1_str, false, infer_duration); + m_perf_metrics.encoder_inference_duration["text_encoder_2"] = infer_duration.count(); + m_clip_text_encoder->infer(prompt_2_str, negative_prompt_2_str, false, infer_duration); + m_perf_metrics.encoder_inference_duration["text_encoder"] = infer_duration.count(); ov::Tensor encoder_hidden_states_1_positive = m_clip_text_encoder->get_output_tensor(idx_hidden_state_1); ov::Tensor encoder_hidden_states_2_positive = m_clip_text_encoder_with_projection->get_output_tensor(idx_hidden_state_2); diff --git a/src/cpp/src/image_generation/text2image_pipeline.cpp b/src/cpp/src/image_generation/text2image_pipeline.cpp index 564f5a7fc1..f03336f6c8 100644 --- a/src/cpp/src/image_generation/text2image_pipeline.cpp +++ b/src/cpp/src/image_generation/text2image_pipeline.cpp @@ -185,13 +185,17 @@ void Text2ImagePipeline::compile(const std::string& device, const ov::AnyMap& pr m_impl->compile(device, properties); } -ImageResults Text2ImagePipeline::generate(const std::string& positive_prompt, const ov::AnyMap& properties) { +ov::Tensor Text2ImagePipeline::generate(const std::string& positive_prompt, const ov::AnyMap& properties) { return m_impl->generate(positive_prompt, {}, {}, properties); } ov::Tensor Text2ImagePipeline::decode(const ov::Tensor latent) { - RawPerfMetrics raw_metrics; - return m_impl->decode(latent, raw_metrics); + MicroSeconds infer_duration; + return m_impl->decode(latent, infer_duration); +} + +ImageGenerationPerfMetrics Text2ImagePipeline::get_perfomance_metrics() { + return m_impl->get_perfomance_metrics(); } } // namespace genai diff --git a/src/python/py_image_generation_models.cpp b/src/python/py_image_generation_models.cpp index b7e71092cd..79009cd11d 100644 --- a/src/python/py_image_generation_models.cpp +++ b/src/python/py_image_generation_models.cpp @@ -70,7 +70,7 @@ void init_clip_text_model(py::module_& m) { clip_text_model.def("get_config", &ov::genai::CLIPTextModel::get_config) .def("reshape", &ov::genai::CLIPTextModel::reshape, py::arg("batch_size")) .def("set_adapters", &ov::genai::CLIPTextModel::set_adapters, py::arg("adapters")) - .def("infer", &ov::genai::CLIPTextModel::infer, py::arg("pos_prompt"), py::arg("neg_prompt"), py::arg("do_classifier_free_guidance"), py::arg("raw_metrics")) + .def("infer", &ov::genai::CLIPTextModel::infer, py::arg("pos_prompt"), py::arg("neg_prompt"), py::arg("do_classifier_free_guidance"), py::arg("infer_duration")) .def("get_output_tensor", &ov::genai::CLIPTextModel::get_output_tensor, py::arg("idx")) .def( "compile", @@ -133,7 +133,7 @@ void init_clip_text_model_with_projection(py::module_& m) { .def_readwrite("num_hidden_layers", &ov::genai::CLIPTextModelWithProjection::Config::num_hidden_layers); clip_text_model_with_projection.def("reshape", &ov::genai::CLIPTextModelWithProjection::reshape, py::arg("batch_size")) - .def("infer", &ov::genai::CLIPTextModelWithProjection::infer, py::arg("pos_prompt"), py::arg("neg_prompt"), py::arg("do_classifier_free_guidance"), py::arg("raw_metrics")) + .def("infer", &ov::genai::CLIPTextModelWithProjection::infer, py::arg("pos_prompt"), py::arg("neg_prompt"), py::arg("do_classifier_free_guidance"), py::arg("infer_duration")) .def("get_config", &ov::genai::CLIPTextModelWithProjection::get_config) .def("get_output_tensor", &ov::genai::CLIPTextModelWithProjection::get_output_tensor, py::arg("idx")) .def("set_adapters", &ov::genai::CLIPTextModelWithProjection::set_adapters, py::arg("adapters")) @@ -189,7 +189,7 @@ void init_t5_encoder_model(py::module_& m) { model (T5EncoderModel): T5EncoderModel model )") .def("reshape", &ov::genai::T5EncoderModel::reshape, py::arg("batch_size"), py::arg("max_sequence_length")) - .def("infer", &ov::genai::T5EncoderModel::infer, py::arg("pos_prompt"), py::arg("neg_prompt"), py::arg("do_classifier_free_guidance"), py::arg("max_sequence_length"), py::arg("raw_metrics")) + .def("infer", &ov::genai::T5EncoderModel::infer, py::arg("pos_prompt"), py::arg("neg_prompt"), py::arg("do_classifier_free_guidance"), py::arg("max_sequence_length"), py::arg("infer_duration")) .def("get_output_tensor", &ov::genai::T5EncoderModel::get_output_tensor, py::arg("idx")) // .def("set_adapters", &ov::genai::T5EncoderModel::set_adapters, py::arg("adapters")) .def( @@ -254,7 +254,7 @@ void init_unet2d_condition_model(py::module_& m) { unet2d_condition_model.def("get_config", &ov::genai::UNet2DConditionModel::get_config) .def("reshape", &ov::genai::UNet2DConditionModel::reshape, py::arg("batch_size"), py::arg("height"), py::arg("width"), py::arg("tokenizer_model_max_length")) .def("set_adapters", &ov::genai::UNet2DConditionModel::set_adapters, py::arg("adapters")) - .def("infer", &ov::genai::UNet2DConditionModel::infer, py::arg("sample"), py::arg("timestep"), py::arg("raw_metrics")) + .def("infer", &ov::genai::UNet2DConditionModel::infer, py::arg("sample"), py::arg("timestep"), py::arg("infer_duration")) .def("set_hidden_states", &ov::genai::UNet2DConditionModel::set_hidden_states, py::arg("tensor_name"), py::arg("encoder_hidden_states")) .def("do_classifier_free_guidance", &ov::genai::UNet2DConditionModel::do_classifier_free_guidance, py::arg("guidance_scale")) .def( @@ -320,7 +320,7 @@ void init_sd3_transformer_2d_model(py::module_& m) { sd3_transformer_2d_model.def("get_config", &ov::genai::SD3Transformer2DModel::get_config) .def("reshape", &ov::genai::SD3Transformer2DModel::reshape, py::arg("batch_size"), py::arg("height"), py::arg("width"), py::arg("tokenizer_model_max_length")) // .def("set_adapters", &ov::genai::SD3Transformer2DModel::set_adapters, py::arg("adapters")) - .def("infer", &ov::genai::SD3Transformer2DModel::infer, py::arg("sample"), py::arg("timestep"), py::arg("raw_metrics")) + .def("infer", &ov::genai::SD3Transformer2DModel::infer, py::arg("sample"), py::arg("timestep"), py::arg("infer_duration")) .def("set_hidden_states", &ov::genai::SD3Transformer2DModel::set_hidden_states, py::arg("tensor_name"), py::arg("encoder_hidden_states")) .def( "compile", @@ -383,7 +383,7 @@ void init_flux_transformer_2d_model(py::module_& m) { flux_transformer_2d_model.def("get_config", &ov::genai::FluxTransformer2DModel::get_config) .def("reshape", &ov::genai::FluxTransformer2DModel::reshape, py::arg("batch_size"), py::arg("height"), py::arg("width"), py::arg("tokenizer_model_max_length")) // .def("set_adapters", &ov::genai::FluxTransformer2DModel::set_adapters, py::arg("adapters")) - .def("infer", &ov::genai::FluxTransformer2DModel::infer, py::arg("sample"), py::arg("timestep"), py::arg("raw_metrics")) + .def("infer", &ov::genai::FluxTransformer2DModel::infer, py::arg("sample"), py::arg("timestep"), py::arg("infer_duration")) .def("set_hidden_states", &ov::genai::FluxTransformer2DModel::set_hidden_states, py::arg("tensor_name"), py::arg("encoder_hidden_states")) .def( "compile", @@ -492,7 +492,7 @@ void init_autoencoder_kl(py::module_& m) { device (str): Device to run the model on (e.g., CPU, GPU). kwargs: Device properties. )") - .def("decode", &ov::genai::AutoencoderKL::decode, py::arg("latent"), py::arg("raw_metrics")) + .def("decode", &ov::genai::AutoencoderKL::decode, py::arg("latent"), py::arg("infer_duration")) .def("encode", &ov::genai::AutoencoderKL::encode, py::arg("image"), py::arg("generator")) .def("get_config", &ov::genai::AutoencoderKL::get_config) .def("get_vae_scale_factor", &ov::genai::AutoencoderKL::get_vae_scale_factor); diff --git a/src/python/py_image_generation_pipelines.cpp b/src/python/py_image_generation_pipelines.cpp index 978567cf76..c246557a97 100644 --- a/src/python/py_image_generation_pipelines.cpp +++ b/src/python/py_image_generation_pipelines.cpp @@ -288,7 +288,7 @@ void init_image_generation_pipelines(py::module_& m) { [](ov::genai::Text2ImagePipeline& pipe, const std::string& prompt, const py::kwargs& kwargs - ) -> py::typing::Union { + ) -> py::typing::Union { ov::AnyMap params = pyutils::kwargs_to_any_map(kwargs); return py::cast(pipe.generate(prompt, params)); }, @@ -350,7 +350,7 @@ void init_image_generation_pipelines(py::module_& m) { const std::string& prompt, const ov::Tensor& image, const py::kwargs& kwargs - ) -> py::typing::Union { + ) -> py::typing::Union { ov::AnyMap params = pyutils::kwargs_to_any_map(kwargs); return py::cast(pipe.generate(prompt, image, params)); }, @@ -414,7 +414,7 @@ void init_image_generation_pipelines(py::module_& m) { const ov::Tensor& image, const ov::Tensor& mask_image, const py::kwargs& kwargs - ) -> py::typing::Union { + ) -> py::typing::Union { ov::AnyMap params = pyutils::kwargs_to_any_map(kwargs); return py::cast(pipe.generate(prompt, image, mask_image, params)); }, From ac55e25a48bd94088677eb96271ab04ae6e0384c Mon Sep 17 00:00:00 2001 From: xufang Date: Fri, 3 Jan 2025 14:19:39 +0800 Subject: [PATCH 05/19] fix build error --- .../genai/image_generation/image_generation_perf_metrics.hpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/cpp/include/openvino/genai/image_generation/image_generation_perf_metrics.hpp b/src/cpp/include/openvino/genai/image_generation/image_generation_perf_metrics.hpp index 3d2ec5db69..bf1a170fdc 100644 --- a/src/cpp/include/openvino/genai/image_generation/image_generation_perf_metrics.hpp +++ b/src/cpp/include/openvino/genai/image_generation/image_generation_perf_metrics.hpp @@ -3,6 +3,7 @@ #include #include #include +#include #include "openvino/genai/visibility.hpp" #include "openvino/genai/perf_metrics.hpp" From 2b360aa11bff8f2f9b5029dadade76ea1a0ae2d0 Mon Sep 17 00:00:00 2001 From: xufang Date: Fri, 3 Jan 2025 17:28:30 +0800 Subject: [PATCH 06/19] fix build error --- src/python/openvino_genai/py_openvino_genai.pyi | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/python/openvino_genai/py_openvino_genai.pyi b/src/python/openvino_genai/py_openvino_genai.pyi index 5d82fa89a3..e734f76460 100644 --- a/src/python/openvino_genai/py_openvino_genai.pyi +++ b/src/python/openvino_genai/py_openvino_genai.pyi @@ -202,7 +202,7 @@ class AutoencoderKL: device (str): Device to run the model on (e.g., CPU, GPU). kwargs: Device properties. """ - def decode(self, latent: openvino._pyopenvino.Tensor) -> openvino._pyopenvino.Tensor: + def decode(self, latent: openvino._pyopenvino.Tensor, infer_duration: float) -> openvino._pyopenvino.Tensor: ... def encode(self, image: openvino._pyopenvino.Tensor, generator: Generator) -> openvino._pyopenvino.Tensor: ... @@ -255,7 +255,7 @@ class CLIPTextModel: ... def get_output_tensor(self, idx: int) -> openvino._pyopenvino.Tensor: ... - def infer(self, pos_prompt: str, neg_prompt: str, do_classifier_free_guidance: bool) -> openvino._pyopenvino.Tensor: + def infer(self, pos_prompt: str, neg_prompt: str, do_classifier_free_guidance: bool, infer_duration: float) -> openvino._pyopenvino.Tensor: ... def reshape(self, batch_size: int) -> CLIPTextModel: ... @@ -304,7 +304,7 @@ class CLIPTextModelWithProjection: ... def get_output_tensor(self, idx: int) -> openvino._pyopenvino.Tensor: ... - def infer(self, pos_prompt: str, neg_prompt: str, do_classifier_free_guidance: bool) -> openvino._pyopenvino.Tensor: + def infer(self, pos_prompt: str, neg_prompt: str, do_classifier_free_guidance: bool, infer_duration: float) -> openvino._pyopenvino.Tensor: ... def reshape(self, batch_size: int) -> CLIPTextModelWithProjection: ... @@ -513,7 +513,7 @@ class FluxTransformer2DModel: """ def get_config(self) -> FluxTransformer2DModel.Config: ... - def infer(self, sample: openvino._pyopenvino.Tensor, timestep: openvino._pyopenvino.Tensor) -> openvino._pyopenvino.Tensor: + def infer(self, sample: openvino._pyopenvino.Tensor, timestep: openvino._pyopenvino.Tensor, infer_duration: float) -> openvino._pyopenvino.Tensor: ... def reshape(self, batch_size: int, height: int, width: int, tokenizer_model_max_length: int) -> FluxTransformer2DModel: ... @@ -1332,7 +1332,7 @@ class SD3Transformer2DModel: """ def get_config(self) -> SD3Transformer2DModel.Config: ... - def infer(self, sample: openvino._pyopenvino.Tensor, timestep: openvino._pyopenvino.Tensor) -> openvino._pyopenvino.Tensor: + def infer(self, sample: openvino._pyopenvino.Tensor, timestep: openvino._pyopenvino.Tensor, infer_duration: float) -> openvino._pyopenvino.Tensor: ... def reshape(self, batch_size: int, height: int, width: int, tokenizer_model_max_length: int) -> SD3Transformer2DModel: ... @@ -1529,7 +1529,7 @@ class T5EncoderModel: """ def get_output_tensor(self, idx: int) -> openvino._pyopenvino.Tensor: ... - def infer(self, pos_prompt: str, neg_prompt: str, do_classifier_free_guidance: bool, max_sequence_length: int) -> openvino._pyopenvino.Tensor: + def infer(self, pos_prompt: str, neg_prompt: str, do_classifier_free_guidance: bool, max_sequence_length: int, infer_duration: float) -> openvino._pyopenvino.Tensor: ... def reshape(self, batch_size: int, max_sequence_length: int) -> T5EncoderModel: ... @@ -1736,7 +1736,7 @@ class UNet2DConditionModel: ... def get_config(self) -> UNet2DConditionModel.Config: ... - def infer(self, sample: openvino._pyopenvino.Tensor, timestep: openvino._pyopenvino.Tensor) -> openvino._pyopenvino.Tensor: + def infer(self, sample: openvino._pyopenvino.Tensor, timestep: openvino._pyopenvino.Tensor, infer_duration: float) -> openvino._pyopenvino.Tensor: ... def reshape(self, batch_size: int, height: int, width: int, tokenizer_model_max_length: int) -> UNet2DConditionModel: ... From 1ada73e62f7ccabbc69d995ea9487f3680460ffd Mon Sep 17 00:00:00 2001 From: xufang Date: Wed, 8 Jan 2025 16:17:22 +0800 Subject: [PATCH 07/19] add python api --- .../genai/image_generation/autoencoder_kl.hpp | 2 +- .../image_generation/clip_text_model.hpp | 2 +- .../clip_text_model_with_projection.hpp | 2 +- .../flux_transformer_2d_model.hpp | 2 +- .../image_generation_perf_metrics.hpp | 24 ++--- .../sd3_transformer_2d_model.hpp | 2 +- .../image_generation/t5_encoder_model.hpp | 2 +- .../unet2d_condition_model.hpp | 2 +- .../image_generation/diffusion_pipeline.hpp | 2 +- .../src/image_generation/flux_pipeline.hpp | 19 ++-- .../image_generation/image2image_pipeline.cpp | 2 +- .../image_generation_perf_metrics.cpp | 10 ++- .../image_generation/inpainting_pipeline.cpp | 2 +- .../models/autoencoder_kl.cpp | 5 +- .../models/clip_text_model.cpp | 5 +- .../clip_text_model_with_projection.cpp | 5 +- .../models/flux_transformer_2d_model.cpp | 5 +- .../models/sd3_transformer_2d_model.cpp | 5 +- .../models/t5_encoder_model.cpp | 5 +- .../models/unet2d_condition_model.cpp | 2 +- .../models/unet_inference.hpp | 2 +- .../models/unet_inference_dynamic.hpp | 5 +- .../models/unet_inference_static_bs1.hpp | 5 +- .../stable_diffusion_3_pipeline.hpp | 23 ++--- .../stable_diffusion_pipeline.hpp | 17 ++-- .../stable_diffusion_xl_pipeline.hpp | 10 +-- .../image_generation/text2image_pipeline.cpp | 2 +- src/python/openvino_genai/__init__.py | 2 + src/python/openvino_genai/__init__.pyi | 4 +- .../openvino_genai/py_openvino_genai.pyi | 90 ++++++++++++++++++- src/python/py_image_generation_pipelines.cpp | 87 +++++++++++++++++- 31 files changed, 265 insertions(+), 87 deletions(-) diff --git a/src/cpp/include/openvino/genai/image_generation/autoencoder_kl.hpp b/src/cpp/include/openvino/genai/image_generation/autoencoder_kl.hpp index 211d6cca0a..ca5b4120c3 100644 --- a/src/cpp/include/openvino/genai/image_generation/autoencoder_kl.hpp +++ b/src/cpp/include/openvino/genai/image_generation/autoencoder_kl.hpp @@ -127,7 +127,7 @@ class OPENVINO_GENAI_EXPORTS AutoencoderKL { return compile(device, ov::AnyMap{std::forward(properties)...}); } - ov::Tensor decode(ov::Tensor latent, MicroSeconds& infer_duration); + ov::Tensor decode(ov::Tensor latent, float& infer_duration); ov::Tensor encode(ov::Tensor image, std::shared_ptr generator); diff --git a/src/cpp/include/openvino/genai/image_generation/clip_text_model.hpp b/src/cpp/include/openvino/genai/image_generation/clip_text_model.hpp index 835470b2b8..db7ae41004 100644 --- a/src/cpp/include/openvino/genai/image_generation/clip_text_model.hpp +++ b/src/cpp/include/openvino/genai/image_generation/clip_text_model.hpp @@ -85,7 +85,7 @@ class OPENVINO_GENAI_EXPORTS CLIPTextModel { void set_adapters(const std::optional& adapters); - ov::Tensor infer(const std::string& pos_prompt, const std::string& neg_prompt, bool do_classifier_free_guidance, MicroSeconds& infer_duration); + ov::Tensor infer(const std::string& pos_prompt, const std::string& neg_prompt, bool do_classifier_free_guidance, float& infer_duration); ov::Tensor get_output_tensor(const size_t idx); diff --git a/src/cpp/include/openvino/genai/image_generation/clip_text_model_with_projection.hpp b/src/cpp/include/openvino/genai/image_generation/clip_text_model_with_projection.hpp index d109823751..c12ed96469 100644 --- a/src/cpp/include/openvino/genai/image_generation/clip_text_model_with_projection.hpp +++ b/src/cpp/include/openvino/genai/image_generation/clip_text_model_with_projection.hpp @@ -85,7 +85,7 @@ class OPENVINO_GENAI_EXPORTS CLIPTextModelWithProjection { void set_adapters(const std::optional& adapters); - ov::Tensor infer(const std::string& pos_prompt, const std::string& neg_prompt, bool do_classifier_free_guidance, MicroSeconds& infer_duration); + ov::Tensor infer(const std::string& pos_prompt, const std::string& neg_prompt, bool do_classifier_free_guidance, float& infer_duration); ov::Tensor get_output_tensor(const size_t idx); diff --git a/src/cpp/include/openvino/genai/image_generation/flux_transformer_2d_model.hpp b/src/cpp/include/openvino/genai/image_generation/flux_transformer_2d_model.hpp index 27053b876c..a5a8de5ee9 100644 --- a/src/cpp/include/openvino/genai/image_generation/flux_transformer_2d_model.hpp +++ b/src/cpp/include/openvino/genai/image_generation/flux_transformer_2d_model.hpp @@ -76,7 +76,7 @@ class OPENVINO_GENAI_EXPORTS FluxTransformer2DModel { void set_hidden_states(const std::string& tensor_name, ov::Tensor encoder_hidden_states); - ov::Tensor infer(const ov::Tensor latent, const ov::Tensor timestep, MicroSeconds& infer_duration); + ov::Tensor infer(const ov::Tensor latent, const ov::Tensor timestep, float& infer_duration); private: Config m_config; diff --git a/src/cpp/include/openvino/genai/image_generation/image_generation_perf_metrics.hpp b/src/cpp/include/openvino/genai/image_generation/image_generation_perf_metrics.hpp index bf1a170fdc..9df6dc3f62 100644 --- a/src/cpp/include/openvino/genai/image_generation/image_generation_perf_metrics.hpp +++ b/src/cpp/include/openvino/genai/image_generation/image_generation_perf_metrics.hpp @@ -11,21 +11,21 @@ namespace ov::genai { struct OPENVINO_GENAI_EXPORTS RawImageGenerationPerfMetrics { - std::vector unet_inference_durations; // unet durations for each step - std::vector transformer_inference_durations; // transformer durations for each step + std::vector unet_inference_durations; // unet inference durations for each step + std::vector transformer_inference_durations; // transformer inference durations for each step std::vector iteration_durations; // durations of each step }; struct OPENVINO_GENAI_EXPORTS ImageGenerationPerfMetrics { - float load_time; // model load time (includes reshape & read_model time) - float generate_duration; // duration of method generate(...) + float load_time; // model load time (includes reshape & read_model time), ms + float generate_duration; // duration of method generate(...), ms - MeanStdPair iteration_duration; // Mean-Std time of one generation iteration - std::map encoder_inference_duration; // inference durations for each encoder - MeanStdPair unet_inference_duration; // inference duration for unet model, should be filled with zeros if we don't have unet - MeanStdPair transformer_inference_duration; // inference duration for transformer model, should be filled with zeros if we don't have transformer - float vae_encoder_inference_duration; // inference duration of vae_encoder model, should be filled with zeros if we don't use it - float vae_decoder_inference_duration; // inference duration of vae_decoder model + MeanStdPair iteration_duration; // Mean-Std time of one generation iteration, ms + std::map encoder_inference_duration; // inference durations for each encoder, ms + MeanStdPair unet_inference_duration; // inference duration for unet model, should be filled with zeros if we don't have unet, ms + MeanStdPair transformer_inference_duration; // inference duration for transformer model, should be filled with zeros if we don't have transformer, ms + float vae_encoder_inference_duration; // inference duration of vae_encoder model, should be filled with zeros if we don't use it, ms + float vae_decoder_inference_duration; // inference duration of vae_decoder model, ms bool m_evaluated = false; @@ -35,11 +35,11 @@ struct OPENVINO_GENAI_EXPORTS ImageGenerationPerfMetrics { void evaluate_statistics(); MeanStdPair get_unet_inference_duration(); - MeanStdPair get_transformer_inference_duration(); MeanStdPair get_iteration_duration(); - float get_inference_total_duration(); + float get_load_time(); + float get_generate_duration(); }; } \ No newline at end of file diff --git a/src/cpp/include/openvino/genai/image_generation/sd3_transformer_2d_model.hpp b/src/cpp/include/openvino/genai/image_generation/sd3_transformer_2d_model.hpp index 133b65a67e..540c038a7e 100644 --- a/src/cpp/include/openvino/genai/image_generation/sd3_transformer_2d_model.hpp +++ b/src/cpp/include/openvino/genai/image_generation/sd3_transformer_2d_model.hpp @@ -78,7 +78,7 @@ class OPENVINO_GENAI_EXPORTS SD3Transformer2DModel { void set_hidden_states(const std::string& tensor_name, ov::Tensor encoder_hidden_states); - ov::Tensor infer(const ov::Tensor latent, const ov::Tensor timestep, MicroSeconds& infer_duration); + ov::Tensor infer(const ov::Tensor latent, const ov::Tensor timestep, float& infer_duration); private: Config m_config; diff --git a/src/cpp/include/openvino/genai/image_generation/t5_encoder_model.hpp b/src/cpp/include/openvino/genai/image_generation/t5_encoder_model.hpp index 9e3c185737..421d7aca83 100644 --- a/src/cpp/include/openvino/genai/image_generation/t5_encoder_model.hpp +++ b/src/cpp/include/openvino/genai/image_generation/t5_encoder_model.hpp @@ -70,7 +70,7 @@ class OPENVINO_GENAI_EXPORTS T5EncoderModel { const std::string& neg_prompt, bool do_classifier_free_guidance, int max_sequence_length, - MicroSeconds& infer_duration); + float& infer_duration); ov::Tensor get_output_tensor(const size_t idx); diff --git a/src/cpp/include/openvino/genai/image_generation/unet2d_condition_model.hpp b/src/cpp/include/openvino/genai/image_generation/unet2d_condition_model.hpp index 25eef5cd80..09822bd5d7 100644 --- a/src/cpp/include/openvino/genai/image_generation/unet2d_condition_model.hpp +++ b/src/cpp/include/openvino/genai/image_generation/unet2d_condition_model.hpp @@ -90,7 +90,7 @@ class OPENVINO_GENAI_EXPORTS UNet2DConditionModel { void set_adapters(const std::optional& adapters); - ov::Tensor infer(ov::Tensor sample, ov::Tensor timestep, MicroSeconds& infer_duration); + ov::Tensor infer(ov::Tensor sample, ov::Tensor timestep, float& infer_duration); bool do_classifier_free_guidance(float guidance_scale) const { return guidance_scale > 1.0f && m_config.time_cond_proj_dim < 0; diff --git a/src/cpp/src/image_generation/diffusion_pipeline.hpp b/src/cpp/src/image_generation/diffusion_pipeline.hpp index 886be27da1..db0057bfb3 100644 --- a/src/cpp/src/image_generation/diffusion_pipeline.hpp +++ b/src/cpp/src/image_generation/diffusion_pipeline.hpp @@ -88,7 +88,7 @@ class DiffusionPipeline { virtual ov::Tensor generate(const std::string& positive_prompt, ov::Tensor initial_image, ov::Tensor mask_image, const ov::AnyMap& properties) = 0; - virtual ov::Tensor decode(const ov::Tensor latent, MicroSeconds& infer_duration) = 0; + virtual ov::Tensor decode(const ov::Tensor latent, float& infer_duration) = 0; virtual ImageGenerationPerfMetrics get_perfomance_metrics() = 0; diff --git a/src/cpp/src/image_generation/flux_pipeline.hpp b/src/cpp/src/image_generation/flux_pipeline.hpp index 240dcfd3ae..c2ec4d2cb2 100644 --- a/src/cpp/src/image_generation/flux_pipeline.hpp +++ b/src/cpp/src/image_generation/flux_pipeline.hpp @@ -254,15 +254,15 @@ class FluxPipeline : public DiffusionPipeline { } void compute_hidden_states(const std::string& positive_prompt, const ImageGenerationConfig& generation_config) override { - MicroSeconds infer_duration; + float infer_duration; // encode_prompt std::string prompt_2_str = generation_config.prompt_2 != std::nullopt ? *generation_config.prompt_2 : positive_prompt; m_clip_text_encoder->infer(positive_prompt, {}, false, infer_duration); - m_perf_metrics.encoder_inference_duration["text_encoder"] = infer_duration.count(); + m_perf_metrics.encoder_inference_duration["text_encoder"] = infer_duration / 1000.0f; ov::Tensor pooled_prompt_embeds = m_clip_text_encoder->get_output_tensor(1); ov::Tensor prompt_embeds = m_t5_text_encoder->infer(prompt_2_str, "", false, generation_config.max_sequence_length, infer_duration); - m_perf_metrics.encoder_inference_duration["text_encoder_2"] = infer_duration.count(); + m_perf_metrics.encoder_inference_duration["text_encoder_2"] = infer_duration / 1000.0f; pooled_prompt_embeds = numpy_utils::repeat(pooled_prompt_embeds, generation_config.num_images_per_prompt); prompt_embeds = numpy_utils::repeat(prompt_embeds, generation_config.num_images_per_prompt); @@ -323,7 +323,7 @@ class FluxPipeline : public DiffusionPipeline { ov::Tensor mask_image, const ov::AnyMap& properties) override { const auto gen_start = std::chrono::steady_clock::now(); - MicroSeconds infer_duration; + float infer_duration; m_perf_metrics.clean_up(); m_custom_generation_config = m_generation_config; m_custom_generation_config.update_generation_config(properties); @@ -368,7 +368,7 @@ class FluxPipeline : public DiffusionPipeline { timestep_data[0] = timesteps[inference_step] / 1000; ov::Tensor noise_pred_tensor = m_transformer->infer(latents, timestep, infer_duration); - m_perf_metrics.raw_metrics.transformer_inference_durations.emplace_back(infer_duration); + m_perf_metrics.raw_metrics.transformer_inference_durations.emplace_back(MicroSeconds(infer_duration)); auto scheduler_step_result = m_scheduler->step(noise_pred_tensor, latents, inference_step, m_custom_generation_config.generator); latents = scheduler_step_result["latent"]; @@ -378,20 +378,21 @@ class FluxPipeline : public DiffusionPipeline { if (callback && callback(inference_step, timesteps.size(), latents)) { auto image = ov::Tensor(ov::element::u8, {}); m_perf_metrics.generate_duration = - ov::genai::PerfMetrics::get_microsec(std::chrono::steady_clock::now() - gen_start); + std::chrono::duration_cast(std::chrono::steady_clock::now() - gen_start) + .count(); return image; } } latents = unpack_latents(latents, m_custom_generation_config.height, m_custom_generation_config.width, vae_scale_factor); auto image = m_vae->decode(latents, infer_duration); - m_perf_metrics.vae_decoder_inference_duration = infer_duration.count(); + m_perf_metrics.vae_decoder_inference_duration = infer_duration / 1000.0f; m_perf_metrics.generate_duration = - ov::genai::PerfMetrics::get_microsec(std::chrono::steady_clock::now() - gen_start); + std::chrono::duration_cast(std::chrono::steady_clock::now() - gen_start).count(); return image; } - ov::Tensor decode(const ov::Tensor latent, MicroSeconds& infer_duration) override { + ov::Tensor decode(const ov::Tensor latent, float& infer_duration) override { ov::Tensor unpacked_latent = unpack_latents(latent, m_custom_generation_config.height, m_custom_generation_config.width, diff --git a/src/cpp/src/image_generation/image2image_pipeline.cpp b/src/cpp/src/image_generation/image2image_pipeline.cpp index 18911e1bc8..5435892cd6 100644 --- a/src/cpp/src/image_generation/image2image_pipeline.cpp +++ b/src/cpp/src/image_generation/image2image_pipeline.cpp @@ -120,7 +120,7 @@ ov::Tensor Image2ImagePipeline::generate(const std::string& positive_prompt, ov: } ov::Tensor Image2ImagePipeline::decode(const ov::Tensor latent) { - MicroSeconds infer_duration; + float infer_duration; return m_impl->decode(latent, infer_duration); } diff --git a/src/cpp/src/image_generation/image_generation_perf_metrics.cpp b/src/cpp/src/image_generation/image_generation_perf_metrics.cpp index 9f91082ba5..6e9739140d 100644 --- a/src/cpp/src/image_generation/image_generation_perf_metrics.cpp +++ b/src/cpp/src/image_generation/image_generation_perf_metrics.cpp @@ -94,8 +94,16 @@ float ImageGenerationPerfMetrics::get_inference_total_duration() { for (auto encoder = encoder_inference_duration.begin(); encoder != encoder_inference_duration.end(); encoder++) { total_duration += encoder->second; } - + // Return milliseconds return total_duration / 1000.0f; } + +float ImageGenerationPerfMetrics::get_load_time() { + return load_time; +} + +float ImageGenerationPerfMetrics::get_generate_duration() { + return generate_duration; +} } // namespace genai } // namespace ov \ No newline at end of file diff --git a/src/cpp/src/image_generation/inpainting_pipeline.cpp b/src/cpp/src/image_generation/inpainting_pipeline.cpp index bee94813f9..5a85d36996 100644 --- a/src/cpp/src/image_generation/inpainting_pipeline.cpp +++ b/src/cpp/src/image_generation/inpainting_pipeline.cpp @@ -126,7 +126,7 @@ ov::Tensor InpaintingPipeline::generate(const std::string& positive_prompt, ov:: } ov::Tensor InpaintingPipeline::decode(const ov::Tensor latent) { - MicroSeconds infer_duration; + float infer_duration; return m_impl->decode(latent, infer_duration); } diff --git a/src/cpp/src/image_generation/models/autoencoder_kl.cpp b/src/cpp/src/image_generation/models/autoencoder_kl.cpp index 5e2f10f710..4ffab62c53 100644 --- a/src/cpp/src/image_generation/models/autoencoder_kl.cpp +++ b/src/cpp/src/image_generation/models/autoencoder_kl.cpp @@ -227,14 +227,13 @@ AutoencoderKL& AutoencoderKL::compile(const std::string& device, const ov::AnyMa return *this; } -ov::Tensor AutoencoderKL::decode(ov::Tensor latent, MicroSeconds& infer_duration) { +ov::Tensor AutoencoderKL::decode(ov::Tensor latent, float& infer_duration) { OPENVINO_ASSERT(m_decoder_request, "VAE decoder model must be compiled first. Cannot infer non-compiled model"); m_decoder_request.set_input_tensor(latent); const auto infer_start = std::chrono::steady_clock::now(); m_decoder_request.infer(); - const auto infer_ms = ov::genai::PerfMetrics::get_microsec(std::chrono::steady_clock::now() - infer_start); - infer_duration = MicroSeconds(infer_ms); + infer_duration = ov::genai::PerfMetrics::get_microsec(std::chrono::steady_clock::now() - infer_start); return m_decoder_request.get_output_tensor(); } diff --git a/src/cpp/src/image_generation/models/clip_text_model.cpp b/src/cpp/src/image_generation/models/clip_text_model.cpp index ab7c86a449..a119483417 100644 --- a/src/cpp/src/image_generation/models/clip_text_model.cpp +++ b/src/cpp/src/image_generation/models/clip_text_model.cpp @@ -114,7 +114,7 @@ void CLIPTextModel::set_adapters(const std::optional& adapters) { ov::Tensor CLIPTextModel::infer(const std::string& pos_prompt, const std::string& neg_prompt, bool do_classifier_free_guidance, - MicroSeconds& infer_duration) { + float& infer_duration) { OPENVINO_ASSERT(m_request, "CLIP text encoder model must be compiled first. Cannot infer non-compiled model"); const int32_t pad_token_id = m_clip_tokenizer.get_pad_token_id(); @@ -153,8 +153,7 @@ ov::Tensor CLIPTextModel::infer(const std::string& pos_prompt, // text embeddings const auto infer_start = std::chrono::steady_clock::now(); m_request.infer(); - const auto infer_ms = ov::genai::PerfMetrics::get_microsec(std::chrono::steady_clock::now() - infer_start); - infer_duration = MicroSeconds(infer_ms); + infer_duration = ov::genai::PerfMetrics::get_microsec(std::chrono::steady_clock::now() - infer_start); return m_request.get_output_tensor(0); } diff --git a/src/cpp/src/image_generation/models/clip_text_model_with_projection.cpp b/src/cpp/src/image_generation/models/clip_text_model_with_projection.cpp index ee0366d06f..685c1f6c0e 100644 --- a/src/cpp/src/image_generation/models/clip_text_model_with_projection.cpp +++ b/src/cpp/src/image_generation/models/clip_text_model_with_projection.cpp @@ -105,7 +105,7 @@ void CLIPTextModelWithProjection::set_adapters(const std::optional& adap } } -ov::Tensor UNet2DConditionModel::infer(ov::Tensor sample, ov::Tensor timestep, MicroSeconds& infer_duration) { +ov::Tensor UNet2DConditionModel::infer(ov::Tensor sample, ov::Tensor timestep, float& infer_duration) { OPENVINO_ASSERT(m_impl, "UNet model must be compiled first. Cannot infer non-compiled model"); return m_impl->infer(sample, timestep, infer_duration); } diff --git a/src/cpp/src/image_generation/models/unet_inference.hpp b/src/cpp/src/image_generation/models/unet_inference.hpp index c2a841b9ce..5438e1bf36 100644 --- a/src/cpp/src/image_generation/models/unet_inference.hpp +++ b/src/cpp/src/image_generation/models/unet_inference.hpp @@ -14,7 +14,7 @@ class UNet2DConditionModel::UNetInference { virtual void compile(std::shared_ptr model, const std::string& device, const ov::AnyMap& properties) = 0; virtual void set_hidden_states(const std::string& tensor_name, ov::Tensor encoder_hidden_states) = 0; virtual void set_adapters(AdapterController& adapter_controller, const AdapterConfig& adapters) = 0; - virtual ov::Tensor infer(ov::Tensor sample, ov::Tensor timestep, MicroSeconds& infer_duration) = 0; + virtual ov::Tensor infer(ov::Tensor sample, ov::Tensor timestep, float& infer_duration) = 0; // utility function to resize model given optional dimensions. static void reshape(std::shared_ptr model, diff --git a/src/cpp/src/image_generation/models/unet_inference_dynamic.hpp b/src/cpp/src/image_generation/models/unet_inference_dynamic.hpp index b228421df2..7db7ca9451 100644 --- a/src/cpp/src/image_generation/models/unet_inference_dynamic.hpp +++ b/src/cpp/src/image_generation/models/unet_inference_dynamic.hpp @@ -31,7 +31,7 @@ class UNet2DConditionModel::UNetInferenceDynamic : public UNet2DConditionModel:: adapter_controller.apply(m_request, adapters); } - virtual ov::Tensor infer(ov::Tensor sample, ov::Tensor timestep, MicroSeconds& infer_duration) override + virtual ov::Tensor infer(ov::Tensor sample, ov::Tensor timestep, float& infer_duration) override { OPENVINO_ASSERT(m_request, "UNet model must be compiled first. Cannot infer non-compiled model"); @@ -39,8 +39,7 @@ class UNet2DConditionModel::UNetInferenceDynamic : public UNet2DConditionModel:: m_request.set_tensor("timestep", timestep); const auto infer_start = std::chrono::steady_clock::now(); m_request.infer(); - const auto infer_ms = ov::genai::PerfMetrics::get_microsec(std::chrono::steady_clock::now() - infer_start); - infer_duration = MicroSeconds(infer_ms); + infer_duration = ov::genai::PerfMetrics::get_microsec(std::chrono::steady_clock::now() - infer_start); return m_request.get_output_tensor(); } diff --git a/src/cpp/src/image_generation/models/unet_inference_static_bs1.hpp b/src/cpp/src/image_generation/models/unet_inference_static_bs1.hpp index 07b16134db..d7c9d2d77d 100644 --- a/src/cpp/src/image_generation/models/unet_inference_static_bs1.hpp +++ b/src/cpp/src/image_generation/models/unet_inference_static_bs1.hpp @@ -88,7 +88,7 @@ class UNet2DConditionModel::UNetInferenceStaticBS1 : public UNet2DConditionModel } } - virtual ov::Tensor infer(ov::Tensor sample, ov::Tensor timestep, MicroSeconds& infer_duration) override { + virtual ov::Tensor infer(ov::Tensor sample, ov::Tensor timestep, float& infer_duration) override { OPENVINO_ASSERT(m_native_batch_size && m_native_batch_size == m_requests.size(), "UNet model must be compiled first"); @@ -134,8 +134,7 @@ class UNet2DConditionModel::UNetInferenceStaticBS1 : public UNet2DConditionModel // wait for infer to complete. m_requests[i].wait(); } - const auto infer_ms = ov::genai::PerfMetrics::get_microsec(std::chrono::steady_clock::now() - infer_start); - infer_duration = MicroSeconds(infer_ms); + infer_duration = ov::genai::PerfMetrics::get_microsec(std::chrono::steady_clock::now() - infer_start); return out_sample; } diff --git a/src/cpp/src/image_generation/stable_diffusion_3_pipeline.hpp b/src/cpp/src/image_generation/stable_diffusion_3_pipeline.hpp index de74bb4a98..4e43a720ad 100644 --- a/src/cpp/src/image_generation/stable_diffusion_3_pipeline.hpp +++ b/src/cpp/src/image_generation/stable_diffusion_3_pipeline.hpp @@ -264,7 +264,7 @@ class StableDiffusion3Pipeline : public DiffusionPipeline { // Input tensors for transformer model ov::Tensor prompt_embeds_inp, pooled_prompt_embeds_inp; - MicroSeconds infer_duration; + float infer_duration; // 1. Encode positive prompt: std::string prompt_2_str = generation_config.prompt_2 != std::nullopt ? *generation_config.prompt_2 : positive_prompt; @@ -280,7 +280,7 @@ class StableDiffusion3Pipeline : public DiffusionPipeline { negative_prompt_1_str, do_classifier_free_guidance(generation_config.guidance_scale), infer_duration); - m_perf_metrics.encoder_inference_duration["text_encode"] = infer_duration.count(); + m_perf_metrics.encoder_inference_duration["text_encode"] = infer_duration / 1000.0f; // text_encoder_1_hidden_state - stores positive and negative prompt_embeds size_t idx_hidden_state_1 = m_clip_text_encoder_1->get_config().num_hidden_layers + 1; @@ -292,7 +292,7 @@ class StableDiffusion3Pipeline : public DiffusionPipeline { negative_prompt_2_str, do_classifier_free_guidance(generation_config.guidance_scale), infer_duration); - m_perf_metrics.encoder_inference_duration["text_encode_2"] = infer_duration.count(); + m_perf_metrics.encoder_inference_duration["text_encode_2"] = infer_duration / 1000.0f; // text_encoder_2_hidden_state - stores positive and negative prompt_2_embeds size_t idx_hidden_state_2 = m_clip_text_encoder_2->get_config().num_hidden_layers + 1; @@ -305,7 +305,7 @@ class StableDiffusion3Pipeline : public DiffusionPipeline { do_classifier_free_guidance(generation_config.guidance_scale), generation_config.max_sequence_length, infer_duration); - m_perf_metrics.encoder_inference_duration["text_encode_3"] = infer_duration.count(); + m_perf_metrics.encoder_inference_duration["text_encode_3"] = infer_duration / 1000.0f; } else { ov::Shape t5_prompt_embed_shape = {generation_config.num_images_per_prompt, m_clip_text_encoder_1->get_config().max_position_embeddings, @@ -445,7 +445,7 @@ class StableDiffusion3Pipeline : public DiffusionPipeline { ov::Tensor mask_image, const ov::AnyMap& properties) override { const auto gen_start = std::chrono::steady_clock::now(); - MicroSeconds infer_duration; + float infer_duration; m_perf_metrics.clean_up(); ImageGenerationConfig generation_config = m_generation_config; generation_config.update_generation_config(properties); @@ -498,7 +498,7 @@ class StableDiffusion3Pipeline : public DiffusionPipeline { } ov::Tensor timestep(ov::element::f32, {1}, ×teps[inference_step]); ov::Tensor noise_pred_tensor = m_transformer->infer(latent_cfg, timestep, infer_duration); - m_perf_metrics.raw_metrics.transformer_inference_durations.emplace_back(infer_duration); + m_perf_metrics.raw_metrics.transformer_inference_durations.emplace_back(MicroSeconds(infer_duration)); ov::Shape noise_pred_shape = noise_pred_tensor.get_shape(); noise_pred_shape[0] /= batch_size_multiplier; @@ -527,18 +527,21 @@ class StableDiffusion3Pipeline : public DiffusionPipeline { if (callback && callback(inference_step, timesteps.size(), latent)) { auto image = ov::Tensor(ov::element::u8, {}); - m_perf_metrics.generate_duration = ov::genai::PerfMetrics::get_microsec(std::chrono::steady_clock::now() - gen_start); + m_perf_metrics.generate_duration = + std::chrono::duration_cast(std::chrono::steady_clock::now() - gen_start) + .count(); return image; } } auto image = decode(latent, infer_duration); - m_perf_metrics.vae_decoder_inference_duration = infer_duration.count(); - m_perf_metrics.generate_duration = ov::genai::PerfMetrics::get_microsec(std::chrono::steady_clock::now() - gen_start); + m_perf_metrics.vae_decoder_inference_duration = infer_duration / 1000.0f; + m_perf_metrics.generate_duration = + std::chrono::duration_cast(std::chrono::steady_clock::now() - gen_start).count(); return image; } - ov::Tensor decode(const ov::Tensor latent, MicroSeconds& infer_duration) override { + ov::Tensor decode(const ov::Tensor latent, float& infer_duration) override { return m_vae->decode(latent, infer_duration); } diff --git a/src/cpp/src/image_generation/stable_diffusion_pipeline.hpp b/src/cpp/src/image_generation/stable_diffusion_pipeline.hpp index 6520714486..1298196e28 100644 --- a/src/cpp/src/image_generation/stable_diffusion_pipeline.hpp +++ b/src/cpp/src/image_generation/stable_diffusion_pipeline.hpp @@ -179,12 +179,12 @@ class StableDiffusionPipeline : public DiffusionPipeline { void compute_hidden_states(const std::string& positive_prompt, const ImageGenerationConfig& generation_config) override { const auto& unet_config = m_unet->get_config(); const size_t batch_size_multiplier = m_unet->do_classifier_free_guidance(generation_config.guidance_scale) ? 2 : 1; // Unet accepts 2x batch in case of CFG - MicroSeconds infer_duration; + float infer_duration; std::string negative_prompt = generation_config.negative_prompt != std::nullopt ? *generation_config.negative_prompt : std::string{}; ov::Tensor encoder_hidden_states = m_clip_text_encoder->infer(positive_prompt, negative_prompt, batch_size_multiplier > 1, infer_duration); - m_perf_metrics.encoder_inference_duration["text_encoder"] = infer_duration.count(); + m_perf_metrics.encoder_inference_duration["text_encoder"] = infer_duration / 1000.0f; // replicate encoder hidden state to UNet model if (generation_config.num_images_per_prompt == 1) { @@ -310,7 +310,7 @@ class StableDiffusionPipeline : public DiffusionPipeline { ov::Tensor mask_image, const ov::AnyMap& properties) override { const auto gen_start = std::chrono::steady_clock::now(); - MicroSeconds infer_duration; + float infer_duration; using namespace numpy_utils; m_perf_metrics.clean_up(); ImageGenerationConfig generation_config = m_generation_config; @@ -374,7 +374,7 @@ class StableDiffusionPipeline : public DiffusionPipeline { ov::Tensor latent_model_input = is_inpainting_model() ? numpy_utils::concat(numpy_utils::concat(latent_cfg, mask, 1), masked_image_latent, 1) : latent_cfg; ov::Tensor timestep(ov::element::i64, {1}, ×teps[inference_step]); ov::Tensor noise_pred_tensor = m_unet->infer(latent_model_input, timestep, infer_duration); - m_perf_metrics.raw_metrics.unet_inference_durations.emplace_back(infer_duration); + m_perf_metrics.raw_metrics.unet_inference_durations.emplace_back(MicroSeconds(infer_duration)); ov::Shape noise_pred_shape = noise_pred_tensor.get_shape(); noise_pred_shape[0] /= batch_size_multiplier; @@ -413,18 +413,19 @@ class StableDiffusionPipeline : public DiffusionPipeline { if (callback && callback(inference_step, timesteps.size(), denoised)) { auto image = ov::Tensor(ov::element::u8, {}); m_perf_metrics.generate_duration = - ov::genai::PerfMetrics::get_microsec(std::chrono::steady_clock::now() - gen_start); + std::chrono::duration_cast(std::chrono::steady_clock::now() - gen_start) + .count(); return image; } } auto image = decode(denoised, infer_duration); - m_perf_metrics.vae_decoder_inference_duration = infer_duration.count(); + m_perf_metrics.vae_decoder_inference_duration = infer_duration / 1000.0f; m_perf_metrics.generate_duration = - ov::genai::PerfMetrics::get_microsec(std::chrono::steady_clock::now() - gen_start); + std::chrono::duration_cast(std::chrono::steady_clock::now() - gen_start).count(); return image; } - ov::Tensor decode(const ov::Tensor latent, MicroSeconds& infer_duration) override { + ov::Tensor decode(const ov::Tensor latent, float& infer_duration) override { return m_vae->decode(latent, infer_duration); } diff --git a/src/cpp/src/image_generation/stable_diffusion_xl_pipeline.hpp b/src/cpp/src/image_generation/stable_diffusion_xl_pipeline.hpp index 1a2c229432..b806315cad 100644 --- a/src/cpp/src/image_generation/stable_diffusion_xl_pipeline.hpp +++ b/src/cpp/src/image_generation/stable_diffusion_xl_pipeline.hpp @@ -147,7 +147,7 @@ class StableDiffusionXLPipeline : public StableDiffusionPipeline { void compute_hidden_states(const std::string& positive_prompt, const ImageGenerationConfig& generation_config) override { const auto& unet_config = m_unet->get_config(); const size_t batch_size_multiplier = m_unet->do_classifier_free_guidance(generation_config.guidance_scale) ? 2 : 1; // Unet accepts 2x batch in case of CFG - MicroSeconds infer_duration; + float infer_duration; std::vector time_ids = {static_cast(generation_config.width), static_cast(generation_config.height), @@ -182,9 +182,9 @@ class StableDiffusionXLPipeline : public StableDiffusionPipeline { negative_prompt_1_str, batch_size_multiplier > 1, infer_duration); - m_perf_metrics.encoder_inference_duration["text_encoder_2"] = infer_duration.count(); + m_perf_metrics.encoder_inference_duration["text_encoder_2"] = infer_duration / 1000.0f; m_clip_text_encoder->infer(prompt_2_str, negative_prompt_2_str, batch_size_multiplier > 1, infer_duration); - m_perf_metrics.encoder_inference_duration["text_encoder"] = infer_duration.count(); + m_perf_metrics.encoder_inference_duration["text_encoder"] = infer_duration / 1000.0f; // prompt_embeds = prompt_embeds.hidden_states[-2] ov::Tensor encoder_hidden_states_1 = m_clip_text_encoder->get_output_tensor(idx_hidden_state_1); @@ -194,9 +194,9 @@ class StableDiffusionXLPipeline : public StableDiffusionPipeline { } else { ov::Tensor add_text_embeds_positive = m_clip_text_encoder_with_projection->infer(positive_prompt, negative_prompt_1_str, false, infer_duration); - m_perf_metrics.encoder_inference_duration["text_encoder_2"] = infer_duration.count(); + m_perf_metrics.encoder_inference_duration["text_encoder_2"] = infer_duration / 1000.0f; m_clip_text_encoder->infer(prompt_2_str, negative_prompt_2_str, false, infer_duration); - m_perf_metrics.encoder_inference_duration["text_encoder"] = infer_duration.count(); + m_perf_metrics.encoder_inference_duration["text_encoder"] = infer_duration / 1000.0f; ov::Tensor encoder_hidden_states_1_positive = m_clip_text_encoder->get_output_tensor(idx_hidden_state_1); ov::Tensor encoder_hidden_states_2_positive = m_clip_text_encoder_with_projection->get_output_tensor(idx_hidden_state_2); diff --git a/src/cpp/src/image_generation/text2image_pipeline.cpp b/src/cpp/src/image_generation/text2image_pipeline.cpp index f03336f6c8..c7ee90f804 100644 --- a/src/cpp/src/image_generation/text2image_pipeline.cpp +++ b/src/cpp/src/image_generation/text2image_pipeline.cpp @@ -190,7 +190,7 @@ ov::Tensor Text2ImagePipeline::generate(const std::string& positive_prompt, cons } ov::Tensor Text2ImagePipeline::decode(const ov::Tensor latent) { - MicroSeconds infer_duration; + float infer_duration; return m_impl->decode(latent, infer_duration); } diff --git a/src/python/openvino_genai/__init__.py b/src/python/openvino_genai/__init__.py index a0b0faf58c..dd98b25da1 100644 --- a/src/python/openvino_genai/__init__.py +++ b/src/python/openvino_genai/__init__.py @@ -75,6 +75,8 @@ Generator, CppStdGenerator, TorchGenerator, + ImageGenerationPerfMetrics, + RawImageGenerationPerfMetrics, ) # Continuous batching diff --git a/src/python/openvino_genai/__init__.pyi b/src/python/openvino_genai/__init__.pyi index 187e0a0a06..a5aa727eba 100644 --- a/src/python/openvino_genai/__init__.pyi +++ b/src/python/openvino_genai/__init__.pyi @@ -42,7 +42,9 @@ from openvino_genai.py_openvino_genai import WhisperPerfMetrics from openvino_genai.py_openvino_genai import WhisperPipeline from openvino_genai.py_openvino_genai import WhisperRawPerfMetrics from openvino_genai.py_openvino_genai import draft_model +from openvino_genai.py_openvino_genai import RawImageGenerationPerfMetrics +from openvino_genai.py_openvino_genai import ImageGenerationPerfMetrics import os as os from . import py_openvino_genai -__all__ = ['Adapter', 'AdapterConfig', 'AggregationMode', 'AutoencoderKL', 'CLIPTextModel', 'CLIPTextModelWithProjection', 'CacheEvictionConfig', 'ChunkStreamerBase', 'ContinuousBatchingPipeline', 'CppStdGenerator', 'DecodedResults', 'EncodedResults', 'FluxTransformer2DModel', 'GenerationConfig', 'GenerationResult', 'Generator', 'Image2ImagePipeline', 'ImageGenerationConfig', 'InpaintingPipeline', 'LLMPipeline', 'PerfMetrics', 'RawPerfMetrics', 'SD3Transformer2DModel', 'Scheduler', 'SchedulerConfig', 'StopCriteria', 'StreamerBase', 'T5EncoderModel', 'Text2ImagePipeline', 'TokenizedInputs', 'Tokenizer', 'TorchGenerator', 'UNet2DConditionModel', 'VLMPipeline', 'WhisperGenerationConfig', 'WhisperPerfMetrics', 'WhisperPipeline', 'WhisperRawPerfMetrics', 'draft_model', 'openvino', 'os', 'py_openvino_genai'] +__all__ = ['Adapter', 'AdapterConfig', 'AggregationMode', 'AutoencoderKL', 'CLIPTextModel', 'CLIPTextModelWithProjection', 'CacheEvictionConfig', 'ChunkStreamerBase', 'ContinuousBatchingPipeline', 'CppStdGenerator', 'DecodedResults', 'EncodedResults', 'FluxTransformer2DModel', 'GenerationConfig', 'GenerationResult', 'Generator', 'Image2ImagePipeline', 'ImageGenerationConfig', 'InpaintingPipeline', 'LLMPipeline', 'PerfMetrics', 'RawPerfMetrics', 'SD3Transformer2DModel', 'Scheduler', 'SchedulerConfig', 'StopCriteria', 'StreamerBase', 'T5EncoderModel', 'Text2ImagePipeline', 'TokenizedInputs', 'Tokenizer', 'TorchGenerator', 'UNet2DConditionModel', 'VLMPipeline', 'WhisperGenerationConfig', 'WhisperPerfMetrics', 'WhisperPipeline', 'WhisperRawPerfMetrics', 'ImageGenerationPerfMetrics', 'RawImageGenerationPerfMetrics', 'draft_model', 'openvino', 'os', 'py_openvino_genai'] __version__: str = '2025.0.0.0' diff --git a/src/python/openvino_genai/py_openvino_genai.pyi b/src/python/openvino_genai/py_openvino_genai.pyi index e734f76460..1e1886d01f 100644 --- a/src/python/openvino_genai/py_openvino_genai.pyi +++ b/src/python/openvino_genai/py_openvino_genai.pyi @@ -5,7 +5,7 @@ from __future__ import annotations import openvino._pyopenvino import os import typing -__all__ = ['Adapter', 'AdapterConfig', 'AggregationMode', 'AutoencoderKL', 'CLIPTextModel', 'CLIPTextModelWithProjection', 'CacheEvictionConfig', 'ChunkStreamerBase', 'ContinuousBatchingPipeline', 'CppStdGenerator', 'DecodedResults', 'EncodedGenerationResult', 'EncodedResults', 'FluxTransformer2DModel', 'GenerationConfig', 'GenerationFinishReason', 'GenerationHandle', 'GenerationOutput', 'GenerationResult', 'GenerationStatus', 'Generator', 'Image2ImagePipeline', 'ImageGenerationConfig', 'InpaintingPipeline', 'LLMPipeline', 'MeanStdPair', 'PerfMetrics', 'PipelineMetrics', 'RawPerfMetrics', 'SD3Transformer2DModel', 'Scheduler', 'SchedulerConfig', 'StopCriteria', 'StreamerBase', 'T5EncoderModel', 'Text2ImagePipeline', 'TokenizedInputs', 'Tokenizer', 'TorchGenerator', 'UNet2DConditionModel', 'VLMDecodedResults', 'VLMPerfMetrics', 'VLMPipeline', 'VLMRawPerfMetrics', 'WhisperDecodedResultChunk', 'WhisperDecodedResults', 'WhisperGenerationConfig', 'WhisperPerfMetrics', 'WhisperPipeline', 'WhisperRawPerfMetrics', 'draft_model'] +__all__ = ['Adapter', 'AdapterConfig', 'AggregationMode', 'AutoencoderKL', 'CLIPTextModel', 'CLIPTextModelWithProjection', 'CacheEvictionConfig', 'ChunkStreamerBase', 'ContinuousBatchingPipeline', 'CppStdGenerator', 'DecodedResults', 'EncodedGenerationResult', 'EncodedResults', 'FluxTransformer2DModel', 'GenerationConfig', 'GenerationFinishReason', 'GenerationHandle', 'GenerationOutput', 'GenerationResult', 'GenerationStatus', 'Generator', 'Image2ImagePipeline', 'ImageGenerationConfig', 'InpaintingPipeline', 'LLMPipeline', 'MeanStdPair', 'PerfMetrics', 'PipelineMetrics', 'RawPerfMetrics', 'SD3Transformer2DModel', 'Scheduler', 'SchedulerConfig', 'StopCriteria', 'StreamerBase', 'T5EncoderModel', 'Text2ImagePipeline', 'TokenizedInputs', 'Tokenizer', 'TorchGenerator', 'UNet2DConditionModel', 'VLMDecodedResults', 'VLMPerfMetrics', 'VLMPipeline', 'VLMRawPerfMetrics', 'WhisperDecodedResultChunk', 'WhisperDecodedResults', 'WhisperGenerationConfig', 'WhisperPerfMetrics', 'WhisperPipeline', 'WhisperRawPerfMetrics', 'draft_model', 'ImageGenerationPerfMetrics', 'RawImageGenerationPerfMetrics'] class Adapter: """ Immutable LoRA Adapter that carries the adaptation matrices and serves as unique adapter identifier. @@ -2203,3 +2203,91 @@ def draft_model(models_path: os.PathLike, device: str = '', **kwargs) -> openvin """ device on which inference will be performed """ +class RawImageGenerationPerfMetrics: + """ + + Structure with raw performance metrics for each generation before any statistics are calculated. + + :param unet_inference_durations: Inference time for each unet inference in microseconds. + :type unet_inference_durations: List[float] + + :param transformer_inference_durations: Inference time for each transformer inference in microseconds. + :type transformer_inference_durations: List[float] + + :param iteration_durations: durations for each step iteration in microseconds. + :type iteration_durations: List[float] + """ + def __init__(self) -> None: + ... + @property + def unet_inference_durations(self) -> list[float]: + ... + @property + def transformer_inference_durations(self) -> list[float]: + ... + @property + def iteration_durations(self) -> list[float]: + ... +class ImageGenerationPerfMetrics: + """ + + Holds performance metrics for each image generate call. + + PerfMetrics holds fields with mean and standard deviations for the following metrics: + - one generation iteration, ms + - inference duration for unet model, ms + - inference duration for transformer model, ms + + Additional fields include: + - Load time, ms + - total duration of image generation, ms + - inference duration of vae_encoder model, ms + - inference duration of vae_decoder model, ms + - inference duration of each encoder model, ms + + Preferable way to access values is via get functions. Getters calculate mean and std values from raw_metrics and return pairs. + If mean and std were already calculated, getters return cached values. + + :param get_load_time: Returns the load time in milliseconds. + :type get_load_time: float + + :param get_unet_inference_duration: Returns the mean and standard deviation of unet inference in millionseconds. + :type get_unet_inference_duration: MeanStdPair + + :param get_transformer_inference_duration: Returns the mean and standard deviation of transformer inference in millionseconds. + :type get_transformer_inference_duration: MeanStdPair + + :param get_iteration_duration: Returns the mean and standard deviation of one generation iteration in millionseconds. + :type get_iteration_duration: MeanStdPair + + :param get_inference_total_duration: Returns all inference duration including encoder, decoder and transformer/unet inference. + :type get_inference_total_duration: float + + :param raw_metrics: A structure of RawImageGenerationPerfMetrics type that holds raw metrics. + :type raw_metrics: RawImageGenerationPerfMetrics + """ + def __init__(self) -> None: + ... + @property + def load_time(self) -> float: + ... + @property + def generate_duration(self) -> float: + ... + @property + def vae_encoder_inference_duration(self) -> float: + ... + @property + def vae_decoder_inference_duration(self) -> float: + ... + @property + def encoder_inference_duration(self) -> dict[str, float]: + ... + def get_unet_inference_duration(self) -> MeanStdPair: + ... + def get_transformer_inference_duration(self) -> MeanStdPair: + ... + def get_iteration_duration(self) -> MeanStdPair: + ... + def get_inference_total_duration(self) -> float: + ... \ No newline at end of file diff --git a/src/python/py_image_generation_pipelines.cpp b/src/python/py_image_generation_pipelines.cpp index c246557a97..3499712c3e 100644 --- a/src/python/py_image_generation_pipelines.cpp +++ b/src/python/py_image_generation_pipelines.cpp @@ -13,6 +13,7 @@ #include "openvino/genai/image_generation/text2image_pipeline.hpp" #include "openvino/genai/image_generation/image2image_pipeline.hpp" #include "openvino/genai/image_generation/inpainting_pipeline.hpp" +#include "openvino/genai/image_generation/image_generation_perf_metrics.hpp" #include "tokenizers_path.hpp" #include "py_utils.hpp" @@ -21,6 +22,8 @@ namespace py = pybind11; namespace pyutils = ov::genai::pybind::utils; using namespace pybind11::literals; +using ov::genai::ImageGenerationPerfMetrics; +using ov::genai::RawImageGenerationPerfMetrics; namespace { @@ -54,6 +57,59 @@ auto text2image_generate_docstring = R"( :rtype: ov.Tensor )"; +auto raw_image_generation_perf_metrics_docstring = R"( + Structure with raw performance metrics for each generation before any statistics are calculated. + + :param unet_inference_durations: Durations for each unet inference in microseconds. + :type unet_inference_durations: List[float] + + :param transformer_inference_durations: Durations for each transformer inference in microseconds. + :type transformer_inference_durations: List[float] + + :param iteration_durations: Durations for each step iteration in microseconds. + :type iteration_durations: List[float] +)"; + +auto image_generation_perf_metrics_docstring = R"( + Holds performance metrics for each generate call. + + PerfMetrics holds fields with mean and standard deviations for the following metrics: + - Generate iteration duration, ms + - Inference duration for unet model, ms + - Inference duration for transformer model, ms + + Additional fields include: + - Load time, ms + - Generate total duration, ms + - inference durations for each encoder, ms + - inference duration of vae_encoder model, ms + - inference duration of vae_decoder model, ms + + Preferable way to access values is via get functions. Getters calculate mean and std values from raw_metrics and return pairs. + If mean and std were already calculated, getters return cached values. + + :param get_load_time: Returns the load time in milliseconds. + :type get_load_time: float + + :param get_generate_duration: Returns the generate duration in milliseconds. + :type get_generate_duration: float + + :param get_inference_total_duration: Returns the total inference durations (including encoder, unet/transformer and decoder inference) in milliseconds. + :type get_inference_total_duration: float + + :param get_iteration_duration: Returns the mean and standard deviation of one generation iteration in milliseconds. + :type get_iteration_duration: MeanStdPair + + :param unet_inference_duration: Returns the mean and standard deviation of one unet inference in milliseconds. + :type unet_inference_duration: MeanStdPair + + :param get_transformer_inference_duration: Returns the mean and standard deviation of one transformer inference in milliseconds. + :type get_transformer_inference_duration: MeanStdPair + + :param raw_metrics: A structure of RawImageGenerationPerfMetrics type that holds raw metrics. + :type raw_metrics: RawImageGenerationPerfMetrics +)"; + // Trampoline class to support inheritance from Generator in Python class PyGenerator : public ov::genai::Generator { public: @@ -229,6 +285,28 @@ void init_image_generation_pipelines(py::module_& m) { config.update_generation_config(pyutils::kwargs_to_any_map(kwargs)); }); + py::class_(m, "RawImageGenerationPerfMetrics", raw_image_generation_perf_metrics_docstring) + .def(py::init<>()) + .def_property_readonly("unet_inference_durations", [](const RawImageGenerationPerfMetrics &rw) { + return pyutils::get_ms(rw, &RawImageGenerationPerfMetrics::unet_inference_durations); + }) + .def_property_readonly("transformer_inference_durations", [](const RawImageGenerationPerfMetrics &rw) { + return pyutils::get_ms(rw, &RawImageGenerationPerfMetrics::transformer_inference_durations); + }) + .def_property_readonly("iteration_durations", [](const RawImageGenerationPerfMetrics &rw) { + return pyutils::get_ms(rw, &RawImageGenerationPerfMetrics::iteration_durations); + }); + + py::class_(m, "ImageGenerationPerfMetrics", image_generation_perf_metrics_docstring) + .def(py::init<>()) + .def("get_load_time", &ImageGenerationPerfMetrics::get_load_time) + .def("get_generate_duration", &ImageGenerationPerfMetrics::get_generate_duration) + .def("get_unet_inference_duration", &ImageGenerationPerfMetrics::get_unet_inference_duration) + .def("get_transformer_inference_duration", &ImageGenerationPerfMetrics::get_transformer_inference_duration) + .def("get_iteration_duration", &ImageGenerationPerfMetrics::get_iteration_duration) + .def("get_inference_total_duration", &ImageGenerationPerfMetrics::get_inference_total_duration) + .def_readonly("raw_metrics", &ImageGenerationPerfMetrics::raw_metrics); + auto text2image_pipeline = py::class_(m, "Text2ImagePipeline", "This class is used for generation with text-to-image models.") .def(py::init([](const std::filesystem::path& models_path) { ScopedVar env_manager(pyutils::ov_tokenizers_module_path()); @@ -294,7 +372,8 @@ void init_image_generation_pipelines(py::module_& m) { }, py::arg("prompt"), "Input string", (text2image_generate_docstring + std::string(" \n ")).c_str()) - .def("decode", &ov::genai::Text2ImagePipeline::decode, py::arg("latent")); + .def("decode", &ov::genai::Text2ImagePipeline::decode, py::arg("latent")) + .def("get_perfomance_metrics", &ov::genai::Text2ImagePipeline::get_perfomance_metrics); auto image2image_pipeline = py::class_(m, "Image2ImagePipeline", "This class is used for generation with image-to-image models.") @@ -357,7 +436,8 @@ void init_image_generation_pipelines(py::module_& m) { py::arg("prompt"), "Input string", py::arg("image"), "Initial image", (text2image_generate_docstring + std::string(" \n ")).c_str()) - .def("decode", &ov::genai::Image2ImagePipeline::decode, py::arg("latent")); + .def("decode", &ov::genai::Image2ImagePipeline::decode, py::arg("latent")) + .def("get_perfomance_metrics", &ov::genai::Image2ImagePipeline::get_perfomance_metrics); auto inpainting_pipeline = py::class_(m, "InpaintingPipeline", "This class is used for generation with inpainting models.") @@ -422,7 +502,8 @@ void init_image_generation_pipelines(py::module_& m) { py::arg("image"), "Initial image", py::arg("mask_image"), "Mask image", (text2image_generate_docstring + std::string(" \n ")).c_str()) - .def("decode", &ov::genai::InpaintingPipeline::decode, py::arg("latent")); + .def("decode", &ov::genai::InpaintingPipeline::decode, py::arg("latent")) + .def("get_perfomance_metrics", &ov::genai::InpaintingPipeline::get_perfomance_metrics); // define constructors to create one pipeline from another // NOTE: needs to be defined once all pipelines are created From 1fccaad9ce37aa2365956d52a776c99dfe7ddddb Mon Sep 17 00:00:00 2001 From: xufang Date: Thu, 9 Jan 2025 10:11:24 +0800 Subject: [PATCH 08/19] fix error --- src/python/openvino_genai/py_openvino_genai.pyi | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/python/openvino_genai/py_openvino_genai.pyi b/src/python/openvino_genai/py_openvino_genai.pyi index 1e1886d01f..cc706895c2 100644 --- a/src/python/openvino_genai/py_openvino_genai.pyi +++ b/src/python/openvino_genai/py_openvino_genai.pyi @@ -2263,6 +2263,9 @@ class ImageGenerationPerfMetrics: :param get_inference_total_duration: Returns all inference duration including encoder, decoder and transformer/unet inference. :type get_inference_total_duration: float + :param get_generate_duration: Returns generate duration in millionseconds. + :type get_generate_duration: float + :param raw_metrics: A structure of RawImageGenerationPerfMetrics type that holds raw metrics. :type raw_metrics: RawImageGenerationPerfMetrics """ @@ -2290,4 +2293,8 @@ class ImageGenerationPerfMetrics: def get_iteration_duration(self) -> MeanStdPair: ... def get_inference_total_duration(self) -> float: + ... + def get_load_time(self) -> float: + ... + def get_generate_duration(self) -> float: ... \ No newline at end of file From a09aad0ca8f92256d5f3df2aea607f586177ac4e Mon Sep 17 00:00:00 2001 From: xufang Date: Thu, 9 Jan 2025 15:06:24 +0800 Subject: [PATCH 09/19] restore sample --- .../image_generation/heterogeneous_stable_diffusion.cpp | 4 ---- samples/cpp/image_generation/image2image.cpp | 4 ---- samples/cpp/image_generation/inpainting.cpp | 5 ----- samples/cpp/image_generation/lora_text2image.cpp | 8 -------- samples/cpp/image_generation/text2image.cpp | 5 ----- 5 files changed, 26 deletions(-) diff --git a/samples/cpp/image_generation/heterogeneous_stable_diffusion.cpp b/samples/cpp/image_generation/heterogeneous_stable_diffusion.cpp index 9a29235fa0..8203c37345 100644 --- a/samples/cpp/image_generation/heterogeneous_stable_diffusion.cpp +++ b/samples/cpp/image_generation/heterogeneous_stable_diffusion.cpp @@ -105,10 +105,6 @@ int32_t main(int32_t argc, char* argv[]) try { ov::genai::num_inference_steps(number_of_inference_steps_per_image)); imwrite("image_" + std::to_string(imagei) + ".bmp", image, true); - auto perf_metrics = pipe.get_perfomance_metrics(); - std::cout << "pipeline generate duration ms:" << perf_metrics.generate_duration / 1000.0f << std::endl; - std::cout << "pipeline inference duration ms:" << perf_metrics.get_inference_total_duration() << std::endl; - std::cout << "pipeline iteration:" << perf_metrics.raw_metrics.iteration_durations.size() << std::endl; } return EXIT_SUCCESS; diff --git a/samples/cpp/image_generation/image2image.cpp b/samples/cpp/image_generation/image2image.cpp index bab0954361..c071b88362 100644 --- a/samples/cpp/image_generation/image2image.cpp +++ b/samples/cpp/image_generation/image2image.cpp @@ -21,10 +21,6 @@ int32_t main(int32_t argc, char* argv[]) try { // writes `num_images_per_prompt` images by pattern name imwrite("image_%d.bmp", generated_image, true); - auto perf_metrics = pipe.get_perfomance_metrics(); - std::cout << "pipeline generate duration ms:" << perf_metrics.generate_duration / 1000.0f << std::endl; - std::cout << "pipeline inference duration ms:" << perf_metrics.get_inference_total_duration() << std::endl; - std::cout << "pipeline iteration:" << perf_metrics.raw_metrics.iteration_durations.size() << std::endl; return EXIT_SUCCESS; } catch (const std::exception& error) { diff --git a/samples/cpp/image_generation/inpainting.cpp b/samples/cpp/image_generation/inpainting.cpp index 42d4ccc0d4..4c7a758450 100644 --- a/samples/cpp/image_generation/inpainting.cpp +++ b/samples/cpp/image_generation/inpainting.cpp @@ -21,11 +21,6 @@ int32_t main(int32_t argc, char* argv[]) try { // writes `num_images_per_prompt` images by pattern name imwrite("image_%d.bmp", generated_image, true); - auto perf_metrics = pipe.get_perfomance_metrics(); - std::cout << "pipeline generate duration ms:" << perf_metrics.generate_duration / 1000.0f << std::endl; - std::cout << "pipeline inference duration ms:" << perf_metrics.get_inference_total_duration() << std::endl; - std::cout << "pipeline iteration:" << perf_metrics.raw_metrics.iteration_durations.size() << std::endl; - return EXIT_SUCCESS; } catch (const std::exception& error) { try { diff --git a/samples/cpp/image_generation/lora_text2image.cpp b/samples/cpp/image_generation/lora_text2image.cpp index d2f60af613..c1e6461db9 100644 --- a/samples/cpp/image_generation/lora_text2image.cpp +++ b/samples/cpp/image_generation/lora_text2image.cpp @@ -29,10 +29,6 @@ int32_t main(int32_t argc, char* argv[]) try { ov::genai::num_inference_steps(20), ov::genai::rng_seed(42)); imwrite("lora.bmp", image, true); - auto perf_metrics = pipe.get_perfomance_metrics(); - std::cout << "pipeline generate duration ms:" << perf_metrics.generate_duration / 1000.0f << std::endl; - std::cout << "pipeline inference duration ms:" << perf_metrics.get_inference_total_duration() << std::endl; - std::cout << "pipeline iteration:" << perf_metrics.raw_metrics.iteration_durations.size() << std::endl; std::cout << "Generating image without LoRA adapters applied, resulting image will be in baseline.bmp\n"; image = pipe.generate(prompt, @@ -42,10 +38,6 @@ int32_t main(int32_t argc, char* argv[]) try { ov::genai::num_inference_steps(20), ov::genai::rng_seed(42)); imwrite("baseline.bmp", image, true); - perf_metrics = pipe.get_perfomance_metrics(); - std::cout << "pipeline generate duration ms:" << perf_metrics.generate_duration / 1000.0f << std::endl; - std::cout << "pipeline inference duration ms:" << perf_metrics.get_inference_total_duration() << std::endl; - std::cout << "pipeline iteration:" << perf_metrics.raw_metrics.iteration_durations.size() << std::endl; return EXIT_SUCCESS; } catch (const std::exception& error) { diff --git a/samples/cpp/image_generation/text2image.cpp b/samples/cpp/image_generation/text2image.cpp index d6870ea7fe..6a97b3a074 100644 --- a/samples/cpp/image_generation/text2image.cpp +++ b/samples/cpp/image_generation/text2image.cpp @@ -21,11 +21,6 @@ int32_t main(int32_t argc, char* argv[]) try { // writes `num_images_per_prompt` images by pattern name imwrite("image_%d.bmp", image, true); - auto perf_metrics = pipe.get_perfomance_metrics(); - std::cout << "pipeline generate duration ms:" << perf_metrics.generate_duration / 1000.0f << std::endl; - std::cout << "pipeline inference duration ms:" << perf_metrics.get_inference_total_duration() << std::endl; - std::cout << "pipeline iteration:" << perf_metrics.raw_metrics.iteration_durations.size() << std::endl; - return EXIT_SUCCESS; } catch (const std::exception& error) { try { From 5fa1f251f34356443c4cda36efed18bd7ebc680e Mon Sep 17 00:00:00 2001 From: xufang Date: Thu, 9 Jan 2025 17:08:13 +0800 Subject: [PATCH 10/19] fix error --- src/python/openvino_genai/py_openvino_genai.pyi | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/src/python/openvino_genai/py_openvino_genai.pyi b/src/python/openvino_genai/py_openvino_genai.pyi index 8f16b6f9fa..4ecb9e7abe 100644 --- a/src/python/openvino_genai/py_openvino_genai.pyi +++ b/src/python/openvino_genai/py_openvino_genai.pyi @@ -5,11 +5,7 @@ from __future__ import annotations import openvino._pyopenvino import os import typing -<<<<<<< HEAD -__all__ = ['Adapter', 'AdapterConfig', 'AggregationMode', 'AutoencoderKL', 'CLIPTextModel', 'CLIPTextModelWithProjection', 'CacheEvictionConfig', 'ChunkStreamerBase', 'ContinuousBatchingPipeline', 'CppStdGenerator', 'DecodedResults', 'EncodedGenerationResult', 'EncodedResults', 'FluxTransformer2DModel', 'GenerationConfig', 'GenerationFinishReason', 'GenerationHandle', 'GenerationOutput', 'GenerationResult', 'GenerationStatus', 'Generator', 'Image2ImagePipeline', 'ImageGenerationConfig', 'InpaintingPipeline', 'LLMPipeline', 'MeanStdPair', 'PerfMetrics', 'PipelineMetrics', 'RawPerfMetrics', 'SD3Transformer2DModel', 'Scheduler', 'SchedulerConfig', 'StopCriteria', 'StreamerBase', 'T5EncoderModel', 'Text2ImagePipeline', 'TokenizedInputs', 'Tokenizer', 'TorchGenerator', 'UNet2DConditionModel', 'VLMDecodedResults', 'VLMPerfMetrics', 'VLMPipeline', 'VLMRawPerfMetrics', 'WhisperDecodedResultChunk', 'WhisperDecodedResults', 'WhisperGenerationConfig', 'WhisperPerfMetrics', 'WhisperPipeline', 'WhisperRawPerfMetrics', 'draft_model', 'ImageGenerationPerfMetrics', 'RawImageGenerationPerfMetrics'] -======= -__all__ = ['Adapter', 'AdapterConfig', 'AggregationMode', 'AutoencoderKL', 'CLIPTextModel', 'CLIPTextModelWithProjection', 'CacheEvictionConfig', 'ChunkStreamerBase', 'ContinuousBatchingPipeline', 'CppStdGenerator', 'DecodedResults', 'EncodedGenerationResult', 'EncodedResults', 'FluxTransformer2DModel', 'GenerationConfig', 'GenerationFinishReason', 'GenerationHandle', 'GenerationOutput', 'GenerationResult', 'GenerationStatus', 'Generator', 'Image2ImagePipeline', 'ImageGenerationConfig', 'InpaintingPipeline', 'LLMPipeline', 'MeanStdPair', 'PerfMetrics', 'PipelineMetrics', 'RawPerfMetrics', 'SD3Transformer2DModel', 'Scheduler', 'SchedulerConfig', 'StopCriteria', 'StreamerBase', 'T5EncoderModel', 'Text2ImagePipeline', 'TokenizedInputs', 'Tokenizer', 'TorchGenerator', 'UNet2DConditionModel', 'VLMDecodedResults', 'VLMPerfMetrics', 'VLMPipeline', 'VLMRawPerfMetrics', 'WhisperDecodedResultChunk', 'WhisperDecodedResults', 'WhisperGenerationConfig', 'WhisperPerfMetrics', 'WhisperPipeline', 'WhisperRawPerfMetrics', 'draft_model', 'get_version'] ->>>>>>> master +__all__ = ['Adapter', 'AdapterConfig', 'AggregationMode', 'AutoencoderKL', 'CLIPTextModel', 'CLIPTextModelWithProjection', 'CacheEvictionConfig', 'ChunkStreamerBase', 'ContinuousBatchingPipeline', 'CppStdGenerator', 'DecodedResults', 'EncodedGenerationResult', 'EncodedResults', 'FluxTransformer2DModel', 'GenerationConfig', 'GenerationFinishReason', 'GenerationHandle', 'GenerationOutput', 'GenerationResult', 'GenerationStatus', 'Generator', 'Image2ImagePipeline', 'ImageGenerationConfig', 'InpaintingPipeline', 'LLMPipeline', 'MeanStdPair', 'PerfMetrics', 'PipelineMetrics', 'RawPerfMetrics', 'SD3Transformer2DModel', 'Scheduler', 'SchedulerConfig', 'StopCriteria', 'StreamerBase', 'T5EncoderModel', 'Text2ImagePipeline', 'TokenizedInputs', 'Tokenizer', 'TorchGenerator', 'UNet2DConditionModel', 'VLMDecodedResults', 'VLMPerfMetrics', 'VLMPipeline', 'VLMRawPerfMetrics', 'WhisperDecodedResultChunk', 'WhisperDecodedResults', 'WhisperGenerationConfig', 'WhisperPerfMetrics', 'WhisperPipeline', 'WhisperRawPerfMetrics', 'ImageGenerationPerfMetrics', 'RawImageGenerationPerfMetrics', 'draft_model', 'get_version'] class Adapter: """ Immutable LoRA Adapter that carries the adaptation matrices and serves as unique adapter identifier. From 8bb7e2ae8751067df6524661d9b582049dc1d35b Mon Sep 17 00:00:00 2001 From: xufang Date: Thu, 9 Jan 2025 18:40:32 +0800 Subject: [PATCH 11/19] remove python api --- src/python/openvino_genai/__init__.py | 2 - src/python/openvino_genai/__init__.pyi | 4 +- .../openvino_genai/py_openvino_genai.pyi | 97 +------------------ src/python/py_image_generation_pipelines.cpp | 31 +----- 4 files changed, 5 insertions(+), 129 deletions(-) diff --git a/src/python/openvino_genai/__init__.py b/src/python/openvino_genai/__init__.py index 7620fd22d4..0ad7ba3f12 100644 --- a/src/python/openvino_genai/__init__.py +++ b/src/python/openvino_genai/__init__.py @@ -76,8 +76,6 @@ Generator, CppStdGenerator, TorchGenerator, - ImageGenerationPerfMetrics, - RawImageGenerationPerfMetrics, ) # Continuous batching diff --git a/src/python/openvino_genai/__init__.pyi b/src/python/openvino_genai/__init__.pyi index d9211d1e72..0a401ae958 100644 --- a/src/python/openvino_genai/__init__.pyi +++ b/src/python/openvino_genai/__init__.pyi @@ -41,11 +41,9 @@ from openvino_genai.py_openvino_genai import WhisperGenerationConfig from openvino_genai.py_openvino_genai import WhisperPerfMetrics from openvino_genai.py_openvino_genai import WhisperPipeline from openvino_genai.py_openvino_genai import WhisperRawPerfMetrics -from openvino_genai.py_openvino_genai import RawImageGenerationPerfMetrics -from openvino_genai.py_openvino_genai import ImageGenerationPerfMetrics from openvino_genai.py_openvino_genai import draft_model from openvino_genai.py_openvino_genai import get_version import os as os from . import py_openvino_genai -__all__ = ['Adapter', 'AdapterConfig', 'AggregationMode', 'AutoencoderKL', 'CLIPTextModel', 'CLIPTextModelWithProjection', 'CacheEvictionConfig', 'ChunkStreamerBase', 'ContinuousBatchingPipeline', 'CppStdGenerator', 'DecodedResults', 'EncodedResults', 'FluxTransformer2DModel', 'GenerationConfig', 'GenerationResult', 'Generator', 'Image2ImagePipeline', 'ImageGenerationConfig', 'InpaintingPipeline', 'LLMPipeline', 'PerfMetrics', 'RawPerfMetrics', 'SD3Transformer2DModel', 'Scheduler', 'SchedulerConfig', 'StopCriteria', 'StreamerBase', 'T5EncoderModel', 'Text2ImagePipeline', 'TokenizedInputs', 'Tokenizer', 'TorchGenerator', 'UNet2DConditionModel', 'VLMPipeline', 'WhisperGenerationConfig', 'WhisperPerfMetrics', 'WhisperPipeline', 'WhisperRawPerfMetrics', 'ImageGenerationPerfMetrics', 'RawImageGenerationPerfMetrics', 'draft_model', 'get_version', 'openvino', 'os', 'py_openvino_genai'] +__all__ = ['Adapter', 'AdapterConfig', 'AggregationMode', 'AutoencoderKL', 'CLIPTextModel', 'CLIPTextModelWithProjection', 'CacheEvictionConfig', 'ChunkStreamerBase', 'ContinuousBatchingPipeline', 'CppStdGenerator', 'DecodedResults', 'EncodedResults', 'FluxTransformer2DModel', 'GenerationConfig', 'GenerationResult', 'Generator', 'Image2ImagePipeline', 'ImageGenerationConfig', 'InpaintingPipeline', 'LLMPipeline', 'PerfMetrics', 'RawPerfMetrics', 'SD3Transformer2DModel', 'Scheduler', 'SchedulerConfig', 'StopCriteria', 'StreamerBase', 'T5EncoderModel', 'Text2ImagePipeline', 'TokenizedInputs', 'Tokenizer', 'TorchGenerator', 'UNet2DConditionModel', 'VLMPipeline', 'WhisperGenerationConfig', 'WhisperPerfMetrics', 'WhisperPipeline', 'WhisperRawPerfMetrics', 'draft_model', 'get_version', 'openvino', 'os', 'py_openvino_genai'] __version__: str diff --git a/src/python/openvino_genai/py_openvino_genai.pyi b/src/python/openvino_genai/py_openvino_genai.pyi index 4ecb9e7abe..a8063ca4ab 100644 --- a/src/python/openvino_genai/py_openvino_genai.pyi +++ b/src/python/openvino_genai/py_openvino_genai.pyi @@ -5,7 +5,7 @@ from __future__ import annotations import openvino._pyopenvino import os import typing -__all__ = ['Adapter', 'AdapterConfig', 'AggregationMode', 'AutoencoderKL', 'CLIPTextModel', 'CLIPTextModelWithProjection', 'CacheEvictionConfig', 'ChunkStreamerBase', 'ContinuousBatchingPipeline', 'CppStdGenerator', 'DecodedResults', 'EncodedGenerationResult', 'EncodedResults', 'FluxTransformer2DModel', 'GenerationConfig', 'GenerationFinishReason', 'GenerationHandle', 'GenerationOutput', 'GenerationResult', 'GenerationStatus', 'Generator', 'Image2ImagePipeline', 'ImageGenerationConfig', 'InpaintingPipeline', 'LLMPipeline', 'MeanStdPair', 'PerfMetrics', 'PipelineMetrics', 'RawPerfMetrics', 'SD3Transformer2DModel', 'Scheduler', 'SchedulerConfig', 'StopCriteria', 'StreamerBase', 'T5EncoderModel', 'Text2ImagePipeline', 'TokenizedInputs', 'Tokenizer', 'TorchGenerator', 'UNet2DConditionModel', 'VLMDecodedResults', 'VLMPerfMetrics', 'VLMPipeline', 'VLMRawPerfMetrics', 'WhisperDecodedResultChunk', 'WhisperDecodedResults', 'WhisperGenerationConfig', 'WhisperPerfMetrics', 'WhisperPipeline', 'WhisperRawPerfMetrics', 'ImageGenerationPerfMetrics', 'RawImageGenerationPerfMetrics', 'draft_model', 'get_version'] +__all__ = ['Adapter', 'AdapterConfig', 'AggregationMode', 'AutoencoderKL', 'CLIPTextModel', 'CLIPTextModelWithProjection', 'CacheEvictionConfig', 'ChunkStreamerBase', 'ContinuousBatchingPipeline', 'CppStdGenerator', 'DecodedResults', 'EncodedGenerationResult', 'EncodedResults', 'FluxTransformer2DModel', 'GenerationConfig', 'GenerationFinishReason', 'GenerationHandle', 'GenerationOutput', 'GenerationResult', 'GenerationStatus', 'Generator', 'Image2ImagePipeline', 'ImageGenerationConfig', 'InpaintingPipeline', 'LLMPipeline', 'MeanStdPair', 'PerfMetrics', 'PipelineMetrics', 'RawPerfMetrics', 'SD3Transformer2DModel', 'Scheduler', 'SchedulerConfig', 'StopCriteria', 'StreamerBase', 'T5EncoderModel', 'Text2ImagePipeline', 'TokenizedInputs', 'Tokenizer', 'TorchGenerator', 'UNet2DConditionModel', 'VLMDecodedResults', 'VLMPerfMetrics', 'VLMPipeline', 'VLMRawPerfMetrics', 'WhisperDecodedResultChunk', 'WhisperDecodedResults', 'WhisperGenerationConfig', 'WhisperPerfMetrics', 'WhisperPipeline', 'WhisperRawPerfMetrics', 'draft_model', 'get_version'] class Adapter: """ Immutable LoRA Adapter that carries the adaptation matrices and serves as unique adapter identifier. @@ -2200,101 +2200,6 @@ class WhisperRawPerfMetrics: @property def features_extraction_durations(self) -> list[float]: ... -class RawImageGenerationPerfMetrics: - """ - - Structure with raw performance metrics for each generation before any statistics are calculated. - - :param unet_inference_durations: Inference time for each unet inference in microseconds. - :type unet_inference_durations: List[float] - - :param transformer_inference_durations: Inference time for each transformer inference in microseconds. - :type transformer_inference_durations: List[float] - - :param iteration_durations: durations for each step iteration in microseconds. - :type iteration_durations: List[float] - """ - def __init__(self) -> None: - ... - @property - def unet_inference_durations(self) -> list[float]: - ... - @property - def transformer_inference_durations(self) -> list[float]: - ... - @property - def iteration_durations(self) -> list[float]: - ... -class ImageGenerationPerfMetrics: - """ - - Holds performance metrics for each image generate call. - - PerfMetrics holds fields with mean and standard deviations for the following metrics: - - one generation iteration, ms - - inference duration for unet model, ms - - inference duration for transformer model, ms - - Additional fields include: - - Load time, ms - - total duration of image generation, ms - - inference duration of vae_encoder model, ms - - inference duration of vae_decoder model, ms - - inference duration of each encoder model, ms - - Preferable way to access values is via get functions. Getters calculate mean and std values from raw_metrics and return pairs. - If mean and std were already calculated, getters return cached values. - - :param get_load_time: Returns the load time in milliseconds. - :type get_load_time: float - - :param get_unet_inference_duration: Returns the mean and standard deviation of unet inference in millionseconds. - :type get_unet_inference_duration: MeanStdPair - - :param get_transformer_inference_duration: Returns the mean and standard deviation of transformer inference in millionseconds. - :type get_transformer_inference_duration: MeanStdPair - - :param get_iteration_duration: Returns the mean and standard deviation of one generation iteration in millionseconds. - :type get_iteration_duration: MeanStdPair - - :param get_inference_total_duration: Returns all inference duration including encoder, decoder and transformer/unet inference. - :type get_inference_total_duration: float - - :param get_generate_duration: Returns generate duration in millionseconds. - :type get_generate_duration: float - - :param raw_metrics: A structure of RawImageGenerationPerfMetrics type that holds raw metrics. - :type raw_metrics: RawImageGenerationPerfMetrics - """ - def __init__(self) -> None: - ... - @property - def load_time(self) -> float: - ... - @property - def generate_duration(self) -> float: - ... - @property - def vae_encoder_inference_duration(self) -> float: - ... - @property - def vae_decoder_inference_duration(self) -> float: - ... - @property - def encoder_inference_duration(self) -> dict[str, float]: - ... - def get_unet_inference_duration(self) -> MeanStdPair: - ... - def get_transformer_inference_duration(self) -> MeanStdPair: - ... - def get_iteration_duration(self) -> MeanStdPair: - ... - def get_inference_total_duration(self) -> float: - ... - def get_load_time(self) -> float: - ... - def get_generate_duration(self) -> float: - ... def draft_model(models_path: os.PathLike, device: str = '', **kwargs) -> openvino._pyopenvino.OVAny: """ device on which inference will be performed diff --git a/src/python/py_image_generation_pipelines.cpp b/src/python/py_image_generation_pipelines.cpp index 3499712c3e..cc156cb209 100644 --- a/src/python/py_image_generation_pipelines.cpp +++ b/src/python/py_image_generation_pipelines.cpp @@ -285,28 +285,6 @@ void init_image_generation_pipelines(py::module_& m) { config.update_generation_config(pyutils::kwargs_to_any_map(kwargs)); }); - py::class_(m, "RawImageGenerationPerfMetrics", raw_image_generation_perf_metrics_docstring) - .def(py::init<>()) - .def_property_readonly("unet_inference_durations", [](const RawImageGenerationPerfMetrics &rw) { - return pyutils::get_ms(rw, &RawImageGenerationPerfMetrics::unet_inference_durations); - }) - .def_property_readonly("transformer_inference_durations", [](const RawImageGenerationPerfMetrics &rw) { - return pyutils::get_ms(rw, &RawImageGenerationPerfMetrics::transformer_inference_durations); - }) - .def_property_readonly("iteration_durations", [](const RawImageGenerationPerfMetrics &rw) { - return pyutils::get_ms(rw, &RawImageGenerationPerfMetrics::iteration_durations); - }); - - py::class_(m, "ImageGenerationPerfMetrics", image_generation_perf_metrics_docstring) - .def(py::init<>()) - .def("get_load_time", &ImageGenerationPerfMetrics::get_load_time) - .def("get_generate_duration", &ImageGenerationPerfMetrics::get_generate_duration) - .def("get_unet_inference_duration", &ImageGenerationPerfMetrics::get_unet_inference_duration) - .def("get_transformer_inference_duration", &ImageGenerationPerfMetrics::get_transformer_inference_duration) - .def("get_iteration_duration", &ImageGenerationPerfMetrics::get_iteration_duration) - .def("get_inference_total_duration", &ImageGenerationPerfMetrics::get_inference_total_duration) - .def_readonly("raw_metrics", &ImageGenerationPerfMetrics::raw_metrics); - auto text2image_pipeline = py::class_(m, "Text2ImagePipeline", "This class is used for generation with text-to-image models.") .def(py::init([](const std::filesystem::path& models_path) { ScopedVar env_manager(pyutils::ov_tokenizers_module_path()); @@ -372,8 +350,7 @@ void init_image_generation_pipelines(py::module_& m) { }, py::arg("prompt"), "Input string", (text2image_generate_docstring + std::string(" \n ")).c_str()) - .def("decode", &ov::genai::Text2ImagePipeline::decode, py::arg("latent")) - .def("get_perfomance_metrics", &ov::genai::Text2ImagePipeline::get_perfomance_metrics); + .def("decode", &ov::genai::Text2ImagePipeline::decode, py::arg("latent")); auto image2image_pipeline = py::class_(m, "Image2ImagePipeline", "This class is used for generation with image-to-image models.") @@ -436,8 +413,7 @@ void init_image_generation_pipelines(py::module_& m) { py::arg("prompt"), "Input string", py::arg("image"), "Initial image", (text2image_generate_docstring + std::string(" \n ")).c_str()) - .def("decode", &ov::genai::Image2ImagePipeline::decode, py::arg("latent")) - .def("get_perfomance_metrics", &ov::genai::Image2ImagePipeline::get_perfomance_metrics); + .def("decode", &ov::genai::Image2ImagePipeline::decode, py::arg("latent")); auto inpainting_pipeline = py::class_(m, "InpaintingPipeline", "This class is used for generation with inpainting models.") @@ -502,8 +478,7 @@ void init_image_generation_pipelines(py::module_& m) { py::arg("image"), "Initial image", py::arg("mask_image"), "Mask image", (text2image_generate_docstring + std::string(" \n ")).c_str()) - .def("decode", &ov::genai::InpaintingPipeline::decode, py::arg("latent")) - .def("get_perfomance_metrics", &ov::genai::InpaintingPipeline::get_perfomance_metrics); + .def("decode", &ov::genai::InpaintingPipeline::decode, py::arg("latent")); // define constructors to create one pipeline from another // NOTE: needs to be defined once all pipelines are created From 2e8aad690b528aa336504ca4fc6230a8681dd993 Mon Sep 17 00:00:00 2001 From: xufang Date: Fri, 10 Jan 2025 13:47:25 +0800 Subject: [PATCH 12/19] add python api --- src/python/openvino_genai/__init__.py | 2 + src/python/openvino_genai/__init__.pyi | 4 +- .../openvino_genai/py_openvino_genai.pyi | 97 ++++++++++++++++++- src/python/py_image_generation_pipelines.cpp | 31 +++++- 4 files changed, 129 insertions(+), 5 deletions(-) diff --git a/src/python/openvino_genai/__init__.py b/src/python/openvino_genai/__init__.py index 0ad7ba3f12..7620fd22d4 100644 --- a/src/python/openvino_genai/__init__.py +++ b/src/python/openvino_genai/__init__.py @@ -76,6 +76,8 @@ Generator, CppStdGenerator, TorchGenerator, + ImageGenerationPerfMetrics, + RawImageGenerationPerfMetrics, ) # Continuous batching diff --git a/src/python/openvino_genai/__init__.pyi b/src/python/openvino_genai/__init__.pyi index 0a401ae958..0fa661e96f 100644 --- a/src/python/openvino_genai/__init__.pyi +++ b/src/python/openvino_genai/__init__.pyi @@ -21,9 +21,11 @@ from openvino_genai.py_openvino_genai import GenerationResult from openvino_genai.py_openvino_genai import Generator from openvino_genai.py_openvino_genai import Image2ImagePipeline from openvino_genai.py_openvino_genai import ImageGenerationConfig +from openvino_genai.py_openvino_genai import ImageGenerationPerfMetrics from openvino_genai.py_openvino_genai import InpaintingPipeline from openvino_genai.py_openvino_genai import LLMPipeline from openvino_genai.py_openvino_genai import PerfMetrics +from openvino_genai.py_openvino_genai import RawImageGenerationPerfMetrics from openvino_genai.py_openvino_genai import RawPerfMetrics from openvino_genai.py_openvino_genai import SD3Transformer2DModel from openvino_genai.py_openvino_genai import Scheduler @@ -45,5 +47,5 @@ from openvino_genai.py_openvino_genai import draft_model from openvino_genai.py_openvino_genai import get_version import os as os from . import py_openvino_genai -__all__ = ['Adapter', 'AdapterConfig', 'AggregationMode', 'AutoencoderKL', 'CLIPTextModel', 'CLIPTextModelWithProjection', 'CacheEvictionConfig', 'ChunkStreamerBase', 'ContinuousBatchingPipeline', 'CppStdGenerator', 'DecodedResults', 'EncodedResults', 'FluxTransformer2DModel', 'GenerationConfig', 'GenerationResult', 'Generator', 'Image2ImagePipeline', 'ImageGenerationConfig', 'InpaintingPipeline', 'LLMPipeline', 'PerfMetrics', 'RawPerfMetrics', 'SD3Transformer2DModel', 'Scheduler', 'SchedulerConfig', 'StopCriteria', 'StreamerBase', 'T5EncoderModel', 'Text2ImagePipeline', 'TokenizedInputs', 'Tokenizer', 'TorchGenerator', 'UNet2DConditionModel', 'VLMPipeline', 'WhisperGenerationConfig', 'WhisperPerfMetrics', 'WhisperPipeline', 'WhisperRawPerfMetrics', 'draft_model', 'get_version', 'openvino', 'os', 'py_openvino_genai'] +__all__ = ['Adapter', 'AdapterConfig', 'AggregationMode', 'AutoencoderKL', 'CLIPTextModel', 'CLIPTextModelWithProjection', 'CacheEvictionConfig', 'ChunkStreamerBase', 'ContinuousBatchingPipeline', 'CppStdGenerator', 'DecodedResults', 'EncodedResults', 'FluxTransformer2DModel', 'GenerationConfig', 'GenerationResult', 'Generator', 'Image2ImagePipeline', 'ImageGenerationConfig', 'ImageGenerationPerfMetrics', 'InpaintingPipeline', 'LLMPipeline', 'PerfMetrics', 'RawImageGenerationPerfMetrics', 'RawPerfMetrics', 'SD3Transformer2DModel', 'Scheduler', 'SchedulerConfig', 'StopCriteria', 'StreamerBase', 'T5EncoderModel', 'Text2ImagePipeline', 'TokenizedInputs', 'Tokenizer', 'TorchGenerator', 'UNet2DConditionModel', 'VLMPipeline', 'WhisperGenerationConfig', 'WhisperPerfMetrics', 'WhisperPipeline', 'WhisperRawPerfMetrics', 'ImageGenerationPerfMetrics', 'RawImageGenerationPerfMetrics', 'draft_model', 'get_version', 'openvino', 'os', 'py_openvino_genai'] __version__: str diff --git a/src/python/openvino_genai/py_openvino_genai.pyi b/src/python/openvino_genai/py_openvino_genai.pyi index a8063ca4ab..bb34f19bb6 100644 --- a/src/python/openvino_genai/py_openvino_genai.pyi +++ b/src/python/openvino_genai/py_openvino_genai.pyi @@ -5,7 +5,7 @@ from __future__ import annotations import openvino._pyopenvino import os import typing -__all__ = ['Adapter', 'AdapterConfig', 'AggregationMode', 'AutoencoderKL', 'CLIPTextModel', 'CLIPTextModelWithProjection', 'CacheEvictionConfig', 'ChunkStreamerBase', 'ContinuousBatchingPipeline', 'CppStdGenerator', 'DecodedResults', 'EncodedGenerationResult', 'EncodedResults', 'FluxTransformer2DModel', 'GenerationConfig', 'GenerationFinishReason', 'GenerationHandle', 'GenerationOutput', 'GenerationResult', 'GenerationStatus', 'Generator', 'Image2ImagePipeline', 'ImageGenerationConfig', 'InpaintingPipeline', 'LLMPipeline', 'MeanStdPair', 'PerfMetrics', 'PipelineMetrics', 'RawPerfMetrics', 'SD3Transformer2DModel', 'Scheduler', 'SchedulerConfig', 'StopCriteria', 'StreamerBase', 'T5EncoderModel', 'Text2ImagePipeline', 'TokenizedInputs', 'Tokenizer', 'TorchGenerator', 'UNet2DConditionModel', 'VLMDecodedResults', 'VLMPerfMetrics', 'VLMPipeline', 'VLMRawPerfMetrics', 'WhisperDecodedResultChunk', 'WhisperDecodedResults', 'WhisperGenerationConfig', 'WhisperPerfMetrics', 'WhisperPipeline', 'WhisperRawPerfMetrics', 'draft_model', 'get_version'] +__all__ = ['Adapter', 'AdapterConfig', 'AggregationMode', 'AutoencoderKL', 'CLIPTextModel', 'CLIPTextModelWithProjection', 'CacheEvictionConfig', 'ChunkStreamerBase', 'ContinuousBatchingPipeline', 'CppStdGenerator', 'DecodedResults', 'EncodedGenerationResult', 'EncodedResults', 'FluxTransformer2DModel', 'GenerationConfig', 'GenerationFinishReason', 'GenerationHandle', 'GenerationOutput', 'GenerationResult', 'GenerationStatus', 'Generator', 'Image2ImagePipeline', 'ImageGenerationConfig', 'ImageGenerationPerfMetrics', 'InpaintingPipeline', 'LLMPipeline', 'MeanStdPair', 'PerfMetrics', 'PipelineMetrics', 'RawImageGenerationPerfMetrics', 'RawPerfMetrics', 'SD3Transformer2DModel', 'Scheduler', 'SchedulerConfig', 'StopCriteria', 'StreamerBase', 'T5EncoderModel', 'Text2ImagePipeline', 'TokenizedInputs', 'Tokenizer', 'TorchGenerator', 'UNet2DConditionModel', 'VLMDecodedResults', 'VLMPerfMetrics', 'VLMPipeline', 'VLMRawPerfMetrics', 'WhisperDecodedResultChunk', 'WhisperDecodedResults', 'WhisperGenerationConfig', 'WhisperPerfMetrics', 'WhisperPipeline', 'WhisperRawPerfMetrics', 'draft_model', 'get_version'] class Adapter: """ Immutable LoRA Adapter that carries the adaptation matrices and serves as unique adapter identifier. @@ -2200,6 +2200,101 @@ class WhisperRawPerfMetrics: @property def features_extraction_durations(self) -> list[float]: ... +class RawImageGenerationPerfMetrics: + """ + + Structure with raw performance metrics for each generation before any statistics are calculated. + + :param unet_inference_durations: Inference time for each unet inference in microseconds. + :type unet_inference_durations: List[float] + + :param transformer_inference_durations: Inference time for each transformer inference in microseconds. + :type transformer_inference_durations: List[float] + + :param iteration_durations: durations for each step iteration in microseconds. + :type iteration_durations: List[float] + """ + def __init__(self) -> None: + ... + @property + def unet_inference_durations(self) -> list[float]: + ... + @property + def transformer_inference_durations(self) -> list[float]: + ... + @property + def iteration_durations(self) -> list[float]: + ... +class ImageGenerationPerfMetrics: + """ + + Holds performance metrics for each image generate call. + + PerfMetrics holds fields with mean and standard deviations for the following metrics: + - one generation iteration, ms + - inference duration for unet model, ms + - inference duration for transformer model, ms + + Additional fields include: + - Load time, ms + - total duration of image generation, ms + - inference duration of vae_encoder model, ms + - inference duration of vae_decoder model, ms + - inference duration of each encoder model, ms + + Preferable way to access values is via get functions. Getters calculate mean and std values from raw_metrics and return pairs. + If mean and std were already calculated, getters return cached values. + + :param get_load_time: Returns the load time in milliseconds. + :type get_load_time: float + + :param get_unet_inference_duration: Returns the mean and standard deviation of unet inference in millionseconds. + :type get_unet_inference_duration: MeanStdPair + + :param get_transformer_inference_duration: Returns the mean and standard deviation of transformer inference in millionseconds. + :type get_transformer_inference_duration: MeanStdPair + + :param get_iteration_duration: Returns the mean and standard deviation of one generation iteration in millionseconds. + :type get_iteration_duration: MeanStdPair + + :param get_inference_total_duration: Returns all inference duration including encoder, decoder and transformer/unet inference. + :type get_inference_total_duration: float + + :param get_generate_duration: Returns generate duration in millionseconds. + :type get_generate_duration: float + + :param raw_metrics: A structure of RawImageGenerationPerfMetrics type that holds raw metrics. + :type raw_metrics: RawImageGenerationPerfMetrics + """ + def __init__(self) -> None: + ... + @property + def load_time(self) -> float: + ... + @property + def generate_duration(self) -> float: + ... + @property + def vae_encoder_inference_duration(self) -> float: + ... + @property + def vae_decoder_inference_duration(self) -> float: + ... + @property + def encoder_inference_duration(self) -> dict[str, float]: + ... + def get_unet_inference_duration(self) -> MeanStdPair: + ... + def get_transformer_inference_duration(self) -> MeanStdPair: + ... + def get_iteration_duration(self) -> MeanStdPair: + ... + def get_inference_total_duration(self) -> float: + ... + def get_load_time(self) -> float: + ... + def get_generate_duration(self) -> float: + ... def draft_model(models_path: os.PathLike, device: str = '', **kwargs) -> openvino._pyopenvino.OVAny: """ device on which inference will be performed diff --git a/src/python/py_image_generation_pipelines.cpp b/src/python/py_image_generation_pipelines.cpp index cc156cb209..3499712c3e 100644 --- a/src/python/py_image_generation_pipelines.cpp +++ b/src/python/py_image_generation_pipelines.cpp @@ -285,6 +285,28 @@ void init_image_generation_pipelines(py::module_& m) { config.update_generation_config(pyutils::kwargs_to_any_map(kwargs)); }); + py::class_(m, "RawImageGenerationPerfMetrics", raw_image_generation_perf_metrics_docstring) + .def(py::init<>()) + .def_property_readonly("unet_inference_durations", [](const RawImageGenerationPerfMetrics &rw) { + return pyutils::get_ms(rw, &RawImageGenerationPerfMetrics::unet_inference_durations); + }) + .def_property_readonly("transformer_inference_durations", [](const RawImageGenerationPerfMetrics &rw) { + return pyutils::get_ms(rw, &RawImageGenerationPerfMetrics::transformer_inference_durations); + }) + .def_property_readonly("iteration_durations", [](const RawImageGenerationPerfMetrics &rw) { + return pyutils::get_ms(rw, &RawImageGenerationPerfMetrics::iteration_durations); + }); + + py::class_(m, "ImageGenerationPerfMetrics", image_generation_perf_metrics_docstring) + .def(py::init<>()) + .def("get_load_time", &ImageGenerationPerfMetrics::get_load_time) + .def("get_generate_duration", &ImageGenerationPerfMetrics::get_generate_duration) + .def("get_unet_inference_duration", &ImageGenerationPerfMetrics::get_unet_inference_duration) + .def("get_transformer_inference_duration", &ImageGenerationPerfMetrics::get_transformer_inference_duration) + .def("get_iteration_duration", &ImageGenerationPerfMetrics::get_iteration_duration) + .def("get_inference_total_duration", &ImageGenerationPerfMetrics::get_inference_total_duration) + .def_readonly("raw_metrics", &ImageGenerationPerfMetrics::raw_metrics); + auto text2image_pipeline = py::class_(m, "Text2ImagePipeline", "This class is used for generation with text-to-image models.") .def(py::init([](const std::filesystem::path& models_path) { ScopedVar env_manager(pyutils::ov_tokenizers_module_path()); @@ -350,7 +372,8 @@ void init_image_generation_pipelines(py::module_& m) { }, py::arg("prompt"), "Input string", (text2image_generate_docstring + std::string(" \n ")).c_str()) - .def("decode", &ov::genai::Text2ImagePipeline::decode, py::arg("latent")); + .def("decode", &ov::genai::Text2ImagePipeline::decode, py::arg("latent")) + .def("get_perfomance_metrics", &ov::genai::Text2ImagePipeline::get_perfomance_metrics); auto image2image_pipeline = py::class_(m, "Image2ImagePipeline", "This class is used for generation with image-to-image models.") @@ -413,7 +436,8 @@ void init_image_generation_pipelines(py::module_& m) { py::arg("prompt"), "Input string", py::arg("image"), "Initial image", (text2image_generate_docstring + std::string(" \n ")).c_str()) - .def("decode", &ov::genai::Image2ImagePipeline::decode, py::arg("latent")); + .def("decode", &ov::genai::Image2ImagePipeline::decode, py::arg("latent")) + .def("get_perfomance_metrics", &ov::genai::Image2ImagePipeline::get_perfomance_metrics); auto inpainting_pipeline = py::class_(m, "InpaintingPipeline", "This class is used for generation with inpainting models.") @@ -478,7 +502,8 @@ void init_image_generation_pipelines(py::module_& m) { py::arg("image"), "Initial image", py::arg("mask_image"), "Mask image", (text2image_generate_docstring + std::string(" \n ")).c_str()) - .def("decode", &ov::genai::InpaintingPipeline::decode, py::arg("latent")); + .def("decode", &ov::genai::InpaintingPipeline::decode, py::arg("latent")) + .def("get_perfomance_metrics", &ov::genai::InpaintingPipeline::get_perfomance_metrics); // define constructors to create one pipeline from another // NOTE: needs to be defined once all pipelines are created From 7ee74b12041e2a124e1808bc4cc0848fa0497224 Mon Sep 17 00:00:00 2001 From: xufang Date: Fri, 10 Jan 2025 16:05:37 +0800 Subject: [PATCH 13/19] add debug info --- src/python/compare_pyi.cmake | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/python/compare_pyi.cmake b/src/python/compare_pyi.cmake index 62234d60d4..4bcfc4114d 100644 --- a/src/python/compare_pyi.cmake +++ b/src/python/compare_pyi.cmake @@ -21,7 +21,10 @@ foreach(pyi_file IN LISTS pyi_files) ERROR_VARIABLE error_message RESULT_VARIABLE exit_code OUTPUT_STRIP_TRAILING_WHITESPACE) + message(STATUS "compare output_message is ${output_message}") + message(STATUS "compare error_message is ${error_message}") if(NOT exit_code EQUAL 0) + message(STATUS "commited_pyi_file is ${commited_pyi_file}, pyi_file is ${pyi_file}") message(FATAL_ERROR "File ${commited_pyi_file} is outdated and need to be regenerated with pybind11-stubgen") endif() endif() From 1ec94d14e9ebac0685a996d2157396617dd29ba1 Mon Sep 17 00:00:00 2001 From: xufang Date: Fri, 10 Jan 2025 17:07:12 +0800 Subject: [PATCH 14/19] add more debug info --- src/python/compare_pyi.cmake | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/python/compare_pyi.cmake b/src/python/compare_pyi.cmake index 4bcfc4114d..d9bb01efdc 100644 --- a/src/python/compare_pyi.cmake +++ b/src/python/compare_pyi.cmake @@ -25,6 +25,10 @@ foreach(pyi_file IN LISTS pyi_files) message(STATUS "compare error_message is ${error_message}") if(NOT exit_code EQUAL 0) message(STATUS "commited_pyi_file is ${commited_pyi_file}, pyi_file is ${pyi_file}") + file(READ "${pyi_file}" file_content) + message(STATUS "Content of ${pyi_file} is: \n${file_content}") + file(READ "${commited_pyi_file}" file_content_commit) + message(STATUS "Content of ${commited_pyi_file} is: \n${file_content_commit}") message(FATAL_ERROR "File ${commited_pyi_file} is outdated and need to be regenerated with pybind11-stubgen") endif() endif() From bec1327974f2c8272380659c3d04b53e134ed2fc Mon Sep 17 00:00:00 2001 From: xufang Date: Fri, 10 Jan 2025 17:43:28 +0800 Subject: [PATCH 15/19] fix error --- src/python/openvino_genai/__init__.pyi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/python/openvino_genai/__init__.pyi b/src/python/openvino_genai/__init__.pyi index 0fa661e96f..5e6f6da0ed 100644 --- a/src/python/openvino_genai/__init__.pyi +++ b/src/python/openvino_genai/__init__.pyi @@ -47,5 +47,5 @@ from openvino_genai.py_openvino_genai import draft_model from openvino_genai.py_openvino_genai import get_version import os as os from . import py_openvino_genai -__all__ = ['Adapter', 'AdapterConfig', 'AggregationMode', 'AutoencoderKL', 'CLIPTextModel', 'CLIPTextModelWithProjection', 'CacheEvictionConfig', 'ChunkStreamerBase', 'ContinuousBatchingPipeline', 'CppStdGenerator', 'DecodedResults', 'EncodedResults', 'FluxTransformer2DModel', 'GenerationConfig', 'GenerationResult', 'Generator', 'Image2ImagePipeline', 'ImageGenerationConfig', 'ImageGenerationPerfMetrics', 'InpaintingPipeline', 'LLMPipeline', 'PerfMetrics', 'RawImageGenerationPerfMetrics', 'RawPerfMetrics', 'SD3Transformer2DModel', 'Scheduler', 'SchedulerConfig', 'StopCriteria', 'StreamerBase', 'T5EncoderModel', 'Text2ImagePipeline', 'TokenizedInputs', 'Tokenizer', 'TorchGenerator', 'UNet2DConditionModel', 'VLMPipeline', 'WhisperGenerationConfig', 'WhisperPerfMetrics', 'WhisperPipeline', 'WhisperRawPerfMetrics', 'ImageGenerationPerfMetrics', 'RawImageGenerationPerfMetrics', 'draft_model', 'get_version', 'openvino', 'os', 'py_openvino_genai'] +__all__ = ['Adapter', 'AdapterConfig', 'AggregationMode', 'AutoencoderKL', 'CLIPTextModel', 'CLIPTextModelWithProjection', 'CacheEvictionConfig', 'ChunkStreamerBase', 'ContinuousBatchingPipeline', 'CppStdGenerator', 'DecodedResults', 'EncodedResults', 'FluxTransformer2DModel', 'GenerationConfig', 'GenerationResult', 'Generator', 'Image2ImagePipeline', 'ImageGenerationConfig', 'ImageGenerationPerfMetrics', 'InpaintingPipeline', 'LLMPipeline', 'PerfMetrics', 'RawImageGenerationPerfMetrics', 'RawPerfMetrics', 'SD3Transformer2DModel', 'Scheduler', 'SchedulerConfig', 'StopCriteria', 'StreamerBase', 'T5EncoderModel', 'Text2ImagePipeline', 'TokenizedInputs', 'Tokenizer', 'TorchGenerator', 'UNet2DConditionModel', 'VLMPipeline', 'WhisperGenerationConfig', 'WhisperPerfMetrics', 'WhisperPipeline', 'WhisperRawPerfMetrics', 'draft_model', 'get_version', 'openvino', 'os', 'py_openvino_genai'] __version__: str From ffef587885ce510f4b4684a1d6ca0aeea1ee1372 Mon Sep 17 00:00:00 2001 From: xufang Date: Fri, 10 Jan 2025 18:07:47 +0800 Subject: [PATCH 16/19] fix error --- .../openvino_genai/py_openvino_genai.pyi | 196 +++++++++--------- 1 file changed, 101 insertions(+), 95 deletions(-) diff --git a/src/python/openvino_genai/py_openvino_genai.pyi b/src/python/openvino_genai/py_openvino_genai.pyi index bb34f19bb6..9eb0005ecc 100644 --- a/src/python/openvino_genai/py_openvino_genai.pyi +++ b/src/python/openvino_genai/py_openvino_genai.pyi @@ -829,6 +829,8 @@ class Image2ImagePipeline: """ def get_generation_config(self) -> ImageGenerationConfig: ... + def get_perfomance_metrics(self) -> ImageGenerationPerfMetrics: + ... def reshape(self, num_images_per_prompt: int, height: int, width: int, guidance_scale: float) -> None: ... def set_generation_config(self, config: ImageGenerationConfig) -> None: @@ -860,6 +862,76 @@ class ImageGenerationConfig: ... def validate(self) -> None: ... +class ImageGenerationPerfMetrics: + """ + + Holds performance metrics for each image generate call. + + PerfMetrics holds fields with mean and standard deviations for the following metrics: + - one generation iteration, ms + - inference duration for unet model, ms + - inference duration for transformer model, ms + + Additional fields include: + - Load time, ms + - total duration of image generation, ms + - inference duration of vae_encoder model, ms + - inference duration of vae_decoder model, ms + - inference duration of each encoder model, ms + + Preferable way to access values is via get functions. Getters calculate mean and std values from raw_metrics and return pairs. + If mean and std were already calculated, getters return cached values. + + :param get_load_time: Returns the load time in milliseconds. + :type get_load_time: float + + :param get_unet_inference_duration: Returns the mean and standard deviation of unet inference in millionseconds. + :type get_unet_inference_duration: MeanStdPair + + :param get_transformer_inference_duration: Returns the mean and standard deviation of transformer inference in millionseconds. + :type get_transformer_inference_duration: MeanStdPair + + :param get_iteration_duration: Returns the mean and standard deviation of one generation iteration in millionseconds. + :type get_iteration_duration: MeanStdPair + + :param get_inference_total_duration: Returns all inference duration including encoder, decoder and transformer/unet inference. + :type get_inference_total_duration: float + + :param get_generate_duration: Returns generate duration in millionseconds. + :type get_generate_duration: float + + :param raw_metrics: A structure of RawImageGenerationPerfMetrics type that holds raw metrics. + :type raw_metrics: RawImageGenerationPerfMetrics + """ + def __init__(self) -> None: + ... + @property + def load_time(self) -> float: + ... + @property + def generate_duration(self) -> float: + ... + @property + def vae_encoder_inference_duration(self) -> float: + ... + @property + def vae_decoder_inference_duration(self) -> float: + ... + @property + def encoder_inference_duration(self) -> dict[str, float]: + ... + def get_unet_inference_duration(self) -> MeanStdPair: + ... + def get_transformer_inference_duration(self) -> MeanStdPair: + ... + def get_iteration_duration(self) -> MeanStdPair: + ... + def get_inference_total_duration(self) -> float: + ... + def get_load_time(self) -> float: + ... + def get_generate_duration(self) -> float: + ... class InpaintingPipeline: """ This class is used for generation with inpainting models. @@ -930,6 +1002,8 @@ class InpaintingPipeline: """ def get_generation_config(self) -> ImageGenerationConfig: ... + def get_perfomance_metrics(self) -> ImageGenerationPerfMetrics: + ... def reshape(self, num_images_per_prompt: int, height: int, width: int, guidance_scale: float) -> None: ... def set_generation_config(self, config: ImageGenerationConfig) -> None: @@ -1229,6 +1303,31 @@ class PipelineMetrics: @property def scheduled_requests(self) -> int: ... +class RawImageGenerationPerfMetrics: + """ + + Structure with raw performance metrics for each generation before any statistics are calculated. + + :param unet_inference_durations: Inference time for each unet inference in microseconds. + :type unet_inference_durations: List[float] + + :param transformer_inference_durations: Inference time for each transformer inference in microseconds. + :type transformer_inference_durations: List[float] + + :param iteration_durations: durations for each step iteration in microseconds. + :type iteration_durations: List[float] + """ + def __init__(self) -> None: + ... + @property + def unet_inference_durations(self) -> list[float]: + ... + @property + def transformer_inference_durations(self) -> list[float]: + ... + @property + def iteration_durations(self) -> list[float]: + ... class RawPerfMetrics: """ @@ -1618,6 +1717,8 @@ class Text2ImagePipeline: """ def get_generation_config(self) -> ImageGenerationConfig: ... + def get_perfomance_metrics(self) -> ImageGenerationPerfMetrics: + ... def reshape(self, num_images_per_prompt: int, height: int, width: int, guidance_scale: float) -> None: ... def set_generation_config(self, config: ImageGenerationConfig) -> None: @@ -2200,101 +2301,6 @@ class WhisperRawPerfMetrics: @property def features_extraction_durations(self) -> list[float]: ... -class RawImageGenerationPerfMetrics: - """ - - Structure with raw performance metrics for each generation before any statistics are calculated. - - :param unet_inference_durations: Inference time for each unet inference in microseconds. - :type unet_inference_durations: List[float] - - :param transformer_inference_durations: Inference time for each transformer inference in microseconds. - :type transformer_inference_durations: List[float] - - :param iteration_durations: durations for each step iteration in microseconds. - :type iteration_durations: List[float] - """ - def __init__(self) -> None: - ... - @property - def unet_inference_durations(self) -> list[float]: - ... - @property - def transformer_inference_durations(self) -> list[float]: - ... - @property - def iteration_durations(self) -> list[float]: - ... -class ImageGenerationPerfMetrics: - """ - - Holds performance metrics for each image generate call. - - PerfMetrics holds fields with mean and standard deviations for the following metrics: - - one generation iteration, ms - - inference duration for unet model, ms - - inference duration for transformer model, ms - - Additional fields include: - - Load time, ms - - total duration of image generation, ms - - inference duration of vae_encoder model, ms - - inference duration of vae_decoder model, ms - - inference duration of each encoder model, ms - - Preferable way to access values is via get functions. Getters calculate mean and std values from raw_metrics and return pairs. - If mean and std were already calculated, getters return cached values. - - :param get_load_time: Returns the load time in milliseconds. - :type get_load_time: float - - :param get_unet_inference_duration: Returns the mean and standard deviation of unet inference in millionseconds. - :type get_unet_inference_duration: MeanStdPair - - :param get_transformer_inference_duration: Returns the mean and standard deviation of transformer inference in millionseconds. - :type get_transformer_inference_duration: MeanStdPair - - :param get_iteration_duration: Returns the mean and standard deviation of one generation iteration in millionseconds. - :type get_iteration_duration: MeanStdPair - - :param get_inference_total_duration: Returns all inference duration including encoder, decoder and transformer/unet inference. - :type get_inference_total_duration: float - - :param get_generate_duration: Returns generate duration in millionseconds. - :type get_generate_duration: float - - :param raw_metrics: A structure of RawImageGenerationPerfMetrics type that holds raw metrics. - :type raw_metrics: RawImageGenerationPerfMetrics - """ - def __init__(self) -> None: - ... - @property - def load_time(self) -> float: - ... - @property - def generate_duration(self) -> float: - ... - @property - def vae_encoder_inference_duration(self) -> float: - ... - @property - def vae_decoder_inference_duration(self) -> float: - ... - @property - def encoder_inference_duration(self) -> dict[str, float]: - ... - def get_unet_inference_duration(self) -> MeanStdPair: - ... - def get_transformer_inference_duration(self) -> MeanStdPair: - ... - def get_iteration_duration(self) -> MeanStdPair: - ... - def get_inference_total_duration(self) -> float: - ... - def get_load_time(self) -> float: - ... - def get_generate_duration(self) -> float: - ... def draft_model(models_path: os.PathLike, device: str = '', **kwargs) -> openvino._pyopenvino.OVAny: """ device on which inference will be performed From 28590fee069f46a0782b1aeabceffe0b0597595b Mon Sep 17 00:00:00 2001 From: xufang Date: Fri, 10 Jan 2025 18:44:17 +0800 Subject: [PATCH 17/19] fix error --- .../openvino_genai/py_openvino_genai.pyi | 66 ++++++++----------- 1 file changed, 27 insertions(+), 39 deletions(-) diff --git a/src/python/openvino_genai/py_openvino_genai.pyi b/src/python/openvino_genai/py_openvino_genai.pyi index 9eb0005ecc..fe28a91306 100644 --- a/src/python/openvino_genai/py_openvino_genai.pyi +++ b/src/python/openvino_genai/py_openvino_genai.pyi @@ -865,19 +865,19 @@ class ImageGenerationConfig: class ImageGenerationPerfMetrics: """ - Holds performance metrics for each image generate call. + Holds performance metrics for each generate call. PerfMetrics holds fields with mean and standard deviations for the following metrics: - - one generation iteration, ms - - inference duration for unet model, ms - - inference duration for transformer model, ms + - Generate iteration duration, ms + - Inference duration for unet model, ms + - Inference duration for transformer model, ms Additional fields include: - Load time, ms - - total duration of image generation, ms + - Generate total duration, ms + - inference durations for each encoder, ms - inference duration of vae_encoder model, ms - inference duration of vae_decoder model, ms - - inference duration of each encoder model, ms Preferable way to access values is via get functions. Getters calculate mean and std values from raw_metrics and return pairs. If mean and std were already calculated, getters return cached values. @@ -885,52 +885,40 @@ class ImageGenerationPerfMetrics: :param get_load_time: Returns the load time in milliseconds. :type get_load_time: float - :param get_unet_inference_duration: Returns the mean and standard deviation of unet inference in millionseconds. - :type get_unet_inference_duration: MeanStdPair + :param get_generate_duration: Returns the generate duration in milliseconds. + :type get_generate_duration: float - :param get_transformer_inference_duration: Returns the mean and standard deviation of transformer inference in millionseconds. - :type get_transformer_inference_duration: MeanStdPair + :param get_inference_total_duration: Returns the total inference durations (including encoder, unet/transformer and decoder inference) in milliseconds. + :type get_inference_total_duration: float - :param get_iteration_duration: Returns the mean and standard deviation of one generation iteration in millionseconds. + :param get_iteration_duration: Returns the mean and standard deviation of one generation iteration in milliseconds. :type get_iteration_duration: MeanStdPair - :param get_inference_total_duration: Returns all inference duration including encoder, decoder and transformer/unet inference. - :type get_inference_total_duration: float + :param unet_inference_duration: Returns the mean and standard deviation of one unet inference in milliseconds. + :type unet_inference_duration: MeanStdPair - :param get_generate_duration: Returns generate duration in millionseconds. - :type get_generate_duration: float + :param get_transformer_inference_duration: Returns the mean and standard deviation of one transformer inference in milliseconds. + :type get_transformer_inference_duration: MeanStdPair :param raw_metrics: A structure of RawImageGenerationPerfMetrics type that holds raw metrics. :type raw_metrics: RawImageGenerationPerfMetrics """ def __init__(self) -> None: ... - @property - def load_time(self) -> float: - ... - @property - def generate_duration(self) -> float: - ... - @property - def vae_encoder_inference_duration(self) -> float: + def get_generate_duration(self) -> float: ... - @property - def vae_decoder_inference_duration(self) -> float: + def get_inference_total_duration(self) -> float: ... - @property - def encoder_inference_duration(self) -> dict[str, float]: + def get_iteration_duration(self) -> MeanStdPair: ... - def get_unet_inference_duration(self) -> MeanStdPair: + def get_load_time(self) -> float: ... def get_transformer_inference_duration(self) -> MeanStdPair: ... - def get_iteration_duration(self) -> MeanStdPair: - ... - def get_inference_total_duration(self) -> float: - ... - def get_load_time(self) -> float: + def get_unet_inference_duration(self) -> MeanStdPair: ... - def get_generate_duration(self) -> float: + @property + def raw_metrics(self) -> RawImageGenerationPerfMetrics: ... class InpaintingPipeline: """ @@ -1308,25 +1296,25 @@ class RawImageGenerationPerfMetrics: Structure with raw performance metrics for each generation before any statistics are calculated. - :param unet_inference_durations: Inference time for each unet inference in microseconds. + :param unet_inference_durations: Durations for each unet inference in microseconds. :type unet_inference_durations: List[float] - :param transformer_inference_durations: Inference time for each transformer inference in microseconds. + :param transformer_inference_durations: Durations for each transformer inference in microseconds. :type transformer_inference_durations: List[float] - :param iteration_durations: durations for each step iteration in microseconds. + :param iteration_durations: Durations for each step iteration in microseconds. :type iteration_durations: List[float] """ def __init__(self) -> None: ... @property - def unet_inference_durations(self) -> list[float]: + def iteration_durations(self) -> list[float]: ... @property def transformer_inference_durations(self) -> list[float]: ... @property - def iteration_durations(self) -> list[float]: + def unet_inference_durations(self) -> list[float]: ... class RawPerfMetrics: """ From 5952a9a1f9c59d852caaabb26b47d756b139b543 Mon Sep 17 00:00:00 2001 From: xufang Date: Fri, 10 Jan 2025 19:06:56 +0800 Subject: [PATCH 18/19] fix python error --- src/python/openvino_genai/py_openvino_genai.pyi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/python/openvino_genai/py_openvino_genai.pyi b/src/python/openvino_genai/py_openvino_genai.pyi index fe28a91306..001b4aeb84 100644 --- a/src/python/openvino_genai/py_openvino_genai.pyi +++ b/src/python/openvino_genai/py_openvino_genai.pyi @@ -2296,4 +2296,4 @@ def draft_model(models_path: os.PathLike, device: str = '', **kwargs) -> openvin def get_version() -> str: """ OpenVINO GenAI version - """ + """ \ No newline at end of file From ac68d53139a8989592e3c3387b5d1d94f6db1c77 Mon Sep 17 00:00:00 2001 From: xufang Date: Fri, 10 Jan 2025 19:50:31 +0800 Subject: [PATCH 19/19] add newline at end of file --- src/python/openvino_genai/py_openvino_genai.pyi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/python/openvino_genai/py_openvino_genai.pyi b/src/python/openvino_genai/py_openvino_genai.pyi index 001b4aeb84..fe28a91306 100644 --- a/src/python/openvino_genai/py_openvino_genai.pyi +++ b/src/python/openvino_genai/py_openvino_genai.pyi @@ -2296,4 +2296,4 @@ def draft_model(models_path: os.PathLike, device: str = '', **kwargs) -> openvin def get_version() -> str: """ OpenVINO GenAI version - """ \ No newline at end of file + """