diff --git a/src/python/openvino_genai/__init__.py b/src/python/openvino_genai/__init__.py index 0ad7ba3f12..7620fd22d4 100644 --- a/src/python/openvino_genai/__init__.py +++ b/src/python/openvino_genai/__init__.py @@ -76,6 +76,8 @@ Generator, CppStdGenerator, TorchGenerator, + ImageGenerationPerfMetrics, + RawImageGenerationPerfMetrics, ) # Continuous batching diff --git a/src/python/openvino_genai/__init__.pyi b/src/python/openvino_genai/__init__.pyi index 0a401ae958..0fa661e96f 100644 --- a/src/python/openvino_genai/__init__.pyi +++ b/src/python/openvino_genai/__init__.pyi @@ -21,9 +21,11 @@ from openvino_genai.py_openvino_genai import GenerationResult from openvino_genai.py_openvino_genai import Generator from openvino_genai.py_openvino_genai import Image2ImagePipeline from openvino_genai.py_openvino_genai import ImageGenerationConfig +from openvino_genai.py_openvino_genai import ImageGenerationPerfMetrics from openvino_genai.py_openvino_genai import InpaintingPipeline from openvino_genai.py_openvino_genai import LLMPipeline from openvino_genai.py_openvino_genai import PerfMetrics +from openvino_genai.py_openvino_genai import RawImageGenerationPerfMetrics from openvino_genai.py_openvino_genai import RawPerfMetrics from openvino_genai.py_openvino_genai import SD3Transformer2DModel from openvino_genai.py_openvino_genai import Scheduler @@ -45,5 +47,5 @@ from openvino_genai.py_openvino_genai import draft_model from openvino_genai.py_openvino_genai import get_version import os as os from . import py_openvino_genai -__all__ = ['Adapter', 'AdapterConfig', 'AggregationMode', 'AutoencoderKL', 'CLIPTextModel', 'CLIPTextModelWithProjection', 'CacheEvictionConfig', 'ChunkStreamerBase', 'ContinuousBatchingPipeline', 'CppStdGenerator', 'DecodedResults', 'EncodedResults', 'FluxTransformer2DModel', 'GenerationConfig', 'GenerationResult', 'Generator', 'Image2ImagePipeline', 'ImageGenerationConfig', 'InpaintingPipeline', 'LLMPipeline', 'PerfMetrics', 'RawPerfMetrics', 'SD3Transformer2DModel', 'Scheduler', 'SchedulerConfig', 'StopCriteria', 'StreamerBase', 'T5EncoderModel', 'Text2ImagePipeline', 'TokenizedInputs', 'Tokenizer', 'TorchGenerator', 'UNet2DConditionModel', 'VLMPipeline', 'WhisperGenerationConfig', 'WhisperPerfMetrics', 'WhisperPipeline', 'WhisperRawPerfMetrics', 'draft_model', 'get_version', 'openvino', 'os', 'py_openvino_genai'] +__all__ = ['Adapter', 'AdapterConfig', 'AggregationMode', 'AutoencoderKL', 'CLIPTextModel', 'CLIPTextModelWithProjection', 'CacheEvictionConfig', 'ChunkStreamerBase', 'ContinuousBatchingPipeline', 'CppStdGenerator', 'DecodedResults', 'EncodedResults', 'FluxTransformer2DModel', 'GenerationConfig', 'GenerationResult', 'Generator', 'Image2ImagePipeline', 'ImageGenerationConfig', 'ImageGenerationPerfMetrics', 'InpaintingPipeline', 'LLMPipeline', 'PerfMetrics', 'RawImageGenerationPerfMetrics', 'RawPerfMetrics', 'SD3Transformer2DModel', 'Scheduler', 'SchedulerConfig', 'StopCriteria', 'StreamerBase', 'T5EncoderModel', 'Text2ImagePipeline', 'TokenizedInputs', 'Tokenizer', 'TorchGenerator', 'UNet2DConditionModel', 'VLMPipeline', 'WhisperGenerationConfig', 'WhisperPerfMetrics', 'WhisperPipeline', 'WhisperRawPerfMetrics', 'ImageGenerationPerfMetrics', 'RawImageGenerationPerfMetrics', 'draft_model', 'get_version', 'openvino', 'os', 'py_openvino_genai'] __version__: str diff --git a/src/python/openvino_genai/py_openvino_genai.pyi b/src/python/openvino_genai/py_openvino_genai.pyi index a8063ca4ab..bb34f19bb6 100644 --- a/src/python/openvino_genai/py_openvino_genai.pyi +++ b/src/python/openvino_genai/py_openvino_genai.pyi @@ -5,7 +5,7 @@ from __future__ import annotations import openvino._pyopenvino import os import typing -__all__ = ['Adapter', 'AdapterConfig', 'AggregationMode', 'AutoencoderKL', 'CLIPTextModel', 'CLIPTextModelWithProjection', 'CacheEvictionConfig', 'ChunkStreamerBase', 'ContinuousBatchingPipeline', 'CppStdGenerator', 'DecodedResults', 'EncodedGenerationResult', 'EncodedResults', 'FluxTransformer2DModel', 'GenerationConfig', 'GenerationFinishReason', 'GenerationHandle', 'GenerationOutput', 'GenerationResult', 'GenerationStatus', 'Generator', 'Image2ImagePipeline', 'ImageGenerationConfig', 'InpaintingPipeline', 'LLMPipeline', 'MeanStdPair', 'PerfMetrics', 'PipelineMetrics', 'RawPerfMetrics', 'SD3Transformer2DModel', 'Scheduler', 'SchedulerConfig', 'StopCriteria', 'StreamerBase', 'T5EncoderModel', 'Text2ImagePipeline', 'TokenizedInputs', 'Tokenizer', 'TorchGenerator', 'UNet2DConditionModel', 'VLMDecodedResults', 'VLMPerfMetrics', 'VLMPipeline', 'VLMRawPerfMetrics', 'WhisperDecodedResultChunk', 'WhisperDecodedResults', 'WhisperGenerationConfig', 'WhisperPerfMetrics', 'WhisperPipeline', 'WhisperRawPerfMetrics', 'draft_model', 'get_version'] +__all__ = ['Adapter', 'AdapterConfig', 'AggregationMode', 'AutoencoderKL', 'CLIPTextModel', 'CLIPTextModelWithProjection', 'CacheEvictionConfig', 'ChunkStreamerBase', 'ContinuousBatchingPipeline', 'CppStdGenerator', 'DecodedResults', 'EncodedGenerationResult', 'EncodedResults', 'FluxTransformer2DModel', 'GenerationConfig', 'GenerationFinishReason', 'GenerationHandle', 'GenerationOutput', 'GenerationResult', 'GenerationStatus', 'Generator', 'Image2ImagePipeline', 'ImageGenerationConfig', 'ImageGenerationPerfMetrics', 'InpaintingPipeline', 'LLMPipeline', 'MeanStdPair', 'PerfMetrics', 'PipelineMetrics', 'RawImageGenerationPerfMetrics', 'RawPerfMetrics', 'SD3Transformer2DModel', 'Scheduler', 'SchedulerConfig', 'StopCriteria', 'StreamerBase', 'T5EncoderModel', 'Text2ImagePipeline', 'TokenizedInputs', 'Tokenizer', 'TorchGenerator', 'UNet2DConditionModel', 'VLMDecodedResults', 'VLMPerfMetrics', 'VLMPipeline', 'VLMRawPerfMetrics', 'WhisperDecodedResultChunk', 'WhisperDecodedResults', 'WhisperGenerationConfig', 'WhisperPerfMetrics', 'WhisperPipeline', 'WhisperRawPerfMetrics', 'draft_model', 'get_version'] class Adapter: """ Immutable LoRA Adapter that carries the adaptation matrices and serves as unique adapter identifier. @@ -2200,6 +2200,101 @@ class WhisperRawPerfMetrics: @property def features_extraction_durations(self) -> list[float]: ... +class RawImageGenerationPerfMetrics: + """ + + Structure with raw performance metrics for each generation before any statistics are calculated. + + :param unet_inference_durations: Inference time for each unet inference in microseconds. + :type unet_inference_durations: List[float] + + :param transformer_inference_durations: Inference time for each transformer inference in microseconds. + :type transformer_inference_durations: List[float] + + :param iteration_durations: durations for each step iteration in microseconds. + :type iteration_durations: List[float] + """ + def __init__(self) -> None: + ... + @property + def unet_inference_durations(self) -> list[float]: + ... + @property + def transformer_inference_durations(self) -> list[float]: + ... + @property + def iteration_durations(self) -> list[float]: + ... +class ImageGenerationPerfMetrics: + """ + + Holds performance metrics for each image generate call. + + PerfMetrics holds fields with mean and standard deviations for the following metrics: + - one generation iteration, ms + - inference duration for unet model, ms + - inference duration for transformer model, ms + + Additional fields include: + - Load time, ms + - total duration of image generation, ms + - inference duration of vae_encoder model, ms + - inference duration of vae_decoder model, ms + - inference duration of each encoder model, ms + + Preferable way to access values is via get functions. Getters calculate mean and std values from raw_metrics and return pairs. + If mean and std were already calculated, getters return cached values. + + :param get_load_time: Returns the load time in milliseconds. + :type get_load_time: float + + :param get_unet_inference_duration: Returns the mean and standard deviation of unet inference in millionseconds. + :type get_unet_inference_duration: MeanStdPair + + :param get_transformer_inference_duration: Returns the mean and standard deviation of transformer inference in millionseconds. + :type get_transformer_inference_duration: MeanStdPair + + :param get_iteration_duration: Returns the mean and standard deviation of one generation iteration in millionseconds. + :type get_iteration_duration: MeanStdPair + + :param get_inference_total_duration: Returns all inference duration including encoder, decoder and transformer/unet inference. + :type get_inference_total_duration: float + + :param get_generate_duration: Returns generate duration in millionseconds. + :type get_generate_duration: float + + :param raw_metrics: A structure of RawImageGenerationPerfMetrics type that holds raw metrics. + :type raw_metrics: RawImageGenerationPerfMetrics + """ + def __init__(self) -> None: + ... + @property + def load_time(self) -> float: + ... + @property + def generate_duration(self) -> float: + ... + @property + def vae_encoder_inference_duration(self) -> float: + ... + @property + def vae_decoder_inference_duration(self) -> float: + ... + @property + def encoder_inference_duration(self) -> dict[str, float]: + ... + def get_unet_inference_duration(self) -> MeanStdPair: + ... + def get_transformer_inference_duration(self) -> MeanStdPair: + ... + def get_iteration_duration(self) -> MeanStdPair: + ... + def get_inference_total_duration(self) -> float: + ... + def get_load_time(self) -> float: + ... + def get_generate_duration(self) -> float: + ... def draft_model(models_path: os.PathLike, device: str = '', **kwargs) -> openvino._pyopenvino.OVAny: """ device on which inference will be performed diff --git a/src/python/py_image_generation_pipelines.cpp b/src/python/py_image_generation_pipelines.cpp index cc156cb209..3499712c3e 100644 --- a/src/python/py_image_generation_pipelines.cpp +++ b/src/python/py_image_generation_pipelines.cpp @@ -285,6 +285,28 @@ void init_image_generation_pipelines(py::module_& m) { config.update_generation_config(pyutils::kwargs_to_any_map(kwargs)); }); + py::class_(m, "RawImageGenerationPerfMetrics", raw_image_generation_perf_metrics_docstring) + .def(py::init<>()) + .def_property_readonly("unet_inference_durations", [](const RawImageGenerationPerfMetrics &rw) { + return pyutils::get_ms(rw, &RawImageGenerationPerfMetrics::unet_inference_durations); + }) + .def_property_readonly("transformer_inference_durations", [](const RawImageGenerationPerfMetrics &rw) { + return pyutils::get_ms(rw, &RawImageGenerationPerfMetrics::transformer_inference_durations); + }) + .def_property_readonly("iteration_durations", [](const RawImageGenerationPerfMetrics &rw) { + return pyutils::get_ms(rw, &RawImageGenerationPerfMetrics::iteration_durations); + }); + + py::class_(m, "ImageGenerationPerfMetrics", image_generation_perf_metrics_docstring) + .def(py::init<>()) + .def("get_load_time", &ImageGenerationPerfMetrics::get_load_time) + .def("get_generate_duration", &ImageGenerationPerfMetrics::get_generate_duration) + .def("get_unet_inference_duration", &ImageGenerationPerfMetrics::get_unet_inference_duration) + .def("get_transformer_inference_duration", &ImageGenerationPerfMetrics::get_transformer_inference_duration) + .def("get_iteration_duration", &ImageGenerationPerfMetrics::get_iteration_duration) + .def("get_inference_total_duration", &ImageGenerationPerfMetrics::get_inference_total_duration) + .def_readonly("raw_metrics", &ImageGenerationPerfMetrics::raw_metrics); + auto text2image_pipeline = py::class_(m, "Text2ImagePipeline", "This class is used for generation with text-to-image models.") .def(py::init([](const std::filesystem::path& models_path) { ScopedVar env_manager(pyutils::ov_tokenizers_module_path()); @@ -350,7 +372,8 @@ void init_image_generation_pipelines(py::module_& m) { }, py::arg("prompt"), "Input string", (text2image_generate_docstring + std::string(" \n ")).c_str()) - .def("decode", &ov::genai::Text2ImagePipeline::decode, py::arg("latent")); + .def("decode", &ov::genai::Text2ImagePipeline::decode, py::arg("latent")) + .def("get_perfomance_metrics", &ov::genai::Text2ImagePipeline::get_perfomance_metrics); auto image2image_pipeline = py::class_(m, "Image2ImagePipeline", "This class is used for generation with image-to-image models.") @@ -413,7 +436,8 @@ void init_image_generation_pipelines(py::module_& m) { py::arg("prompt"), "Input string", py::arg("image"), "Initial image", (text2image_generate_docstring + std::string(" \n ")).c_str()) - .def("decode", &ov::genai::Image2ImagePipeline::decode, py::arg("latent")); + .def("decode", &ov::genai::Image2ImagePipeline::decode, py::arg("latent")) + .def("get_perfomance_metrics", &ov::genai::Image2ImagePipeline::get_perfomance_metrics); auto inpainting_pipeline = py::class_(m, "InpaintingPipeline", "This class is used for generation with inpainting models.") @@ -478,7 +502,8 @@ void init_image_generation_pipelines(py::module_& m) { py::arg("image"), "Initial image", py::arg("mask_image"), "Mask image", (text2image_generate_docstring + std::string(" \n ")).c_str()) - .def("decode", &ov::genai::InpaintingPipeline::decode, py::arg("latent")); + .def("decode", &ov::genai::InpaintingPipeline::decode, py::arg("latent")) + .def("get_perfomance_metrics", &ov::genai::InpaintingPipeline::get_perfomance_metrics); // define constructors to create one pipeline from another // NOTE: needs to be defined once all pipelines are created