Skip to content

Commit

Permalink
add python api
Browse files Browse the repository at this point in the history
  • Loading branch information
xufang-lisa committed Jan 10, 2025
1 parent 8bb7e2a commit 2e8aad6
Show file tree
Hide file tree
Showing 4 changed files with 129 additions and 5 deletions.
2 changes: 2 additions & 0 deletions src/python/openvino_genai/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,8 @@
Generator,
CppStdGenerator,
TorchGenerator,
ImageGenerationPerfMetrics,
RawImageGenerationPerfMetrics,
)

# Continuous batching
Expand Down
4 changes: 3 additions & 1 deletion src/python/openvino_genai/__init__.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,11 @@ from openvino_genai.py_openvino_genai import GenerationResult
from openvino_genai.py_openvino_genai import Generator
from openvino_genai.py_openvino_genai import Image2ImagePipeline
from openvino_genai.py_openvino_genai import ImageGenerationConfig
from openvino_genai.py_openvino_genai import ImageGenerationPerfMetrics
from openvino_genai.py_openvino_genai import InpaintingPipeline
from openvino_genai.py_openvino_genai import LLMPipeline
from openvino_genai.py_openvino_genai import PerfMetrics
from openvino_genai.py_openvino_genai import RawImageGenerationPerfMetrics
from openvino_genai.py_openvino_genai import RawPerfMetrics
from openvino_genai.py_openvino_genai import SD3Transformer2DModel
from openvino_genai.py_openvino_genai import Scheduler
Expand All @@ -45,5 +47,5 @@ from openvino_genai.py_openvino_genai import draft_model
from openvino_genai.py_openvino_genai import get_version
import os as os
from . import py_openvino_genai
__all__ = ['Adapter', 'AdapterConfig', 'AggregationMode', 'AutoencoderKL', 'CLIPTextModel', 'CLIPTextModelWithProjection', 'CacheEvictionConfig', 'ChunkStreamerBase', 'ContinuousBatchingPipeline', 'CppStdGenerator', 'DecodedResults', 'EncodedResults', 'FluxTransformer2DModel', 'GenerationConfig', 'GenerationResult', 'Generator', 'Image2ImagePipeline', 'ImageGenerationConfig', 'InpaintingPipeline', 'LLMPipeline', 'PerfMetrics', 'RawPerfMetrics', 'SD3Transformer2DModel', 'Scheduler', 'SchedulerConfig', 'StopCriteria', 'StreamerBase', 'T5EncoderModel', 'Text2ImagePipeline', 'TokenizedInputs', 'Tokenizer', 'TorchGenerator', 'UNet2DConditionModel', 'VLMPipeline', 'WhisperGenerationConfig', 'WhisperPerfMetrics', 'WhisperPipeline', 'WhisperRawPerfMetrics', 'draft_model', 'get_version', 'openvino', 'os', 'py_openvino_genai']
__all__ = ['Adapter', 'AdapterConfig', 'AggregationMode', 'AutoencoderKL', 'CLIPTextModel', 'CLIPTextModelWithProjection', 'CacheEvictionConfig', 'ChunkStreamerBase', 'ContinuousBatchingPipeline', 'CppStdGenerator', 'DecodedResults', 'EncodedResults', 'FluxTransformer2DModel', 'GenerationConfig', 'GenerationResult', 'Generator', 'Image2ImagePipeline', 'ImageGenerationConfig', 'ImageGenerationPerfMetrics', 'InpaintingPipeline', 'LLMPipeline', 'PerfMetrics', 'RawImageGenerationPerfMetrics', 'RawPerfMetrics', 'SD3Transformer2DModel', 'Scheduler', 'SchedulerConfig', 'StopCriteria', 'StreamerBase', 'T5EncoderModel', 'Text2ImagePipeline', 'TokenizedInputs', 'Tokenizer', 'TorchGenerator', 'UNet2DConditionModel', 'VLMPipeline', 'WhisperGenerationConfig', 'WhisperPerfMetrics', 'WhisperPipeline', 'WhisperRawPerfMetrics', 'ImageGenerationPerfMetrics', 'RawImageGenerationPerfMetrics', 'draft_model', 'get_version', 'openvino', 'os', 'py_openvino_genai']
__version__: str
97 changes: 96 additions & 1 deletion src/python/openvino_genai/py_openvino_genai.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ from __future__ import annotations
import openvino._pyopenvino
import os
import typing
__all__ = ['Adapter', 'AdapterConfig', 'AggregationMode', 'AutoencoderKL', 'CLIPTextModel', 'CLIPTextModelWithProjection', 'CacheEvictionConfig', 'ChunkStreamerBase', 'ContinuousBatchingPipeline', 'CppStdGenerator', 'DecodedResults', 'EncodedGenerationResult', 'EncodedResults', 'FluxTransformer2DModel', 'GenerationConfig', 'GenerationFinishReason', 'GenerationHandle', 'GenerationOutput', 'GenerationResult', 'GenerationStatus', 'Generator', 'Image2ImagePipeline', 'ImageGenerationConfig', 'InpaintingPipeline', 'LLMPipeline', 'MeanStdPair', 'PerfMetrics', 'PipelineMetrics', 'RawPerfMetrics', 'SD3Transformer2DModel', 'Scheduler', 'SchedulerConfig', 'StopCriteria', 'StreamerBase', 'T5EncoderModel', 'Text2ImagePipeline', 'TokenizedInputs', 'Tokenizer', 'TorchGenerator', 'UNet2DConditionModel', 'VLMDecodedResults', 'VLMPerfMetrics', 'VLMPipeline', 'VLMRawPerfMetrics', 'WhisperDecodedResultChunk', 'WhisperDecodedResults', 'WhisperGenerationConfig', 'WhisperPerfMetrics', 'WhisperPipeline', 'WhisperRawPerfMetrics', 'draft_model', 'get_version']
__all__ = ['Adapter', 'AdapterConfig', 'AggregationMode', 'AutoencoderKL', 'CLIPTextModel', 'CLIPTextModelWithProjection', 'CacheEvictionConfig', 'ChunkStreamerBase', 'ContinuousBatchingPipeline', 'CppStdGenerator', 'DecodedResults', 'EncodedGenerationResult', 'EncodedResults', 'FluxTransformer2DModel', 'GenerationConfig', 'GenerationFinishReason', 'GenerationHandle', 'GenerationOutput', 'GenerationResult', 'GenerationStatus', 'Generator', 'Image2ImagePipeline', 'ImageGenerationConfig', 'ImageGenerationPerfMetrics', 'InpaintingPipeline', 'LLMPipeline', 'MeanStdPair', 'PerfMetrics', 'PipelineMetrics', 'RawImageGenerationPerfMetrics', 'RawPerfMetrics', 'SD3Transformer2DModel', 'Scheduler', 'SchedulerConfig', 'StopCriteria', 'StreamerBase', 'T5EncoderModel', 'Text2ImagePipeline', 'TokenizedInputs', 'Tokenizer', 'TorchGenerator', 'UNet2DConditionModel', 'VLMDecodedResults', 'VLMPerfMetrics', 'VLMPipeline', 'VLMRawPerfMetrics', 'WhisperDecodedResultChunk', 'WhisperDecodedResults', 'WhisperGenerationConfig', 'WhisperPerfMetrics', 'WhisperPipeline', 'WhisperRawPerfMetrics', 'draft_model', 'get_version']
class Adapter:
"""
Immutable LoRA Adapter that carries the adaptation matrices and serves as unique adapter identifier.
Expand Down Expand Up @@ -2200,6 +2200,101 @@ class WhisperRawPerfMetrics:
@property
def features_extraction_durations(self) -> list[float]:
...
class RawImageGenerationPerfMetrics:
"""
Structure with raw performance metrics for each generation before any statistics are calculated.
:param unet_inference_durations: Inference time for each unet inference in microseconds.
:type unet_inference_durations: List[float]
:param transformer_inference_durations: Inference time for each transformer inference in microseconds.
:type transformer_inference_durations: List[float]
:param iteration_durations: durations for each step iteration in microseconds.
:type iteration_durations: List[float]
"""
def __init__(self) -> None:
...
@property
def unet_inference_durations(self) -> list[float]:
...
@property
def transformer_inference_durations(self) -> list[float]:
...
@property
def iteration_durations(self) -> list[float]:
...
class ImageGenerationPerfMetrics:
"""
Holds performance metrics for each image generate call.
PerfMetrics holds fields with mean and standard deviations for the following metrics:
- one generation iteration, ms
- inference duration for unet model, ms
- inference duration for transformer model, ms
Additional fields include:
- Load time, ms
- total duration of image generation, ms
- inference duration of vae_encoder model, ms
- inference duration of vae_decoder model, ms
- inference duration of each encoder model, ms
Preferable way to access values is via get functions. Getters calculate mean and std values from raw_metrics and return pairs.
If mean and std were already calculated, getters return cached values.
:param get_load_time: Returns the load time in milliseconds.
:type get_load_time: float
:param get_unet_inference_duration: Returns the mean and standard deviation of unet inference in millionseconds.
:type get_unet_inference_duration: MeanStdPair
:param get_transformer_inference_duration: Returns the mean and standard deviation of transformer inference in millionseconds.
:type get_transformer_inference_duration: MeanStdPair
:param get_iteration_duration: Returns the mean and standard deviation of one generation iteration in millionseconds.
:type get_iteration_duration: MeanStdPair
:param get_inference_total_duration: Returns all inference duration including encoder, decoder and transformer/unet inference.
:type get_inference_total_duration: float
:param get_generate_duration: Returns generate duration in millionseconds.
:type get_generate_duration: float
:param raw_metrics: A structure of RawImageGenerationPerfMetrics type that holds raw metrics.
:type raw_metrics: RawImageGenerationPerfMetrics
"""
def __init__(self) -> None:
...
@property
def load_time(self) -> float:
...
@property
def generate_duration(self) -> float:
...
@property
def vae_encoder_inference_duration(self) -> float:
...
@property
def vae_decoder_inference_duration(self) -> float:
...
@property
def encoder_inference_duration(self) -> dict[str, float]:
...
def get_unet_inference_duration(self) -> MeanStdPair:
...
def get_transformer_inference_duration(self) -> MeanStdPair:
...
def get_iteration_duration(self) -> MeanStdPair:
...
def get_inference_total_duration(self) -> float:
...
def get_load_time(self) -> float:
...
def get_generate_duration(self) -> float:
...
def draft_model(models_path: os.PathLike, device: str = '', **kwargs) -> openvino._pyopenvino.OVAny:
"""
device on which inference will be performed
Expand Down
31 changes: 28 additions & 3 deletions src/python/py_image_generation_pipelines.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -285,6 +285,28 @@ void init_image_generation_pipelines(py::module_& m) {
config.update_generation_config(pyutils::kwargs_to_any_map(kwargs));
});

py::class_<RawImageGenerationPerfMetrics>(m, "RawImageGenerationPerfMetrics", raw_image_generation_perf_metrics_docstring)
.def(py::init<>())
.def_property_readonly("unet_inference_durations", [](const RawImageGenerationPerfMetrics &rw) {
return pyutils::get_ms(rw, &RawImageGenerationPerfMetrics::unet_inference_durations);
})
.def_property_readonly("transformer_inference_durations", [](const RawImageGenerationPerfMetrics &rw) {
return pyutils::get_ms(rw, &RawImageGenerationPerfMetrics::transformer_inference_durations);
})
.def_property_readonly("iteration_durations", [](const RawImageGenerationPerfMetrics &rw) {
return pyutils::get_ms(rw, &RawImageGenerationPerfMetrics::iteration_durations);
});

py::class_<ImageGenerationPerfMetrics>(m, "ImageGenerationPerfMetrics", image_generation_perf_metrics_docstring)
.def(py::init<>())
.def("get_load_time", &ImageGenerationPerfMetrics::get_load_time)
.def("get_generate_duration", &ImageGenerationPerfMetrics::get_generate_duration)
.def("get_unet_inference_duration", &ImageGenerationPerfMetrics::get_unet_inference_duration)
.def("get_transformer_inference_duration", &ImageGenerationPerfMetrics::get_transformer_inference_duration)
.def("get_iteration_duration", &ImageGenerationPerfMetrics::get_iteration_duration)
.def("get_inference_total_duration", &ImageGenerationPerfMetrics::get_inference_total_duration)
.def_readonly("raw_metrics", &ImageGenerationPerfMetrics::raw_metrics);

auto text2image_pipeline = py::class_<ov::genai::Text2ImagePipeline>(m, "Text2ImagePipeline", "This class is used for generation with text-to-image models.")
.def(py::init([](const std::filesystem::path& models_path) {
ScopedVar env_manager(pyutils::ov_tokenizers_module_path());
Expand Down Expand Up @@ -350,7 +372,8 @@ void init_image_generation_pipelines(py::module_& m) {
},
py::arg("prompt"), "Input string",
(text2image_generate_docstring + std::string(" \n ")).c_str())
.def("decode", &ov::genai::Text2ImagePipeline::decode, py::arg("latent"));
.def("decode", &ov::genai::Text2ImagePipeline::decode, py::arg("latent"))
.def("get_perfomance_metrics", &ov::genai::Text2ImagePipeline::get_perfomance_metrics);


auto image2image_pipeline = py::class_<ov::genai::Image2ImagePipeline>(m, "Image2ImagePipeline", "This class is used for generation with image-to-image models.")
Expand Down Expand Up @@ -413,7 +436,8 @@ void init_image_generation_pipelines(py::module_& m) {
py::arg("prompt"), "Input string",
py::arg("image"), "Initial image",
(text2image_generate_docstring + std::string(" \n ")).c_str())
.def("decode", &ov::genai::Image2ImagePipeline::decode, py::arg("latent"));
.def("decode", &ov::genai::Image2ImagePipeline::decode, py::arg("latent"))
.def("get_perfomance_metrics", &ov::genai::Image2ImagePipeline::get_perfomance_metrics);


auto inpainting_pipeline = py::class_<ov::genai::InpaintingPipeline>(m, "InpaintingPipeline", "This class is used for generation with inpainting models.")
Expand Down Expand Up @@ -478,7 +502,8 @@ void init_image_generation_pipelines(py::module_& m) {
py::arg("image"), "Initial image",
py::arg("mask_image"), "Mask image",
(text2image_generate_docstring + std::string(" \n ")).c_str())
.def("decode", &ov::genai::InpaintingPipeline::decode, py::arg("latent"));
.def("decode", &ov::genai::InpaintingPipeline::decode, py::arg("latent"))
.def("get_perfomance_metrics", &ov::genai::InpaintingPipeline::get_perfomance_metrics);

// define constructors to create one pipeline from another
// NOTE: needs to be defined once all pipelines are created
Expand Down

0 comments on commit 2e8aad6

Please sign in to comment.