Skip to content

Commit

Permalink
revert changes for python
Browse files Browse the repository at this point in the history
  • Loading branch information
sbalandi committed Jan 9, 2025
1 parent 8ebc1d8 commit 2758f6b
Show file tree
Hide file tree
Showing 13 changed files with 32 additions and 85 deletions.
1 change: 0 additions & 1 deletion samples/python/chat_sample/chat_sample.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@

def streamer(subword):
print(subword, end='', flush=True)
return openvino_genai.StreamerRunningStatus.RUNNING

def main():
parser = argparse.ArgumentParser()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,8 @@
def streamer(subword):
print(subword, end='', flush=True)
# Return flag corresponds whether generation should be stopped.
return openvino_genai.StreamerRunningStatus.RUNNING
# False means continue generation.
return False

def main():
parser = argparse.ArgumentParser()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,8 @@
def streamer(subword):
print(subword, end='', flush=True)
# Return flag corresponds whether generation should be stopped.
return openvino_genai.StreamerRunningStatus.RUNNING
# False means continue generation.
return False

def main():
parser = argparse.ArgumentParser()
Expand Down
7 changes: 2 additions & 5 deletions samples/python/visual_language_chat/visual_language_chat.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,21 +11,18 @@
from pathlib import Path


def streamer(subword: str) -> openvino_genai.StreamerRunningStatus:
def streamer(subword: str):
'''
Args:
subword: sub-word of the generated text.
Returns: Return flag corresponds whether generation should be stopped.
"return None" will be treated the same as "return False".
'''
print(subword, end='', flush=True)

# No value is returned as in this example we don't want to stop the generation in this method.
# "return None" will be treated the same as "return False".

return openvino_genai.StreamerRunningStatus.RUNNING


def read_image(path: str) -> Tensor:
Expand Down
3 changes: 3 additions & 0 deletions src/cpp/src/visual_language/pipeline.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -187,6 +187,9 @@ class ov::genai::VLMPipeline::VLMPipelineImpl {
SequenceGroup::Ptr sequence_group = std::make_shared<SequenceGroup>(request_id, prompt_ids, generation_config, block_size);
requests.push_back(sequence_group);

OPENVINO_ASSERT((!m_is_chat_conversation || !std::get_if<std::function<StreamerRunningStatus(std::string)>>(&streamer)),
"For chat mode, please, use Steamer as StreamerBase class or as callback with a bool return value.");

std::shared_ptr<StreamerBase> streamer_ptr = std::visit(overloaded{
[&m_tokenizer = m_tokenizer](
const std::function<bool(std::string)>& callback
Expand Down
3 changes: 1 addition & 2 deletions src/python/openvino_genai/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,7 @@
RawPerfMetrics,
PerfMetrics,
StreamerBase,
get_version,
StreamerRunningStatus
get_version
)

__version__ = get_version()
Expand Down
3 changes: 1 addition & 2 deletions src/python/openvino_genai/__init__.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,6 @@ from openvino_genai.py_openvino_genai import Scheduler
from openvino_genai.py_openvino_genai import SchedulerConfig
from openvino_genai.py_openvino_genai import StopCriteria
from openvino_genai.py_openvino_genai import StreamerBase
from openvino_genai.py_openvino_genai import StreamerRunningStatus
from openvino_genai.py_openvino_genai import T5EncoderModel
from openvino_genai.py_openvino_genai import Text2ImagePipeline
from openvino_genai.py_openvino_genai import TokenizedInputs
Expand All @@ -46,5 +45,5 @@ from openvino_genai.py_openvino_genai import draft_model
from openvino_genai.py_openvino_genai import get_version
import os as os
from . import py_openvino_genai
__all__ = ['Adapter', 'AdapterConfig', 'AggregationMode', 'AutoencoderKL', 'CLIPTextModel', 'CLIPTextModelWithProjection', 'CacheEvictionConfig', 'ChunkStreamerBase', 'ContinuousBatchingPipeline', 'CppStdGenerator', 'DecodedResults', 'EncodedResults', 'FluxTransformer2DModel', 'GenerationConfig', 'GenerationResult', 'Generator', 'Image2ImagePipeline', 'ImageGenerationConfig', 'InpaintingPipeline', 'LLMPipeline', 'PerfMetrics', 'RawPerfMetrics', 'SD3Transformer2DModel', 'Scheduler', 'SchedulerConfig', 'StopCriteria', 'StreamerBase', 'StreamerRunningStatus', 'T5EncoderModel', 'Text2ImagePipeline', 'TokenizedInputs', 'Tokenizer', 'TorchGenerator', 'UNet2DConditionModel', 'VLMPipeline', 'WhisperGenerationConfig', 'WhisperPerfMetrics', 'WhisperPipeline', 'WhisperRawPerfMetrics', 'draft_model', 'get_version', 'openvino', 'os', 'py_openvino_genai']
__all__ = ['Adapter', 'AdapterConfig', 'AggregationMode', 'AutoencoderKL', 'CLIPTextModel', 'CLIPTextModelWithProjection', 'CacheEvictionConfig', 'ChunkStreamerBase', 'ContinuousBatchingPipeline', 'CppStdGenerator', 'DecodedResults', 'EncodedResults', 'FluxTransformer2DModel', 'GenerationConfig', 'GenerationResult', 'Generator', 'Image2ImagePipeline', 'ImageGenerationConfig', 'InpaintingPipeline', 'LLMPipeline', 'PerfMetrics', 'RawPerfMetrics', 'SD3Transformer2DModel', 'Scheduler', 'SchedulerConfig', 'StopCriteria', 'StreamerBase', 'T5EncoderModel', 'Text2ImagePipeline', 'TokenizedInputs', 'Tokenizer', 'TorchGenerator', 'UNet2DConditionModel', 'VLMPipeline', 'WhisperGenerationConfig', 'WhisperPerfMetrics', 'WhisperPipeline', 'WhisperRawPerfMetrics', 'draft_model', 'get_version', 'openvino', 'os', 'py_openvino_genai']
__version__: str
72 changes: 14 additions & 58 deletions src/python/openvino_genai/py_openvino_genai.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ from __future__ import annotations
import openvino._pyopenvino
import os
import typing
__all__ = ['Adapter', 'AdapterConfig', 'AggregationMode', 'AutoencoderKL', 'CLIPTextModel', 'CLIPTextModelWithProjection', 'CacheEvictionConfig', 'ChunkStreamerBase', 'ContinuousBatchingPipeline', 'CppStdGenerator', 'DecodedResults', 'EncodedGenerationResult', 'EncodedResults', 'FluxTransformer2DModel', 'GenerationConfig', 'GenerationFinishReason', 'GenerationHandle', 'GenerationOutput', 'GenerationResult', 'GenerationStatus', 'Generator', 'Image2ImagePipeline', 'ImageGenerationConfig', 'InpaintingPipeline', 'LLMPipeline', 'MeanStdPair', 'PerfMetrics', 'PipelineMetrics', 'RawPerfMetrics', 'SD3Transformer2DModel', 'Scheduler', 'SchedulerConfig', 'StopCriteria', 'StreamerBase', 'StreamerRunningStatus', 'T5EncoderModel', 'Text2ImagePipeline', 'TokenizedInputs', 'Tokenizer', 'TorchGenerator', 'UNet2DConditionModel', 'VLMDecodedResults', 'VLMPerfMetrics', 'VLMPipeline', 'VLMRawPerfMetrics', 'WhisperDecodedResultChunk', 'WhisperDecodedResults', 'WhisperGenerationConfig', 'WhisperPerfMetrics', 'WhisperPipeline', 'WhisperRawPerfMetrics', 'draft_model', 'get_version']
__all__ = ['Adapter', 'AdapterConfig', 'AggregationMode', 'AutoencoderKL', 'CLIPTextModel', 'CLIPTextModelWithProjection', 'CacheEvictionConfig', 'ChunkStreamerBase', 'ContinuousBatchingPipeline', 'CppStdGenerator', 'DecodedResults', 'EncodedGenerationResult', 'EncodedResults', 'FluxTransformer2DModel', 'GenerationConfig', 'GenerationFinishReason', 'GenerationHandle', 'GenerationOutput', 'GenerationResult', 'GenerationStatus', 'Generator', 'Image2ImagePipeline', 'ImageGenerationConfig', 'InpaintingPipeline', 'LLMPipeline', 'MeanStdPair', 'PerfMetrics', 'PipelineMetrics', 'RawPerfMetrics', 'SD3Transformer2DModel', 'Scheduler', 'SchedulerConfig', 'StopCriteria', 'StreamerBase', 'T5EncoderModel', 'Text2ImagePipeline', 'TokenizedInputs', 'Tokenizer', 'TorchGenerator', 'UNet2DConditionModel', 'VLMDecodedResults', 'VLMPerfMetrics', 'VLMPipeline', 'VLMRawPerfMetrics', 'WhisperDecodedResultChunk', 'WhisperDecodedResults', 'WhisperGenerationConfig', 'WhisperPerfMetrics', 'WhisperPipeline', 'WhisperRawPerfMetrics', 'draft_model', 'get_version']
class Adapter:
"""
Immutable LoRA Adapter that carries the adaptation matrices and serves as unique adapter identifier.
Expand Down Expand Up @@ -373,10 +373,10 @@ class ContinuousBatchingPipeline:
def add_request(self, request_id: int, prompt: str, generation_config: GenerationConfig) -> GenerationHandle:
...
@typing.overload
def generate(self, input_ids: list[openvino._pyopenvino.Tensor], generation_config: list[GenerationConfig], streamer: typing.Callable[[str], bool] | typing.Callable[[str], StreamerRunningStatus] | StreamerBase | None = None) -> list[EncodedGenerationResult]:
def generate(self, input_ids: list[openvino._pyopenvino.Tensor], generation_config: list[GenerationConfig], streamer: typing.Callable[[str], bool] | typing.Callable[[str], ...] | StreamerBase | None = None) -> list[EncodedGenerationResult]:
...
@typing.overload
def generate(self, prompts: list[str], generation_config: list[GenerationConfig], streamer: typing.Callable[[str], bool] | typing.Callable[[str], StreamerRunningStatus] | StreamerBase | None = None) -> list[GenerationResult]:
def generate(self, prompts: list[str], generation_config: list[GenerationConfig], streamer: typing.Callable[[str], bool] | typing.Callable[[str], ...] | StreamerBase | None = None) -> list[GenerationResult]:
...
def get_config(self) -> GenerationConfig:
...
Expand Down Expand Up @@ -940,7 +940,7 @@ class LLMPipeline:
"""
This class is used for generation with LLMs
"""
def __call__(self, inputs: openvino._pyopenvino.Tensor | TokenizedInputs | str | list[str], generation_config: GenerationConfig | None = None, streamer: typing.Callable[[str], StreamerRunningStatus] | StreamerBase | None = None, **kwargs) -> EncodedResults | DecodedResults:
def __call__(self, inputs: openvino._pyopenvino.Tensor | TokenizedInputs | str | list[str], generation_config: GenerationConfig | None = None, streamer: typing.Callable[[str], bool] | StreamerBase | None = None, **kwargs) -> EncodedResults | DecodedResults:
"""
Generates sequences or tokens for LLMs. If input is a string or list of strings then resulting sequences will be already detokenized.
Expand All @@ -951,7 +951,7 @@ class LLMPipeline:
:type generation_config: GenerationConfig or a Dict
:param streamer: streamer either as a lambda with a boolean returning flag whether generation should be stopped
:type : Callable[[str], bool], Callable[[str], ov.genai.StreamerRunningStatus], ov.genai.StreamerBase
:type : Callable[[str], bool], Callable[[str], bool], ov.genai.StreamerBase
:param kwargs: arbitrary keyword arguments with keys corresponding to GenerationConfig fields.
:type : Dict
Expand Down Expand Up @@ -1025,7 +1025,7 @@ class LLMPipeline:
"""
def finish_chat(self) -> None:
...
def generate(self, inputs: openvino._pyopenvino.Tensor | TokenizedInputs | str | list[str], generation_config: GenerationConfig | None = None, streamer: typing.Callable[[str], StreamerRunningStatus] | StreamerBase | None = None, **kwargs) -> EncodedResults | DecodedResults:
def generate(self, inputs: openvino._pyopenvino.Tensor | TokenizedInputs | str | list[str], generation_config: GenerationConfig | None = None, streamer: typing.Callable[[str], bool] | StreamerBase | None = None, **kwargs) -> EncodedResults | DecodedResults:
"""
Generates sequences or tokens for LLMs. If input is a string or list of strings then resulting sequences will be already detokenized.
Expand All @@ -1036,7 +1036,7 @@ class LLMPipeline:
:type generation_config: GenerationConfig or a Dict
:param streamer: streamer either as a lambda with a boolean returning flag whether generation should be stopped
:type : Callable[[str], bool], Callable[[str], ov.genai.StreamerRunningStatus], ov.genai.StreamerBase
:type : Callable[[str], bool], Callable[[str], bool], ov.genai.StreamerBase
:param kwargs: arbitrary keyword arguments with keys corresponding to GenerationConfig fields.
:type : Dict
Expand Down Expand Up @@ -1497,49 +1497,6 @@ class StreamerBase:
"""
Put is called every time new token is decoded. Returns a bool flag to indicate whether generation should be stopped, if return true generation stops
"""
class StreamerRunningStatus:
"""
Members:
UNDEF
RUNNING
STOP
CANCEL
"""
CANCEL: typing.ClassVar[StreamerRunningStatus] # value = <StreamerRunningStatus.CANCEL: 3>
RUNNING: typing.ClassVar[StreamerRunningStatus] # value = <StreamerRunningStatus.RUNNING: 1>
STOP: typing.ClassVar[StreamerRunningStatus] # value = <StreamerRunningStatus.STOP: 2>
UNDEF: typing.ClassVar[StreamerRunningStatus] # value = <StreamerRunningStatus.UNDEF: 0>
__members__: typing.ClassVar[dict[str, StreamerRunningStatus]] # value = {'UNDEF': <StreamerRunningStatus.UNDEF: 0>, 'RUNNING': <StreamerRunningStatus.RUNNING: 1>, 'STOP': <StreamerRunningStatus.STOP: 2>, 'CANCEL': <StreamerRunningStatus.CANCEL: 3>}
def __eq__(self, other: typing.Any) -> bool:
...
def __getstate__(self) -> int:
...
def __hash__(self) -> int:
...
def __index__(self) -> int:
...
def __init__(self, value: int) -> None:
...
def __int__(self) -> int:
...
def __ne__(self, other: typing.Any) -> bool:
...
def __repr__(self) -> str:
...
def __setstate__(self, state: int) -> None:
...
def __str__(self) -> str:
...
@property
def name(self) -> str:
...
@property
def value(self) -> int:
...
class T5EncoderModel:
"""
T5EncoderModel class.
Expand Down Expand Up @@ -1845,7 +1802,7 @@ class VLMPipeline:
def finish_chat(self) -> None:
...
@typing.overload
def generate(self, prompt: str, images: list[openvino._pyopenvino.Tensor], generation_config: GenerationConfig, streamer: typing.Callable[[str], StreamerRunningStatus] | StreamerBase | None = None, **kwargs) -> VLMDecodedResults:
def generate(self, prompt: str, images: list[openvino._pyopenvino.Tensor], generation_config: GenerationConfig, streamer: typing.Callable[[str], bool] | StreamerBase | None = None, **kwargs) -> VLMDecodedResults:
"""
Generates sequences for VLMs.
Expand All @@ -1858,8 +1815,8 @@ class VLMPipeline:
:param generation_config: generation_config
:type generation_config: GenerationConfig or a Dict
:param streamer: streamer either as a lambda with a StreamerRunningStatus returning flag whether generation should be stopped. Please, be aware that status CANCELLED is not supported and work as STOP.
:type : Callable[[str], ov.genai.StreamerRunningStatus], ov.genai.StreamerBase
:param streamer: streamer either as a lambda with a boolean returning flag whether generation should be stopped.
:type : Callable[[str], bool], ov.genai.StreamerBase
:param kwargs: arbitrary keyword arguments with keys corresponding to GenerationConfig fields.
:type : Dict
Expand All @@ -1868,7 +1825,7 @@ class VLMPipeline:
:rtype: VLMDecodedResults
"""
@typing.overload
def generate(self, prompt: str, images: openvino._pyopenvino.Tensor, generation_config: GenerationConfig, streamer: typing.Callable[[str], StreamerRunningStatus] | StreamerBase | None = None, **kwargs) -> VLMDecodedResults:
def generate(self, prompt: str, images: openvino._pyopenvino.Tensor, generation_config: GenerationConfig, streamer: typing.Callable[[str], bool] | StreamerBase | None = None, **kwargs) -> VLMDecodedResults:
"""
Generates sequences for VLMs.
Expand All @@ -1881,8 +1838,8 @@ class VLMPipeline:
:param generation_config: generation_config
:type generation_config: GenerationConfig or a Dict
:param streamer: streamer either as a lambda with a StreamerRunningStatus returning flag whether generation should be stopped. Please, be aware that status CANCELLED is not supported and work as STOP.
:type : Callable[[str], ov.genai.StreamerRunningStatus], ov.genai.StreamerBase
:param streamer: streamer either as a lambda with a boolean returning flag whether generation should be stopped.
:type : Callable[[str], bool], ov.genai.StreamerBase
:param kwargs: arbitrary keyword arguments with keys corresponding to GenerationConfig fields.
:type : Dict
Expand All @@ -1904,8 +1861,7 @@ class VLMPipeline:
image: ov.Tensor - input image,
images: List[ov.Tensor] - input images,
generation_config: GenerationConfig,
streamer: Callable[[str], ov.genai.StreamerRunningStatus], ov.genai.StreamerBase - streamer either as a lambda with a StreamerRunningStatus returning flag whether generation should be stopped.
Please, be aware that status CANCELLED is not supported and work as STOP.
streamer: Callable[[str], bool], ov.genai.StreamerBase - streamer either as a lambda with a boolean returning flag whether generation should be stopped.
:return: return results in decoded form
:rtype: VLMDecodedResults
Expand Down
2 changes: 1 addition & 1 deletion src/python/py_llm_pipeline.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ auto generate_docstring = R"(
:type generation_config: GenerationConfig or a Dict
:param streamer: streamer either as a lambda with a boolean returning flag whether generation should be stopped
:type : Callable[[str], bool], Callable[[str], ov.genai.StreamerRunningStatus], ov.genai.StreamerBase
:type : Callable[[str], bool], Callable[[str], bool], ov.genai.StreamerBase
:param kwargs: arbitrary keyword arguments with keys corresponding to GenerationConfig fields.
:type : Dict
Expand Down
6 changes: 0 additions & 6 deletions src/python/py_openvino_genai.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -118,12 +118,6 @@ PYBIND11_MODULE(py_openvino_genai, m) {
.def("put", &StreamerBase::put, "Put is called every time new token is decoded. Returns a bool flag to indicate whether generation should be stopped, if return true generation stops", py::arg("token"))
.def("end", &StreamerBase::end, "End is called at the end of generation. It can be used to flush cache if your own streamer has one");

py::enum_<ov::genai::StreamerRunningStatus>(m, "StreamerRunningStatus")
.value("UNDEF", ov::genai::StreamerRunningStatus::UNDEF)
.value("RUNNING", ov::genai::StreamerRunningStatus::RUNNING)
.value("STOP", ov::genai::StreamerRunningStatus::STOP)
.value("CANCEL", ov::genai::StreamerRunningStatus::CANCEL);

init_tokenizer(m);
init_lora_adapter(m);
init_generation_config(m);
Expand Down
4 changes: 2 additions & 2 deletions src/python/py_utils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -336,10 +336,10 @@ ov::genai::StreamerVariant pystreamer_to_streamer(const PyBindStreamerVariant& p
ov::genai::StreamerVariant streamer = std::monostate();

std::visit(overloaded {
[&streamer](const std::function<ov::genai::StreamerRunningStatus(py::str)>& py_callback){
[&streamer](const std::function<bool(py::str)>& py_callback){
// Wrap python streamer with manual utf-8 decoding. Do not rely
// on pybind automatic decoding since it raises exceptions on incomplete strings.
auto callback_wrapped = [py_callback](std::string subword) -> ov::genai::StreamerRunningStatus {
auto callback_wrapped = [py_callback](std::string subword) -> bool {
auto py_str = PyUnicode_DecodeUTF8(subword.data(), subword.length(), "replace");
return py_callback(py::reinterpret_borrow<py::str>(py_str));
};
Expand Down
3 changes: 1 addition & 2 deletions src/python/py_utils.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,13 +12,12 @@

namespace py = pybind11;
using ov::genai::StreamerBase;
using ov::genai::StreamerRunningStatus;

namespace ov::genai::pybind::utils {

// When StreamerVariant is used utf-8 decoding is done by pybind and can lead to exception on incomplete texts.
// Therefore strings decoding should be handled with PyUnicode_DecodeUTF8(..., "replace") to not throw errors.
using PyBindStreamerVariant = std::variant<std::function<StreamerRunningStatus(py::str)>, std::shared_ptr<StreamerBase>, std::monostate>;
using PyBindStreamerVariant = std::variant<std::function<bool(py::str)>, std::shared_ptr<StreamerBase>, std::monostate>;

template <class... Ts>
struct overloaded : Ts... {
Expand Down
Loading

0 comments on commit 2758f6b

Please sign in to comment.