revert changes for python

openvinotoolkit · Jan 9, 2025 · 2758f6b · 2758f6b
1 parent 8ebc1d8
commit 2758f6b
Show file tree

Hide file tree

Showing 13 changed files with 32 additions and 85 deletions.
diff --git a/samples/python/chat_sample/chat_sample.py b/samples/python/chat_sample/chat_sample.py
@@ -8,7 +8,6 @@
 
 def streamer(subword):
     print(subword, end='', flush=True)
-    return openvino_genai.StreamerRunningStatus.RUNNING
 
 def main():
     parser = argparse.ArgumentParser()

diff --git a/samples/python/prompt_lookup_decoding_lm/prompt_lookup_decoding_lm.py b/samples/python/prompt_lookup_decoding_lm/prompt_lookup_decoding_lm.py
@@ -8,7 +8,8 @@
 def streamer(subword):
     print(subword, end='', flush=True) 
     # Return flag corresponds whether generation should be stopped. 
-    return openvino_genai.StreamerRunningStatus.RUNNING
+    # False means continue generation. 
+    return False
 
 def main():
     parser = argparse.ArgumentParser()

diff --git a/samples/python/speculative_decoding_lm/speculative_decoding_lm.py b/samples/python/speculative_decoding_lm/speculative_decoding_lm.py
@@ -10,7 +10,8 @@
 def streamer(subword): 
     print(subword, end='', flush=True) 
     # Return flag corresponds whether generation should be stopped. 
-    return openvino_genai.StreamerRunningStatus.RUNNING
+    # False means continue generation. 
+    return False
 
 def main():
     parser = argparse.ArgumentParser()

diff --git a/samples/python/visual_language_chat/visual_language_chat.py b/samples/python/visual_language_chat/visual_language_chat.py
@@ -11,21 +11,18 @@
 from pathlib import Path
 
 
-def streamer(subword: str) -> openvino_genai.StreamerRunningStatus:
+def streamer(subword: str):
     '''
 
     Args:
         subword: sub-word of the generated text.
 
     Returns: Return flag corresponds whether generation should be stopped.
+    "return None" will be treated the same as "return False".
 
     '''
     print(subword, end='', flush=True)
 
-    # No value is returned as in this example we don't want to stop the generation in this method.
-    # "return None" will be treated the same as "return False".
-
-    return openvino_genai.StreamerRunningStatus.RUNNING
 
 
 def read_image(path: str) -> Tensor:

diff --git a/src/cpp/src/visual_language/pipeline.cpp b/src/cpp/src/visual_language/pipeline.cpp
@@ -187,6 +187,9 @@ class ov::genai::VLMPipeline::VLMPipelineImpl {
         SequenceGroup::Ptr sequence_group = std::make_shared<SequenceGroup>(request_id, prompt_ids, generation_config, block_size);
         requests.push_back(sequence_group);
 
+        OPENVINO_ASSERT((!m_is_chat_conversation || !std::get_if<std::function<StreamerRunningStatus(std::string)>>(&streamer)),
+                         "For chat mode, please, use Steamer as StreamerBase class or as callback with a bool return value.");
+
         std::shared_ptr<StreamerBase> streamer_ptr = std::visit(overloaded{
             [&m_tokenizer = m_tokenizer](
                 const std::function<bool(std::string)>& callback

diff --git a/src/python/openvino_genai/__init__.py b/src/python/openvino_genai/__init__.py
@@ -15,8 +15,7 @@
     RawPerfMetrics,
     PerfMetrics,
     StreamerBase,
-    get_version,
-    StreamerRunningStatus
+    get_version
 )
 
 __version__ = get_version()

diff --git a/src/python/openvino_genai/__init__.pyi b/src/python/openvino_genai/__init__.pyi
@@ -30,7 +30,6 @@ from openvino_genai.py_openvino_genai import Scheduler
 from openvino_genai.py_openvino_genai import SchedulerConfig
 from openvino_genai.py_openvino_genai import StopCriteria
 from openvino_genai.py_openvino_genai import StreamerBase
-from openvino_genai.py_openvino_genai import StreamerRunningStatus
 from openvino_genai.py_openvino_genai import T5EncoderModel
 from openvino_genai.py_openvino_genai import Text2ImagePipeline
 from openvino_genai.py_openvino_genai import TokenizedInputs
@@ -46,5 +45,5 @@ from openvino_genai.py_openvino_genai import draft_model
 from openvino_genai.py_openvino_genai import get_version
 import os as os
 from . import py_openvino_genai
-__all__ = ['Adapter', 'AdapterConfig', 'AggregationMode', 'AutoencoderKL', 'CLIPTextModel', 'CLIPTextModelWithProjection', 'CacheEvictionConfig', 'ChunkStreamerBase', 'ContinuousBatchingPipeline', 'CppStdGenerator', 'DecodedResults', 'EncodedResults', 'FluxTransformer2DModel', 'GenerationConfig', 'GenerationResult', 'Generator', 'Image2ImagePipeline', 'ImageGenerationConfig', 'InpaintingPipeline', 'LLMPipeline', 'PerfMetrics', 'RawPerfMetrics', 'SD3Transformer2DModel', 'Scheduler', 'SchedulerConfig', 'StopCriteria', 'StreamerBase', 'StreamerRunningStatus', 'T5EncoderModel', 'Text2ImagePipeline', 'TokenizedInputs', 'Tokenizer', 'TorchGenerator', 'UNet2DConditionModel', 'VLMPipeline', 'WhisperGenerationConfig', 'WhisperPerfMetrics', 'WhisperPipeline', 'WhisperRawPerfMetrics', 'draft_model', 'get_version', 'openvino', 'os', 'py_openvino_genai']
+__all__ = ['Adapter', 'AdapterConfig', 'AggregationMode', 'AutoencoderKL', 'CLIPTextModel', 'CLIPTextModelWithProjection', 'CacheEvictionConfig', 'ChunkStreamerBase', 'ContinuousBatchingPipeline', 'CppStdGenerator', 'DecodedResults', 'EncodedResults', 'FluxTransformer2DModel', 'GenerationConfig', 'GenerationResult', 'Generator', 'Image2ImagePipeline', 'ImageGenerationConfig', 'InpaintingPipeline', 'LLMPipeline', 'PerfMetrics', 'RawPerfMetrics', 'SD3Transformer2DModel', 'Scheduler', 'SchedulerConfig', 'StopCriteria', 'StreamerBase', 'T5EncoderModel', 'Text2ImagePipeline', 'TokenizedInputs', 'Tokenizer', 'TorchGenerator', 'UNet2DConditionModel', 'VLMPipeline', 'WhisperGenerationConfig', 'WhisperPerfMetrics', 'WhisperPipeline', 'WhisperRawPerfMetrics', 'draft_model', 'get_version', 'openvino', 'os', 'py_openvino_genai']
 __version__: str
diff --git a/src/python/openvino_genai/py_openvino_genai.pyi b/src/python/openvino_genai/py_openvino_genai.pyi
@@ -5,7 +5,7 @@ from __future__ import annotations
 import openvino._pyopenvino
 import os
 import typing
-__all__ = ['Adapter', 'AdapterConfig', 'AggregationMode', 'AutoencoderKL', 'CLIPTextModel', 'CLIPTextModelWithProjection', 'CacheEvictionConfig', 'ChunkStreamerBase', 'ContinuousBatchingPipeline', 'CppStdGenerator', 'DecodedResults', 'EncodedGenerationResult', 'EncodedResults', 'FluxTransformer2DModel', 'GenerationConfig', 'GenerationFinishReason', 'GenerationHandle', 'GenerationOutput', 'GenerationResult', 'GenerationStatus', 'Generator', 'Image2ImagePipeline', 'ImageGenerationConfig', 'InpaintingPipeline', 'LLMPipeline', 'MeanStdPair', 'PerfMetrics', 'PipelineMetrics', 'RawPerfMetrics', 'SD3Transformer2DModel', 'Scheduler', 'SchedulerConfig', 'StopCriteria', 'StreamerBase', 'StreamerRunningStatus', 'T5EncoderModel', 'Text2ImagePipeline', 'TokenizedInputs', 'Tokenizer', 'TorchGenerator', 'UNet2DConditionModel', 'VLMDecodedResults', 'VLMPerfMetrics', 'VLMPipeline', 'VLMRawPerfMetrics', 'WhisperDecodedResultChunk', 'WhisperDecodedResults', 'WhisperGenerationConfig', 'WhisperPerfMetrics', 'WhisperPipeline', 'WhisperRawPerfMetrics', 'draft_model', 'get_version']
+__all__ = ['Adapter', 'AdapterConfig', 'AggregationMode', 'AutoencoderKL', 'CLIPTextModel', 'CLIPTextModelWithProjection', 'CacheEvictionConfig', 'ChunkStreamerBase', 'ContinuousBatchingPipeline', 'CppStdGenerator', 'DecodedResults', 'EncodedGenerationResult', 'EncodedResults', 'FluxTransformer2DModel', 'GenerationConfig', 'GenerationFinishReason', 'GenerationHandle', 'GenerationOutput', 'GenerationResult', 'GenerationStatus', 'Generator', 'Image2ImagePipeline', 'ImageGenerationConfig', 'InpaintingPipeline', 'LLMPipeline', 'MeanStdPair', 'PerfMetrics', 'PipelineMetrics', 'RawPerfMetrics', 'SD3Transformer2DModel', 'Scheduler', 'SchedulerConfig', 'StopCriteria', 'StreamerBase', 'T5EncoderModel', 'Text2ImagePipeline', 'TokenizedInputs', 'Tokenizer', 'TorchGenerator', 'UNet2DConditionModel', 'VLMDecodedResults', 'VLMPerfMetrics', 'VLMPipeline', 'VLMRawPerfMetrics', 'WhisperDecodedResultChunk', 'WhisperDecodedResults', 'WhisperGenerationConfig', 'WhisperPerfMetrics', 'WhisperPipeline', 'WhisperRawPerfMetrics', 'draft_model', 'get_version']
 class Adapter:
     """
     Immutable LoRA Adapter that carries the adaptation matrices and serves as unique adapter identifier.
@@ -373,10 +373,10 @@ class ContinuousBatchingPipeline:
     def add_request(self, request_id: int, prompt: str, generation_config: GenerationConfig) -> GenerationHandle:
         ...
     @typing.overload
-    def generate(self, input_ids: list[openvino._pyopenvino.Tensor], generation_config: list[GenerationConfig], streamer: typing.Callable[[str], bool] | typing.Callable[[str], StreamerRunningStatus] | StreamerBase | None = None) -> list[EncodedGenerationResult]:
+    def generate(self, input_ids: list[openvino._pyopenvino.Tensor], generation_config: list[GenerationConfig], streamer: typing.Callable[[str], bool] | typing.Callable[[str], ...] | StreamerBase | None = None) -> list[EncodedGenerationResult]:
         ...
     @typing.overload
-    def generate(self, prompts: list[str], generation_config: list[GenerationConfig], streamer: typing.Callable[[str], bool] | typing.Callable[[str], StreamerRunningStatus] | StreamerBase | None = None) -> list[GenerationResult]:
+    def generate(self, prompts: list[str], generation_config: list[GenerationConfig], streamer: typing.Callable[[str], bool] | typing.Callable[[str], ...] | StreamerBase | None = None) -> list[GenerationResult]:
         ...
     def get_config(self) -> GenerationConfig:
         ...
@@ -940,7 +940,7 @@ class LLMPipeline:
     """
     This class is used for generation with LLMs
     """
-    def __call__(self, inputs: openvino._pyopenvino.Tensor | TokenizedInputs | str | list[str], generation_config: GenerationConfig | None = None, streamer: typing.Callable[[str], StreamerRunningStatus] | StreamerBase | None = None, **kwargs) -> EncodedResults | DecodedResults:
+    def __call__(self, inputs: openvino._pyopenvino.Tensor | TokenizedInputs | str | list[str], generation_config: GenerationConfig | None = None, streamer: typing.Callable[[str], bool] | StreamerBase | None = None, **kwargs) -> EncodedResults | DecodedResults:
         """
             Generates sequences or tokens for LLMs. If input is a string or list of strings then resulting sequences will be already detokenized.
         
@@ -951,7 +951,7 @@ class LLMPipeline:
             :type generation_config: GenerationConfig or a Dict
         
             :param streamer: streamer either as a lambda with a boolean returning flag whether generation should be stopped
-            :type : Callable[[str], bool], Callable[[str], ov.genai.StreamerRunningStatus], ov.genai.StreamerBase
+            :type : Callable[[str], bool], Callable[[str], bool], ov.genai.StreamerBase
         
             :param kwargs: arbitrary keyword arguments with keys corresponding to GenerationConfig fields.
             :type : Dict
@@ -1025,7 +1025,7 @@ class LLMPipeline:
         """
     def finish_chat(self) -> None:
         ...
-    def generate(self, inputs: openvino._pyopenvino.Tensor | TokenizedInputs | str | list[str], generation_config: GenerationConfig | None = None, streamer: typing.Callable[[str], StreamerRunningStatus] | StreamerBase | None = None, **kwargs) -> EncodedResults | DecodedResults:
+    def generate(self, inputs: openvino._pyopenvino.Tensor | TokenizedInputs | str | list[str], generation_config: GenerationConfig | None = None, streamer: typing.Callable[[str], bool] | StreamerBase | None = None, **kwargs) -> EncodedResults | DecodedResults:
         """
             Generates sequences or tokens for LLMs. If input is a string or list of strings then resulting sequences will be already detokenized.
         
@@ -1036,7 +1036,7 @@ class LLMPipeline:
             :type generation_config: GenerationConfig or a Dict
         
             :param streamer: streamer either as a lambda with a boolean returning flag whether generation should be stopped
-            :type : Callable[[str], bool], Callable[[str], ov.genai.StreamerRunningStatus], ov.genai.StreamerBase
+            :type : Callable[[str], bool], Callable[[str], bool], ov.genai.StreamerBase
         
             :param kwargs: arbitrary keyword arguments with keys corresponding to GenerationConfig fields.
             :type : Dict
@@ -1497,49 +1497,6 @@ class StreamerBase:
         """
         Put is called every time new token is decoded. Returns a bool flag to indicate whether generation should be stopped, if return true generation stops
         """
-class StreamerRunningStatus:
-    """
-    Members:
-    
-      UNDEF
-    
-      RUNNING
-    
-      STOP
-    
-      CANCEL
-    """
-    CANCEL: typing.ClassVar[StreamerRunningStatus]  # value = <StreamerRunningStatus.CANCEL: 3>
-    RUNNING: typing.ClassVar[StreamerRunningStatus]  # value = <StreamerRunningStatus.RUNNING: 1>
-    STOP: typing.ClassVar[StreamerRunningStatus]  # value = <StreamerRunningStatus.STOP: 2>
-    UNDEF: typing.ClassVar[StreamerRunningStatus]  # value = <StreamerRunningStatus.UNDEF: 0>
-    __members__: typing.ClassVar[dict[str, StreamerRunningStatus]]  # value = {'UNDEF': <StreamerRunningStatus.UNDEF: 0>, 'RUNNING': <StreamerRunningStatus.RUNNING: 1>, 'STOP': <StreamerRunningStatus.STOP: 2>, 'CANCEL': <StreamerRunningStatus.CANCEL: 3>}
-    def __eq__(self, other: typing.Any) -> bool:
-        ...
-    def __getstate__(self) -> int:
-        ...
-    def __hash__(self) -> int:
-        ...
-    def __index__(self) -> int:
-        ...
-    def __init__(self, value: int) -> None:
-        ...
-    def __int__(self) -> int:
-        ...
-    def __ne__(self, other: typing.Any) -> bool:
-        ...
-    def __repr__(self) -> str:
-        ...
-    def __setstate__(self, state: int) -> None:
-        ...
-    def __str__(self) -> str:
-        ...
-    @property
-    def name(self) -> str:
-        ...
-    @property
-    def value(self) -> int:
-        ...
 class T5EncoderModel:
     """
     T5EncoderModel class.
@@ -1845,7 +1802,7 @@ class VLMPipeline:
     def finish_chat(self) -> None:
         ...
     @typing.overload
-    def generate(self, prompt: str, images: list[openvino._pyopenvino.Tensor], generation_config: GenerationConfig, streamer: typing.Callable[[str], StreamerRunningStatus] | StreamerBase | None = None, **kwargs) -> VLMDecodedResults:
+    def generate(self, prompt: str, images: list[openvino._pyopenvino.Tensor], generation_config: GenerationConfig, streamer: typing.Callable[[str], bool] | StreamerBase | None = None, **kwargs) -> VLMDecodedResults:
         """
             Generates sequences for VLMs.
         
@@ -1858,8 +1815,8 @@ class VLMPipeline:
             :param generation_config: generation_config
             :type generation_config: GenerationConfig or a Dict
         
-            :param streamer: streamer either as a lambda with a StreamerRunningStatus returning flag whether generation should be stopped. Please, be aware that status CANCELLED is not supported and work as STOP.
-            :type : Callable[[str], ov.genai.StreamerRunningStatus], ov.genai.StreamerBase
+            :param streamer: streamer either as a lambda with a boolean returning flag whether generation should be stopped.
+            :type : Callable[[str], bool], ov.genai.StreamerBase
         
             :param kwargs: arbitrary keyword arguments with keys corresponding to GenerationConfig fields.
             :type : Dict
@@ -1868,7 +1825,7 @@ class VLMPipeline:
             :rtype: VLMDecodedResults
         """
     @typing.overload
-    def generate(self, prompt: str, images: openvino._pyopenvino.Tensor, generation_config: GenerationConfig, streamer: typing.Callable[[str], StreamerRunningStatus] | StreamerBase | None = None, **kwargs) -> VLMDecodedResults:
+    def generate(self, prompt: str, images: openvino._pyopenvino.Tensor, generation_config: GenerationConfig, streamer: typing.Callable[[str], bool] | StreamerBase | None = None, **kwargs) -> VLMDecodedResults:
         """
             Generates sequences for VLMs.
         
@@ -1881,8 +1838,8 @@ class VLMPipeline:
             :param generation_config: generation_config
             :type generation_config: GenerationConfig or a Dict
         
-            :param streamer: streamer either as a lambda with a StreamerRunningStatus returning flag whether generation should be stopped. Please, be aware that status CANCELLED is not supported and work as STOP.
-            :type : Callable[[str], ov.genai.StreamerRunningStatus], ov.genai.StreamerBase
+            :param streamer: streamer either as a lambda with a boolean returning flag whether generation should be stopped.
+            :type : Callable[[str], bool], ov.genai.StreamerBase
         
             :param kwargs: arbitrary keyword arguments with keys corresponding to GenerationConfig fields.
             :type : Dict
@@ -1904,8 +1861,7 @@ class VLMPipeline:
             image: ov.Tensor - input image,
             images: List[ov.Tensor] - input images,
             generation_config: GenerationConfig,
-            streamer: Callable[[str], ov.genai.StreamerRunningStatus], ov.genai.StreamerBase - streamer either as a lambda with a StreamerRunningStatus returning flag whether generation should be stopped.
-                      Please, be aware that status CANCELLED is not supported and work as STOP.
+            streamer: Callable[[str], bool], ov.genai.StreamerBase - streamer either as a lambda with a boolean returning flag whether generation should be stopped.
         
             :return: return results in decoded form
             :rtype: VLMDecodedResults

diff --git a/src/python/py_llm_pipeline.cpp b/src/python/py_llm_pipeline.cpp
@@ -37,7 +37,7 @@ auto generate_docstring = R"(
     :type generation_config: GenerationConfig or a Dict
 
     :param streamer: streamer either as a lambda with a boolean returning flag whether generation should be stopped
-    :type : Callable[[str], bool], Callable[[str], ov.genai.StreamerRunningStatus], ov.genai.StreamerBase
+    :type : Callable[[str], bool], Callable[[str], bool], ov.genai.StreamerBase
 
     :param kwargs: arbitrary keyword arguments with keys corresponding to GenerationConfig fields.
     :type : Dict

diff --git a/src/python/py_openvino_genai.cpp b/src/python/py_openvino_genai.cpp
@@ -118,12 +118,6 @@ PYBIND11_MODULE(py_openvino_genai, m) {
         .def("put", &StreamerBase::put, "Put is called every time new token is decoded. Returns a bool flag to indicate whether generation should be stopped, if return true generation stops", py::arg("token"))
         .def("end", &StreamerBase::end, "End is called at the end of generation. It can be used to flush cache if your own streamer has one");
 
-    py::enum_<ov::genai::StreamerRunningStatus>(m, "StreamerRunningStatus")
-        .value("UNDEF", ov::genai::StreamerRunningStatus::UNDEF)
-        .value("RUNNING", ov::genai::StreamerRunningStatus::RUNNING)
-        .value("STOP", ov::genai::StreamerRunningStatus::STOP)
-        .value("CANCEL", ov::genai::StreamerRunningStatus::CANCEL);
-
     init_tokenizer(m);
     init_lora_adapter(m);
     init_generation_config(m);

diff --git a/src/python/py_utils.cpp b/src/python/py_utils.cpp
@@ -336,10 +336,10 @@ ov::genai::StreamerVariant pystreamer_to_streamer(const PyBindStreamerVariant& p
     ov::genai::StreamerVariant streamer = std::monostate();
 
     std::visit(overloaded {
-    [&streamer](const std::function<ov::genai::StreamerRunningStatus(py::str)>& py_callback){
+    [&streamer](const std::function<bool(py::str)>& py_callback){
         // Wrap python streamer with manual utf-8 decoding. Do not rely
         // on pybind automatic decoding since it raises exceptions on incomplete strings.
-        auto callback_wrapped = [py_callback](std::string subword) -> ov::genai::StreamerRunningStatus {
+        auto callback_wrapped = [py_callback](std::string subword) -> bool {
             auto py_str = PyUnicode_DecodeUTF8(subword.data(), subword.length(), "replace");
             return py_callback(py::reinterpret_borrow<py::str>(py_str));
         };

diff --git a/src/python/py_utils.hpp b/src/python/py_utils.hpp
@@ -12,13 +12,12 @@
 
 namespace py = pybind11;
 using ov::genai::StreamerBase;
-using ov::genai::StreamerRunningStatus;
 
 namespace ov::genai::pybind::utils {
 
 // When StreamerVariant is used utf-8 decoding is done by pybind and can lead to exception on incomplete texts.
 // Therefore strings decoding should be handled with PyUnicode_DecodeUTF8(..., "replace") to not throw errors.
-using PyBindStreamerVariant = std::variant<std::function<StreamerRunningStatus(py::str)>, std::shared_ptr<StreamerBase>, std::monostate>;
+using PyBindStreamerVariant = std::variant<std::function<bool(py::str)>, std::shared_ptr<StreamerBase>, std::monostate>;
 
 template <class... Ts>
 struct overloaded : Ts... {