fix python

openvinotoolkit · Jan 9, 2025 · 8ebc1d8 · 8ebc1d8
1 parent 19954ee
commit 8ebc1d8
Show file tree

Hide file tree

Showing 9 changed files with 26 additions and 37 deletions.
diff --git a/samples/python/chat_sample/chat_sample.py b/samples/python/chat_sample/chat_sample.py
@@ -6,9 +6,8 @@
 import openvino_genai
 
 
-def streamer(subword) -> openvino_genai.StreamerRunningStatus:
+def streamer(subword):
     print(subword, end='', flush=True)
-
     return openvino_genai.StreamerRunningStatus.RUNNING
 
 def main():

diff --git a/samples/python/prompt_lookup_decoding_lm/prompt_lookup_decoding_lm.py b/samples/python/prompt_lookup_decoding_lm/prompt_lookup_decoding_lm.py
@@ -5,11 +5,10 @@
 import argparse
 import openvino_genai
 
-def streamer(subword): 
-        print(subword, end='', flush=True) 
-        # Return flag corresponds whether generation should be stopped. 
-        # False means continue generation. 
-        return False
+def streamer(subword):
+    print(subword, end='', flush=True) 
+    # Return flag corresponds whether generation should be stopped. 
+    return openvino_genai.StreamerRunningStatus.RUNNING
 
 def main():
     parser = argparse.ArgumentParser()

diff --git a/samples/python/speculative_decoding_lm/speculative_decoding_lm.py b/samples/python/speculative_decoding_lm/speculative_decoding_lm.py
@@ -8,10 +8,9 @@
 import threading
 
 def streamer(subword): 
-        print(subword, end='', flush=True) 
-        # Return flag corresponds whether generation should be stopped. 
-        # False means continue generation. 
-        return False
+    print(subword, end='', flush=True) 
+    # Return flag corresponds whether generation should be stopped. 
+    return openvino_genai.StreamerRunningStatus.RUNNING
 
 def main():
     parser = argparse.ArgumentParser()

diff --git a/samples/python/visual_language_chat/visual_language_chat.py b/samples/python/visual_language_chat/visual_language_chat.py
@@ -11,7 +11,7 @@
 from pathlib import Path
 
 
-def streamer(subword: str) -> bool:
+def streamer(subword: str) -> openvino_genai.StreamerRunningStatus:
     '''
 
     Args:
@@ -25,6 +25,8 @@ def streamer(subword: str) -> bool:
     # No value is returned as in this example we don't want to stop the generation in this method.
     # "return None" will be treated the same as "return False".
 
+    return openvino_genai.StreamerRunningStatus.RUNNING
+
 
 def read_image(path: str) -> Tensor:
     '''

diff --git a/src/cpp/src/visual_language/pipeline.cpp b/src/cpp/src/visual_language/pipeline.cpp
@@ -187,9 +187,6 @@ class ov::genai::VLMPipeline::VLMPipelineImpl {
         SequenceGroup::Ptr sequence_group = std::make_shared<SequenceGroup>(request_id, prompt_ids, generation_config, block_size);
         requests.push_back(sequence_group);
 
-        OPENVINO_ASSERT((!m_is_chat_conversation || !std::get_if<std::function<StreamerRunningStatus(std::string)>>(&streamer)),
-                         "For chat mode, please, use Steamer as StreamerBase class or as callback with a bool return value.");
-
         std::shared_ptr<StreamerBase> streamer_ptr = std::visit(overloaded{
             [&m_tokenizer = m_tokenizer](
                 const std::function<bool(std::string)>& callback

diff --git a/src/python/openvino_genai/py_openvino_genai.pyi b/src/python/openvino_genai/py_openvino_genai.pyi
@@ -940,7 +940,7 @@ class LLMPipeline:
     """
     This class is used for generation with LLMs
     """
-    def __call__(self, inputs: openvino._pyopenvino.Tensor | TokenizedInputs | str | list[str], generation_config: GenerationConfig | None = None, streamer: typing.Callable[[str], StreamerRunningStatus] | typing.Callable[[str], bool] | StreamerBase | None = None, **kwargs) -> EncodedResults | DecodedResults:
+    def __call__(self, inputs: openvino._pyopenvino.Tensor | TokenizedInputs | str | list[str], generation_config: GenerationConfig | None = None, streamer: typing.Callable[[str], StreamerRunningStatus] | StreamerBase | None = None, **kwargs) -> EncodedResults | DecodedResults:
         """
             Generates sequences or tokens for LLMs. If input is a string or list of strings then resulting sequences will be already detokenized.
         
@@ -1025,7 +1025,7 @@ class LLMPipeline:
         """
     def finish_chat(self) -> None:
         ...
-    def generate(self, inputs: openvino._pyopenvino.Tensor | TokenizedInputs | str | list[str], generation_config: GenerationConfig | None = None, streamer: typing.Callable[[str], StreamerRunningStatus] | typing.Callable[[str], bool] | StreamerBase | None = None, **kwargs) -> EncodedResults | DecodedResults:
+    def generate(self, inputs: openvino._pyopenvino.Tensor | TokenizedInputs | str | list[str], generation_config: GenerationConfig | None = None, streamer: typing.Callable[[str], StreamerRunningStatus] | StreamerBase | None = None, **kwargs) -> EncodedResults | DecodedResults:
         """
             Generates sequences or tokens for LLMs. If input is a string or list of strings then resulting sequences will be already detokenized.
         
@@ -1845,7 +1845,7 @@ class VLMPipeline:
     def finish_chat(self) -> None:
         ...
     @typing.overload
-    def generate(self, prompt: str, images: list[openvino._pyopenvino.Tensor], generation_config: GenerationConfig, streamer: typing.Callable[[str], StreamerRunningStatus] | typing.Callable[[str], bool] | StreamerBase | None = None, **kwargs) -> VLMDecodedResults:
+    def generate(self, prompt: str, images: list[openvino._pyopenvino.Tensor], generation_config: GenerationConfig, streamer: typing.Callable[[str], StreamerRunningStatus] | StreamerBase | None = None, **kwargs) -> VLMDecodedResults:
         """
             Generates sequences for VLMs.
         
@@ -1858,8 +1858,8 @@ class VLMPipeline:
             :param generation_config: generation_config
             :type generation_config: GenerationConfig or a Dict
         
-            :param streamer: streamer either as a lambda with a boolean returning flag whether generation should be stopped
-            :type : Callable[[str], bool], Callable[[str], ov.genai.StreamerRunningStatus], ov.genai.StreamerBase
+            :param streamer: streamer either as a lambda with a StreamerRunningStatus returning flag whether generation should be stopped. Please, be aware that status CANCELLED is not supported and work as STOP.
+            :type : Callable[[str], ov.genai.StreamerRunningStatus], ov.genai.StreamerBase
         
             :param kwargs: arbitrary keyword arguments with keys corresponding to GenerationConfig fields.
             :type : Dict
@@ -1868,7 +1868,7 @@ class VLMPipeline:
             :rtype: VLMDecodedResults
         """
     @typing.overload
-    def generate(self, prompt: str, images: openvino._pyopenvino.Tensor, generation_config: GenerationConfig, streamer: typing.Callable[[str], StreamerRunningStatus] | typing.Callable[[str], bool] | StreamerBase | None = None, **kwargs) -> VLMDecodedResults:
+    def generate(self, prompt: str, images: openvino._pyopenvino.Tensor, generation_config: GenerationConfig, streamer: typing.Callable[[str], StreamerRunningStatus] | StreamerBase | None = None, **kwargs) -> VLMDecodedResults:
         """
             Generates sequences for VLMs.
         
@@ -1881,8 +1881,8 @@ class VLMPipeline:
             :param generation_config: generation_config
             :type generation_config: GenerationConfig or a Dict
         
-            :param streamer: streamer either as a lambda with a boolean returning flag whether generation should be stopped
-            :type : Callable[[str], bool], Callable[[str], ov.genai.StreamerRunningStatus], ov.genai.StreamerBase
+            :param streamer: streamer either as a lambda with a StreamerRunningStatus returning flag whether generation should be stopped. Please, be aware that status CANCELLED is not supported and work as STOP.
+            :type : Callable[[str], ov.genai.StreamerRunningStatus], ov.genai.StreamerBase
         
             :param kwargs: arbitrary keyword arguments with keys corresponding to GenerationConfig fields.
             :type : Dict
@@ -1904,7 +1904,8 @@ class VLMPipeline:
             image: ov.Tensor - input image,
             images: List[ov.Tensor] - input images,
             generation_config: GenerationConfig,
-            streamer: Callable[[str], bool], Callable[[str], ov.genai.StreamerRunningStatus], ov.genai.StreamerBase - streamer either as a lambda with a boolean returning flag whether generation should be stopped
+            streamer: Callable[[str], ov.genai.StreamerRunningStatus], ov.genai.StreamerBase - streamer either as a lambda with a StreamerRunningStatus returning flag whether generation should be stopped.
+                      Please, be aware that status CANCELLED is not supported and work as STOP.
         
             :return: return results in decoded form
             :rtype: VLMDecodedResults

diff --git a/src/python/py_utils.cpp b/src/python/py_utils.cpp
@@ -336,15 +336,6 @@ ov::genai::StreamerVariant pystreamer_to_streamer(const PyBindStreamerVariant& p
     ov::genai::StreamerVariant streamer = std::monostate();
 
     std::visit(overloaded {
-    [&streamer](const std::function<bool(py::str)>& py_callback){
-        // Wrap python streamer with manual utf-8 decoding. Do not rely
-        // on pybind automatic decoding since it raises exceptions on incomplete strings.
-        auto callback_wrapped = [py_callback](std::string subword) -> bool {
-            auto py_str = PyUnicode_DecodeUTF8(subword.data(), subword.length(), "replace");
-            return py_callback(py::reinterpret_borrow<py::str>(py_str));
-        };
-        streamer = callback_wrapped;
-    },
     [&streamer](const std::function<ov::genai::StreamerRunningStatus(py::str)>& py_callback){
         // Wrap python streamer with manual utf-8 decoding. Do not rely
         // on pybind automatic decoding since it raises exceptions on incomplete strings.

diff --git a/src/python/py_utils.hpp b/src/python/py_utils.hpp
@@ -18,7 +18,7 @@ namespace ov::genai::pybind::utils {
 
 // When StreamerVariant is used utf-8 decoding is done by pybind and can lead to exception on incomplete texts.
 // Therefore strings decoding should be handled with PyUnicode_DecodeUTF8(..., "replace") to not throw errors.
-using PyBindStreamerVariant = std::variant<std::function<StreamerRunningStatus(py::str)>, std::function<bool(py::str)>, std::shared_ptr<StreamerBase>, std::monostate>;
+using PyBindStreamerVariant = std::variant<std::function<StreamerRunningStatus(py::str)>, std::shared_ptr<StreamerBase>, std::monostate>;
 
 template <class... Ts>
 struct overloaded : Ts... {

diff --git a/src/python/py_vlm_pipeline.cpp b/src/python/py_vlm_pipeline.cpp
@@ -31,8 +31,8 @@ auto vlm_generate_docstring = R"(
     :param generation_config: generation_config
     :type generation_config: GenerationConfig or a Dict
 
-    :param streamer: streamer either as a lambda with a boolean returning flag whether generation should be stopped
-    :type : Callable[[str], bool], Callable[[str], ov.genai.StreamerRunningStatus], ov.genai.StreamerBase
+    :param streamer: streamer either as a lambda with a StreamerRunningStatus returning flag whether generation should be stopped. Please, be aware that status CANCELLED is not supported and work as STOP.
+    :type : Callable[[str], ov.genai.StreamerRunningStatus], ov.genai.StreamerBase
 
     :param kwargs: arbitrary keyword arguments with keys corresponding to GenerationConfig fields.
     :type : Dict
@@ -53,7 +53,8 @@ auto vlm_generate_kwargs_docstring = R"(
     image: ov.Tensor - input image,
     images: List[ov.Tensor] - input images,
     generation_config: GenerationConfig,
-    streamer: Callable[[str], bool], Callable[[str], ov.genai.StreamerRunningStatus], ov.genai.StreamerBase - streamer either as a lambda with a boolean returning flag whether generation should be stopped
+    streamer: Callable[[str], ov.genai.StreamerRunningStatus], ov.genai.StreamerBase - streamer either as a lambda with a StreamerRunningStatus returning flag whether generation should be stopped.
+              Please, be aware that status CANCELLED is not supported and work as STOP.
 
     :return: return results in decoded form
     :rtype: VLMDecodedResults