From d308e4886f794e9ea16d86414305d4367f3dfe82 Mon Sep 17 00:00:00 2001 From: sbalandi Date: Thu, 9 Jan 2025 18:16:56 +0000 Subject: [PATCH] fix python --- samples/python/chat_sample/chat_sample.py | 3 +-- .../prompt_lookup_decoding_lm.py | 9 ++++----- .../speculative_decoding_lm/speculative_decoding_lm.py | 7 +++---- .../python/visual_language_chat/visual_language_chat.py | 8 +++++--- src/python/py_utils.cpp | 9 --------- src/python/py_utils.hpp | 2 +- 6 files changed, 14 insertions(+), 24 deletions(-) diff --git a/samples/python/chat_sample/chat_sample.py b/samples/python/chat_sample/chat_sample.py index 8d305086e5..8a6cc25ac2 100755 --- a/samples/python/chat_sample/chat_sample.py +++ b/samples/python/chat_sample/chat_sample.py @@ -6,9 +6,8 @@ import openvino_genai -def streamer(subword) -> openvino_genai.StreamerRunningStatus: +def streamer(subword): print(subword, end='', flush=True) - return openvino_genai.StreamerRunningStatus.RUNNING def main(): diff --git a/samples/python/prompt_lookup_decoding_lm/prompt_lookup_decoding_lm.py b/samples/python/prompt_lookup_decoding_lm/prompt_lookup_decoding_lm.py index 726391ba9b..9325a77d00 100755 --- a/samples/python/prompt_lookup_decoding_lm/prompt_lookup_decoding_lm.py +++ b/samples/python/prompt_lookup_decoding_lm/prompt_lookup_decoding_lm.py @@ -5,11 +5,10 @@ import argparse import openvino_genai -def streamer(subword): - print(subword, end='', flush=True) - # Return flag corresponds whether generation should be stopped. - # False means continue generation. - return False +def streamer(subword): + print(subword, end='', flush=True) + # Return flag corresponds whether generation should be stopped. + return openvino_genai.StreamerRunningStatus.RUNNING def main(): parser = argparse.ArgumentParser() diff --git a/samples/python/speculative_decoding_lm/speculative_decoding_lm.py b/samples/python/speculative_decoding_lm/speculative_decoding_lm.py index 217b8a2730..7f82f9e1b7 100755 --- a/samples/python/speculative_decoding_lm/speculative_decoding_lm.py +++ b/samples/python/speculative_decoding_lm/speculative_decoding_lm.py @@ -8,10 +8,9 @@ import threading def streamer(subword): - print(subword, end='', flush=True) - # Return flag corresponds whether generation should be stopped. - # False means continue generation. - return False + print(subword, end='', flush=True) + # Return flag corresponds whether generation should be stopped. + return openvino_genai.StreamerRunningStatus.RUNNING def main(): parser = argparse.ArgumentParser() diff --git a/samples/python/visual_language_chat/visual_language_chat.py b/samples/python/visual_language_chat/visual_language_chat.py index 5dd7b83b3b..4fe5a0b0a4 100755 --- a/samples/python/visual_language_chat/visual_language_chat.py +++ b/samples/python/visual_language_chat/visual_language_chat.py @@ -11,7 +11,7 @@ from pathlib import Path -def streamer(subword: str) -> bool: +def streamer(subword: str) -> openvino_genai.StreamerRunningStatus: ''' Args: @@ -25,6 +25,8 @@ def streamer(subword: str) -> bool: # No value is returned as in this example we don't want to stop the generation in this method. # "return None" will be treated the same as "return False". + return openvino_genai.StreamerRunningStatus.RUNNING + def read_image(path: str) -> Tensor: ''' @@ -66,7 +68,7 @@ def main(): config = openvino_genai.GenerationConfig() config.max_new_tokens = 100 - pipe.start_chat() + # pipe.start_chat() prompt = input('question:\n') pipe.generate(prompt, images=rgbs, generation_config=config, streamer=streamer) @@ -77,7 +79,7 @@ def main(): except EOFError: break pipe.generate(prompt, generation_config=config, streamer=streamer) - pipe.finish_chat() + # pipe.finish_chat() if '__main__' == __name__: diff --git a/src/python/py_utils.cpp b/src/python/py_utils.cpp index 17f5b475fe..52ce9df3bb 100644 --- a/src/python/py_utils.cpp +++ b/src/python/py_utils.cpp @@ -336,15 +336,6 @@ ov::genai::StreamerVariant pystreamer_to_streamer(const PyBindStreamerVariant& p ov::genai::StreamerVariant streamer = std::monostate(); std::visit(overloaded { - [&streamer](const std::function& py_callback){ - // Wrap python streamer with manual utf-8 decoding. Do not rely - // on pybind automatic decoding since it raises exceptions on incomplete strings. - auto callback_wrapped = [py_callback](std::string subword) -> bool { - auto py_str = PyUnicode_DecodeUTF8(subword.data(), subword.length(), "replace"); - return py_callback(py::reinterpret_borrow(py_str)); - }; - streamer = callback_wrapped; - }, [&streamer](const std::function& py_callback){ // Wrap python streamer with manual utf-8 decoding. Do not rely // on pybind automatic decoding since it raises exceptions on incomplete strings. diff --git a/src/python/py_utils.hpp b/src/python/py_utils.hpp index ec9997b14d..f085a5c922 100644 --- a/src/python/py_utils.hpp +++ b/src/python/py_utils.hpp @@ -18,7 +18,7 @@ namespace ov::genai::pybind::utils { // When StreamerVariant is used utf-8 decoding is done by pybind and can lead to exception on incomplete texts. // Therefore strings decoding should be handled with PyUnicode_DecodeUTF8(..., "replace") to not throw errors. -using PyBindStreamerVariant = std::variant, std::function, std::shared_ptr, std::monostate>; +using PyBindStreamerVariant = std::variant, std::shared_ptr, std::monostate>; template struct overloaded : Ts... {