diff --git a/src/cpp/include/openvino/genai/tokenizer.hpp b/src/cpp/include/openvino/genai/tokenizer.hpp index 548e4dc332..96ec40afb7 100644 --- a/src/cpp/include/openvino/genai/tokenizer.hpp +++ b/src/cpp/include/openvino/genai/tokenizer.hpp @@ -125,7 +125,9 @@ class OPENVINO_GENAI_EXPORTS Tokenizer { /** * @brief encode a single prompt * @param prompt std::string with input prompt - * @param properties tokenization properties, e.g. ov::genai::add_special_tokens(false) + * @param add_special_tokens whether to add special tokens + * @param max_length maximum length to which output will be padded or truncated + * @param padding_mode whether to pad result, allowed values are ["truncate", "longest", "max_length", "do_not_pad"] * @return pair of [input_ids, attention_mask] */ template @@ -136,7 +138,9 @@ class OPENVINO_GENAI_EXPORTS Tokenizer { /** * @brief encode batch of prompts. Left padding will be applied by default * @param prompts vector storing batch of prompts - * @param properties tokenization properties, e.g. ov::genai::add_special_tokens(false) + * @param add_special_tokens whether to add special tokens + * @param max_length maximum length to which output will be padded or truncated + * @param padding_mode whether to pad result, allowed values are ["truncate", "pad"] * @return pair of [input_ids, attention_mask] */ template @@ -240,6 +244,7 @@ class OPENVINO_GENAI_EXPORTS Tokenizer { static constexpr ov::Property add_special_tokens{"add_special_tokens"}; static constexpr ov::Property skip_special_tokens{"skip_special_tokens"}; +static constexpr ov::Property padding_mode{"padding_mode"}; } // namespace genai } // namespace ov diff --git a/src/cpp/src/tokenizer.cpp b/src/cpp/src/tokenizer.cpp index e1def95931..7b5dd95708 100644 --- a/src/cpp/src/tokenizer.cpp +++ b/src/cpp/src/tokenizer.cpp @@ -97,6 +97,8 @@ class Tokenizer::TokenizerImpl { // this flag holds the current state value of the CompiledModel. bool m_add_special_tokens = true; bool m_skip_special_tokens = true; + int m_max_pad_length = std::numeric_limits::max(); + int m_max_trunc_length = std::numeric_limits::max(); bool m_older_than_24_5 = false; int64_t m_pad_token_id = -1; @@ -109,17 +111,47 @@ class Tokenizer::TokenizerImpl { std::string m_chat_template = {}; + std::pair get_padding_values(std::string padding_mode, size_t max_length) { + if (padding_mode == "truncate") { + return {max_length, std::numeric_limits::max()}; + } else if (padding_mode == "longest") { + return {std::numeric_limits::max(), std::numeric_limits::max()}; + } else if (padding_mode == "max_length") { + return {std::numeric_limits::max(), max_length}; + } else if (padding_mode == "do_not_pad") { + // bahves exactly as longest + // TODO: need to find a way to disable padding automatically so that it will match to HF. + return {std::numeric_limits::max(), std::numeric_limits::max()}; + } else { + OPENVINO_THROW("Unknown padding mode: " + padding_mode); + } + } + void set_state_if_necessary(CircularBufferQueueElementGuard& infer_request_guard, const ov::AnyMap& params) { bool add_special_tokens_flag = m_add_special_tokens; bool skip_special_tokens_flag = m_skip_special_tokens; - + size_t max_length_val; + std::string padding_mode_val; + ov::genai::utils::read_anymap_param(params, add_special_tokens.name(), add_special_tokens_flag); ov::genai::utils::read_anymap_param(params, skip_special_tokens.name(), skip_special_tokens_flag); + ov::genai::utils::read_anymap_param(params, padding_mode.name(), padding_mode_val); + ov::genai::utils::read_anymap_param(params, max_length.name(), max_length_val); + + int max_trunc_length_val = m_max_trunc_length; + int max_pad_length_val = m_max_pad_length; + + if (!padding_mode_val.empty()) { + std::tie(max_trunc_length_val, max_pad_length_val) = get_padding_values(padding_mode_val, max_length_val); + } // If user requested add_special_tokens mode different from the current one, // need to set state variable. // If requested mode matches the stored state set, then don't touch states. - if (add_special_tokens_flag == m_add_special_tokens && skip_special_tokens_flag == m_skip_special_tokens) { + if (add_special_tokens_flag == m_add_special_tokens + && skip_special_tokens_flag == m_skip_special_tokens + && max_trunc_length_val == m_max_trunc_length + && max_pad_length_val == m_max_pad_length) { return; } if (m_older_than_24_5) { @@ -137,15 +169,26 @@ class Tokenizer::TokenizerImpl { ov::Tensor skip_special_tensor = ov::Tensor(ov::element::i32, {1}); *skip_special_tensor.data() = skip_special_tokens_flag; + ov::Tensor max_trunc_length_tensor = ov::Tensor(ov::element::i32, {1}); + *max_trunc_length_tensor.data() = max_trunc_length_val; + ov::Tensor max_pad_length_tensor = ov::Tensor(ov::element::i32, {1}); + *max_pad_length_tensor.data() = max_pad_length_val; + for (auto& state: infer_request_guard.get().query_state()) { - if (state.get_name().find(ov::genai::ADD_SPECIAL_TOKENS_VAR_ID) != std::string::npos) { + if (state.get_name().find(add_special_tokens.name()) != std::string::npos) { state.set_state(add_special_tensor); - } else if (state.get_name().find(ov::genai::SKIP_SPECIAL_TOKENS_VAR_ID) != std::string::npos) { + } else if (state.get_name().find(skip_special_tokens.name()) != std::string::npos) { state.set_state(skip_special_tensor); + } else if (state.get_name().find("max_trunc_length") != std::string::npos) { + state.set_state(max_trunc_length_tensor); + } else if (state.get_name().find("max_pad_length") != std::string::npos) { + state.set_state(max_pad_length_tensor); } } m_add_special_tokens = add_special_tokens_flag; m_skip_special_tokens = skip_special_tokens_flag; + m_max_trunc_length = max_trunc_length_val; + m_max_pad_length = max_pad_length_val; } TokenizerImpl(const std::filesystem::path& models_path, const ov::AnyMap& properties) { @@ -625,22 +668,22 @@ Tokenizer::Tokenizer(const std::string& model_str, ov::Tensor& weights_tensor, c } TokenizedInputs Tokenizer::encode(const std::string prompt, const ov::AnyMap& tokenization_params) { - check_arguments(tokenization_params, {ov::genai::add_special_tokens.name()}); + check_arguments(tokenization_params, {ov::genai::add_special_tokens.name(), ov::genai::max_length.name(), ov::genai::padding_mode.name()}); return m_pimpl->encode(std::move(prompt), tokenization_params); } TokenizedInputs Tokenizer::encode(std::vector& prompts, const ov::AnyMap& tokenization_params) { - check_arguments(tokenization_params, {ov::genai::add_special_tokens.name()}); + check_arguments(tokenization_params, {ov::genai::add_special_tokens.name(), ov::genai::max_length.name(), ov::genai::padding_mode.name()}); return m_pimpl->encode(prompts, tokenization_params); } TokenizedInputs Tokenizer::encode(std::vector&& prompts, const ov::AnyMap& tokenization_params) { - check_arguments(tokenization_params, {ov::genai::add_special_tokens.name()}); + check_arguments(tokenization_params, {ov::genai::add_special_tokens.name(), ov::genai::max_length.name(), ov::genai::padding_mode.name()}); return m_pimpl->encode(prompts, tokenization_params); } TokenizedInputs Tokenizer::encode(std::initializer_list& text, const ov::AnyMap& tokenization_params) { - check_arguments(tokenization_params, {ov::genai::add_special_tokens.name()}); + check_arguments(tokenization_params, {ov::genai::add_special_tokens.name(), ov::genai::max_length.name(), ov::genai::padding_mode.name()}); return encode(std::vector(text.begin(), text.end()), tokenization_params); } diff --git a/src/python/openvino_genai/py_openvino_genai.pyi b/src/python/openvino_genai/py_openvino_genai.pyi index 5adde32db4..fa57d9b22f 100644 --- a/src/python/openvino_genai/py_openvino_genai.pyi +++ b/src/python/openvino_genai/py_openvino_genai.pyi @@ -1656,12 +1656,12 @@ class Tokenizer: Decode a batch of tokens into a list of string prompt. """ @typing.overload - def encode(self, prompts: list[str], add_special_tokens: bool = True) -> TokenizedInputs: + def encode(self, prompts: list[str], add_special_tokens: bool = True, max_length: int = 2147483647, padding_mode: str = 'truncate') -> TokenizedInputs: """ Encodes a list of prompts into tokenized inputs. """ @typing.overload - def encode(self, prompt: str, add_special_tokens: bool = True) -> TokenizedInputs: + def encode(self, prompt: str, add_special_tokens: bool = True, max_length: int = 2147483647, padding_mode: str = 'truncate') -> TokenizedInputs: """ Encodes a single prompt into tokenized input. """ diff --git a/src/python/py_tokenizer.cpp b/src/python/py_tokenizer.cpp index db4643a65c..1ff78e9463 100644 --- a/src/python/py_tokenizer.cpp +++ b/src/python/py_tokenizer.cpp @@ -44,21 +44,35 @@ void init_tokenizer(py::module_& m) { return std::make_unique(tokenizer_path, kwargs_properties); }), py::arg("tokenizer_path"), py::arg("properties") = ov::AnyMap({})) - .def("encode", [](Tokenizer& tok, std::vector& prompts, bool add_special_tokens) { + .def("encode", [](Tokenizer& tok, std::vector& prompts, + bool add_special_tokens, + size_t max_length, + std::string padding_mode) { ov::AnyMap tokenization_params; tokenization_params[ov::genai::add_special_tokens.name()] = add_special_tokens; + tokenization_params[ov::genai::max_length.name()] = max_length; + tokenization_params[ov::genai::padding_mode.name()] = padding_mode; return tok.encode(prompts, tokenization_params); }, py::arg("prompts"), py::arg("add_special_tokens") = true, + py::arg("max_length") = std::numeric_limits::max(), + py::arg("padding_mode") = "truncate", R"(Encodes a list of prompts into tokenized inputs.)") - .def("encode", [](Tokenizer& tok, const std::string prompt, bool add_special_tokens) { + .def("encode", [](Tokenizer& tok, const std::string prompt, + bool add_special_tokens, + size_t max_length, + std::string padding_mode) { ov::AnyMap tokenization_params; tokenization_params[ov::genai::add_special_tokens.name()] = add_special_tokens; + tokenization_params[ov::genai::max_length.name()] = max_length; + tokenization_params[ov::genai::padding_mode.name()] = padding_mode; return tok.encode(prompt, tokenization_params); }, - py::arg("prompt"), py::arg("add_special_tokens") = true, + py::arg("prompt"), py::arg("add_special_tokens") = true, + py::arg("max_length") = std::numeric_limits::max(), + py::arg("padding_mode") = "truncate", R"(Encodes a single prompt into tokenized input.)") .def( diff --git a/tests/python_tests/test_tokenizer.py b/tests/python_tests/test_tokenizer.py index 8129298763..a6021b8157 100644 --- a/tests/python_tests/test_tokenizer.py +++ b/tests/python_tests/test_tokenizer.py @@ -236,6 +236,26 @@ def test_encode_decode_with_special_tokens_option(add_special_tokens, skip_speci decoded_hf = hf_tokenizer.decode(hf_res[0], skip_special_tokens=skip_special_tokens) assert decoded_genai == decoded_hf +prompts = [ + ['1+1=', 'What is the previous answer?'] +] +@pytest.mark.precommit +@pytest.mark.nightly +@pytest.mark.parametrize("add_special_tokens", [True, False]) +@pytest.mark.parametrize("max_length", [10, 16, 64, 512]) +@pytest.mark.parametrize("pad_mode", ["truncate", "longest", "max_length", "do_not_pad"]) +@pytest.mark.parametrize("prompt", prompts) +def test_padding(add_special_tokens, max_length, pad_mode, prompt): + import numpy as np + model_descr = get_chat_models_list()[0] + model_id, path, hf_tokenizer, model_opt, ov_pipe = read_model((model_descr[0], model_descr[1] / '_test_chat')) + genai_tokenzier = ov_pipe.get_tokenizer() + + # Calling encode with 'add_special_tokens' will set state flag. + ov_res = genai_tokenzier.encode(prompt, add_special_tokens=add_special_tokens, max_length=max_length, padding_mode=pad_mode).input_ids.data + hf_res = hf_tokenizer(prompt, return_tensors="np", add_special_tokens=add_special_tokens, max_length=max_length, padding=pad_mode)["input_ids"] + assert np.all(ov_res == hf_res) + @pytest.mark.precommit @pytest.mark.nightly