Skip to content

Commit

Permalink
add max_lengh parametrisation to encode
Browse files Browse the repository at this point in the history
  • Loading branch information
pavel-esir committed Jan 9, 2025
1 parent 3b0ecb4 commit 2fe2e66
Show file tree
Hide file tree
Showing 5 changed files with 97 additions and 15 deletions.
9 changes: 7 additions & 2 deletions src/cpp/include/openvino/genai/tokenizer.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,9 @@ class OPENVINO_GENAI_EXPORTS Tokenizer {
/**
* @brief encode a single prompt
* @param prompt std::string with input prompt
* @param properties tokenization properties, e.g. ov::genai::add_special_tokens(false)
* @param add_special_tokens whether to add special tokens
* @param max_length maximum length to which output will be padded or truncated
* @param padding_mode whether to pad result, allowed values are ["truncate", "longest", "max_length", "do_not_pad"]
* @return pair of [input_ids, attention_mask]
*/
template <typename... Properties>
Expand All @@ -136,7 +138,9 @@ class OPENVINO_GENAI_EXPORTS Tokenizer {
/**
* @brief encode batch of prompts. Left padding will be applied by default
* @param prompts vector storing batch of prompts
* @param properties tokenization properties, e.g. ov::genai::add_special_tokens(false)
* @param add_special_tokens whether to add special tokens
* @param max_length maximum length to which output will be padded or truncated
* @param padding_mode whether to pad result, allowed values are ["truncate", "pad"]
* @return pair of [input_ids, attention_mask]
*/
template <typename... Properties>
Expand Down Expand Up @@ -240,6 +244,7 @@ class OPENVINO_GENAI_EXPORTS Tokenizer {

static constexpr ov::Property<bool> add_special_tokens{"add_special_tokens"};
static constexpr ov::Property<bool> skip_special_tokens{"skip_special_tokens"};
static constexpr ov::Property<std::string> padding_mode{"padding_mode"};

} // namespace genai
} // namespace ov
59 changes: 51 additions & 8 deletions src/cpp/src/tokenizer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,8 @@ class Tokenizer::TokenizerImpl {
// this flag holds the current state value of the CompiledModel.
bool m_add_special_tokens = true;
bool m_skip_special_tokens = true;
int m_max_pad_length = std::numeric_limits<int>::max();
int m_max_trunc_length = std::numeric_limits<int>::max();
bool m_older_than_24_5 = false;

int64_t m_pad_token_id = -1;
Expand All @@ -109,17 +111,47 @@ class Tokenizer::TokenizerImpl {

std::string m_chat_template = {};

std::pair<int, int> get_padding_values(std::string padding_mode, size_t max_length) {
if (padding_mode == "truncate") {
return {max_length, std::numeric_limits<int32_t>::max()};
} else if (padding_mode == "longest") {
return {std::numeric_limits<int32_t>::max(), std::numeric_limits<int32_t>::max()};
} else if (padding_mode == "max_length") {
return {std::numeric_limits<int32_t>::max(), max_length};
} else if (padding_mode == "do_not_pad") {
// bahves exactly as longest
// TODO: need to find a way to disable padding automatically so that it will match to HF.
return {std::numeric_limits<int32_t>::max(), std::numeric_limits<int32_t>::max()};
} else {
OPENVINO_THROW("Unknown padding mode: " + padding_mode);
}
}

void set_state_if_necessary(CircularBufferQueueElementGuard<ov::InferRequest>& infer_request_guard, const ov::AnyMap& params) {
bool add_special_tokens_flag = m_add_special_tokens;
bool skip_special_tokens_flag = m_skip_special_tokens;

size_t max_length_val;
std::string padding_mode_val;

ov::genai::utils::read_anymap_param(params, add_special_tokens.name(), add_special_tokens_flag);
ov::genai::utils::read_anymap_param(params, skip_special_tokens.name(), skip_special_tokens_flag);
ov::genai::utils::read_anymap_param(params, padding_mode.name(), padding_mode_val);
ov::genai::utils::read_anymap_param(params, max_length.name(), max_length_val);

int max_trunc_length_val = m_max_trunc_length;
int max_pad_length_val = m_max_pad_length;

if (!padding_mode_val.empty()) {
std::tie(max_trunc_length_val, max_pad_length_val) = get_padding_values(padding_mode_val, max_length_val);
}

// If user requested add_special_tokens mode different from the current one,
// need to set state variable.
// If requested mode matches the stored state set, then don't touch states.
if (add_special_tokens_flag == m_add_special_tokens && skip_special_tokens_flag == m_skip_special_tokens) {
if (add_special_tokens_flag == m_add_special_tokens
&& skip_special_tokens_flag == m_skip_special_tokens
&& max_trunc_length_val == m_max_trunc_length
&& max_pad_length_val == m_max_pad_length) {
return;
}
if (m_older_than_24_5) {
Expand All @@ -137,15 +169,26 @@ class Tokenizer::TokenizerImpl {
ov::Tensor skip_special_tensor = ov::Tensor(ov::element::i32, {1});
*skip_special_tensor.data<int>() = skip_special_tokens_flag;

ov::Tensor max_trunc_length_tensor = ov::Tensor(ov::element::i32, {1});
*max_trunc_length_tensor.data<int>() = max_trunc_length_val;
ov::Tensor max_pad_length_tensor = ov::Tensor(ov::element::i32, {1});
*max_pad_length_tensor.data<int>() = max_pad_length_val;

for (auto& state: infer_request_guard.get().query_state()) {
if (state.get_name().find(ov::genai::ADD_SPECIAL_TOKENS_VAR_ID) != std::string::npos) {
if (state.get_name().find(add_special_tokens.name()) != std::string::npos) {
state.set_state(add_special_tensor);
} else if (state.get_name().find(ov::genai::SKIP_SPECIAL_TOKENS_VAR_ID) != std::string::npos) {
} else if (state.get_name().find(skip_special_tokens.name()) != std::string::npos) {
state.set_state(skip_special_tensor);
} else if (state.get_name().find("max_trunc_length") != std::string::npos) {
state.set_state(max_trunc_length_tensor);
} else if (state.get_name().find("max_pad_length") != std::string::npos) {
state.set_state(max_pad_length_tensor);
}
}
m_add_special_tokens = add_special_tokens_flag;
m_skip_special_tokens = skip_special_tokens_flag;
m_max_trunc_length = max_trunc_length_val;
m_max_pad_length = max_pad_length_val;
}

TokenizerImpl(const std::filesystem::path& models_path, const ov::AnyMap& properties) {
Expand Down Expand Up @@ -625,22 +668,22 @@ Tokenizer::Tokenizer(const std::string& model_str, ov::Tensor& weights_tensor, c
}

TokenizedInputs Tokenizer::encode(const std::string prompt, const ov::AnyMap& tokenization_params) {
check_arguments(tokenization_params, {ov::genai::add_special_tokens.name()});
check_arguments(tokenization_params, {ov::genai::add_special_tokens.name(), ov::genai::max_length.name(), ov::genai::padding_mode.name()});
return m_pimpl->encode(std::move(prompt), tokenization_params);
}

TokenizedInputs Tokenizer::encode(std::vector<std::string>& prompts, const ov::AnyMap& tokenization_params) {
check_arguments(tokenization_params, {ov::genai::add_special_tokens.name()});
check_arguments(tokenization_params, {ov::genai::add_special_tokens.name(), ov::genai::max_length.name(), ov::genai::padding_mode.name()});
return m_pimpl->encode(prompts, tokenization_params);
}

TokenizedInputs Tokenizer::encode(std::vector<std::string>&& prompts, const ov::AnyMap& tokenization_params) {
check_arguments(tokenization_params, {ov::genai::add_special_tokens.name()});
check_arguments(tokenization_params, {ov::genai::add_special_tokens.name(), ov::genai::max_length.name(), ov::genai::padding_mode.name()});
return m_pimpl->encode(prompts, tokenization_params);
}

TokenizedInputs Tokenizer::encode(std::initializer_list<std::string>& text, const ov::AnyMap& tokenization_params) {
check_arguments(tokenization_params, {ov::genai::add_special_tokens.name()});
check_arguments(tokenization_params, {ov::genai::add_special_tokens.name(), ov::genai::max_length.name(), ov::genai::padding_mode.name()});
return encode(std::vector<std::string>(text.begin(), text.end()), tokenization_params);
}

Expand Down
4 changes: 2 additions & 2 deletions src/python/openvino_genai/py_openvino_genai.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -1656,12 +1656,12 @@ class Tokenizer:
Decode a batch of tokens into a list of string prompt.
"""
@typing.overload
def encode(self, prompts: list[str], add_special_tokens: bool = True) -> TokenizedInputs:
def encode(self, prompts: list[str], add_special_tokens: bool = True, max_length: int = 2147483647, padding_mode: str = 'truncate') -> TokenizedInputs:
"""
Encodes a list of prompts into tokenized inputs.
"""
@typing.overload
def encode(self, prompt: str, add_special_tokens: bool = True) -> TokenizedInputs:
def encode(self, prompt: str, add_special_tokens: bool = True, max_length: int = 2147483647, padding_mode: str = 'truncate') -> TokenizedInputs:
"""
Encodes a single prompt into tokenized input.
"""
Expand Down
20 changes: 17 additions & 3 deletions src/python/py_tokenizer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -44,21 +44,35 @@ void init_tokenizer(py::module_& m) {
return std::make_unique<ov::genai::Tokenizer>(tokenizer_path, kwargs_properties);
}), py::arg("tokenizer_path"), py::arg("properties") = ov::AnyMap({}))

.def("encode", [](Tokenizer& tok, std::vector<std::string>& prompts, bool add_special_tokens) {
.def("encode", [](Tokenizer& tok, std::vector<std::string>& prompts,
bool add_special_tokens,
size_t max_length,
std::string padding_mode) {
ov::AnyMap tokenization_params;
tokenization_params[ov::genai::add_special_tokens.name()] = add_special_tokens;
tokenization_params[ov::genai::max_length.name()] = max_length;
tokenization_params[ov::genai::padding_mode.name()] = padding_mode;
return tok.encode(prompts, tokenization_params);
},
py::arg("prompts"),
py::arg("add_special_tokens") = true,
py::arg("max_length") = std::numeric_limits<int>::max(),
py::arg("padding_mode") = "truncate",
R"(Encodes a list of prompts into tokenized inputs.)")

.def("encode", [](Tokenizer& tok, const std::string prompt, bool add_special_tokens) {
.def("encode", [](Tokenizer& tok, const std::string prompt,
bool add_special_tokens,
size_t max_length,
std::string padding_mode) {
ov::AnyMap tokenization_params;
tokenization_params[ov::genai::add_special_tokens.name()] = add_special_tokens;
tokenization_params[ov::genai::max_length.name()] = max_length;
tokenization_params[ov::genai::padding_mode.name()] = padding_mode;
return tok.encode(prompt, tokenization_params);
},
py::arg("prompt"), py::arg("add_special_tokens") = true,
py::arg("prompt"), py::arg("add_special_tokens") = true,
py::arg("max_length") = std::numeric_limits<int>::max(),
py::arg("padding_mode") = "truncate",
R"(Encodes a single prompt into tokenized input.)")

.def(
Expand Down
20 changes: 20 additions & 0 deletions tests/python_tests/test_tokenizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -236,6 +236,26 @@ def test_encode_decode_with_special_tokens_option(add_special_tokens, skip_speci
decoded_hf = hf_tokenizer.decode(hf_res[0], skip_special_tokens=skip_special_tokens)
assert decoded_genai == decoded_hf

prompts = [
['1+1=', 'What is the previous answer?']
]
@pytest.mark.precommit
@pytest.mark.nightly
@pytest.mark.parametrize("add_special_tokens", [True, False])
@pytest.mark.parametrize("max_length", [10, 16, 64, 512])
@pytest.mark.parametrize("pad_mode", ["truncate", "longest", "max_length", "do_not_pad"])
@pytest.mark.parametrize("prompt", prompts)
def test_padding(add_special_tokens, max_length, pad_mode, prompt):
import numpy as np
model_descr = get_chat_models_list()[0]
model_id, path, hf_tokenizer, model_opt, ov_pipe = read_model((model_descr[0], model_descr[1] / '_test_chat'))
genai_tokenzier = ov_pipe.get_tokenizer()

# Calling encode with 'add_special_tokens' will set state flag.
ov_res = genai_tokenzier.encode(prompt, add_special_tokens=add_special_tokens, max_length=max_length, padding_mode=pad_mode).input_ids.data
hf_res = hf_tokenizer(prompt, return_tensors="np", add_special_tokens=add_special_tokens, max_length=max_length, padding=pad_mode)["input_ids"]
assert np.all(ov_res == hf_res)


@pytest.mark.precommit
@pytest.mark.nightly
Expand Down

0 comments on commit 2fe2e66

Please sign in to comment.